90 lines (90 with data), 2.5 kB
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"machine_shape": "hm"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 0
},
"id": "gwcBUTqt0Uuz",
"outputId": "c97a5433-ce1a-440f-888d-0c2ccbbc4a1a"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Collecting selfies\n",
" Downloading selfies-2.1.1-py3-none-any.whl (35 kB)\n",
"Installing collected packages: selfies\n",
"Successfully installed selfies-2.1.1\n"
]
}
],
"source": [
"# !pip install selfies --upgrade\n",
"# github, aspuru-guzik-group/selfies, https://github.com/aspuru-guzik-group/selfies"
]
},
{
"cell_type": "code",
"source": [
"import selfies as sf\n",
"\n",
"adenine = \"Nc1c2ncNc2ncn1\"\n",
"\n",
"# SMILES -> SELFIES -> SMILES translation\n",
"try:\n",
" adenine_sf = sf.encoder(adenine) #\n",
" adenine_smi = sf.decoder(adenine_sf) # NC1=NC=NC2=C1N=CN2\n",
"except sf.EncoderError:\n",
" pass # sf.encoder error!\n",
"except sf.DecoderError:\n",
" pass # sf.decoder error!\n",
"\n",
"len_adenine = sf.len_selfies(adenine_sf) # 8\n",
"\n",
"symbols_adenine = list(sf.split_selfies(adenine_sf))\n",
"# ['[C]', '[=C]', '[C]', '[=C]', '[C]', '[=C]', '[Ring1]', '[=Branch1]']\n",
"print(\"SMILES =\", adenine_smi)\n",
"print(\"SELFIES =\", adenine_sf)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 0
},
"id": "m9c5VU9i0eEh",
"outputId": "2253edd6-ecf5-4282-e83f-27e6aa1e2708"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"SMILES = NC1=C2N=CNC2=NC=N1\n",
"SELFIES = [N][C][=C][N][=C][N][C][Ring1][Branch1][=N][C][=N][Ring1][=Branch2]\n"
]
}
]
}
]
}