LLMs-Pharmaceutical / Git / [404218] /Code/Drug Discovery/Meta-Llama-3/SMILES to SELFIES estimator.ipynb

Models:

Amanda-D/

LLMs-Pharmaceutical

Downloads: 1

[404218]: / Code / Drug Discovery / Meta-Llama-3 / SMILES to SELFIES estimator.ipynb

History

Download this file

90 lines (90 with data), 2.5 kB

{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "machine_shape": "hm"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 0
        },
        "id": "gwcBUTqt0Uuz",
        "outputId": "c97a5433-ce1a-440f-888d-0c2ccbbc4a1a"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting selfies\n",
            "  Downloading selfies-2.1.1-py3-none-any.whl (35 kB)\n",
            "Installing collected packages: selfies\n",
            "Successfully installed selfies-2.1.1\n"
          ]
        }
      ],
      "source": [
        "# !pip install selfies --upgrade\n",
        "# github, aspuru-guzik-group/selfies, https://github.com/aspuru-guzik-group/selfies"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import selfies as sf\n",
        "\n",
        "adenine = \"Nc1c2ncNc2ncn1\"\n",
        "\n",
        "# SMILES -> SELFIES -> SMILES translation\n",
        "try:\n",
        "    adenine_sf = sf.encoder(adenine)  #\n",
        "    adenine_smi = sf.decoder(adenine_sf)  # NC1=NC=NC2=C1N=CN2\n",
        "except sf.EncoderError:\n",
        "    pass  # sf.encoder error!\n",
        "except sf.DecoderError:\n",
        "    pass  # sf.decoder error!\n",
        "\n",
        "len_adenine = sf.len_selfies(adenine_sf)  # 8\n",
        "\n",
        "symbols_adenine = list(sf.split_selfies(adenine_sf))\n",
        "# ['[C]', '[=C]', '[C]', '[=C]', '[C]', '[=C]', '[Ring1]', '[=Branch1]']\n",
        "print(\"SMILES =\", adenine_smi)\n",
        "print(\"SELFIES =\", adenine_sf)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 0
        },
        "id": "m9c5VU9i0eEh",
        "outputId": "2253edd6-ecf5-4282-e83f-27e6aa1e2708"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "SMILES = NC1=C2N=CNC2=NC=N1\n",
            "SELFIES = [N][C][=C][N][=C][N][C][Ring1][Branch1][=N][C][=N][Ring1][=Branch2]\n"
          ]
        }
      ]
    }
  ]
}