{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "view-in-github", "colab_type": "text" }, "source": [ "\"Open" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "J_8OxbTXRGUH", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "42e37ffc-3f3d-41a3-c579-de28e29ab26e" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "processor\t: 0\n", "vendor_id\t: GenuineIntel\n", "cpu family\t: 6\n", "model\t\t: 79\n", "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n", "stepping\t: 0\n", "microcode\t: 0x1\n", "cpu MHz\t\t: 2199.998\n", "cache size\t: 56320 KB\n", "physical id\t: 0\n", "siblings\t: 2\n", "core id\t\t: 0\n", "cpu cores\t: 1\n", "apicid\t\t: 0\n", "initial apicid\t: 0\n", "fpu\t\t: yes\n", "fpu_exception\t: yes\n", "cpuid level\t: 13\n", "wp\t\t: yes\n", "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm rdseed adx smap xsaveopt arat md_clear arch_capabilities\n", "bugs\t\t: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa\n", "bogomips\t: 4399.99\n", "clflush size\t: 64\n", "cache_alignment\t: 64\n", "address sizes\t: 46 bits physical, 48 bits virtual\n", "power management:\n", "\n", "processor\t: 1\n", "vendor_id\t: GenuineIntel\n", "cpu family\t: 6\n", "model\t\t: 79\n", "model name\t: Intel(R) Xeon(R) CPU @ 2.20GHz\n", "stepping\t: 0\n", "microcode\t: 0x1\n", "cpu MHz\t\t: 2199.998\n", "cache size\t: 56320 KB\n", "physical id\t: 0\n", "siblings\t: 2\n", "core id\t\t: 0\n", "cpu cores\t: 1\n", "apicid\t\t: 1\n", "initial apicid\t: 1\n", "fpu\t\t: yes\n", "fpu_exception\t: yes\n", "cpuid level\t: 13\n", "wp\t\t: yes\n", "flags\t\t: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc cpuid tsc_known_freq pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb stibp fsgsbase tsc_adjust bmi1 hle avx2 smep bmi2 erms invpcid rtm rdseed adx smap xsaveopt arat md_clear arch_capabilities\n", "bugs\t\t: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf mds swapgs taa\n", "bogomips\t: 4399.99\n", "clflush size\t: 64\n", "cache_alignment\t: 64\n", "address sizes\t: 46 bits physical, 48 bits virtual\n", "power management:\n", "\n" ] } ], "source": [ "!cat /proc/cpuinfo" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "m9S78vQIV0sZ" }, "outputs": [], "source": [ "!sudo apt-get -qq install dvipng texlive-latex-extra texlive-fonts-recommended cm-super" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "dckDYheNzuOs" }, "outputs": [], "source": [ "!pip install -qq selfies==2.1.1 pandas rdkit matplotlib SciencePlots" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "ITdwe6ePmvGE" }, "outputs": [], "source": [ "%config InlineBackend.figure_formats = ['svg']" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "b218cY-3aMVc" }, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import pandas as pd \n", "import random\n", "import selfies as sf \n", "import timeit\n", "\n", "from rdkit import Chem\n", "\n", "plt.style.use(\"science\")" ] }, { "cell_type": "markdown", "metadata": { "id": "_Afa-aGYXkDN" }, "source": [ "# SELFIES Translation" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Kjc0q7cYXmWY" }, "outputs": [], "source": [ "def time_roundtrip_translation(smiles):\n", " selfies = [sf.encoder(s) for s in smiles]\n", "\n", " def batch_encode():\n", " for s in smiles: \n", " sf.encoder(s)\n", "\n", " def batch_decode():\n", " for s in selfies:\n", " sf.decoder(s)\n", "\n", " n_trials = 3\n", " encode_time = timeit.timeit(stmt=batch_encode, number=n_trials) / n_trials\n", " decode_time = timeit.timeit(stmt=batch_decode, number=n_trials) / n_trials \n", " return encode_time, decode_time" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "c5yGYrHkkeIc" }, "outputs": [], "source": [ "def time_individual_roundtrip_translation(smiles):\n", " sizes = []\n", " times = []\n", " for s in smiles:\n", " n_trials = 3\n", " time = timeit.timeit(stmt=lambda: sf.decoder(sf.encoder(s)), number=n_trials) / n_trials\n", " mol = Chem.MolFromSmiles(s)\n", " if mol is not None:\n", " sizes.append(mol.GetNumAtoms())\n", " times.append(time)\n", " return sizes, times\n", "\n", "def plot_translation_sizes_vs_time(sizes, times):\n", " times = np.array(times) * 1000\n", " plt.scatter(sizes, times, s=2)\n", " plt.xlabel(\"Number of Atoms\")\n", " plt.ylabel(\"Roundtrip Time (ms)\")\n", " plt.xlim((0, 60))\n", " plt.tight_layout()\n", "\n", " plt.savefig(\"nci_open_compound_translation.pdf\")\n", " plt.show()" ] }, { "cell_type": "markdown", "metadata": { "id": "taID525mghWs" }, "source": [ "## NCI Open Compound" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "wzOubi6Vf63j", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "6167cf54-920a-4574-f3c6-6c5e8eeaba03" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py:2882: DtypeWarning: Columns (23) have mixed types.Specify dtype option on import or set low_memory=False.\n", " exec(code_obj, self.user_global_ns, self.user_ns)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "301607" ] }, "metadata": {}, "execution_count": 8 } ], "source": [ "nci_open_compound = pd.read_csv(\"PubChem_compound_text_DTP_NCI.csv\")\n", "nci_open_compound = nci_open_compound[\"isosmiles\"].tolist()\n", "\n", "# csv file saves backslashes as \\\\, so we replace\n", "nci_open_compound = [s.replace(\"\\\\\\\\\", \"\\\\\") for s in nci_open_compound]\n", "\n", "len(nci_open_compound)" ] }, { "cell_type": "code", "source": [ "constraints = sf.get_preset_constraints(name=\"hypervalent\")\n", "constraints[\"P-1\"] = 6\n", "sf.set_semantic_constraints(constraints)" ], "metadata": { "id": "4eN92ylFuoxf" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "o5FQN2epgjY3", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "93c6232b-25f4-41b7-b18a-da7862db6b88" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Encode time: 136.29431584966673\n", "Decode time: 116.39895675733351\n", "Total time: 252.69327260700024\n" ] } ], "source": [ "encode_time, decode_time = time_roundtrip_translation(nci_open_compound)\n", "\n", "print(\"Encode time:\", encode_time)\n", "print(\"Decode time:\", decode_time)\n", "print(\"Total time: \", encode_time + decode_time)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Po-HDF2YkdOI", "colab": { "base_uri": "https://localhost:8080/", "height": 254 }, "outputId": "c0d0cb12-974c-4c83-f07b-76a577814709" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n" }, "metadata": { "needs_background": "light" } } ], "source": [ "random.seed(100)\n", "subset = random.sample(nci_open_compound, k=1000)\n", "\n", "sizes, times = time_individual_roundtrip_translation(subset)\n", "plot_translation_sizes_vs_time(sizes, times)" ] }, { "cell_type": "markdown", "metadata": { "id": "y9GXenMfaIMO" }, "source": [ "# SELFIES Randomization" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "u6cakNAoQssR" }, "outputs": [], "source": [ "sf.set_semantic_constraints() # reset to defaults\n", "random.seed(100)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "DjsoWd7boZzO" }, "outputs": [], "source": [ "def time_random_selfies(n, length):\n", " rand_selfies = [\"\".join(random.choices(alphabet, k=length)) for _ in range(n)]\n", "\n", " def batch_decode():\n", " for s in rand_selfies:\n", " sf.decoder(s)\n", "\n", " n_trials = 20\n", " decode_time = timeit.timeit(stmt=batch_decode, number=n_trials) / n_trials \n", "\n", " sizes = [Chem.MolFromSmiles(sf.decoder(s)).GetNumAtoms() for s in rand_selfies]\n", " return decode_time, sizes\n", "\n", "def plot_smiles_size_distribution(ax, sizes_log):\n", " bins = list(range(0, 251, 10))\n", " for l, sizes in sizes_log.items():\n", " ax.hist(sizes, bins=bins, density=True, label=str(l), alpha=0.5, zorder=(250 - l))\n", " ax.set_xlabel(\"Number of Atoms\")\n", " ax.set_ylabel(\"Normalized Counts\")\n", " ax.set_yscale(\"log\")\n", " ax.legend(title=\"Symbol Length\")" ] }, { "cell_type": "markdown", "metadata": { "id": "Wm1hjOA6Qgmr" }, "source": [ "## Standard Alphabet" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "lULQb1GzQPqR", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "ff20be9a-67cd-4fc1-bf43-51a26bbd42c8" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "['[=Ring3]', '[=P]', '[P-1]', '[O-1]', '[Br]', '[P]', '[#P-1]', '[=B]', '[#O+1]', '[=N+1]', '[=C]', '[C-1]', '[S-1]', '[=S+1]', '[B]', '[H]', '[=P-1]', '[#S+1]', '[=O]', '[O]', '[Ring1]', '[#N+1]', '[#S-1]', '[N+1]', '[Branch1]', '[#Branch2]', '[O+1]', '[#S]', '[=Branch1]', '[=N]', '[=Branch2]', '[=C+1]', '[B+1]', '[P+1]', '[=B-1]', '[Branch2]', '[=S]', '[Ring3]', '[S+1]', '[S]', '[Ring2]', '[N]', '[#N]', '[C]', '[#B]', '[=Ring2]', '[#C]', '[#Branch1]', '[=O+1]', '[Branch3]', '[B-1]', '[#B-1]', '[#C+1]', '[=P+1]', '[C+1]', '[Cl]', '[#Branch3]', '[#P]', '[I]', '[=N-1]', '[N-1]', '[=Ring1]', '[#C-1]', '[=Branch3]', '[#P+1]', '[=S-1]', '[=C-1]', '[F]', '[=B+1]']\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "69" ] }, "metadata": {}, "execution_count": 14 } ], "source": [ "alphabet = list(sf.get_semantic_robust_alphabet())\n", "print(alphabet)\n", "len(alphabet)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "Nwhl6-Xxsdsb", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "9738eebd-588e-40f9-fa15-2da883bfbe16" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Decode time (length= 10): 0.08223833985002785\n", "Decode time (length= 100): 0.1988577752499623\n", "Decode time (length= 250): 0.34106298934998447\n" ] } ], "source": [ "lengths = [10, 100, 250]\n", "sizes_log_stnd = dict()\n", "\n", "for l in lengths:\n", " decode_time, sizes_log_stnd[l] = time_random_selfies(n=1000, length=l)\n", " print(f\"Decode time (length={l:4}):\", decode_time)" ] }, { "cell_type": "markdown", "metadata": { "id": "RdOsrRoMQig6" }, "source": [ "## Filtered Alphabet" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "7Z5MfmEoQ8yx", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "796963a8-9af5-4e7d-bca5-a56cde92846d" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "['[P-1]', '[P]', '[C-1]', '[S-1]', '[B]', '[O]', '[N+1]', '[O+1]', '[B+1]', '[P+1]', '[S+1]', '[S]', '[N]', '[C]', '[B-1]', '[C+1]', '[N-1]', '[Branch1]', '[Ring1]']\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "19" ] }, "metadata": {}, "execution_count": 16 } ], "source": [ "def f(symbol): \n", " return all((s not in symbol) for s in (\"=\", \"#\", \"[F]\", \"[Cl]\", \"[Br]\", \"[I]\", \"[H]\", \"[O-1]\", \"Branch\", \"Ring\"))\n", "\n", "alphabet = list(filter(f, sf.get_semantic_robust_alphabet()))\n", "alphabet.extend([\"[Branch1]\", \"[Ring1]\"])\n", "print(alphabet)\n", "len(alphabet)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "wJFJZaxaRD2p", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "b6b3d9d7-398e-41eb-9615-650277437af3" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Decode time (length= 10): 0.13307460139999422\n", "Decode time (length= 100): 0.929271802400035\n", "Decode time (length= 250): 1.6330489501500325\n" ] } ], "source": [ "lengths = [10, 100, 250]\n", "sizes_log_filt = dict()\n", "\n", "for l in lengths:\n", " decode_time, sizes_log_filt[l] = time_random_selfies(n=1000, length=l)\n", " print(f\"Decode time (length={l:4}):\", decode_time)" ] }, { "cell_type": "markdown", "source": [ "## Plotting" ], "metadata": { "id": "Jy4AULrk5zLl" } }, { "cell_type": "code", "source": [ "fig, axes = plt.subplots(1, 2, sharex=True, sharey=True, figsize=(6.7, 2.8))\n", "\n", "plot_smiles_size_distribution(axes[0], sizes_log_stnd)\n", "plot_smiles_size_distribution(axes[1], sizes_log_filt)\n", "\n", "axes[1].yaxis.set_tick_params(labelbottom=True)\n", "\n", "plt.tight_layout()\n", "\n", "plt.savefig(\"n=1000_length=50_size_hist.pdf\")\n", "plt.show()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 271 }, "id": "iFdeLwe849ta", "outputId": "2b01f8dd-1b1f-4072-8579-43c737133b7b" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/svg+xml": "\n\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n\n" }, "metadata": { "needs_background": "light" } } ] } ], "metadata": { "colab": { "provenance": [], "authorship_tag": "ABX9TyPERxoH7XX/kl6A0GN6BQQe", "include_colab_link": true }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }