[dee452]: / Data_processing / drug_smiles_mapping.ipynb

Download this file

1 lines (1 with data), 155.3 kB

{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"drug_smiles_mapping.ipynb","provenance":[{"file_id":"1WBrGaXxcb39syozPKyZ_7--79VGa5rQv","timestamp":1651445556617}],"collapsed_sections":[],"toc_visible":true,"authorship_tag":"ABX9TyOjR37uASduG1OnYOzfUqUw"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["## Imports"],"metadata":{"id":"4da2Sb7ct74V"}},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1259,"status":"ok","timestamp":1651532334625,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"},"user_tz":240},"id":"avEwtXgX61wn","outputId":"e9023fbd-91cd-4a9b-f94c-0ba96f9d35af"},"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/gdrive')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xiC9awOs8aiv"},"outputs":[],"source":["import pandas as pd\n","import os\n","from tqdm import tqdm\n","import re\n","import numpy as np"]},{"cell_type":"markdown","source":["# Map "],"metadata":{"id":"1Sy3aj9D_iFO"}},{"cell_type":"code","source":["# Function that returns value if key in map, otherwise, returns a default value\n","def default(map, key, default):\n","  if key in map:\n","    return map[key]\n","  return default\n","\n","def for_all(list, f):\n","  for e in list:\n","    if not f(e):\n","      return False\n","  return True"],"metadata":{"id":"huUSgupWcDT4"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lDKYr8LWYqDO"},"outputs":[],"source":["# Directory to the project folder\n","deep_learning_dir = '/content/gdrive/My Drive/BMI 707 Project' "]},{"cell_type":"code","source":["# Reading formatted train, test, val data\n","train = pd.read_pickle(deep_learning_dir + '/data_formatting/training_data.pickle')\n","test = pd.read_pickle(deep_learning_dir + '/data_formatting/testing_data.pickle')\n","val = pd.read_pickle(deep_learning_dir + '/data_formatting/validation_data.pickle')"],"metadata":{"id":"Kh9N5P-inJBX"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Read mapping file that contains all drugs in drugbank, with  \n","# Columns: smiles, drugbank_id\n","mapping = pd.read_csv(deep_learning_dir + '/data/drugbank/drugbank_mappings.csv')\n","mapping['drug_name'] = mapping['drug_name'].map(lambda name : name.lower())"],"metadata":{"id":"8YTE9uSGcoG3"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Standardize drug names to facilitate matching\n","\n","# Remove salt from drugs that are in salt form (e.g., metformin hydrochloride)\n","words_to_remove = [\"HYDROCHLORIDE\",\"BISULFATE\",\"ALUMINUM\",\"ARGININE\",\"BENZATHINE\",\"CALCIUM\",\"CHLOROPROCAINE\",\"CHOLINE\",\"DIETHANOLAMINE\",\"ETHANOLAMINE\",\"ETHYLENEDIAMINE\",\"LYSINE\",\"MAGNESIUM\",\"HISTIDINE\",\"LITHIUM\",\"MEGLUMINE\",\"POTASSIUM\",\"PROCAINE\",\"SODIUM\",\"TRIETHYLAMINE\",\"ZINC\",\"ACETATE\",\"ASPARTATE\",\"BENZENESULFONATE\",\"BENZOATE\",\"BESYLATE\",\"BICARBONATE\",\"BITARTRATE\",\"BROMIDE\",\"CAMSYLATE\",\"CARBONATE\",\"CHLORIDE\",\"CITRATE\",\"DECANOATE\",\"EDETATE\",\"ESYLATE\",\"FUMARATE\",\"GLUCEPTATE\",\"GLUCONATE\",\"GLUTAMATE\",\"GLYCOLATE\",\"HEXANOATE\",\"HYDROXYNAPHTHOATE\",\"IODIDE\",\"ISETHIONATE\",\"LACTATE\",\"LACTOBIONATE\",\"MALATE\",\"MALEATE\",\"MANDELATE\",\"MESYLATE\",\"METHYLSULFATE\",\"MUCATE\",\"NAPSYLATE\",\"NITRATE\",\"OCTANOATE\",\"OLEATE\",\"PAMOATE\",\"PANTOTHENATE\",\"PHOSPHATE\",\"POLYGALACTURONATE\",\"PROPIONATE\",\"SALICYLATE\",\"STEARATE\",\"ACETATE\",\"SUCCINATE\",\"SULFATE\",\"TARTRATE\",\"TEOCLATE\",\"TOSYLATE\"]\n","words_to_remove = set(map(lambda w: w.lower(), words_to_remove))\n","\n","non_alphanum_patt = re.compile(r\"[^A-Za-z0-9]\")\n","multi_space_patt = re.compile(r\" +\")\n","\n","def clean_name(drug):\n","  drug = drug.lower()\n","  # Replace non-alphanumeric characters with spaces\n","  drug = non_alphanum_patt.sub(' ', drug)\n","  # Remove words we do not want\n","  splits = drug.split()\n","  if splits:\n","    drug = ' '.join([splits[0], ' '.join(filter(lambda w: w not in words_to_remove, splits[1:]))])\n","  # Replace multi-spaces into a single space\n","  drug = multi_space_patt.sub(' ', drug)\n","  # Trim invisible characters (spaces, tabs, new lines)\n","  drug = drug.strip()\n","  return drug"],"metadata":{"id":"HMsuFlzr1VpE"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["unique_drug = set()\n","pd.concat([train, test, val])['drugs'].map(lambda drugs : list(map(lambda d : unique_drug.add(d), drugs)))\n","len(unique_drug)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uTJKC-_8ym4b","executionInfo":{"status":"ok","timestamp":1651530059246,"user_tz":240,"elapsed":287,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"ad9b7c5e-0681-471f-cdad-5cc1d2532104"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4833"]},"metadata":{},"execution_count":8}]},{"cell_type":"code","source":["# Add drugbank data to the training, testing and validation sets\n","\n","# Dictionary mapping drugbank name to drugbank data\n","drugbank_dict = {}\n","for m in mapping.iterrows():\n","  drugbank_dict[clean_name(m[1]['drug_name'].lower())] = (m[1]['id'], m[1]['smiles'], m[1]['chembl'], m[1]['binding_db'], clean_name(m[1]['drug_name'].lower()))\n","\n","# This is a debug variable for counting matches\n","unmatched = {}\n","\n","drugbank_dict[\"DEBUG\"] = 0\n","drugbank_dict[\"DEBUG2\"] = 0\n","\n","# Drug name that need to be changed to map with drugbank\n","replacements = {\n","  \"bi 10773\": \"empagliflozin\",\n","  \"rbv\": \"ribavirin\",\n","  \"bay59 7939\": \"rivaroxaban\",\n","  \"sof vel\": \"sofosbuvir\",\n","  \"insulin degludec insulin aspart\": \"insulin degludec\",\n","  \"cp 690 550\": \"tofacitinib\",\n","  \"ftc tdf\": \"tenofovir\",\n","  \"risedronate\": \"risedronic acid\",\n","  \"nktr 118\": \"naloxegol\",\n","}\n","\n","# Returns a list of tuple with drugbank information\n","def map_drugbank_data(drugs):\n","  matched = False\n","  data = []\n","  for drug in drugs:\n","    drug = clean_name(drug)\n","\n","    if drug in replacements:\n","      drug = replacements[drug]\n","    \n","    # Placebos are a special case\n","    if 'placebo' in drug:\n","      drugbank_dict[\"DEBUG\"] += 1\n","      matched = True\n","      data.append(('placebo', 'placebo', 'placebo', 'placebo', 'placebo')) # all columns will contain the value placebo\n","      continue\n","    \n","    data.append(('none', 'none', 'none', 'none', 'none'))\n","\n","    if drug in drugbank_dict:\n","      drugbank_dict[\"DEBUG\"] += 1\n","      matched = True\n","      data[len(data)-1] = drugbank_dict[drug]\n","      continue\n","\n","    splits = drug.split(' ')\n","    for split in splits:\n","      if split in drugbank_dict:\n","        drugbank_dict[\"DEBUG\"] += 1\n","        matched = True\n","        data[len(data)-1] = drugbank_dict[split]\n","        break\n","    \n","    if data[len(data)-1][0] == \"none\":\n","      if not drug in unmatched:\n","        unmatched[drug] = 0\n","      unmatched[drug] += 1\n","  if matched:\n","    drugbank_dict[\"DEBUG2\"] += 1\n","  return data\n","\n","def add_drugbank_data(df, name_of_df):\n","  drugbank_dict[\"DEBUG\"] = 0\n","  # Add drugbank data\n","  df['drugbank_data'] = df['drugs'].map(lambda drugs : map_drugbank_data(drugs))\n","  # Splitting the tuples into different columns for readability\n","  df['drugbank_id'] = df['drugbank_data'].map(lambda drugs : list(map(lambda d : d[0], drugs)))\n","  df['smiles'] = df['drugbank_data'].map(lambda drugs : list(map(lambda d : d[1], drugs)))\n","  df['chembl'] = df['drugbank_data'].map(lambda drugs : list(map(lambda d : d[2], drugs)))\n","  df['binding_db'] = df['drugbank_data'].map(lambda drugs : list(map(lambda d : d[3], drugs)))\n","  df['clean_name'] = df['drugbank_data'].map(lambda drugs : list(map(lambda d : d[4], drugs)))\n","\n","  # Remove drugbank data now that we have split it\n","  del df['drugbank_data']\n","  print(name_of_df + \" % of drugs mapped: \" + str(drugbank_dict[\"DEBUG\"]/df['drugs'].map(len).sum()*100))\n","\n","add_drugbank_data(train, \"training\")\n","add_drugbank_data(test, \"testing\")\n","add_drugbank_data(val, \"validation\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_0AKzXI4c-9H","executionInfo":{"status":"ok","timestamp":1651530333169,"user_tz":240,"elapsed":1532,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"db15981f-4014-4d4e-a967-bd03af47532e"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["training % of drugs mapped: 83.38505301267132\n","testing % of drugs mapped: 78.1437125748503\n","validation % of drugs mapped: 86.17021276595744\n"]}]},{"cell_type":"code","source":["# Count the number of unique smiles we have matched\n","smiles_set = set()\n","\n","def add_to_smiles_set(row):\n","  for i in range(len(row.drugs)):\n","    smiles_set.add(row.smiles[i])\n","\n","train.apply(add_to_smiles_set, axis=1)\n","test.apply(add_to_smiles_set, axis=1)\n","val.apply(add_to_smiles_set, axis=1)\n","\n","len(smiles_set)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Ep2RxiG68gZo","executionInfo":{"status":"ok","timestamp":1651531251060,"user_tz":240,"elapsed":304,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"4e494abb-59ca-4e79-f367-3f7228e3b979"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["881"]},"metadata":{},"execution_count":10}]},{"cell_type":"code","source":["# Debugging code to help find out the drug names that are the most unmatched\n","unmatched_list = list(zip(unmatched.keys(), unmatched.values()))\n","unmatched_list.sort(key=lambda x : int(x[1]), reverse=True)\n","unmatched_list[0:10]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"SRGFmLg2dFLH","executionInfo":{"status":"ok","timestamp":1651531253825,"user_tz":240,"elapsed":4,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"59f61752-be9b-45ba-fe98-d06b1395b342"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[('epoetin alfa', 15),\n"," ('chemotherapy', 12),\n"," ('corticosteroids', 9),\n"," ('qva149', 9),\n"," ('aspirin', 9),\n"," ('ly2189265', 9),\n"," ('tak 438', 9),\n"," ('bay43 9006', 8),\n"," ('vi 0521', 8),\n"," ('nva237', 8)]"]},"metadata":{},"execution_count":11}]},{"cell_type":"code","source":["train.to_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/training_data_drugbank.pickle')\n","test.to_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/testing_data_drugbank.pickle')\n","val.to_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/validation_data_drugbank.pickle')"],"metadata":{"id":"a_PM8Mw6uwX8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train = pd.read_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/training_data_drugbank.pickle')\n","test = pd.read_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/testing_data_drugbank.pickle')\n","val = pd.read_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/validation_data_drugbank.pickle')\n","dictionary = pd.read_csv(deep_learning_dir + '/data/mapping/dictionary.csv', sep='|')\n","cui2vec = pd.read_csv(deep_learning_dir + '/data/mapping/cui2vec_pretrained.csv')"],"metadata":{"id":"39IIMcH1AB8b"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Create a map from clean name to CUI\n","keys = list(dictionary['STR'].map(lambda s : clean_name(str(s))))\n","cui_map = dict(zip(keys, dictionary['CUI']))"],"metadata":{"id":"SXt0WyrNAgJe"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Add CUIs to the datasets\n","\n","mapped_cuis = set()\n","unmapped_cuis = set()\n","counter = [0]\n","\n","def get_cuis(drug_names):\n","  cuis = []\n","  for drug in drug_names:\n","    drug = clean_name(drug)\n","\n","    if drug in replacements:\n","      drug = replacements[drug]\n","    \n","    # Placebos are a special case\n","    if 'placebo' in drug:\n","      cuis.append(cui_map['placebo'])\n","      mapped_cuis.add(cui_map['placebo'])\n","      continue\n","    \n","    cuis.append('none')\n","\n","    if drug in cui_map:\n","      cuis[len(cuis)-1] = cui_map[drug]\n","      mapped_cuis.add(cui_map[drug])\n","      continue\n","\n","    splits = drug.split(' ')\n","    for split in splits:\n","      if split in cui_map:\n","        cuis[len(cuis)-1] = cui_map[split]\n","        mapped_cuis.add(cui_map[split])\n","        break\n","  if for_all(cuis, lambda cui : cui == 'none'):\n","    counter[0] += 1\n","  return cuis\n","\n","train['cuis'] = train['clean_name'].map(get_cuis)\n","test['cuis'] = test['clean_name'].map(get_cuis)\n","val['cuis'] = val['clean_name'].map(get_cuis)"],"metadata":{"id":"1tUXp9ksANeb"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["counter[0]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Oj3zWoyKnh8M","executionInfo":{"status":"ok","timestamp":1651448107117,"user_tz":240,"elapsed":350,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"dee841a9-a57a-4deb-da00-a1cffae86a56"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["333"]},"metadata":{},"execution_count":90}]},{"cell_type":"code","source":["len(mapped_cuis)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"g8ZPDXHuaibu","executionInfo":{"status":"ok","timestamp":1651448108400,"user_tz":240,"elapsed":2,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"a05a6c08-3e9f-4c0f-9c73-cca69c8ee051"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["1043"]},"metadata":{},"execution_count":91}]},{"cell_type":"code","source":["pd.DataFrame(mapped_cuis).to_csv(deep_learning_dir + '/data_formatting/drug_cui2vec_data/mapped_cuis.csv')"],"metadata":{"id":"6Gldwd4XFgp8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["cui2vec_map = {}\n","for row in cui2vec.iterrows():\n","  cui2vec_map[row[1][0]] = row[1][1:]"],"metadata":{"id":"fzjcqbmkCvAL"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train['cui_vectors'] = train['cuis'].map(lambda cuis : [default(cui2vec_map, cui, []) for cui in cuis])\n","test['cui_vectors'] = test['cuis'].map(lambda cuis : [default(cui2vec_map, cui, []) for cui in cuis])\n","val['cui_vectors'] = val['cuis'].map(lambda cuis : [default(cui2vec_map, cui, []) for cui in cuis])"],"metadata":{"id":"Zm1J4eLZaKb6"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["counter[0] = 0\n","def pick_one_cui(cui_vectors):\n","  for vector in cui_vectors:\n","    if len(vector) != 0:\n","      return vector\n","  return []\n","\n","train['primary_cui_vector'] = train['cui_vectors'].map(pick_one_cui)\n","test['primary_cui_vector'] = test['cui_vectors'].map(pick_one_cui)\n","val['primary_cui_vector'] = val['cui_vectors'].map(pick_one_cui)"],"metadata":{"id":"-5eCOK8Aa3u3"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train.to_pickle(deep_learning_dir + '/data_formatting/drug_cui2vec_data/training_data_cui.pickle')\n","test.to_pickle(deep_learning_dir + '/data_formatting/drug_cui2vec_data/testing_data_cui.pickle')\n","val.to_pickle(deep_learning_dir + '/data_formatting/drug_cui2vec_data/validation_data_cui.pickle')"],"metadata":{"id":"GVVlHmJPhCf1"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(len(train[train['primary_cui_vector'].str.len() == 0])/len(train)*100)\n","print(len(test[test['primary_cui_vector'].str.len() == 0])/len(test)*100)\n","print(len(val[val['primary_cui_vector'].str.len() == 0])/len(val)*100)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"YTrMLcQWevmT","executionInfo":{"status":"ok","timestamp":1651446307906,"user_tz":240,"elapsed":15,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"a36887fa-8722-4d29-e61f-14b01e35c04d"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["42.82482223658694\n","58.90052356020943\n","43.895348837209305\n"]}]},{"cell_type":"code","source":["train['smiles'].map(lambda smiles : for_all(smiles, lambda smile : smile == \"none\")).sum()/len(train)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"vUmNESqeo2cK","executionInfo":{"status":"ok","timestamp":1651446307906,"user_tz":240,"elapsed":12,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"50ecbf2b-9316-4518-dc61-83f2503001e4"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.11667744020685197"]},"metadata":{},"execution_count":41}]},{"cell_type":"code","source":["smiles_set = set()\n","\n","def add_to_smiles_set(row):\n","  for i in range(len(row.drugs)):\n","    smiles_set.add(row.smiles[i])\n","\n","train.apply(add_to_smiles_set, axis=1)\n","test.apply(add_to_smiles_set, axis=1)\n","val.apply(add_to_smiles_set, axis=1)"],"metadata":{"id":"MmW8JR5KutXl","executionInfo":{"status":"ok","timestamp":1651447435584,"user_tz":240,"elapsed":373,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"f04628bb-6dd2-4b8f-e63a-9e9369b4bd29","colab":{"base_uri":"https://localhost:8080/"}},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0      None\n","1      None\n","2      None\n","3      None\n","4      None\n","       ... \n","339    None\n","340    None\n","341    None\n","342    None\n","343    None\n","Length: 344, dtype: object"]},"metadata":{},"execution_count":68}]},{"cell_type":"code","source":["unique_smiles = pd.DataFrame(smiles_set, columns=[\"smiles\"])"],"metadata":{"id":"aIgKo2vCyyqY"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["unique_smiles"],"metadata":{"id":"EOloL6jRzrJO","executionInfo":{"status":"ok","timestamp":1651447437170,"user_tz":240,"elapsed":4,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"4f5dcdf0-465c-4ce9-866e-d9ca4bf7ba46","colab":{"base_uri":"https://localhost:8080/","height":419}},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["                                                smiles\n","0     CCCOC(C(=O)OC1CCN(C)CC1)(C1=CC=CC=C1)C1=CC=CC=C1\n","1         NCCNC1=CC=C(NCCN)C2=C1C(=O)C1=C(C=NC=C1)C2=O\n","2      CC1=C(OCC(F)(F)F)C=CN=C1CS(=O)C1=NC2=CC=CC=C2N1\n","3    CCCCC1=NC2(CCCC2)C(=O)N1CC1=CC=C(C=C1)C1=CC=CC...\n","4    [H][C@]12[C@@H](C)C(S[C@]3([H])CN[C@H](CNS(N)(...\n","..                                                 ...\n","876  COC1=C(C=C(Cl)C=C1)C(=O)NCCC1=CC=C(C=C1)S(=O)(...\n","877  CC[C@H](C)[C@@H]1NC(=O)[C@H](CC2=CC=CC=C2)NC(=...\n","878  [H][C@@]12OC3=C(O)C=CC4=C3[C@@]11CCN(C)[C@]([H...\n","879  CC1=CC2=C(C=C1C(=C)C1=CC=C(C=C1)C(O)=O)C(C)(C)...\n","880  CN(CCN1CCC(CC1)OC(=O)NC1=CC=CC=C1C1=CC=CC=C1)C...\n","\n","[881 rows x 1 columns]"],"text/html":["\n","  <div id=\"df-3b0b9851-ca78-466e-8983-43847fa27ce9\">\n","    <div class=\"colab-df-container\">\n","      <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>smiles</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>CCCOC(C(=O)OC1CCN(C)CC1)(C1=CC=CC=C1)C1=CC=CC=C1</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>NCCNC1=CC=C(NCCN)C2=C1C(=O)C1=C(C=NC=C1)C2=O</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>CC1=C(OCC(F)(F)F)C=CN=C1CS(=O)C1=NC2=CC=CC=C2N1</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>CCCCC1=NC2(CCCC2)C(=O)N1CC1=CC=C(C=C1)C1=CC=CC...</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>[H][C@]12[C@@H](C)C(S[C@]3([H])CN[C@H](CNS(N)(...</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>876</th>\n","      <td>COC1=C(C=C(Cl)C=C1)C(=O)NCCC1=CC=C(C=C1)S(=O)(...</td>\n","    </tr>\n","    <tr>\n","      <th>877</th>\n","      <td>CC[C@H](C)[C@@H]1NC(=O)[C@H](CC2=CC=CC=C2)NC(=...</td>\n","    </tr>\n","    <tr>\n","      <th>878</th>\n","      <td>[H][C@@]12OC3=C(O)C=CC4=C3[C@@]11CCN(C)[C@]([H...</td>\n","    </tr>\n","    <tr>\n","      <th>879</th>\n","      <td>CC1=CC2=C(C=C1C(=C)C1=CC=C(C=C1)C(O)=O)C(C)(C)...</td>\n","    </tr>\n","    <tr>\n","      <th>880</th>\n","      <td>CN(CCN1CCC(CC1)OC(=O)NC1=CC=CC=C1C1=CC=CC=C1)C...</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>881 rows × 1 columns</p>\n","</div>\n","      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3b0b9851-ca78-466e-8983-43847fa27ce9')\"\n","              title=\"Convert this dataframe to an interactive table.\"\n","              style=\"display:none;\">\n","        \n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n","    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n","  </svg>\n","      </button>\n","      \n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      flex-wrap:wrap;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","      <script>\n","        const buttonEl =\n","          document.querySelector('#df-3b0b9851-ca78-466e-8983-43847fa27ce9 button.colab-df-convert');\n","        buttonEl.style.display =\n","          google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","        async function convertToInteractive(key) {\n","          const element = document.querySelector('#df-3b0b9851-ca78-466e-8983-43847fa27ce9');\n","          const dataTable =\n","            await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                     [key], {});\n","          if (!dataTable) return;\n","\n","          const docLinkHtml = 'Like what you see? Visit the ' +\n","            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","            + ' to learn more about interactive tables.';\n","          element.innerHTML = '';\n","          dataTable['output_type'] = 'display_data';\n","          await google.colab.output.renderOutput(dataTable, element);\n","          const docLink = document.createElement('div');\n","          docLink.innerHTML = docLinkHtml;\n","          element.appendChild(docLink);\n","        }\n","      </script>\n","    </div>\n","  </div>\n","  "]},"metadata":{},"execution_count":70}]},{"cell_type":"code","source":["train[train['smiles'].map(lambda smiles : for_all(smiles, lambda smile : smile == \"none\"))].head(50)"],"metadata":{"id":"BYzvBQNrqQ2V","executionInfo":{"status":"ok","timestamp":1651447477441,"user_tz":240,"elapsed":758,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"563e00d6-7474-4eab-d326-61bdc1a7e594","colab":{"base_uri":"https://localhost:8080/","height":1000}},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["           nctid  n_participants  \\\n","1    NCT01626859           152.0   \n","7    NCT00605293           578.0   \n","8    NCT00331864          4189.0   \n","11   NCT00124982         27188.0   \n","20   NCT00437112             NaN   \n","51   NCT00316719           928.0   \n","63   NCT00311402         12910.0   \n","73   NCT00221845             NaN   \n","75   NCT00206089             NaN   \n","77   NCT00250965             NaN   \n","91   NCT00220831             NaN   \n","102  NCT00567697             NaN   \n","107  NCT00330460          5745.0   \n","143  NCT01019694         16447.0   \n","145  NCT01336023          6445.0   \n","155  NCT00334880             NaN   \n","156  NCT00309972             NaN   \n","158  NCT01327222             NaN   \n","168  NCT01421459         10357.0   \n","169  NCT01059812          2080.0   \n","177  NCT00350415             NaN   \n","183  NCT00127634             NaN   \n","184  NCT01404234           601.0   \n","197  NCT01438710            38.0   \n","201  NCT00597753          2379.0   \n","209  NCT01511939            78.0   \n","210  NCT00668850             NaN   \n","218  NCT00119119             NaN   \n","226  NCT00064116             NaN   \n","230  NCT00735241             NaN   \n","243  NCT01106079             NaN   \n","289  NCT01326026          1100.0   \n","301  NCT01365507          1372.0   \n","307  NCT00369278           554.0   \n","311  NCT00536263          6874.0   \n","323  NCT00394589            17.0   \n","338  NCT01074268          4974.0   \n","348  NCT00295776             NaN   \n","373  NCT00390806         13169.0   \n","388  NCT00717678             NaN   \n","402  NCT00184600         10620.0   \n","414  NCT01245569             NaN   \n","420  NCT00486018          2769.0   \n","436  NCT00000419             NaN   \n","441  NCT00360334          6327.0   \n","445  NCT00330473             NaN   \n","452  NCT00372333             NaN   \n","466  NCT00510952          5016.0   \n","498  NCT00422383         11992.0   \n","501  NCT01689142             NaN   \n","\n","                                                 drugs  \\\n","1    [mp-214 low dose, mp-214 middle dose, mp-214 h...   \n","7    [methoxy polyethylene glycol-epoetin beta, epo...   \n","8                                        [ranibizumab]   \n","11   [abatacept, non-biologic disease modifying ant...   \n","20   [human insulin inhalation powder, insulin glar...   \n","51                              [lam group, adv group]   \n","63                                  [aggrenox capsule]   \n","73   [ace inhibition, intensified blood pressure co...   \n","75                                            [exanta]   \n","77                             [intravenous magnesium]   \n","91                [natural source vitamin e 400iu/day]   \n","102                         [ranibizumab, ranibizumab]   \n","107                           [alendronate, denosumab]   \n","143  [combivent cfc-mdi, combivent respimat 20/100 ...   \n","145  [insulin degludec/liraglutide, insulin deglude...   \n","155                                           [nrp104]   \n","156      [control arm (seq):, experimental arm (con):]   \n","158                                      [bevacizumab]   \n","168                          [ly2963016, lantus, oams]   \n","169  [insulin degludec/insulin aspart, biphasic ins...   \n","177                                       [mesalamine]   \n","183  [human insulin inhalation powder, injectable i...   \n","184                                             [azli]   \n","197                               [prograf, lcp-tacro]   \n","201                       [peginesatide, epoetin alfa]   \n","209                                         [pennsaid]   \n","210         [generex oral-lyn™, regular human insulin]   \n","218                      [pentoxyphilline, tocopherol]   \n","226                                     [chop regimen]   \n","230  [folfox6 cycles 1-3, folfox6 cycles 4 onwards,...   \n","243  [intensive management or tight control, standa...   \n","289               [insulin degludec, insulin degludec]   \n","301                  [insulin degludec/insulin aspart]   \n","307     [enteric-coated mycophenolate sodium (ec-mps)]   \n","311  [pegylated interferon alpha-2b, pegylated inte...   \n","323  [infliximab increased frequency, infliximab in...   \n","338  [insulin degludec, insulin detemir, insulin as...   \n","348  [lamictal in the treatment of post-herpetic ne...   \n","373                          [hycamtin, oral capsules]   \n","388                         [prograf-xl, prograf, mmf]   \n","402  [biphasic insulin aspart, insulin detemir, ins...   \n","414  [foster® 100/6 µg/unit dose, seretide accuhale...   \n","420  [ranibizumab injection 0.3 mg, ranibizumab inj...   \n","436                             [premarin and provera]   \n","441                      [exenatide, insulin glargine]   \n","445         [human insulin inhalation powder, insulin]   \n","452                                         [idea-033]   \n","466  [insulin lispro protamine suspension, insulin ...   \n","498  [rituximab mabthera/rituxan, rituximab mabther...   \n","501  [insulin glargine new formulation (hoe901), in...   \n","\n","                                              diseases  \\\n","1                                      [schizophrenia]   \n","7                                             [anemia]   \n","8    [age related macular degeneration, choroidal n...   \n","11                              [rheumatoid arthritis]   \n","20                         [diabetes mellitus, type 2]   \n","51                               [chronic hepatitis b]   \n","63                          [cerebrovascular accident]   \n","73   [children, chronic renal failure, hypertension...   \n","75                                   [thromboembolism]   \n","77   [coronary artery disease, valvular heart disease]   \n","91   [diabetes, myocardial infarction, cardiovascul...   \n","102    [central retinal vein occlusion, macular edema]   \n","107                         [osteoporosis, osteopenia]   \n","143           [pulmonary disease, chronic obstructive]   \n","145              [diabetes, diabetes mellitus, type 2]   \n","155  [attention deficit hyperactivity disorder, att...   \n","156                                      [lung cancer]   \n","158                 [age related macular degeneration]   \n","168                        [diabetes mellitus, type 2]   \n","169              [diabetes, diabetes mellitus, type 2]   \n","177                               [ulcerative colitis]   \n","183                        [diabetes mellitus, type 1]   \n","184          [cystic fibrosis, pseudomonas aeruginosa]   \n","197                           [renal failure, tremors]   \n","201  [chronic renal failure, chronic kidney disease...   \n","209         [osteoarthritis of the knee, coagulopathy]   \n","210                                [diabetes mellitus]   \n","218             [hepatitis c, chronic, liver fibrosis]   \n","226                                         [lymphoma]   \n","230           [colorectal carcinoma, liver metastases]   \n","243                              [psoriatic arthritis]   \n","289              [diabetes, diabetes mellitus, type 2]   \n","301              [diabetes, diabetes mellitus, type 2]   \n","307                            [renal transplantation]   \n","311                             [hepatitis b, chronic]   \n","323                             [rheumatoid arthritis]   \n","338              [diabetes, diabetes mellitus, type 1]   \n","348                          [neuralgia, postherpetic]   \n","373                      [lung cancer, non-small cell]   \n","388  [kidney transplantation, transplantation immun...   \n","402              [diabetes, diabetes mellitus, type 2]   \n","414            [chronic obstructive pulmonary disease]   \n","420            [macular edema, retinal vein occlusion]   \n","436                     [systemic lupus erythematosus]   \n","441                                  [type 2 diabetes]   \n","445                        [diabetes mellitus, type 2]   \n","452  [joint pain, musculoskeletal pain, stiffness, ...   \n","466                        [diabetes mellitus, type 2]   \n","498                             [rheumatoid arthritis]   \n","501                         [type 2 diabetes mellitus]   \n","\n","                                              icdcodes  \\\n","1    [F20.0, F20.1, F20.2, F20.3, F20.5, F20.89, F2...   \n","7    [D53.2, D64.9, D46.4, D53.0, D53.9, D61.3, D61.9]   \n","8    [H35.3130, H35.3230, H35.3110, H35.3120, H35.3...   \n","11   [M06.9, M05.9, M06.08, M06.00, M06.011, M06.01...   \n","20   [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...   \n","51                 [B18.0, B18.1, B18.2, B18.8, B18.9]   \n","63   [A52.05, I67.81, I67.89, I67.9, I67.841, I67.8...   \n","73   [Y93.6A, Y92.110, Y92.111, Y92.112, Y92.113, Y...   \n","75   [O88.22, O88.23, O88.211, O88.212, O88.213, O8...   \n","77        [I25.10, I25.110, I25.119, I25.111, I25.118]   \n","91   [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...   \n","102  [H34.8132, H34.8131, H34.8111, H34.8121, H34.8...   \n","107  [M81.6, Z82.62, Z13.820, M81.8, Z87.310, M81.0...   \n","143                              [J44.9, J44.1, J44.0]   \n","145  [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...   \n","155  [F90.2, F90.8, F90.9, F90.0, F90.1, F90.2, F90...   \n","156  [C78.00, C78.01, C78.02, D14.30, D14.31, D14.3...   \n","158  [H35.3130, H35.3230, H35.3110, H35.3120, H35.3...   \n","168  [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...   \n","169  [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...   \n","177  [K51.80, K51.813, K51.814, K51.90, K51.913, K5...   \n","183  [E10.65, E10.9, E10.21, E10.36, E10.41, E10.42...   \n","184  [E84.9, Z14.1, E84.0, E84.11, E84.8, E84.19, P...   \n","197  [P96.0, O03.32, O04.82, O08.4, O03.82, O07.32,...   \n","201  [I13.11, I13.2, I12.9, N18.9, I12.0, D63.1, N1...   \n","209  [M15.4, M15.0, M16.9, M17.9, M19.011, M19.012,...   \n","210  [P70.2, O24.92, Z83.3, E10.65, E10.9, E11.65, ...   \n","218  [B18.2, B18.0, B18.1, B18.8, B18.9, K71.3, K71...   \n","226  [S33.110S, S33.111S, S33.120S, S33.121S, S33.1...   \n","230  [C22.0, C22.1, C4A.9, C7B.1, D09.9, C4A.0, C4A...   \n","243                                           [L40.52]   \n","289  [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...   \n","301  [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...   \n","307    [N25.0, Q61.4, N23, N26.9, P96.0, Q60.0, Q60.1]   \n","311                [B18.0, B18.1, B18.2, B18.8, B18.9]   \n","323  [M06.9, M05.9, M06.08, M06.00, M06.011, M06.01...   \n","338  [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...   \n","348                                           [B02.22]   \n","373  [C78.00, C78.01, C78.02, D14.30, D14.31, D14.3...   \n","388   [N26.2, Q63.0, Q63.2, Z52.4, I75.81, N19, N20.0]   \n","402  [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...   \n","414                              [J44.9, J44.1, J44.0]   \n","420  [H59.033, H34.8130, H59.031, H59.032, H59.039,...   \n","436  [M32.9, M32.0, M32.11, M32.12, M32.13, M32.14,...   \n","441  [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...   \n","445  [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...   \n","452  [M25.50, M25.59, M25.541, M25.542, M25.549, M2...   \n","466  [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...   \n","498  [M06.9, M05.9, M06.08, M06.00, M06.011, M06.01...   \n","501  [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...   \n","\n","                                              criteria  label  \\\n","1    \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","7    \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","8    \\n        Patients who participated in this st...      1   \n","11   \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","20   \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","51   \\n        Inclusion criteria:\\n\\n          -  ...      1   \n","63   \\n        Inclusion Criteria:\\n\\n        Patie...      0   \n","73   \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","75   \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","77   \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","91   \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","102  \\n        Inclusion Criteria:\\n\\n          1. ...      1   \n","107  \\n        Inclusion Criteria: - Patient is an ...      1   \n","143  \\n        Inclusion criteria:\\n\\n          1. ...      1   \n","145  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","155  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","156  \\n        DISEASE CHARACTERISTICS:\\n\\n        ...      1   \n","158  \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","168  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","169  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","177  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","183  \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","184  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","197  \\n        Inclusion Criteria:\\n\\n          1. ...      1   \n","201  \\n        Inclusion Criteria\\n\\n          1. P...      1   \n","209  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","210  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","218  \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","226  \\n        DISEASE CHARACTERISTICS:\\n\\n        ...      1   \n","230  \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","243  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","289  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","301  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","307  \\n        Inclusion criteria\\n\\n          1. R...      1   \n","311  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","323  \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","338  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","348  \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","373  \\n        Inclusion criteria:\\n\\n          -  ...      0   \n","388  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","402  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","414  \\n        Inclusion Criteria:\\n\\n          1. ...      1   \n","420  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","436  \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","441  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","445  \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","452  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","466  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","498  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","501  \\n        Inclusion criteria :\\n\\n        o Pa...      1   \n","\n","                     drugbank_id              smiles  \\\n","1             [none, none, none]  [none, none, none]   \n","7                [DB09107, none]        [none, none]   \n","8                      [DB01270]              [none]   \n","11         [DB01281, none, none]  [none, none, none]   \n","20               [none, DB00047]        [none, none]   \n","51                  [none, none]        [none, none]   \n","63                        [none]              [none]   \n","73            [none, none, none]  [none, none, none]   \n","75                        [none]              [none]   \n","77                        [none]              [none]   \n","91                        [none]              [none]   \n","102           [DB01270, DB01270]        [none, none]   \n","107              [none, DB06643]        [none, none]   \n","143           [none, none, none]  [none, none, none]   \n","145  [DB06655, DB09564, DB06655]  [none, none, none]   \n","155                       [none]              [none]   \n","156                 [none, none]        [none, none]   \n","158                    [DB00112]              [none]   \n","168           [none, none, none]  [none, none, none]   \n","169              [DB09564, none]        [none, none]   \n","177                       [none]              [none]   \n","183        [none, none, DB00047]  [none, none, none]   \n","184                       [none]              [none]   \n","197                 [none, none]        [none, none]   \n","201              [DB08894, none]        [none, none]   \n","209                       [none]              [none]   \n","210                 [none, none]        [none, none]   \n","218              [none, DB11251]        [none, none]   \n","226                       [none]              [none]   \n","230        [none, none, DB00112]  [none, none, none]   \n","243                 [none, none]        [none, none]   \n","289           [DB09564, DB09564]        [none, none]   \n","301                    [DB09564]              [none]   \n","307                       [none]              [none]   \n","311           [none, none, none]  [none, none, none]   \n","323  [DB00065, DB00065, DB00065]  [none, none, none]   \n","338  [DB09564, DB01307, DB01306]  [none, none, none]   \n","348                       [none]              [none]   \n","373                 [none, none]        [none, none]   \n","388           [none, none, none]  [none, none, none]   \n","402     [none, DB01307, DB01306]  [none, none, none]   \n","414                 [none, none]        [none, none]   \n","420     [DB01270, DB01270, none]  [none, none, none]   \n","436                       [none]              [none]   \n","441           [DB01276, DB00047]        [none, none]   \n","445                 [none, none]        [none, none]   \n","452                       [none]              [none]   \n","466           [DB13700, DB00047]        [none, none]   \n","498  [DB00073, DB00073, DB00073]  [none, none, none]   \n","501                 [none, none]        [none, none]   \n","\n","                                            chembl          binding_db  \\\n","1                               [none, none, none]  [none, none, none]   \n","7                                     [none, none]        [none, none]   \n","8                                  [CHEMBL1201825]              [none]   \n","11                     [CHEMBL1201823, none, none]  [none, none, none]   \n","20                           [none, CHEMBL1201497]        [none, none]   \n","51                                    [none, none]        [none, none]   \n","63                                          [none]              [none]   \n","73                              [none, none, none]  [none, none, none]   \n","75                                          [none]              [none]   \n","77                                          [none]              [none]   \n","91                                          [none]              [none]   \n","102                 [CHEMBL1201825, CHEMBL1201825]        [none, none]   \n","107                          [none, CHEMBL1237023]        [none, none]   \n","143                             [none, none, none]  [none, none, none]   \n","145  [CHEMBL1201866, CHEMBL2107869, CHEMBL1201866]  [none, none, none]   \n","155                                         [none]              [none]   \n","156                                   [none, none]        [none, none]   \n","158                                [CHEMBL1201583]              [none]   \n","168                             [none, none, none]  [none, none, none]   \n","169                          [CHEMBL2107869, none]        [none, none]   \n","177                                         [none]              [none]   \n","183                    [none, none, CHEMBL1201497]  [none, none, none]   \n","184                                         [none]              [none]   \n","197                                   [none, none]        [none, none]   \n","201                          [CHEMBL2107866, none]        [none, none]   \n","209                                         [none]              [none]   \n","210                                   [none, none]        [none, none]   \n","218                                   [none, none]        [none, none]   \n","226                                         [none]              [none]   \n","230                    [none, none, CHEMBL1201583]  [none, none, none]   \n","243                                   [none, none]        [none, none]   \n","289                 [CHEMBL2107869, CHEMBL2107869]        [none, none]   \n","301                                [CHEMBL2107869]              [none]   \n","307                                         [none]              [none]   \n","311                             [none, none, none]  [none, none, none]   \n","323  [CHEMBL1201581, CHEMBL1201581, CHEMBL1201581]  [none, none, none]   \n","338  [CHEMBL2107869, CHEMBL2104391, CHEMBL1201496]  [none, none, none]   \n","348                                         [none]              [none]   \n","373                                   [none, none]        [none, none]   \n","388                             [none, none, none]  [none, none, none]   \n","402           [none, CHEMBL2104391, CHEMBL1201496]  [none, none, none]   \n","414                                   [none, none]        [none, none]   \n","420           [CHEMBL1201825, CHEMBL1201825, none]  [none, none, none]   \n","436                                         [none]              [none]   \n","441                  [CHEMBL414357, CHEMBL1201497]        [none, none]   \n","445                                   [none, none]        [none, none]   \n","452                                         [none]              [none]   \n","466                          [none, CHEMBL1201497]        [none, none]   \n","498  [CHEMBL1201576, CHEMBL1201576, CHEMBL1201576]  [none, none, none]   \n","501                                   [none, none]        [none, none]   \n","\n","                                            clean_name  \\\n","1                                   [none, none, none]   \n","7     [methoxy polyethylene glycol epoetin beta, none]   \n","8                                        [ranibizumab]   \n","11                             [abatacept, none, none]   \n","20                            [none, insulin glargine]   \n","51                                        [none, none]   \n","63                                              [none]   \n","73                                  [none, none, none]   \n","75                                              [none]   \n","77                                              [none]   \n","91                                              [none]   \n","102                         [ranibizumab, ranibizumab]   \n","107                                  [none, denosumab]   \n","143                                 [none, none, none]   \n","145       [liraglutide, insulin degludec, liraglutide]   \n","155                                             [none]   \n","156                                       [none, none]   \n","158                                      [bevacizumab]   \n","168                                 [none, none, none]   \n","169                           [insulin degludec, none]   \n","177                                             [none]   \n","183                     [none, none, insulin glargine]   \n","184                                             [none]   \n","197                                       [none, none]   \n","201                               [peginesatide, none]   \n","209                                             [none]   \n","210                                       [none, none]   \n","218                                 [none, tocopherol]   \n","226                                             [none]   \n","230                          [none, none, bevacizumab]   \n","243                                       [none, none]   \n","289               [insulin degludec, insulin degludec]   \n","301                                 [insulin degludec]   \n","307                                             [none]   \n","311                                 [none, none, none]   \n","323               [infliximab, infliximab, infliximab]   \n","338  [insulin degludec, insulin detemir, insulin as...   \n","348                                             [none]   \n","373                                       [none, none]   \n","388                                 [none, none, none]   \n","402            [none, insulin detemir, insulin aspart]   \n","414                                       [none, none]   \n","420                   [ranibizumab, ranibizumab, none]   \n","436                                             [none]   \n","441                      [exenatide, insulin glargine]   \n","445                                       [none, none]   \n","452                                             [none]   \n","466                      [protamine, insulin glargine]   \n","498                  [rituximab, rituximab, rituximab]   \n","501                                       [none, none]   \n","\n","                               cuis  \\\n","1                [none, none, none]   \n","7                  [C1328071, none]   \n","8                        [C1566537]   \n","11           [C1619966, none, none]   \n","20                 [none, C0907402]   \n","51                     [none, none]   \n","63                           [none]   \n","73               [none, none, none]   \n","75                           [none]   \n","77                           [none]   \n","91                           [none]   \n","102            [C1566537, C1566537]   \n","107                [none, C1690432]   \n","143              [none, none, none]   \n","145  [C1456408, C3491971, C1456408]   \n","155                          [none]   \n","156                    [none, none]   \n","158                      [C0796392]   \n","168              [none, none, none]   \n","169                [C3491971, none]   \n","177                          [none]   \n","183          [none, none, C0907402]   \n","184                          [none]   \n","197                    [none, none]   \n","201                [C3281388, none]   \n","209                          [none]   \n","210                    [none, none]   \n","218                [none, C3255108]   \n","226                          [none]   \n","230          [none, none, C0796392]   \n","243                    [none, none]   \n","289            [C3491971, C3491971]   \n","301                      [C3491971]   \n","307                          [none]   \n","311              [none, none, none]   \n","323  [C5238750, C5238750, C5238750]   \n","338  [C3491971, C0537270, C1708521]   \n","348                          [none]   \n","373                    [none, none]   \n","388              [none, none, none]   \n","402      [none, C0537270, C1708521]   \n","414                    [none, none]   \n","420      [C1566537, C1566537, none]   \n","436                          [none]   \n","441            [C0167117, C0907402]   \n","445                    [none, none]   \n","452                          [none]   \n","466            [C0771747, C0907402]   \n","498  [C0393022, C0393022, C0393022]   \n","501                    [none, none]   \n","\n","                                           cui_vectors  \\\n","1                                         [[], [], []]   \n","7                                             [[], []]   \n","8    [[-0.0198971997276426, 0.0166968261538703, -8....   \n","11   [[-0.0140159579976783, 0.0032744963090471, -2....   \n","20   [[], [-0.0088522591226082, 0.0038537407826502,...   \n","51                                            [[], []]   \n","63                                                [[]]   \n","73                                        [[], [], []]   \n","75                                                [[]]   \n","77                                                [[]]   \n","91                                                [[]]   \n","102  [[-0.0198971997276426, 0.0166968261538703, -8....   \n","107  [[], [-0.0159405395376339, 0.0027208530185431,...   \n","143                                       [[], [], []]   \n","145  [[-0.0140124803475009, 0.0037774540637431, 2.6...   \n","155                                               [[]]   \n","156                                           [[], []]   \n","158  [[-0.0341094153430391, 0.0219485207498133, -1....   \n","168                                       [[], [], []]   \n","169                                           [[], []]   \n","177                                               [[]]   \n","183  [[], [], [-0.0088522591226082, 0.0038537407826...   \n","184                                               [[]]   \n","197                                           [[], []]   \n","201                                           [[], []]   \n","209                                               [[]]   \n","210                                           [[], []]   \n","218                                           [[], []]   \n","226                                               [[]]   \n","230  [[], [], [-0.0341094153430391, 0.0219485207498...   \n","243                                           [[], []]   \n","289                                           [[], []]   \n","301                                               [[]]   \n","307                                               [[]]   \n","311                                       [[], [], []]   \n","323                                       [[], [], []]   \n","338  [[], [-0.0051244166680686, 0.0016540584969475,...   \n","348                                               [[]]   \n","373                                           [[], []]   \n","388                                       [[], [], []]   \n","402  [[], [-0.0051244166680686, 0.0016540584969475,...   \n","414                                           [[], []]   \n","420  [[-0.0198971997276426, 0.0166968261538703, -8....   \n","436                                               [[]]   \n","441  [[-0.0123086924602175, 0.0042105967441839, -3....   \n","445                                           [[], []]   \n","452                                               [[]]   \n","466  [[], [-0.0088522591226082, 0.0038537407826502,...   \n","498  [[-0.0339438843118617, 0.0180569791652772, -3....   \n","501                                           [[], []]   \n","\n","                                    primary_cui_vector  \n","1                                                   []  \n","7                                                   []  \n","8    V1     -0.019897\n","V2      0.016697\n","V3          ...  \n","11   V1     -0.014016\n","V2      0.003274\n","V3          ...  \n","20   V1     -0.008852\n","V2      0.003854\n","V3          ...  \n","51                                                  []  \n","63                                                  []  \n","73                                                  []  \n","75                                                  []  \n","77                                                  []  \n","91                                                  []  \n","102  V1     -0.019897\n","V2      0.016697\n","V3          ...  \n","107  V1     -0.015941\n","V2      0.002721\n","V3          ...  \n","143                                                 []  \n","145  V1     -0.014012\n","V2      0.003777\n","V3          ...  \n","155                                                 []  \n","156                                                 []  \n","158  V1     -0.034109\n","V2      0.021949\n","V3          ...  \n","168                                                 []  \n","169                                                 []  \n","177                                                 []  \n","183  V1     -0.008852\n","V2      0.003854\n","V3          ...  \n","184                                                 []  \n","197                                                 []  \n","201                                                 []  \n","209                                                 []  \n","210                                                 []  \n","218                                                 []  \n","226                                                 []  \n","230  V1     -0.034109\n","V2      0.021949\n","V3          ...  \n","243                                                 []  \n","289                                                 []  \n","301                                                 []  \n","307                                                 []  \n","311                                                 []  \n","323                                                 []  \n","338  V1     -0.005124\n","V2      0.001654\n","V3          ...  \n","348                                                 []  \n","373                                                 []  \n","388                                                 []  \n","402  V1     -0.005124\n","V2      0.001654\n","V3          ...  \n","414                                                 []  \n","420  V1     -0.019897\n","V2      0.016697\n","V3          ...  \n","436                                                 []  \n","441  V1     -0.012309\n","V2      0.004211\n","V3          ...  \n","445                                                 []  \n","452                                                 []  \n","466  V1     -0.008852\n","V2      0.003854\n","V3          ...  \n","498  V1     -0.033944\n","V2      0.018057\n","V3          ...  \n","501                                                 []  "],"text/html":["\n","  <div id=\"df-6cd79391-c807-4181-8abe-c43b25f19170\">\n","    <div class=\"colab-df-container\">\n","      <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>nctid</th>\n","      <th>n_participants</th>\n","      <th>drugs</th>\n","      <th>diseases</th>\n","      <th>icdcodes</th>\n","      <th>criteria</th>\n","      <th>label</th>\n","      <th>drugbank_id</th>\n","      <th>smiles</th>\n","      <th>chembl</th>\n","      <th>binding_db</th>\n","      <th>clean_name</th>\n","      <th>cuis</th>\n","      <th>cui_vectors</th>\n","      <th>primary_cui_vector</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>1</th>\n","      <td>NCT01626859</td>\n","      <td>152.0</td>\n","      <td>[mp-214 low dose, mp-214 middle dose, mp-214 h...</td>\n","      <td>[schizophrenia]</td>\n","      <td>[F20.0, F20.1, F20.2, F20.3, F20.5, F20.89, F2...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[[], [], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>7</th>\n","      <td>NCT00605293</td>\n","      <td>578.0</td>\n","      <td>[methoxy polyethylene glycol-epoetin beta, epo...</td>\n","      <td>[anemia]</td>\n","      <td>[D53.2, D64.9, D46.4, D53.0, D53.9, D61.3, D61.9]</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB09107, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[methoxy polyethylene glycol epoetin beta, none]</td>\n","      <td>[C1328071, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>8</th>\n","      <td>NCT00331864</td>\n","      <td>4189.0</td>\n","      <td>[ranibizumab]</td>\n","      <td>[age related macular degeneration, choroidal n...</td>\n","      <td>[H35.3130, H35.3230, H35.3110, H35.3120, H35.3...</td>\n","      <td>\\n        Patients who participated in this st...</td>\n","      <td>1</td>\n","      <td>[DB01270]</td>\n","      <td>[none]</td>\n","      <td>[CHEMBL1201825]</td>\n","      <td>[none]</td>\n","      <td>[ranibizumab]</td>\n","      <td>[C1566537]</td>\n","      <td>[[-0.0198971997276426, 0.0166968261538703, -8....</td>\n","      <td>V1     -0.019897\n","V2      0.016697\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>11</th>\n","      <td>NCT00124982</td>\n","      <td>27188.0</td>\n","      <td>[abatacept, non-biologic disease modifying ant...</td>\n","      <td>[rheumatoid arthritis]</td>\n","      <td>[M06.9, M05.9, M06.08, M06.00, M06.011, M06.01...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB01281, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[CHEMBL1201823, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[abatacept, none, none]</td>\n","      <td>[C1619966, none, none]</td>\n","      <td>[[-0.0140159579976783, 0.0032744963090471, -2....</td>\n","      <td>V1     -0.014016\n","V2      0.003274\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>20</th>\n","      <td>NCT00437112</td>\n","      <td>NaN</td>\n","      <td>[human insulin inhalation powder, insulin glar...</td>\n","      <td>[diabetes mellitus, type 2]</td>\n","      <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[none, DB00047]</td>\n","      <td>[none, none]</td>\n","      <td>[none, CHEMBL1201497]</td>\n","      <td>[none, none]</td>\n","      <td>[none, insulin glargine]</td>\n","      <td>[none, C0907402]</td>\n","      <td>[[], [-0.0088522591226082, 0.0038537407826502,...</td>\n","      <td>V1     -0.008852\n","V2      0.003854\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>51</th>\n","      <td>NCT00316719</td>\n","      <td>928.0</td>\n","      <td>[lam group, adv group]</td>\n","      <td>[chronic hepatitis b]</td>\n","      <td>[B18.0, B18.1, B18.2, B18.8, B18.9]</td>\n","      <td>\\n        Inclusion criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>63</th>\n","      <td>NCT00311402</td>\n","      <td>12910.0</td>\n","      <td>[aggrenox capsule]</td>\n","      <td>[cerebrovascular accident]</td>\n","      <td>[A52.05, I67.81, I67.89, I67.9, I67.841, I67.8...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n        Patie...</td>\n","      <td>0</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>73</th>\n","      <td>NCT00221845</td>\n","      <td>NaN</td>\n","      <td>[ace inhibition, intensified blood pressure co...</td>\n","      <td>[children, chronic renal failure, hypertension...</td>\n","      <td>[Y93.6A, Y92.110, Y92.111, Y92.112, Y92.113, Y...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[[], [], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>75</th>\n","      <td>NCT00206089</td>\n","      <td>NaN</td>\n","      <td>[exanta]</td>\n","      <td>[thromboembolism]</td>\n","      <td>[O88.22, O88.23, O88.211, O88.212, O88.213, O8...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>77</th>\n","      <td>NCT00250965</td>\n","      <td>NaN</td>\n","      <td>[intravenous magnesium]</td>\n","      <td>[coronary artery disease, valvular heart disease]</td>\n","      <td>[I25.10, I25.110, I25.119, I25.111, I25.118]</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>91</th>\n","      <td>NCT00220831</td>\n","      <td>NaN</td>\n","      <td>[natural source vitamin e 400iu/day]</td>\n","      <td>[diabetes, myocardial infarction, cardiovascul...</td>\n","      <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>102</th>\n","      <td>NCT00567697</td>\n","      <td>NaN</td>\n","      <td>[ranibizumab, ranibizumab]</td>\n","      <td>[central retinal vein occlusion, macular edema]</td>\n","      <td>[H34.8132, H34.8131, H34.8111, H34.8121, H34.8...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          1. ...</td>\n","      <td>1</td>\n","      <td>[DB01270, DB01270]</td>\n","      <td>[none, none]</td>\n","      <td>[CHEMBL1201825, CHEMBL1201825]</td>\n","      <td>[none, none]</td>\n","      <td>[ranibizumab, ranibizumab]</td>\n","      <td>[C1566537, C1566537]</td>\n","      <td>[[-0.0198971997276426, 0.0166968261538703, -8....</td>\n","      <td>V1     -0.019897\n","V2      0.016697\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>107</th>\n","      <td>NCT00330460</td>\n","      <td>5745.0</td>\n","      <td>[alendronate, denosumab]</td>\n","      <td>[osteoporosis, osteopenia]</td>\n","      <td>[M81.6, Z82.62, Z13.820, M81.8, Z87.310, M81.0...</td>\n","      <td>\\n        Inclusion Criteria: - Patient is an ...</td>\n","      <td>1</td>\n","      <td>[none, DB06643]</td>\n","      <td>[none, none]</td>\n","      <td>[none, CHEMBL1237023]</td>\n","      <td>[none, none]</td>\n","      <td>[none, denosumab]</td>\n","      <td>[none, C1690432]</td>\n","      <td>[[], [-0.0159405395376339, 0.0027208530185431,...</td>\n","      <td>V1     -0.015941\n","V2      0.002721\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>143</th>\n","      <td>NCT01019694</td>\n","      <td>16447.0</td>\n","      <td>[combivent cfc-mdi, combivent respimat 20/100 ...</td>\n","      <td>[pulmonary disease, chronic obstructive]</td>\n","      <td>[J44.9, J44.1, J44.0]</td>\n","      <td>\\n        Inclusion criteria:\\n\\n          1. ...</td>\n","      <td>1</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[[], [], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>145</th>\n","      <td>NCT01336023</td>\n","      <td>6445.0</td>\n","      <td>[insulin degludec/liraglutide, insulin deglude...</td>\n","      <td>[diabetes, diabetes mellitus, type 2]</td>\n","      <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB06655, DB09564, DB06655]</td>\n","      <td>[none, none, none]</td>\n","      <td>[CHEMBL1201866, CHEMBL2107869, CHEMBL1201866]</td>\n","      <td>[none, none, none]</td>\n","      <td>[liraglutide, insulin degludec, liraglutide]</td>\n","      <td>[C1456408, C3491971, C1456408]</td>\n","      <td>[[-0.0140124803475009, 0.0037774540637431, 2.6...</td>\n","      <td>V1     -0.014012\n","V2      0.003777\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>155</th>\n","      <td>NCT00334880</td>\n","      <td>NaN</td>\n","      <td>[nrp104]</td>\n","      <td>[attention deficit hyperactivity disorder, att...</td>\n","      <td>[F90.2, F90.8, F90.9, F90.0, F90.1, F90.2, F90...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>156</th>\n","      <td>NCT00309972</td>\n","      <td>NaN</td>\n","      <td>[control arm (seq):, experimental arm (con):]</td>\n","      <td>[lung cancer]</td>\n","      <td>[C78.00, C78.01, C78.02, D14.30, D14.31, D14.3...</td>\n","      <td>\\n        DISEASE CHARACTERISTICS:\\n\\n        ...</td>\n","      <td>1</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>158</th>\n","      <td>NCT01327222</td>\n","      <td>NaN</td>\n","      <td>[bevacizumab]</td>\n","      <td>[age related macular degeneration]</td>\n","      <td>[H35.3130, H35.3230, H35.3110, H35.3120, H35.3...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[DB00112]</td>\n","      <td>[none]</td>\n","      <td>[CHEMBL1201583]</td>\n","      <td>[none]</td>\n","      <td>[bevacizumab]</td>\n","      <td>[C0796392]</td>\n","      <td>[[-0.0341094153430391, 0.0219485207498133, -1....</td>\n","      <td>V1     -0.034109\n","V2      0.021949\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>168</th>\n","      <td>NCT01421459</td>\n","      <td>10357.0</td>\n","      <td>[ly2963016, lantus, oams]</td>\n","      <td>[diabetes mellitus, type 2]</td>\n","      <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[[], [], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>169</th>\n","      <td>NCT01059812</td>\n","      <td>2080.0</td>\n","      <td>[insulin degludec/insulin aspart, biphasic ins...</td>\n","      <td>[diabetes, diabetes mellitus, type 2]</td>\n","      <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB09564, none]</td>\n","      <td>[none, none]</td>\n","      <td>[CHEMBL2107869, none]</td>\n","      <td>[none, none]</td>\n","      <td>[insulin degludec, none]</td>\n","      <td>[C3491971, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>177</th>\n","      <td>NCT00350415</td>\n","      <td>NaN</td>\n","      <td>[mesalamine]</td>\n","      <td>[ulcerative colitis]</td>\n","      <td>[K51.80, K51.813, K51.814, K51.90, K51.913, K5...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>183</th>\n","      <td>NCT00127634</td>\n","      <td>NaN</td>\n","      <td>[human insulin inhalation powder, injectable i...</td>\n","      <td>[diabetes mellitus, type 1]</td>\n","      <td>[E10.65, E10.9, E10.21, E10.36, E10.41, E10.42...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[none, none, DB00047]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, CHEMBL1201497]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, insulin glargine]</td>\n","      <td>[none, none, C0907402]</td>\n","      <td>[[], [], [-0.0088522591226082, 0.0038537407826...</td>\n","      <td>V1     -0.008852\n","V2      0.003854\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>184</th>\n","      <td>NCT01404234</td>\n","      <td>601.0</td>\n","      <td>[azli]</td>\n","      <td>[cystic fibrosis, pseudomonas aeruginosa]</td>\n","      <td>[E84.9, Z14.1, E84.0, E84.11, E84.8, E84.19, P...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>197</th>\n","      <td>NCT01438710</td>\n","      <td>38.0</td>\n","      <td>[prograf, lcp-tacro]</td>\n","      <td>[renal failure, tremors]</td>\n","      <td>[P96.0, O03.32, O04.82, O08.4, O03.82, O07.32,...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          1. ...</td>\n","      <td>1</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>201</th>\n","      <td>NCT00597753</td>\n","      <td>2379.0</td>\n","      <td>[peginesatide, epoetin alfa]</td>\n","      <td>[chronic renal failure, chronic kidney disease...</td>\n","      <td>[I13.11, I13.2, I12.9, N18.9, I12.0, D63.1, N1...</td>\n","      <td>\\n        Inclusion Criteria\\n\\n          1. P...</td>\n","      <td>1</td>\n","      <td>[DB08894, none]</td>\n","      <td>[none, none]</td>\n","      <td>[CHEMBL2107866, none]</td>\n","      <td>[none, none]</td>\n","      <td>[peginesatide, none]</td>\n","      <td>[C3281388, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>209</th>\n","      <td>NCT01511939</td>\n","      <td>78.0</td>\n","      <td>[pennsaid]</td>\n","      <td>[osteoarthritis of the knee, coagulopathy]</td>\n","      <td>[M15.4, M15.0, M16.9, M17.9, M19.011, M19.012,...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>210</th>\n","      <td>NCT00668850</td>\n","      <td>NaN</td>\n","      <td>[generex oral-lyn™, regular human insulin]</td>\n","      <td>[diabetes mellitus]</td>\n","      <td>[P70.2, O24.92, Z83.3, E10.65, E10.9, E11.65, ...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>218</th>\n","      <td>NCT00119119</td>\n","      <td>NaN</td>\n","      <td>[pentoxyphilline, tocopherol]</td>\n","      <td>[hepatitis c, chronic, liver fibrosis]</td>\n","      <td>[B18.2, B18.0, B18.1, B18.8, B18.9, K71.3, K71...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[none, DB11251]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, tocopherol]</td>\n","      <td>[none, C3255108]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>226</th>\n","      <td>NCT00064116</td>\n","      <td>NaN</td>\n","      <td>[chop regimen]</td>\n","      <td>[lymphoma]</td>\n","      <td>[S33.110S, S33.111S, S33.120S, S33.121S, S33.1...</td>\n","      <td>\\n        DISEASE CHARACTERISTICS:\\n\\n        ...</td>\n","      <td>1</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>230</th>\n","      <td>NCT00735241</td>\n","      <td>NaN</td>\n","      <td>[folfox6 cycles 1-3, folfox6 cycles 4 onwards,...</td>\n","      <td>[colorectal carcinoma, liver metastases]</td>\n","      <td>[C22.0, C22.1, C4A.9, C7B.1, D09.9, C4A.0, C4A...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[none, none, DB00112]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, CHEMBL1201583]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, bevacizumab]</td>\n","      <td>[none, none, C0796392]</td>\n","      <td>[[], [], [-0.0341094153430391, 0.0219485207498...</td>\n","      <td>V1     -0.034109\n","V2      0.021949\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>243</th>\n","      <td>NCT01106079</td>\n","      <td>NaN</td>\n","      <td>[intensive management or tight control, standa...</td>\n","      <td>[psoriatic arthritis]</td>\n","      <td>[L40.52]</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>289</th>\n","      <td>NCT01326026</td>\n","      <td>1100.0</td>\n","      <td>[insulin degludec, insulin degludec]</td>\n","      <td>[diabetes, diabetes mellitus, type 2]</td>\n","      <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB09564, DB09564]</td>\n","      <td>[none, none]</td>\n","      <td>[CHEMBL2107869, CHEMBL2107869]</td>\n","      <td>[none, none]</td>\n","      <td>[insulin degludec, insulin degludec]</td>\n","      <td>[C3491971, C3491971]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>301</th>\n","      <td>NCT01365507</td>\n","      <td>1372.0</td>\n","      <td>[insulin degludec/insulin aspart]</td>\n","      <td>[diabetes, diabetes mellitus, type 2]</td>\n","      <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB09564]</td>\n","      <td>[none]</td>\n","      <td>[CHEMBL2107869]</td>\n","      <td>[none]</td>\n","      <td>[insulin degludec]</td>\n","      <td>[C3491971]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>307</th>\n","      <td>NCT00369278</td>\n","      <td>554.0</td>\n","      <td>[enteric-coated mycophenolate sodium (ec-mps)]</td>\n","      <td>[renal transplantation]</td>\n","      <td>[N25.0, Q61.4, N23, N26.9, P96.0, Q60.0, Q60.1]</td>\n","      <td>\\n        Inclusion criteria\\n\\n          1. R...</td>\n","      <td>1</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>311</th>\n","      <td>NCT00536263</td>\n","      <td>6874.0</td>\n","      <td>[pegylated interferon alpha-2b, pegylated inte...</td>\n","      <td>[hepatitis b, chronic]</td>\n","      <td>[B18.0, B18.1, B18.2, B18.8, B18.9]</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[[], [], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>323</th>\n","      <td>NCT00394589</td>\n","      <td>17.0</td>\n","      <td>[infliximab increased frequency, infliximab in...</td>\n","      <td>[rheumatoid arthritis]</td>\n","      <td>[M06.9, M05.9, M06.08, M06.00, M06.011, M06.01...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[DB00065, DB00065, DB00065]</td>\n","      <td>[none, none, none]</td>\n","      <td>[CHEMBL1201581, CHEMBL1201581, CHEMBL1201581]</td>\n","      <td>[none, none, none]</td>\n","      <td>[infliximab, infliximab, infliximab]</td>\n","      <td>[C5238750, C5238750, C5238750]</td>\n","      <td>[[], [], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>338</th>\n","      <td>NCT01074268</td>\n","      <td>4974.0</td>\n","      <td>[insulin degludec, insulin detemir, insulin as...</td>\n","      <td>[diabetes, diabetes mellitus, type 1]</td>\n","      <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB09564, DB01307, DB01306]</td>\n","      <td>[none, none, none]</td>\n","      <td>[CHEMBL2107869, CHEMBL2104391, CHEMBL1201496]</td>\n","      <td>[none, none, none]</td>\n","      <td>[insulin degludec, insulin detemir, insulin as...</td>\n","      <td>[C3491971, C0537270, C1708521]</td>\n","      <td>[[], [-0.0051244166680686, 0.0016540584969475,...</td>\n","      <td>V1     -0.005124\n","V2      0.001654\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>348</th>\n","      <td>NCT00295776</td>\n","      <td>NaN</td>\n","      <td>[lamictal in the treatment of post-herpetic ne...</td>\n","      <td>[neuralgia, postherpetic]</td>\n","      <td>[B02.22]</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>373</th>\n","      <td>NCT00390806</td>\n","      <td>13169.0</td>\n","      <td>[hycamtin, oral capsules]</td>\n","      <td>[lung cancer, non-small cell]</td>\n","      <td>[C78.00, C78.01, C78.02, D14.30, D14.31, D14.3...</td>\n","      <td>\\n        Inclusion criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>388</th>\n","      <td>NCT00717678</td>\n","      <td>NaN</td>\n","      <td>[prograf-xl, prograf, mmf]</td>\n","      <td>[kidney transplantation, transplantation immun...</td>\n","      <td>[N26.2, Q63.0, Q63.2, Z52.4, I75.81, N19, N20.0]</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[[], [], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>402</th>\n","      <td>NCT00184600</td>\n","      <td>10620.0</td>\n","      <td>[biphasic insulin aspart, insulin detemir, ins...</td>\n","      <td>[diabetes, diabetes mellitus, type 2]</td>\n","      <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none, DB01307, DB01306]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, CHEMBL2104391, CHEMBL1201496]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, insulin detemir, insulin aspart]</td>\n","      <td>[none, C0537270, C1708521]</td>\n","      <td>[[], [-0.0051244166680686, 0.0016540584969475,...</td>\n","      <td>V1     -0.005124\n","V2      0.001654\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>414</th>\n","      <td>NCT01245569</td>\n","      <td>NaN</td>\n","      <td>[foster® 100/6 µg/unit dose, seretide accuhale...</td>\n","      <td>[chronic obstructive pulmonary disease]</td>\n","      <td>[J44.9, J44.1, J44.0]</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          1. ...</td>\n","      <td>1</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>420</th>\n","      <td>NCT00486018</td>\n","      <td>2769.0</td>\n","      <td>[ranibizumab injection 0.3 mg, ranibizumab inj...</td>\n","      <td>[macular edema, retinal vein occlusion]</td>\n","      <td>[H59.033, H34.8130, H59.031, H59.032, H59.039,...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB01270, DB01270, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[CHEMBL1201825, CHEMBL1201825, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[ranibizumab, ranibizumab, none]</td>\n","      <td>[C1566537, C1566537, none]</td>\n","      <td>[[-0.0198971997276426, 0.0166968261538703, -8....</td>\n","      <td>V1     -0.019897\n","V2      0.016697\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>436</th>\n","      <td>NCT00000419</td>\n","      <td>NaN</td>\n","      <td>[premarin and provera]</td>\n","      <td>[systemic lupus erythematosus]</td>\n","      <td>[M32.9, M32.0, M32.11, M32.12, M32.13, M32.14,...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>441</th>\n","      <td>NCT00360334</td>\n","      <td>6327.0</td>\n","      <td>[exenatide, insulin glargine]</td>\n","      <td>[type 2 diabetes]</td>\n","      <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB01276, DB00047]</td>\n","      <td>[none, none]</td>\n","      <td>[CHEMBL414357, CHEMBL1201497]</td>\n","      <td>[none, none]</td>\n","      <td>[exenatide, insulin glargine]</td>\n","      <td>[C0167117, C0907402]</td>\n","      <td>[[-0.0123086924602175, 0.0042105967441839, -3....</td>\n","      <td>V1     -0.012309\n","V2      0.004211\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>445</th>\n","      <td>NCT00330473</td>\n","      <td>NaN</td>\n","      <td>[human insulin inhalation powder, insulin]</td>\n","      <td>[diabetes mellitus, type 2]</td>\n","      <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>452</th>\n","      <td>NCT00372333</td>\n","      <td>NaN</td>\n","      <td>[idea-033]</td>\n","      <td>[joint pain, musculoskeletal pain, stiffness, ...</td>\n","      <td>[M25.50, M25.59, M25.541, M25.542, M25.549, M2...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[none]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","    </tr>\n","    <tr>\n","      <th>466</th>\n","      <td>NCT00510952</td>\n","      <td>5016.0</td>\n","      <td>[insulin lispro protamine suspension, insulin ...</td>\n","      <td>[diabetes mellitus, type 2]</td>\n","      <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB13700, DB00047]</td>\n","      <td>[none, none]</td>\n","      <td>[none, CHEMBL1201497]</td>\n","      <td>[none, none]</td>\n","      <td>[protamine, insulin glargine]</td>\n","      <td>[C0771747, C0907402]</td>\n","      <td>[[], [-0.0088522591226082, 0.0038537407826502,...</td>\n","      <td>V1     -0.008852\n","V2      0.003854\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>498</th>\n","      <td>NCT00422383</td>\n","      <td>11992.0</td>\n","      <td>[rituximab mabthera/rituxan, rituximab mabther...</td>\n","      <td>[rheumatoid arthritis]</td>\n","      <td>[M06.9, M05.9, M06.08, M06.00, M06.011, M06.01...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB00073, DB00073, DB00073]</td>\n","      <td>[none, none, none]</td>\n","      <td>[CHEMBL1201576, CHEMBL1201576, CHEMBL1201576]</td>\n","      <td>[none, none, none]</td>\n","      <td>[rituximab, rituximab, rituximab]</td>\n","      <td>[C0393022, C0393022, C0393022]</td>\n","      <td>[[-0.0339438843118617, 0.0180569791652772, -3....</td>\n","      <td>V1     -0.033944\n","V2      0.018057\n","V3          ...</td>\n","    </tr>\n","    <tr>\n","      <th>501</th>\n","      <td>NCT01689142</td>\n","      <td>NaN</td>\n","      <td>[insulin glargine new formulation (hoe901), in...</td>\n","      <td>[type 2 diabetes mellitus]</td>\n","      <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n","      <td>\\n        Inclusion criteria :\\n\\n        o Pa...</td>\n","      <td>1</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[none, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6cd79391-c807-4181-8abe-c43b25f19170')\"\n","              title=\"Convert this dataframe to an interactive table.\"\n","              style=\"display:none;\">\n","        \n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n","    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n","  </svg>\n","      </button>\n","      \n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      flex-wrap:wrap;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","      <script>\n","        const buttonEl =\n","          document.querySelector('#df-6cd79391-c807-4181-8abe-c43b25f19170 button.colab-df-convert');\n","        buttonEl.style.display =\n","          google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","        async function convertToInteractive(key) {\n","          const element = document.querySelector('#df-6cd79391-c807-4181-8abe-c43b25f19170');\n","          const dataTable =\n","            await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                     [key], {});\n","          if (!dataTable) return;\n","\n","          const docLinkHtml = 'Like what you see? Visit the ' +\n","            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","            + ' to learn more about interactive tables.';\n","          element.innerHTML = '';\n","          dataTable['output_type'] = 'display_data';\n","          await google.colab.output.renderOutput(dataTable, element);\n","          const docLink = document.createElement('div');\n","          docLink.innerHTML = docLinkHtml;\n","          element.appendChild(docLink);\n","        }\n","      </script>\n","    </div>\n","  </div>\n","  "]},"metadata":{},"execution_count":71}]},{"cell_type":"code","source":["# Debug code to figure out which unique tuples were matched\n","all_data = pd.concat([train, test, val])\n","\n","def extract_info(r):\n","  assert len(r.drugs) == len(r.drugbank_id), \"%d not equal to %d\" % (len(r.drugs), len(r.drugbank_id))\n","  out = []\n","  for i in range(len(r.drugs)):\n","    out.append((r.drugs[i], r.drugbank_id[i], r.smiles[i], r.chembl[i], r.binding_db[i]))\n","  return out\n","\n","def flatten(lists):\n","  out = []\n","  for l in lists:\n","    out += l\n","  return out\n","\n","pd.DataFrame(set(flatten(all_data.apply(extract_info, axis=1))), columns=[\"drug\", \"drugbank_id\", \"smiles\", \"chembl\", \"binding_db\"]).to_pickle(deep_learning_dir + '/AChander_Targets/unique_drug_tuple.pickle')"],"metadata":{"id":"bXw2yyzlHf3J"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["smiles_embedding = pd.read_pickle(deep_learning_dir + '/embeddings/SMILES_embedding.pkl')"],"metadata":{"id":"U7eV2fVh_G8c"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train['embeddings'] = train['smiles'].map(lambda smiles : list(map(lambda smile : default(smiles_embedding, smile, []), smiles)))\n","test['embeddings'] = test['smiles'].map(lambda smiles : list(map(lambda smile : default(smiles_embedding, smile, []), smiles)))\n","val['embeddings'] = val['smiles'].map(lambda smiles : list(map(lambda smile : default(smiles_embedding, smile, []), smiles)))"],"metadata":{"id":"jGczJrJn_ZIB"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def pick_one_embedding(embeddings):\n","  for embedding in embeddings:\n","    if len(embedding) != 0:\n","      return embedding\n","  return []\n","\n","train['embedding'] = train['embeddings'].map(pick_one_embedding)\n","test['embedding'] = test['embeddings'].map(pick_one_embedding)\n","val['embedding'] = val['embeddings'].map(pick_one_embedding)"],"metadata":{"id":"QOiUTWNFBk2h"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def pick_one_embedding(embeddings):\n","  for i in range(len(embeddings)):\n","    if len(embeddings[i]) != 0:\n","      return i\n","  return -1\n","\n","train['embedding_id'] = train['embeddings'].map(pick_one_embedding)\n","test['embedding_id'] = test['embeddings'].map(pick_one_embedding)\n","val['embedding_id'] = val['embeddings'].map(pick_one_embedding)"],"metadata":{"id":"wCuVrAuaeXfa"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train.columns"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"dji3CKv7ENe5","executionInfo":{"status":"ok","timestamp":1651456613916,"user_tz":240,"elapsed":288,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"4d3ffea6-802c-46a5-d51f-79109636a08c"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['nctid', 'n_participants', 'drugs', 'diseases', 'icdcodes', 'criteria',\n","       'label', 'drugbank_id', 'smiles', 'chembl', 'binding_db', 'clean_name',\n","       'cuis', 'cui_vectors', 'primary_cui_vector', 'embeddings', 'embedding',\n","       'embedding_id'],\n","      dtype='object')"]},"metadata":{},"execution_count":175}]},{"cell_type":"code","source":["ctid_set = set()\n","ctid_embeddings = []\n","\n","def add_to_ctid_embeddings(row):\n","  if row.nctid in ctid_set:\n","    return\n","  if row.embedding_id == -1:\n","    ctid_embeddings.append((row.nctid, np.zeros(1024), \"none\"))\n","  else:\n","    ctid_embeddings.append((row.nctid, row.embeddings[row.embedding_id], row.drugs[row.embedding_id]))\n","  ctid_set.add(row.nctid)\n","\n","train.apply(add_to_ctid_embeddings, axis=1)\n","test.apply(add_to_ctid_embeddings, axis=1)\n","val.apply(add_to_ctid_embeddings, axis=1)\n","\n","ctid_embeddings_df = pd.DataFrame(ctid_embeddings, columns=[\"nctid\", \"embedding\", \"drug\"])"],"metadata":{"id":"GdGdkSK9D9s0"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":30000},"id":"hhILDzZkfIVZ","executionInfo":{"status":"ok","timestamp":1651456642615,"user_tz":240,"elapsed":375,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"0623f508-b28b-4db1-e42b-8c5a32a1d739"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["            nctid  n_participants  \\\n","0     NCT00475085           944.0   \n","1     NCT01626859           152.0   \n","2     NCT00203957             NaN   \n","3     NCT00169832             NaN   \n","4     NCT01249352             NaN   \n","...           ...             ...   \n","3089  NCT01015118         12294.0   \n","3090  NCT01127217             NaN   \n","3091  NCT01187953          1086.0   \n","3092  NCT01364649          1186.0   \n","3093  NCT01097018             NaN   \n","\n","                                                  drugs  \\\n","0     [aprepitant, dexamethasone, granisetron hydroc...   \n","1     [mp-214 low dose, mp-214 middle dose, mp-214 h...   \n","2                      [istradefylline, istradefylline]   \n","3                            [rosiglitazone or placebo]   \n","4                [nimotuzumab, cisplatin, fluorouracil]   \n","...                                                 ...   \n","3089  [placebo, paclitaxel, bibf 1120, carboplatin, ...   \n","3090                  [amlodipine/losartan, amlodipine]   \n","3091                  [prograf (tacrolimus), lcp-tacro]   \n","3092              [vortioxetine, escitalopram, placebo]   \n","3093                [capecitabine, perifosine, placebo]   \n","\n","                                         diseases  \\\n","0                                        [nausea]   \n","1                                 [schizophrenia]   \n","2                            [parkinsons disease]   \n","3     [diabetes, coronary artery bypass grafting]   \n","4             [esophageal cancer, adenocarcinoma]   \n","...                                           ...   \n","3089    [ovarian neoplasms, peritoneal neoplasms]   \n","3090                               [hypertension]   \n","3091                              [renal failure]   \n","3092                          [treatment outcome]   \n","3093                          [colorectal cancer]   \n","\n","                                               icdcodes  \\\n","0                                [R11.0, R11.11, R11.2]   \n","1     [F20.0, F20.1, F20.2, F20.3, F20.5, F20.89, F2...   \n","2                                                 [G20]   \n","3     [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...   \n","4     [K22.2, K22.81, Q39.4, P78.83, I85.00, I85.01,...   \n","...                                                 ...   \n","3089  [C05.2, C10.0, C16.0, C16.4, C17.0, C17.1, C17...   \n","3090  [I15.0, I97.3, K76.6, P29.2, G93.2, H40.053, I10]   \n","3091  [P96.0, O03.32, O04.82, O08.4, O03.82, O07.32,...   \n","3092  [Z01.12, Z92.89, Z75.2, M27.59, Z53.9, Z91.19,...   \n","3093  [C05.2, C10.0, C16.0, C16.4, C17.0, C17.1, C17.2]   \n","\n","                                               criteria  label  \\\n","0     \\n        Inclusion criteria:\\n\\n          -  ...      1   \n","1     \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","2     \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","3     \\n        Inclusion Criteria:\\n\\n        AT SC...      0   \n","4     \\n        Inclusion Criteria:\\n\\n          1. ...      1   \n","...                                                 ...    ...   \n","3089  \\n        Inclusion criteria:\\n\\n          -  ...      1   \n","3090  \\n        Inclusion Criteria:\\n\\n          -  ...      1   \n","3091  \\n        Inclusion Criteria:\\n\\n          1. ...      1   \n","3092  \\n        Inclusion Criteria:\\n\\n          1. ...      1   \n","3093  \\n        Inclusion Criteria:\\n\\n          -  ...      0   \n","\n","                                            drugbank_id  \\\n","0     [DB00673, DB14649, DB00889, DB00377, DB00433, ...   \n","1                                    [none, none, none]   \n","2                                    [DB11757, DB11757]   \n","3                                             [placebo]   \n","4                           [DB06192, DB00515, DB00544]   \n","...                                                 ...   \n","3089  [placebo, DB01229, none, DB00958, DB01229, DB0...   \n","3090                                 [DB00381, DB00381]   \n","3091                                    [DB00864, none]   \n","3092                        [DB09068, DB01175, placebo]   \n","3093                        [DB01101, DB06641, placebo]   \n","\n","                                                 smiles  \\\n","0     [C[C@@H](O[C@H]1OCCN(CC2=NNC(=O)N2)[C@H]1C1=CC...   \n","1                                    [none, none, none]   \n","2     [[H]\\C(=C(\\[H])C1=CC(OC)=C(OC)C=C1)C1=NC2=C(N1...   \n","3                                             [placebo]   \n","4     [none, [H][N]([H])([H])[Pt](Cl)(Cl)[N]([H])([H...   \n","...                                                 ...   \n","3089  [placebo, [H][C@]12[C@H](OC(=O)C3=CC=CC=C3)[C@...   \n","3090  [CCOC(=O)C1=C(COCCN)NC(C)=C(C1C1=CC=CC=C1Cl)C(...   \n","3091  [CO[C@@H]1C[C@@H](CC[C@H]1O)\\C=C(/C)[C@H]1OC(=...   \n","3092  [CC1=CC=C(SC2=CC=CC=C2N2CCNCC2)C(C)=C1, CN(C)C...   \n","3093  [CCCCCOC(=O)NC1=NC(=O)N(C=C1F)[C@@H]1O[C@H](C)...   \n","\n","                                                 chembl  \\\n","0     [CHEMBL1471, CHEMBL1530428, CHEMBL1290003, CHE...   \n","1                                    [none, none, none]   \n","2                          [CHEMBL431770, CHEMBL431770]   \n","3                                             [placebo]   \n","4                      [none, CHEMBL2068237, CHEMBL185]   \n","...                                                 ...   \n","3089  [placebo, CHEMBL428647, none, CHEMBL1351, CHEM...   \n","3090                           [CHEMBL1491, CHEMBL1491]   \n","3091                               [CHEMBL269732, none]   \n","3092               [CHEMBL2104993, CHEMBL1508, placebo]   \n","3093                [CHEMBL1773, CHEMBL372764, placebo]   \n","\n","                                             binding_db  \\\n","0     [50220136, 50103620, 50443668, 50417287, 78434...   \n","1                                    [none, none, none]   \n","2                                  [50176050, 50176050]   \n","3                                             [placebo]   \n","4                            [none, 50028111, 50340677]   \n","...                                                 ...   \n","3089    [placebo, 50001839, none, none, 50001839, none]   \n","3090                               [50088383, 50088383]   \n","3091                                   [50030448, none]   \n","3092                      [50400902, 50302225, placebo]   \n","3093                          [none, 50431630, placebo]   \n","\n","                                             clean_name  \\\n","0     [aprepitant, dexamethasone, granisetron, palon...   \n","1                                    [none, none, none]   \n","2                      [istradefylline, istradefylline]   \n","3                                             [placebo]   \n","4                [nimotuzumab, cisplatin, fluorouracil]   \n","...                                                 ...   \n","3089  [placebo, paclitaxel, none, carboplatin, pacli...   \n","3090                           [amlodipine, amlodipine]   \n","3091                                 [tacrolimus, none]   \n","3092              [vortioxetine, escitalopram, placebo]   \n","3093                [capecitabine, perifosine, placebo]   \n","\n","                                                   cuis  \\\n","0     [C1176306, C2930043, C0543476, C1310734, C0770...   \n","1                                    [none, none, none]   \n","2                                  [C0673470, C0673470]   \n","3                                            [C1706408]   \n","4                        [C1570308, C0008838, C2711401]   \n","...                                                 ...   \n","3089  [C1706408, C0144576, none, C0079083, C0144576,...   \n","3090                               [C5195719, C5195719]   \n","3091                                   [C0519826, none]   \n","3092                     [C3661282, C1099456, C1706408]   \n","3093                     [C0671970, C0754570, C1706408]   \n","\n","                                            cui_vectors  \\\n","0     [[-0.0133983809219361, 0.0038140331326222, -3....   \n","1                                          [[], [], []]   \n","2                                              [[], []]   \n","3                                                  [[]]   \n","4     [[], [-0.0160435887106513, 0.0074711445684327,...   \n","...                                                 ...   \n","3089  [[], [-0.0152721016686416, 0.0059331896906342,...   \n","3090                                           [[], []]   \n","3091                                           [[], []]   \n","3092  [[], [-0.0147954572699932, 0.003364188566606, ...   \n","3093  [[-0.0206859657439039, 0.0082703372165789, 1.3...   \n","\n","                                     primary_cui_vector  \\\n","0     V1     -0.013398\n","V2      0.003814\n","V3          ...   \n","1                                                    []   \n","2                                                    []   \n","3                                                    []   \n","4     V1     -0.016044\n","V2      0.007471\n","V3          ...   \n","...                                                 ...   \n","3089  V1     -0.015272\n","V2      0.005933\n","V3          ...   \n","3090                                                 []   \n","3091                                                 []   \n","3092  V1     -0.014795\n","V2      0.003364\n","V3          ...   \n","3093  V1     -0.020686\n","V2       0.00827\n","V3          ...   \n","\n","                                             embeddings  \\\n","0     [[13.561273574829102, -13.577717781066895, 0.9...   \n","1                                          [[], [], []]   \n","2                                              [[], []]   \n","3                                                  [[]]   \n","4     [[], [], [4.89539098739624, 3.768472671508789,...   \n","...                                                 ...   \n","3089                           [[], [], [], [], [], []]   \n","3090  [[5.37669563293457, -5.854226589202881, -4.580...   \n","3091  [[8.613880157470703, -9.339082717895508, 19.17...   \n","3092  [[-3.836270332336426, -12.949006080627441, 3.9...   \n","3093  [[7.950209617614746, -1.9675993919372559, 3.18...   \n","\n","                                              embedding  embedding_id  \n","0     [13.561273574829102, -13.577717781066895, 0.95...             0  \n","1                                                    []            -1  \n","2                                                    []            -1  \n","3                                                    []            -1  \n","4     [4.89539098739624, 3.768472671508789, 4.447010...             2  \n","...                                                 ...           ...  \n","3089                                                 []            -1  \n","3090  [5.37669563293457, -5.854226589202881, -4.5800...             0  \n","3091  [8.613880157470703, -9.339082717895508, 19.174...             0  \n","3092  [-3.836270332336426, -12.949006080627441, 3.97...             0  \n","3093  [7.950209617614746, -1.9675993919372559, 3.184...             0  \n","\n","[3094 rows x 18 columns]"],"text/html":["\n","  <div id=\"df-16c71b1f-9777-4076-b77b-0a212cb8390b\">\n","    <div class=\"colab-df-container\">\n","      <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>nctid</th>\n","      <th>n_participants</th>\n","      <th>drugs</th>\n","      <th>diseases</th>\n","      <th>icdcodes</th>\n","      <th>criteria</th>\n","      <th>label</th>\n","      <th>drugbank_id</th>\n","      <th>smiles</th>\n","      <th>chembl</th>\n","      <th>binding_db</th>\n","      <th>clean_name</th>\n","      <th>cuis</th>\n","      <th>cui_vectors</th>\n","      <th>primary_cui_vector</th>\n","      <th>embeddings</th>\n","      <th>embedding</th>\n","      <th>embedding_id</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>NCT00475085</td>\n","      <td>944.0</td>\n","      <td>[aprepitant, dexamethasone, granisetron hydroc...</td>\n","      <td>[nausea]</td>\n","      <td>[R11.0, R11.11, R11.2]</td>\n","      <td>\\n        Inclusion criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB00673, DB14649, DB00889, DB00377, DB00433, ...</td>\n","      <td>[C[C@@H](O[C@H]1OCCN(CC2=NNC(=O)N2)[C@H]1C1=CC...</td>\n","      <td>[CHEMBL1471, CHEMBL1530428, CHEMBL1290003, CHE...</td>\n","      <td>[50220136, 50103620, 50443668, 50417287, 78434...</td>\n","      <td>[aprepitant, dexamethasone, granisetron, palon...</td>\n","      <td>[C1176306, C2930043, C0543476, C1310734, C0770...</td>\n","      <td>[[-0.0133983809219361, 0.0038140331326222, -3....</td>\n","      <td>V1     -0.013398\n","V2      0.003814\n","V3          ...</td>\n","      <td>[[13.561273574829102, -13.577717781066895, 0.9...</td>\n","      <td>[13.561273574829102, -13.577717781066895, 0.95...</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>NCT01626859</td>\n","      <td>152.0</td>\n","      <td>[mp-214 low dose, mp-214 middle dose, mp-214 h...</td>\n","      <td>[schizophrenia]</td>\n","      <td>[F20.0, F20.1, F20.2, F20.3, F20.5, F20.89, F2...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[none, none, none]</td>\n","      <td>[[], [], []]</td>\n","      <td>[]</td>\n","      <td>[[], [], []]</td>\n","      <td>[]</td>\n","      <td>-1</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>NCT00203957</td>\n","      <td>NaN</td>\n","      <td>[istradefylline, istradefylline]</td>\n","      <td>[parkinsons disease]</td>\n","      <td>[G20]</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB11757, DB11757]</td>\n","      <td>[[H]\\C(=C(\\[H])C1=CC(OC)=C(OC)C=C1)C1=NC2=C(N1...</td>\n","      <td>[CHEMBL431770, CHEMBL431770]</td>\n","      <td>[50176050, 50176050]</td>\n","      <td>[istradefylline, istradefylline]</td>\n","      <td>[C0673470, C0673470]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","      <td>-1</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>NCT00169832</td>\n","      <td>NaN</td>\n","      <td>[rosiglitazone or placebo]</td>\n","      <td>[diabetes, coronary artery bypass grafting]</td>\n","      <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n        AT SC...</td>\n","      <td>0</td>\n","      <td>[placebo]</td>\n","      <td>[placebo]</td>\n","      <td>[placebo]</td>\n","      <td>[placebo]</td>\n","      <td>[placebo]</td>\n","      <td>[C1706408]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","      <td>[[]]</td>\n","      <td>[]</td>\n","      <td>-1</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>NCT01249352</td>\n","      <td>NaN</td>\n","      <td>[nimotuzumab, cisplatin, fluorouracil]</td>\n","      <td>[esophageal cancer, adenocarcinoma]</td>\n","      <td>[K22.2, K22.81, Q39.4, P78.83, I85.00, I85.01,...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          1. ...</td>\n","      <td>1</td>\n","      <td>[DB06192, DB00515, DB00544]</td>\n","      <td>[none, [H][N]([H])([H])[Pt](Cl)(Cl)[N]([H])([H...</td>\n","      <td>[none, CHEMBL2068237, CHEMBL185]</td>\n","      <td>[none, 50028111, 50340677]</td>\n","      <td>[nimotuzumab, cisplatin, fluorouracil]</td>\n","      <td>[C1570308, C0008838, C2711401]</td>\n","      <td>[[], [-0.0160435887106513, 0.0074711445684327,...</td>\n","      <td>V1     -0.016044\n","V2      0.007471\n","V3          ...</td>\n","      <td>[[], [], [4.89539098739624, 3.768472671508789,...</td>\n","      <td>[4.89539098739624, 3.768472671508789, 4.447010...</td>\n","      <td>2</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>3089</th>\n","      <td>NCT01015118</td>\n","      <td>12294.0</td>\n","      <td>[placebo, paclitaxel, bibf 1120, carboplatin, ...</td>\n","      <td>[ovarian neoplasms, peritoneal neoplasms]</td>\n","      <td>[C05.2, C10.0, C16.0, C16.4, C17.0, C17.1, C17...</td>\n","      <td>\\n        Inclusion criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[placebo, DB01229, none, DB00958, DB01229, DB0...</td>\n","      <td>[placebo, [H][C@]12[C@H](OC(=O)C3=CC=CC=C3)[C@...</td>\n","      <td>[placebo, CHEMBL428647, none, CHEMBL1351, CHEM...</td>\n","      <td>[placebo, 50001839, none, none, 50001839, none]</td>\n","      <td>[placebo, paclitaxel, none, carboplatin, pacli...</td>\n","      <td>[C1706408, C0144576, none, C0079083, C0144576,...</td>\n","      <td>[[], [-0.0152721016686416, 0.0059331896906342,...</td>\n","      <td>V1     -0.015272\n","V2      0.005933\n","V3          ...</td>\n","      <td>[[], [], [], [], [], []]</td>\n","      <td>[]</td>\n","      <td>-1</td>\n","    </tr>\n","    <tr>\n","      <th>3090</th>\n","      <td>NCT01127217</td>\n","      <td>NaN</td>\n","      <td>[amlodipine/losartan, amlodipine]</td>\n","      <td>[hypertension]</td>\n","      <td>[I15.0, I97.3, K76.6, P29.2, G93.2, H40.053, I10]</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>1</td>\n","      <td>[DB00381, DB00381]</td>\n","      <td>[CCOC(=O)C1=C(COCCN)NC(C)=C(C1C1=CC=CC=C1Cl)C(...</td>\n","      <td>[CHEMBL1491, CHEMBL1491]</td>\n","      <td>[50088383, 50088383]</td>\n","      <td>[amlodipine, amlodipine]</td>\n","      <td>[C5195719, C5195719]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","      <td>[[5.37669563293457, -5.854226589202881, -4.580...</td>\n","      <td>[5.37669563293457, -5.854226589202881, -4.5800...</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>3091</th>\n","      <td>NCT01187953</td>\n","      <td>1086.0</td>\n","      <td>[prograf (tacrolimus), lcp-tacro]</td>\n","      <td>[renal failure]</td>\n","      <td>[P96.0, O03.32, O04.82, O08.4, O03.82, O07.32,...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          1. ...</td>\n","      <td>1</td>\n","      <td>[DB00864, none]</td>\n","      <td>[CO[C@@H]1C[C@@H](CC[C@H]1O)\\C=C(/C)[C@H]1OC(=...</td>\n","      <td>[CHEMBL269732, none]</td>\n","      <td>[50030448, none]</td>\n","      <td>[tacrolimus, none]</td>\n","      <td>[C0519826, none]</td>\n","      <td>[[], []]</td>\n","      <td>[]</td>\n","      <td>[[8.613880157470703, -9.339082717895508, 19.17...</td>\n","      <td>[8.613880157470703, -9.339082717895508, 19.174...</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>3092</th>\n","      <td>NCT01364649</td>\n","      <td>1186.0</td>\n","      <td>[vortioxetine, escitalopram, placebo]</td>\n","      <td>[treatment outcome]</td>\n","      <td>[Z01.12, Z92.89, Z75.2, M27.59, Z53.9, Z91.19,...</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          1. ...</td>\n","      <td>1</td>\n","      <td>[DB09068, DB01175, placebo]</td>\n","      <td>[CC1=CC=C(SC2=CC=CC=C2N2CCNCC2)C(C)=C1, CN(C)C...</td>\n","      <td>[CHEMBL2104993, CHEMBL1508, placebo]</td>\n","      <td>[50400902, 50302225, placebo]</td>\n","      <td>[vortioxetine, escitalopram, placebo]</td>\n","      <td>[C3661282, C1099456, C1706408]</td>\n","      <td>[[], [-0.0147954572699932, 0.003364188566606, ...</td>\n","      <td>V1     -0.014795\n","V2      0.003364\n","V3          ...</td>\n","      <td>[[-3.836270332336426, -12.949006080627441, 3.9...</td>\n","      <td>[-3.836270332336426, -12.949006080627441, 3.97...</td>\n","      <td>0</td>\n","    </tr>\n","    <tr>\n","      <th>3093</th>\n","      <td>NCT01097018</td>\n","      <td>NaN</td>\n","      <td>[capecitabine, perifosine, placebo]</td>\n","      <td>[colorectal cancer]</td>\n","      <td>[C05.2, C10.0, C16.0, C16.4, C17.0, C17.1, C17.2]</td>\n","      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n","      <td>0</td>\n","      <td>[DB01101, DB06641, placebo]</td>\n","      <td>[CCCCCOC(=O)NC1=NC(=O)N(C=C1F)[C@@H]1O[C@H](C)...</td>\n","      <td>[CHEMBL1773, CHEMBL372764, placebo]</td>\n","      <td>[none, 50431630, placebo]</td>\n","      <td>[capecitabine, perifosine, placebo]</td>\n","      <td>[C0671970, C0754570, C1706408]</td>\n","      <td>[[-0.0206859657439039, 0.0082703372165789, 1.3...</td>\n","      <td>V1     -0.020686\n","V2       0.00827\n","V3          ...</td>\n","      <td>[[7.950209617614746, -1.9675993919372559, 3.18...</td>\n","      <td>[7.950209617614746, -1.9675993919372559, 3.184...</td>\n","      <td>0</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>3094 rows × 18 columns</p>\n","</div>\n","      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-16c71b1f-9777-4076-b77b-0a212cb8390b')\"\n","              title=\"Convert this dataframe to an interactive table.\"\n","              style=\"display:none;\">\n","        \n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n","    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n","  </svg>\n","      </button>\n","      \n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      flex-wrap:wrap;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","      <script>\n","        const buttonEl =\n","          document.querySelector('#df-16c71b1f-9777-4076-b77b-0a212cb8390b button.colab-df-convert');\n","        buttonEl.style.display =\n","          google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","        async function convertToInteractive(key) {\n","          const element = document.querySelector('#df-16c71b1f-9777-4076-b77b-0a212cb8390b');\n","          const dataTable =\n","            await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                     [key], {});\n","          if (!dataTable) return;\n","\n","          const docLinkHtml = 'Like what you see? Visit the ' +\n","            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","            + ' to learn more about interactive tables.';\n","          element.innerHTML = '';\n","          dataTable['output_type'] = 'display_data';\n","          await google.colab.output.renderOutput(dataTable, element);\n","          const docLink = document.createElement('div');\n","          docLink.innerHTML = docLinkHtml;\n","          element.appendChild(docLink);\n","        }\n","      </script>\n","    </div>\n","  </div>\n","  "]},"metadata":{},"execution_count":179}]},{"cell_type":"code","source":["ctid_embeddings_df"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"OgopTFN6fE2I","executionInfo":{"status":"ok","timestamp":1651456805363,"user_tz":240,"elapsed":336,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"bfd6f32a-3600-4dc9-d2a3-c1eeba6e97a9"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["            nctid                                          embedding  \\\n","0     NCT00475085  [13.561273574829102, -13.577717781066895, 0.95...   \n","1     NCT01626859  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","2     NCT00203957  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","3     NCT00169832  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","4     NCT01249352  [4.89539098739624, 3.768472671508789, 4.447010...   \n","...           ...                                                ...   \n","4579  NCT00679484  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","4580  NCT01057407  [-3.7375659942626953, -3.272908926010132, 4.68...   \n","4581  NCT01126580  [9.013765335083008, -4.713770866394043, 15.139...   \n","4582  NCT00423813  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","4583  NCT00237653  [12.198214530944824, 2.7601611614227295, 12.73...   \n","\n","                         drug  \n","0                  aprepitant  \n","1                        none  \n","2                        none  \n","3                        none  \n","4                fluorouracil  \n","...                       ...  \n","4579                     none  \n","4580  sevelamer hydrochloride  \n","4581                metformin  \n","4582                     none  \n","4583           valganciclovir  \n","\n","[4584 rows x 3 columns]"],"text/html":["\n","  <div id=\"df-56f1d3e3-9289-4499-a24e-24335a3c9e6d\">\n","    <div class=\"colab-df-container\">\n","      <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>nctid</th>\n","      <th>embedding</th>\n","      <th>drug</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>0</th>\n","      <td>NCT00475085</td>\n","      <td>[13.561273574829102, -13.577717781066895, 0.95...</td>\n","      <td>aprepitant</td>\n","    </tr>\n","    <tr>\n","      <th>1</th>\n","      <td>NCT01626859</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>2</th>\n","      <td>NCT00203957</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>3</th>\n","      <td>NCT00169832</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>4</th>\n","      <td>NCT01249352</td>\n","      <td>[4.89539098739624, 3.768472671508789, 4.447010...</td>\n","      <td>fluorouracil</td>\n","    </tr>\n","    <tr>\n","      <th>...</th>\n","      <td>...</td>\n","      <td>...</td>\n","      <td>...</td>\n","    </tr>\n","    <tr>\n","      <th>4579</th>\n","      <td>NCT00679484</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>4580</th>\n","      <td>NCT01057407</td>\n","      <td>[-3.7375659942626953, -3.272908926010132, 4.68...</td>\n","      <td>sevelamer hydrochloride</td>\n","    </tr>\n","    <tr>\n","      <th>4581</th>\n","      <td>NCT01126580</td>\n","      <td>[9.013765335083008, -4.713770866394043, 15.139...</td>\n","      <td>metformin</td>\n","    </tr>\n","    <tr>\n","      <th>4582</th>\n","      <td>NCT00423813</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>4583</th>\n","      <td>NCT00237653</td>\n","      <td>[12.198214530944824, 2.7601611614227295, 12.73...</td>\n","      <td>valganciclovir</td>\n","    </tr>\n","  </tbody>\n","</table>\n","<p>4584 rows × 3 columns</p>\n","</div>\n","      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-56f1d3e3-9289-4499-a24e-24335a3c9e6d')\"\n","              title=\"Convert this dataframe to an interactive table.\"\n","              style=\"display:none;\">\n","        \n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n","    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n","  </svg>\n","      </button>\n","      \n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      flex-wrap:wrap;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","      <script>\n","        const buttonEl =\n","          document.querySelector('#df-56f1d3e3-9289-4499-a24e-24335a3c9e6d button.colab-df-convert');\n","        buttonEl.style.display =\n","          google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","        async function convertToInteractive(key) {\n","          const element = document.querySelector('#df-56f1d3e3-9289-4499-a24e-24335a3c9e6d');\n","          const dataTable =\n","            await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                     [key], {});\n","          if (!dataTable) return;\n","\n","          const docLinkHtml = 'Like what you see? Visit the ' +\n","            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","            + ' to learn more about interactive tables.';\n","          element.innerHTML = '';\n","          dataTable['output_type'] = 'display_data';\n","          await google.colab.output.renderOutput(dataTable, element);\n","          const docLink = document.createElement('div');\n","          docLink.innerHTML = docLinkHtml;\n","          element.appendChild(docLink);\n","        }\n","      </script>\n","    </div>\n","  </div>\n","  "]},"metadata":{},"execution_count":180}]},{"cell_type":"code","source":["ctid_embeddings_df.sample(50)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"HzmTMSbrHwcD","executionInfo":{"status":"ok","timestamp":1651457317585,"user_tz":240,"elapsed":408,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"58f17885-67a3-415b-aab3-92dfed8d4c83"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["            nctid                                          embedding  \\\n","761   NCT00174720  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","3383  NCT02220894  [14.387839317321777, -10.28243350982666, 1.606...   \n","1451  NCT00113386  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","2678  NCT00002597  [10.195808410644531, -9.866142272949219, 0.332...   \n","3475  NCT02292446  [23.38435935974121, -0.7330405712127686, 11.95...   \n","2898  NCT01768286  [3.361865282058716, 0.7817785739898682, 10.766...   \n","1653  NCT01049334  [6.104618072509766, -5.057725429534912, -14.15...   \n","3307  NCT02149121  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","3569  NCT02388906  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","1292  NCT01348243  [-2.2266159057617188, -5.000643253326416, 5.43...   \n","1578  NCT02182479  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","812   NCT00253968  [9.196856498718262, -10.230440139770508, -13.4...   \n","4468  NCT00698581  [8.661005020141602, -0.3385846018791199, 11.90...   \n","1146  NCT01512108  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","3392  NCT02226003  [0.14281639456748962, -10.426595687866211, -14...   \n","3873  NCT02679573  [14.945191383361816, -25.684415817260742, 17.6...   \n","1398  NCT01342913  [-6.010187149047852, 7.235744476318359, 0.6008...   \n","2380  NCT00286468  [6.733612060546875, -12.518169403076172, 1.304...   \n","2340  NCT00227747  [7.950209617614746, -1.9675993919372559, 3.184...   \n","183   NCT00127634  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","574   NCT00265616  [7.544963359832764, 6.164093017578125, -10.620...   \n","1999  NCT00540449  [12.20997142791748, -2.5129404067993164, -1.25...   \n","114   NCT01672762  [-8.753070831298828, -18.59516143798828, -4.12...   \n","358   NCT01763918  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","2330  NCT00484939  [7.950209617614746, -1.9675993919372559, 3.184...   \n","1179  NCT00114127  [0.26812058687210083, -11.34184741973877, -2.4...   \n","2369  NCT00151255  [-2.3481616973876953, -2.4751017093658447, 4.7...   \n","1277  NCT00163293  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","2316  NCT00668525  [9.756115913391113, -13.786759376525879, -8.16...   \n","875   NCT00506285  [4.142498970031738, -1.2075169086456299, -2.51...   \n","1086  NCT00056407  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","261   NCT00628589  [-3.4664182662963867, -6.698912143707275, -0.5...   \n","1329  NCT01510535  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","3964  NCT02785939  [17.70220184326172, -18.377199172973633, 11.80...   \n","231   NCT00355641  [6.557114124298096, -12.076240539550781, 0.163...   \n","1849  NCT00347412  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","107   NCT00330460  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","316   NCT00113815  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","1431  NCT01277666  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","1167  NCT00385138  [12.460927963256836, -18.833032608032227, 14.6...   \n","1795  NCT01573351  [13.946184158325195, -10.585319519042969, 7.30...   \n","1315  NCT00449930  [11.46723747253418, -19.053577423095703, 2.737...   \n","3061  NCT01287260  [7.839696407318115, -1.8401050567626953, -8.38...   \n","507   NCT00363584  [7.950209617614746, -1.9675993919372559, 3.184...   \n","4139  NCT03123471  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","2194  NCT00604383  [10.526212692260742, -15.975157737731934, -3.7...   \n","3424  NCT02252042  [23.116355895996094, -7.407402515411377, 16.62...   \n","1934  NCT00699153  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","637   NCT01433523  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...   \n","3574  NCT02393859  [-3.2333426475524902, -13.376590728759766, 10....   \n","\n","                                                   drug  \n","761                                                none  \n","3383                                         pemetrexed  \n","1451                                               none  \n","2678                                          flutamide  \n","3475                                        ruxolitinib  \n","2898                                                rbv  \n","1653                                       flurbiprofen  \n","3307                                               none  \n","3569                                               none  \n","1292  disodium clodronate 200 mg/4 ml with 1% lidocaine  \n","1578                                               none  \n","812                                        eplivanserin  \n","4468                                       brivaracetam  \n","1146                                               none  \n","3392                                      ertugliflozin  \n","3873                                       delafloxacin  \n","1398        fluticasone furoate 100mcg/vilanterol 25mcg  \n","2380                           alogliptin and glyburide  \n","2340                                       capecitabine  \n","183                                                none  \n","574                                            propofol  \n","1999                                          efavirenz  \n","114                                       ipragliflozin  \n","358                                                none  \n","2330                                       capecitabine  \n","1179                                         duloxetine  \n","2369                                         cytarabine  \n","1277                                               none  \n","2316                                       escitalopram  \n","875            methylphenidate transdermal system (mts)  \n","1086                                               none  \n","261                               inhaled loxapine 5 mg  \n","1329                                               none  \n","3964                                        palbociclib  \n","231                    ropinirole extended release (xr)  \n","1849                                               none  \n","107                                                none  \n","316                                                none  \n","1431                                               none  \n","1167                                          cangrelor  \n","1795                                        asunaprevir  \n","1315                              sitagliptin phosphate  \n","3061                                 nifedipine (adalat  \n","507                                        capecitabine  \n","4139                                               none  \n","2194                                      ruboxistaurin  \n","3424                                       methotrexate  \n","1934                                               none  \n","637                                                none  \n","3574                                       daunorubicin  "],"text/html":["\n","  <div id=\"df-83ba7a9c-9b68-4ff7-9145-5e9879e53bfa\">\n","    <div class=\"colab-df-container\">\n","      <div>\n","<style scoped>\n","    .dataframe tbody tr th:only-of-type {\n","        vertical-align: middle;\n","    }\n","\n","    .dataframe tbody tr th {\n","        vertical-align: top;\n","    }\n","\n","    .dataframe thead th {\n","        text-align: right;\n","    }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n","  <thead>\n","    <tr style=\"text-align: right;\">\n","      <th></th>\n","      <th>nctid</th>\n","      <th>embedding</th>\n","      <th>drug</th>\n","    </tr>\n","  </thead>\n","  <tbody>\n","    <tr>\n","      <th>761</th>\n","      <td>NCT00174720</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>3383</th>\n","      <td>NCT02220894</td>\n","      <td>[14.387839317321777, -10.28243350982666, 1.606...</td>\n","      <td>pemetrexed</td>\n","    </tr>\n","    <tr>\n","      <th>1451</th>\n","      <td>NCT00113386</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>2678</th>\n","      <td>NCT00002597</td>\n","      <td>[10.195808410644531, -9.866142272949219, 0.332...</td>\n","      <td>flutamide</td>\n","    </tr>\n","    <tr>\n","      <th>3475</th>\n","      <td>NCT02292446</td>\n","      <td>[23.38435935974121, -0.7330405712127686, 11.95...</td>\n","      <td>ruxolitinib</td>\n","    </tr>\n","    <tr>\n","      <th>2898</th>\n","      <td>NCT01768286</td>\n","      <td>[3.361865282058716, 0.7817785739898682, 10.766...</td>\n","      <td>rbv</td>\n","    </tr>\n","    <tr>\n","      <th>1653</th>\n","      <td>NCT01049334</td>\n","      <td>[6.104618072509766, -5.057725429534912, -14.15...</td>\n","      <td>flurbiprofen</td>\n","    </tr>\n","    <tr>\n","      <th>3307</th>\n","      <td>NCT02149121</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>3569</th>\n","      <td>NCT02388906</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>1292</th>\n","      <td>NCT01348243</td>\n","      <td>[-2.2266159057617188, -5.000643253326416, 5.43...</td>\n","      <td>disodium clodronate 200 mg/4 ml with 1% lidocaine</td>\n","    </tr>\n","    <tr>\n","      <th>1578</th>\n","      <td>NCT02182479</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>812</th>\n","      <td>NCT00253968</td>\n","      <td>[9.196856498718262, -10.230440139770508, -13.4...</td>\n","      <td>eplivanserin</td>\n","    </tr>\n","    <tr>\n","      <th>4468</th>\n","      <td>NCT00698581</td>\n","      <td>[8.661005020141602, -0.3385846018791199, 11.90...</td>\n","      <td>brivaracetam</td>\n","    </tr>\n","    <tr>\n","      <th>1146</th>\n","      <td>NCT01512108</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>3392</th>\n","      <td>NCT02226003</td>\n","      <td>[0.14281639456748962, -10.426595687866211, -14...</td>\n","      <td>ertugliflozin</td>\n","    </tr>\n","    <tr>\n","      <th>3873</th>\n","      <td>NCT02679573</td>\n","      <td>[14.945191383361816, -25.684415817260742, 17.6...</td>\n","      <td>delafloxacin</td>\n","    </tr>\n","    <tr>\n","      <th>1398</th>\n","      <td>NCT01342913</td>\n","      <td>[-6.010187149047852, 7.235744476318359, 0.6008...</td>\n","      <td>fluticasone furoate 100mcg/vilanterol 25mcg</td>\n","    </tr>\n","    <tr>\n","      <th>2380</th>\n","      <td>NCT00286468</td>\n","      <td>[6.733612060546875, -12.518169403076172, 1.304...</td>\n","      <td>alogliptin and glyburide</td>\n","    </tr>\n","    <tr>\n","      <th>2340</th>\n","      <td>NCT00227747</td>\n","      <td>[7.950209617614746, -1.9675993919372559, 3.184...</td>\n","      <td>capecitabine</td>\n","    </tr>\n","    <tr>\n","      <th>183</th>\n","      <td>NCT00127634</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>574</th>\n","      <td>NCT00265616</td>\n","      <td>[7.544963359832764, 6.164093017578125, -10.620...</td>\n","      <td>propofol</td>\n","    </tr>\n","    <tr>\n","      <th>1999</th>\n","      <td>NCT00540449</td>\n","      <td>[12.20997142791748, -2.5129404067993164, -1.25...</td>\n","      <td>efavirenz</td>\n","    </tr>\n","    <tr>\n","      <th>114</th>\n","      <td>NCT01672762</td>\n","      <td>[-8.753070831298828, -18.59516143798828, -4.12...</td>\n","      <td>ipragliflozin</td>\n","    </tr>\n","    <tr>\n","      <th>358</th>\n","      <td>NCT01763918</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>2330</th>\n","      <td>NCT00484939</td>\n","      <td>[7.950209617614746, -1.9675993919372559, 3.184...</td>\n","      <td>capecitabine</td>\n","    </tr>\n","    <tr>\n","      <th>1179</th>\n","      <td>NCT00114127</td>\n","      <td>[0.26812058687210083, -11.34184741973877, -2.4...</td>\n","      <td>duloxetine</td>\n","    </tr>\n","    <tr>\n","      <th>2369</th>\n","      <td>NCT00151255</td>\n","      <td>[-2.3481616973876953, -2.4751017093658447, 4.7...</td>\n","      <td>cytarabine</td>\n","    </tr>\n","    <tr>\n","      <th>1277</th>\n","      <td>NCT00163293</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>2316</th>\n","      <td>NCT00668525</td>\n","      <td>[9.756115913391113, -13.786759376525879, -8.16...</td>\n","      <td>escitalopram</td>\n","    </tr>\n","    <tr>\n","      <th>875</th>\n","      <td>NCT00506285</td>\n","      <td>[4.142498970031738, -1.2075169086456299, -2.51...</td>\n","      <td>methylphenidate transdermal system (mts)</td>\n","    </tr>\n","    <tr>\n","      <th>1086</th>\n","      <td>NCT00056407</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>261</th>\n","      <td>NCT00628589</td>\n","      <td>[-3.4664182662963867, -6.698912143707275, -0.5...</td>\n","      <td>inhaled loxapine 5 mg</td>\n","    </tr>\n","    <tr>\n","      <th>1329</th>\n","      <td>NCT01510535</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>3964</th>\n","      <td>NCT02785939</td>\n","      <td>[17.70220184326172, -18.377199172973633, 11.80...</td>\n","      <td>palbociclib</td>\n","    </tr>\n","    <tr>\n","      <th>231</th>\n","      <td>NCT00355641</td>\n","      <td>[6.557114124298096, -12.076240539550781, 0.163...</td>\n","      <td>ropinirole extended release (xr)</td>\n","    </tr>\n","    <tr>\n","      <th>1849</th>\n","      <td>NCT00347412</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>107</th>\n","      <td>NCT00330460</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>316</th>\n","      <td>NCT00113815</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>1431</th>\n","      <td>NCT01277666</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>1167</th>\n","      <td>NCT00385138</td>\n","      <td>[12.460927963256836, -18.833032608032227, 14.6...</td>\n","      <td>cangrelor</td>\n","    </tr>\n","    <tr>\n","      <th>1795</th>\n","      <td>NCT01573351</td>\n","      <td>[13.946184158325195, -10.585319519042969, 7.30...</td>\n","      <td>asunaprevir</td>\n","    </tr>\n","    <tr>\n","      <th>1315</th>\n","      <td>NCT00449930</td>\n","      <td>[11.46723747253418, -19.053577423095703, 2.737...</td>\n","      <td>sitagliptin phosphate</td>\n","    </tr>\n","    <tr>\n","      <th>3061</th>\n","      <td>NCT01287260</td>\n","      <td>[7.839696407318115, -1.8401050567626953, -8.38...</td>\n","      <td>nifedipine (adalat</td>\n","    </tr>\n","    <tr>\n","      <th>507</th>\n","      <td>NCT00363584</td>\n","      <td>[7.950209617614746, -1.9675993919372559, 3.184...</td>\n","      <td>capecitabine</td>\n","    </tr>\n","    <tr>\n","      <th>4139</th>\n","      <td>NCT03123471</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>2194</th>\n","      <td>NCT00604383</td>\n","      <td>[10.526212692260742, -15.975157737731934, -3.7...</td>\n","      <td>ruboxistaurin</td>\n","    </tr>\n","    <tr>\n","      <th>3424</th>\n","      <td>NCT02252042</td>\n","      <td>[23.116355895996094, -7.407402515411377, 16.62...</td>\n","      <td>methotrexate</td>\n","    </tr>\n","    <tr>\n","      <th>1934</th>\n","      <td>NCT00699153</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>637</th>\n","      <td>NCT01433523</td>\n","      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n","      <td>none</td>\n","    </tr>\n","    <tr>\n","      <th>3574</th>\n","      <td>NCT02393859</td>\n","      <td>[-3.2333426475524902, -13.376590728759766, 10....</td>\n","      <td>daunorubicin</td>\n","    </tr>\n","  </tbody>\n","</table>\n","</div>\n","      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-83ba7a9c-9b68-4ff7-9145-5e9879e53bfa')\"\n","              title=\"Convert this dataframe to an interactive table.\"\n","              style=\"display:none;\">\n","        \n","  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n","       width=\"24px\">\n","    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n","    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n","  </svg>\n","      </button>\n","      \n","  <style>\n","    .colab-df-container {\n","      display:flex;\n","      flex-wrap:wrap;\n","      gap: 12px;\n","    }\n","\n","    .colab-df-convert {\n","      background-color: #E8F0FE;\n","      border: none;\n","      border-radius: 50%;\n","      cursor: pointer;\n","      display: none;\n","      fill: #1967D2;\n","      height: 32px;\n","      padding: 0 0 0 0;\n","      width: 32px;\n","    }\n","\n","    .colab-df-convert:hover {\n","      background-color: #E2EBFA;\n","      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n","      fill: #174EA6;\n","    }\n","\n","    [theme=dark] .colab-df-convert {\n","      background-color: #3B4455;\n","      fill: #D2E3FC;\n","    }\n","\n","    [theme=dark] .colab-df-convert:hover {\n","      background-color: #434B5C;\n","      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n","      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n","      fill: #FFFFFF;\n","    }\n","  </style>\n","\n","      <script>\n","        const buttonEl =\n","          document.querySelector('#df-83ba7a9c-9b68-4ff7-9145-5e9879e53bfa button.colab-df-convert');\n","        buttonEl.style.display =\n","          google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n","        async function convertToInteractive(key) {\n","          const element = document.querySelector('#df-83ba7a9c-9b68-4ff7-9145-5e9879e53bfa');\n","          const dataTable =\n","            await google.colab.kernel.invokeFunction('convertToInteractive',\n","                                                     [key], {});\n","          if (!dataTable) return;\n","\n","          const docLinkHtml = 'Like what you see? Visit the ' +\n","            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n","            + ' to learn more about interactive tables.';\n","          element.innerHTML = '';\n","          dataTable['output_type'] = 'display_data';\n","          await google.colab.output.renderOutput(dataTable, element);\n","          const docLink = document.createElement('div');\n","          docLink.innerHTML = docLinkHtml;\n","          element.appendChild(docLink);\n","        }\n","      </script>\n","    </div>\n","  </div>\n","  "]},"metadata":{},"execution_count":183}]},{"cell_type":"code","source":["ctid_embeddings_df.to_pickle(deep_learning_dir + '/embeddings/drug_embeddings.pickle')"],"metadata":{"id":"nWzJXSVCFTII"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["ctid_embeddings_dict = dict(zip(ctid_embeddings_df[\"nctid\"], ctid_embeddings_df[\"embedding\"]))"],"metadata":{"id":"xj_FACXuTE3j"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["import pickle"],"metadata":{"id":"XQQul_IQT-aj"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["with open(\"nctid2drugs.pkl\", 'wb') as handle:\n","    pickle.dump(ctid_embeddings_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)"],"metadata":{"id":"RVYjsLkyTmHJ"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["!mv nctid2drugs.pkl \"/content/gdrive/My Drive/BMI 707 Project/embeddings/\""],"metadata":{"id":"mZ9hk1ZeT4u3"},"execution_count":null,"outputs":[]}]}