1 lines (1 with data), 155.3 kB
{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"drug_smiles_mapping.ipynb","provenance":[{"file_id":"1WBrGaXxcb39syozPKyZ_7--79VGa5rQv","timestamp":1651445556617}],"collapsed_sections":[],"toc_visible":true,"authorship_tag":"ABX9TyOjR37uASduG1OnYOzfUqUw"},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"}},"cells":[{"cell_type":"markdown","source":["## Imports"],"metadata":{"id":"4da2Sb7ct74V"}},{"cell_type":"code","execution_count":null,"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"executionInfo":{"elapsed":1259,"status":"ok","timestamp":1651532334625,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"},"user_tz":240},"id":"avEwtXgX61wn","outputId":"e9023fbd-91cd-4a9b-f94c-0ba96f9d35af"},"outputs":[{"output_type":"stream","name":"stdout","text":["Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n"]}],"source":["from google.colab import drive\n","drive.mount('/content/gdrive')"]},{"cell_type":"code","execution_count":null,"metadata":{"id":"xiC9awOs8aiv"},"outputs":[],"source":["import pandas as pd\n","import os\n","from tqdm import tqdm\n","import re\n","import numpy as np"]},{"cell_type":"markdown","source":["# Map "],"metadata":{"id":"1Sy3aj9D_iFO"}},{"cell_type":"code","source":["# Function that returns value if key in map, otherwise, returns a default value\n","def default(map, key, default):\n"," if key in map:\n"," return map[key]\n"," return default\n","\n","def for_all(list, f):\n"," for e in list:\n"," if not f(e):\n"," return False\n"," return True"],"metadata":{"id":"huUSgupWcDT4"},"execution_count":null,"outputs":[]},{"cell_type":"code","execution_count":null,"metadata":{"id":"lDKYr8LWYqDO"},"outputs":[],"source":["# Directory to the project folder\n","deep_learning_dir = '/content/gdrive/My Drive/BMI 707 Project' "]},{"cell_type":"code","source":["# Reading formatted train, test, val data\n","train = pd.read_pickle(deep_learning_dir + '/data_formatting/training_data.pickle')\n","test = pd.read_pickle(deep_learning_dir + '/data_formatting/testing_data.pickle')\n","val = pd.read_pickle(deep_learning_dir + '/data_formatting/validation_data.pickle')"],"metadata":{"id":"Kh9N5P-inJBX"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Read mapping file that contains all drugs in drugbank, with \n","# Columns: smiles, drugbank_id\n","mapping = pd.read_csv(deep_learning_dir + '/data/drugbank/drugbank_mappings.csv')\n","mapping['drug_name'] = mapping['drug_name'].map(lambda name : name.lower())"],"metadata":{"id":"8YTE9uSGcoG3"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Standardize drug names to facilitate matching\n","\n","# Remove salt from drugs that are in salt form (e.g., metformin hydrochloride)\n","words_to_remove = [\"HYDROCHLORIDE\",\"BISULFATE\",\"ALUMINUM\",\"ARGININE\",\"BENZATHINE\",\"CALCIUM\",\"CHLOROPROCAINE\",\"CHOLINE\",\"DIETHANOLAMINE\",\"ETHANOLAMINE\",\"ETHYLENEDIAMINE\",\"LYSINE\",\"MAGNESIUM\",\"HISTIDINE\",\"LITHIUM\",\"MEGLUMINE\",\"POTASSIUM\",\"PROCAINE\",\"SODIUM\",\"TRIETHYLAMINE\",\"ZINC\",\"ACETATE\",\"ASPARTATE\",\"BENZENESULFONATE\",\"BENZOATE\",\"BESYLATE\",\"BICARBONATE\",\"BITARTRATE\",\"BROMIDE\",\"CAMSYLATE\",\"CARBONATE\",\"CHLORIDE\",\"CITRATE\",\"DECANOATE\",\"EDETATE\",\"ESYLATE\",\"FUMARATE\",\"GLUCEPTATE\",\"GLUCONATE\",\"GLUTAMATE\",\"GLYCOLATE\",\"HEXANOATE\",\"HYDROXYNAPHTHOATE\",\"IODIDE\",\"ISETHIONATE\",\"LACTATE\",\"LACTOBIONATE\",\"MALATE\",\"MALEATE\",\"MANDELATE\",\"MESYLATE\",\"METHYLSULFATE\",\"MUCATE\",\"NAPSYLATE\",\"NITRATE\",\"OCTANOATE\",\"OLEATE\",\"PAMOATE\",\"PANTOTHENATE\",\"PHOSPHATE\",\"POLYGALACTURONATE\",\"PROPIONATE\",\"SALICYLATE\",\"STEARATE\",\"ACETATE\",\"SUCCINATE\",\"SULFATE\",\"TARTRATE\",\"TEOCLATE\",\"TOSYLATE\"]\n","words_to_remove = set(map(lambda w: w.lower(), words_to_remove))\n","\n","non_alphanum_patt = re.compile(r\"[^A-Za-z0-9]\")\n","multi_space_patt = re.compile(r\" +\")\n","\n","def clean_name(drug):\n"," drug = drug.lower()\n"," # Replace non-alphanumeric characters with spaces\n"," drug = non_alphanum_patt.sub(' ', drug)\n"," # Remove words we do not want\n"," splits = drug.split()\n"," if splits:\n"," drug = ' '.join([splits[0], ' '.join(filter(lambda w: w not in words_to_remove, splits[1:]))])\n"," # Replace multi-spaces into a single space\n"," drug = multi_space_patt.sub(' ', drug)\n"," # Trim invisible characters (spaces, tabs, new lines)\n"," drug = drug.strip()\n"," return drug"],"metadata":{"id":"HMsuFlzr1VpE"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["unique_drug = set()\n","pd.concat([train, test, val])['drugs'].map(lambda drugs : list(map(lambda d : unique_drug.add(d), drugs)))\n","len(unique_drug)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"uTJKC-_8ym4b","executionInfo":{"status":"ok","timestamp":1651530059246,"user_tz":240,"elapsed":287,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"ad9b7c5e-0681-471f-cdad-5cc1d2532104"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["4833"]},"metadata":{},"execution_count":8}]},{"cell_type":"code","source":["# Add drugbank data to the training, testing and validation sets\n","\n","# Dictionary mapping drugbank name to drugbank data\n","drugbank_dict = {}\n","for m in mapping.iterrows():\n"," drugbank_dict[clean_name(m[1]['drug_name'].lower())] = (m[1]['id'], m[1]['smiles'], m[1]['chembl'], m[1]['binding_db'], clean_name(m[1]['drug_name'].lower()))\n","\n","# This is a debug variable for counting matches\n","unmatched = {}\n","\n","drugbank_dict[\"DEBUG\"] = 0\n","drugbank_dict[\"DEBUG2\"] = 0\n","\n","# Drug name that need to be changed to map with drugbank\n","replacements = {\n"," \"bi 10773\": \"empagliflozin\",\n"," \"rbv\": \"ribavirin\",\n"," \"bay59 7939\": \"rivaroxaban\",\n"," \"sof vel\": \"sofosbuvir\",\n"," \"insulin degludec insulin aspart\": \"insulin degludec\",\n"," \"cp 690 550\": \"tofacitinib\",\n"," \"ftc tdf\": \"tenofovir\",\n"," \"risedronate\": \"risedronic acid\",\n"," \"nktr 118\": \"naloxegol\",\n","}\n","\n","# Returns a list of tuple with drugbank information\n","def map_drugbank_data(drugs):\n"," matched = False\n"," data = []\n"," for drug in drugs:\n"," drug = clean_name(drug)\n","\n"," if drug in replacements:\n"," drug = replacements[drug]\n"," \n"," # Placebos are a special case\n"," if 'placebo' in drug:\n"," drugbank_dict[\"DEBUG\"] += 1\n"," matched = True\n"," data.append(('placebo', 'placebo', 'placebo', 'placebo', 'placebo')) # all columns will contain the value placebo\n"," continue\n"," \n"," data.append(('none', 'none', 'none', 'none', 'none'))\n","\n"," if drug in drugbank_dict:\n"," drugbank_dict[\"DEBUG\"] += 1\n"," matched = True\n"," data[len(data)-1] = drugbank_dict[drug]\n"," continue\n","\n"," splits = drug.split(' ')\n"," for split in splits:\n"," if split in drugbank_dict:\n"," drugbank_dict[\"DEBUG\"] += 1\n"," matched = True\n"," data[len(data)-1] = drugbank_dict[split]\n"," break\n"," \n"," if data[len(data)-1][0] == \"none\":\n"," if not drug in unmatched:\n"," unmatched[drug] = 0\n"," unmatched[drug] += 1\n"," if matched:\n"," drugbank_dict[\"DEBUG2\"] += 1\n"," return data\n","\n","def add_drugbank_data(df, name_of_df):\n"," drugbank_dict[\"DEBUG\"] = 0\n"," # Add drugbank data\n"," df['drugbank_data'] = df['drugs'].map(lambda drugs : map_drugbank_data(drugs))\n"," # Splitting the tuples into different columns for readability\n"," df['drugbank_id'] = df['drugbank_data'].map(lambda drugs : list(map(lambda d : d[0], drugs)))\n"," df['smiles'] = df['drugbank_data'].map(lambda drugs : list(map(lambda d : d[1], drugs)))\n"," df['chembl'] = df['drugbank_data'].map(lambda drugs : list(map(lambda d : d[2], drugs)))\n"," df['binding_db'] = df['drugbank_data'].map(lambda drugs : list(map(lambda d : d[3], drugs)))\n"," df['clean_name'] = df['drugbank_data'].map(lambda drugs : list(map(lambda d : d[4], drugs)))\n","\n"," # Remove drugbank data now that we have split it\n"," del df['drugbank_data']\n"," print(name_of_df + \" % of drugs mapped: \" + str(drugbank_dict[\"DEBUG\"]/df['drugs'].map(len).sum()*100))\n","\n","add_drugbank_data(train, \"training\")\n","add_drugbank_data(test, \"testing\")\n","add_drugbank_data(val, \"validation\")"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"_0AKzXI4c-9H","executionInfo":{"status":"ok","timestamp":1651530333169,"user_tz":240,"elapsed":1532,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"db15981f-4014-4d4e-a967-bd03af47532e"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["training % of drugs mapped: 83.38505301267132\n","testing % of drugs mapped: 78.1437125748503\n","validation % of drugs mapped: 86.17021276595744\n"]}]},{"cell_type":"code","source":["# Count the number of unique smiles we have matched\n","smiles_set = set()\n","\n","def add_to_smiles_set(row):\n"," for i in range(len(row.drugs)):\n"," smiles_set.add(row.smiles[i])\n","\n","train.apply(add_to_smiles_set, axis=1)\n","test.apply(add_to_smiles_set, axis=1)\n","val.apply(add_to_smiles_set, axis=1)\n","\n","len(smiles_set)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Ep2RxiG68gZo","executionInfo":{"status":"ok","timestamp":1651531251060,"user_tz":240,"elapsed":304,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"4e494abb-59ca-4e79-f367-3f7228e3b979"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["881"]},"metadata":{},"execution_count":10}]},{"cell_type":"code","source":["# Debugging code to help find out the drug names that are the most unmatched\n","unmatched_list = list(zip(unmatched.keys(), unmatched.values()))\n","unmatched_list.sort(key=lambda x : int(x[1]), reverse=True)\n","unmatched_list[0:10]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"SRGFmLg2dFLH","executionInfo":{"status":"ok","timestamp":1651531253825,"user_tz":240,"elapsed":4,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"59f61752-be9b-45ba-fe98-d06b1395b342"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["[('epoetin alfa', 15),\n"," ('chemotherapy', 12),\n"," ('corticosteroids', 9),\n"," ('qva149', 9),\n"," ('aspirin', 9),\n"," ('ly2189265', 9),\n"," ('tak 438', 9),\n"," ('bay43 9006', 8),\n"," ('vi 0521', 8),\n"," ('nva237', 8)]"]},"metadata":{},"execution_count":11}]},{"cell_type":"code","source":["train.to_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/training_data_drugbank.pickle')\n","test.to_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/testing_data_drugbank.pickle')\n","val.to_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/validation_data_drugbank.pickle')"],"metadata":{"id":"a_PM8Mw6uwX8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train = pd.read_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/training_data_drugbank.pickle')\n","test = pd.read_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/testing_data_drugbank.pickle')\n","val = pd.read_pickle(deep_learning_dir + '/data_formatting/drug_drugbank/validation_data_drugbank.pickle')\n","dictionary = pd.read_csv(deep_learning_dir + '/data/mapping/dictionary.csv', sep='|')\n","cui2vec = pd.read_csv(deep_learning_dir + '/data/mapping/cui2vec_pretrained.csv')"],"metadata":{"id":"39IIMcH1AB8b"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Create a map from clean name to CUI\n","keys = list(dictionary['STR'].map(lambda s : clean_name(str(s))))\n","cui_map = dict(zip(keys, dictionary['CUI']))"],"metadata":{"id":"SXt0WyrNAgJe"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["# Add CUIs to the datasets\n","\n","mapped_cuis = set()\n","unmapped_cuis = set()\n","counter = [0]\n","\n","def get_cuis(drug_names):\n"," cuis = []\n"," for drug in drug_names:\n"," drug = clean_name(drug)\n","\n"," if drug in replacements:\n"," drug = replacements[drug]\n"," \n"," # Placebos are a special case\n"," if 'placebo' in drug:\n"," cuis.append(cui_map['placebo'])\n"," mapped_cuis.add(cui_map['placebo'])\n"," continue\n"," \n"," cuis.append('none')\n","\n"," if drug in cui_map:\n"," cuis[len(cuis)-1] = cui_map[drug]\n"," mapped_cuis.add(cui_map[drug])\n"," continue\n","\n"," splits = drug.split(' ')\n"," for split in splits:\n"," if split in cui_map:\n"," cuis[len(cuis)-1] = cui_map[split]\n"," mapped_cuis.add(cui_map[split])\n"," break\n"," if for_all(cuis, lambda cui : cui == 'none'):\n"," counter[0] += 1\n"," return cuis\n","\n","train['cuis'] = train['clean_name'].map(get_cuis)\n","test['cuis'] = test['clean_name'].map(get_cuis)\n","val['cuis'] = val['clean_name'].map(get_cuis)"],"metadata":{"id":"1tUXp9ksANeb"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["counter[0]"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Oj3zWoyKnh8M","executionInfo":{"status":"ok","timestamp":1651448107117,"user_tz":240,"elapsed":350,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"dee841a9-a57a-4deb-da00-a1cffae86a56"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["333"]},"metadata":{},"execution_count":90}]},{"cell_type":"code","source":["len(mapped_cuis)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"g8ZPDXHuaibu","executionInfo":{"status":"ok","timestamp":1651448108400,"user_tz":240,"elapsed":2,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"a05a6c08-3e9f-4c0f-9c73-cca69c8ee051"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["1043"]},"metadata":{},"execution_count":91}]},{"cell_type":"code","source":["pd.DataFrame(mapped_cuis).to_csv(deep_learning_dir + '/data_formatting/drug_cui2vec_data/mapped_cuis.csv')"],"metadata":{"id":"6Gldwd4XFgp8"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["cui2vec_map = {}\n","for row in cui2vec.iterrows():\n"," cui2vec_map[row[1][0]] = row[1][1:]"],"metadata":{"id":"fzjcqbmkCvAL"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train['cui_vectors'] = train['cuis'].map(lambda cuis : [default(cui2vec_map, cui, []) for cui in cuis])\n","test['cui_vectors'] = test['cuis'].map(lambda cuis : [default(cui2vec_map, cui, []) for cui in cuis])\n","val['cui_vectors'] = val['cuis'].map(lambda cuis : [default(cui2vec_map, cui, []) for cui in cuis])"],"metadata":{"id":"Zm1J4eLZaKb6"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["counter[0] = 0\n","def pick_one_cui(cui_vectors):\n"," for vector in cui_vectors:\n"," if len(vector) != 0:\n"," return vector\n"," return []\n","\n","train['primary_cui_vector'] = train['cui_vectors'].map(pick_one_cui)\n","test['primary_cui_vector'] = test['cui_vectors'].map(pick_one_cui)\n","val['primary_cui_vector'] = val['cui_vectors'].map(pick_one_cui)"],"metadata":{"id":"-5eCOK8Aa3u3"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train.to_pickle(deep_learning_dir + '/data_formatting/drug_cui2vec_data/training_data_cui.pickle')\n","test.to_pickle(deep_learning_dir + '/data_formatting/drug_cui2vec_data/testing_data_cui.pickle')\n","val.to_pickle(deep_learning_dir + '/data_formatting/drug_cui2vec_data/validation_data_cui.pickle')"],"metadata":{"id":"GVVlHmJPhCf1"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["print(len(train[train['primary_cui_vector'].str.len() == 0])/len(train)*100)\n","print(len(test[test['primary_cui_vector'].str.len() == 0])/len(test)*100)\n","print(len(val[val['primary_cui_vector'].str.len() == 0])/len(val)*100)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"YTrMLcQWevmT","executionInfo":{"status":"ok","timestamp":1651446307906,"user_tz":240,"elapsed":15,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"a36887fa-8722-4d29-e61f-14b01e35c04d"},"execution_count":null,"outputs":[{"output_type":"stream","name":"stdout","text":["42.82482223658694\n","58.90052356020943\n","43.895348837209305\n"]}]},{"cell_type":"code","source":["train['smiles'].map(lambda smiles : for_all(smiles, lambda smile : smile == \"none\")).sum()/len(train)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"vUmNESqeo2cK","executionInfo":{"status":"ok","timestamp":1651446307906,"user_tz":240,"elapsed":12,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"50ecbf2b-9316-4518-dc61-83f2503001e4"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0.11667744020685197"]},"metadata":{},"execution_count":41}]},{"cell_type":"code","source":["smiles_set = set()\n","\n","def add_to_smiles_set(row):\n"," for i in range(len(row.drugs)):\n"," smiles_set.add(row.smiles[i])\n","\n","train.apply(add_to_smiles_set, axis=1)\n","test.apply(add_to_smiles_set, axis=1)\n","val.apply(add_to_smiles_set, axis=1)"],"metadata":{"id":"MmW8JR5KutXl","executionInfo":{"status":"ok","timestamp":1651447435584,"user_tz":240,"elapsed":373,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"f04628bb-6dd2-4b8f-e63a-9e9369b4bd29","colab":{"base_uri":"https://localhost:8080/"}},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["0 None\n","1 None\n","2 None\n","3 None\n","4 None\n"," ... \n","339 None\n","340 None\n","341 None\n","342 None\n","343 None\n","Length: 344, dtype: object"]},"metadata":{},"execution_count":68}]},{"cell_type":"code","source":["unique_smiles = pd.DataFrame(smiles_set, columns=[\"smiles\"])"],"metadata":{"id":"aIgKo2vCyyqY"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["unique_smiles"],"metadata":{"id":"EOloL6jRzrJO","executionInfo":{"status":"ok","timestamp":1651447437170,"user_tz":240,"elapsed":4,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"4f5dcdf0-465c-4ce9-866e-d9ca4bf7ba46","colab":{"base_uri":"https://localhost:8080/","height":419}},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" smiles\n","0 CCCOC(C(=O)OC1CCN(C)CC1)(C1=CC=CC=C1)C1=CC=CC=C1\n","1 NCCNC1=CC=C(NCCN)C2=C1C(=O)C1=C(C=NC=C1)C2=O\n","2 CC1=C(OCC(F)(F)F)C=CN=C1CS(=O)C1=NC2=CC=CC=C2N1\n","3 CCCCC1=NC2(CCCC2)C(=O)N1CC1=CC=C(C=C1)C1=CC=CC...\n","4 [H][C@]12[C@@H](C)C(S[C@]3([H])CN[C@H](CNS(N)(...\n",".. ...\n","876 COC1=C(C=C(Cl)C=C1)C(=O)NCCC1=CC=C(C=C1)S(=O)(...\n","877 CC[C@H](C)[C@@H]1NC(=O)[C@H](CC2=CC=CC=C2)NC(=...\n","878 [H][C@@]12OC3=C(O)C=CC4=C3[C@@]11CCN(C)[C@]([H...\n","879 CC1=CC2=C(C=C1C(=C)C1=CC=C(C=C1)C(O)=O)C(C)(C)...\n","880 CN(CCN1CCC(CC1)OC(=O)NC1=CC=CC=C1C1=CC=CC=C1)C...\n","\n","[881 rows x 1 columns]"],"text/html":["\n"," <div id=\"df-3b0b9851-ca78-466e-8983-43847fa27ce9\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>smiles</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>CCCOC(C(=O)OC1CCN(C)CC1)(C1=CC=CC=C1)C1=CC=CC=C1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>NCCNC1=CC=C(NCCN)C2=C1C(=O)C1=C(C=NC=C1)C2=O</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>CC1=C(OCC(F)(F)F)C=CN=C1CS(=O)C1=NC2=CC=CC=C2N1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>CCCCC1=NC2(CCCC2)C(=O)N1CC1=CC=C(C=C1)C1=CC=CC...</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>[H][C@]12[C@@H](C)C(S[C@]3([H])CN[C@H](CNS(N)(...</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>876</th>\n"," <td>COC1=C(C=C(Cl)C=C1)C(=O)NCCC1=CC=C(C=C1)S(=O)(...</td>\n"," </tr>\n"," <tr>\n"," <th>877</th>\n"," <td>CC[C@H](C)[C@@H]1NC(=O)[C@H](CC2=CC=CC=C2)NC(=...</td>\n"," </tr>\n"," <tr>\n"," <th>878</th>\n"," <td>[H][C@@]12OC3=C(O)C=CC4=C3[C@@]11CCN(C)[C@]([H...</td>\n"," </tr>\n"," <tr>\n"," <th>879</th>\n"," <td>CC1=CC2=C(C=C1C(=C)C1=CC=C(C=C1)C(O)=O)C(C)(C)...</td>\n"," </tr>\n"," <tr>\n"," <th>880</th>\n"," <td>CN(CCN1CCC(CC1)OC(=O)NC1=CC=CC=C1C1=CC=CC=C1)C...</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>881 rows × 1 columns</p>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-3b0b9851-ca78-466e-8983-43847fa27ce9')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-3b0b9851-ca78-466e-8983-43847fa27ce9 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-3b0b9851-ca78-466e-8983-43847fa27ce9');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":70}]},{"cell_type":"code","source":["train[train['smiles'].map(lambda smiles : for_all(smiles, lambda smile : smile == \"none\"))].head(50)"],"metadata":{"id":"BYzvBQNrqQ2V","executionInfo":{"status":"ok","timestamp":1651447477441,"user_tz":240,"elapsed":758,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"563e00d6-7474-4eab-d326-61bdc1a7e594","colab":{"base_uri":"https://localhost:8080/","height":1000}},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" nctid n_participants \\\n","1 NCT01626859 152.0 \n","7 NCT00605293 578.0 \n","8 NCT00331864 4189.0 \n","11 NCT00124982 27188.0 \n","20 NCT00437112 NaN \n","51 NCT00316719 928.0 \n","63 NCT00311402 12910.0 \n","73 NCT00221845 NaN \n","75 NCT00206089 NaN \n","77 NCT00250965 NaN \n","91 NCT00220831 NaN \n","102 NCT00567697 NaN \n","107 NCT00330460 5745.0 \n","143 NCT01019694 16447.0 \n","145 NCT01336023 6445.0 \n","155 NCT00334880 NaN \n","156 NCT00309972 NaN \n","158 NCT01327222 NaN \n","168 NCT01421459 10357.0 \n","169 NCT01059812 2080.0 \n","177 NCT00350415 NaN \n","183 NCT00127634 NaN \n","184 NCT01404234 601.0 \n","197 NCT01438710 38.0 \n","201 NCT00597753 2379.0 \n","209 NCT01511939 78.0 \n","210 NCT00668850 NaN \n","218 NCT00119119 NaN \n","226 NCT00064116 NaN \n","230 NCT00735241 NaN \n","243 NCT01106079 NaN \n","289 NCT01326026 1100.0 \n","301 NCT01365507 1372.0 \n","307 NCT00369278 554.0 \n","311 NCT00536263 6874.0 \n","323 NCT00394589 17.0 \n","338 NCT01074268 4974.0 \n","348 NCT00295776 NaN \n","373 NCT00390806 13169.0 \n","388 NCT00717678 NaN \n","402 NCT00184600 10620.0 \n","414 NCT01245569 NaN \n","420 NCT00486018 2769.0 \n","436 NCT00000419 NaN \n","441 NCT00360334 6327.0 \n","445 NCT00330473 NaN \n","452 NCT00372333 NaN \n","466 NCT00510952 5016.0 \n","498 NCT00422383 11992.0 \n","501 NCT01689142 NaN \n","\n"," drugs \\\n","1 [mp-214 low dose, mp-214 middle dose, mp-214 h... \n","7 [methoxy polyethylene glycol-epoetin beta, epo... \n","8 [ranibizumab] \n","11 [abatacept, non-biologic disease modifying ant... \n","20 [human insulin inhalation powder, insulin glar... \n","51 [lam group, adv group] \n","63 [aggrenox capsule] \n","73 [ace inhibition, intensified blood pressure co... \n","75 [exanta] \n","77 [intravenous magnesium] \n","91 [natural source vitamin e 400iu/day] \n","102 [ranibizumab, ranibizumab] \n","107 [alendronate, denosumab] \n","143 [combivent cfc-mdi, combivent respimat 20/100 ... \n","145 [insulin degludec/liraglutide, insulin deglude... \n","155 [nrp104] \n","156 [control arm (seq):, experimental arm (con):] \n","158 [bevacizumab] \n","168 [ly2963016, lantus, oams] \n","169 [insulin degludec/insulin aspart, biphasic ins... \n","177 [mesalamine] \n","183 [human insulin inhalation powder, injectable i... \n","184 [azli] \n","197 [prograf, lcp-tacro] \n","201 [peginesatide, epoetin alfa] \n","209 [pennsaid] \n","210 [generex oral-lyn™, regular human insulin] \n","218 [pentoxyphilline, tocopherol] \n","226 [chop regimen] \n","230 [folfox6 cycles 1-3, folfox6 cycles 4 onwards,... \n","243 [intensive management or tight control, standa... \n","289 [insulin degludec, insulin degludec] \n","301 [insulin degludec/insulin aspart] \n","307 [enteric-coated mycophenolate sodium (ec-mps)] \n","311 [pegylated interferon alpha-2b, pegylated inte... \n","323 [infliximab increased frequency, infliximab in... \n","338 [insulin degludec, insulin detemir, insulin as... \n","348 [lamictal in the treatment of post-herpetic ne... \n","373 [hycamtin, oral capsules] \n","388 [prograf-xl, prograf, mmf] \n","402 [biphasic insulin aspart, insulin detemir, ins... \n","414 [foster® 100/6 µg/unit dose, seretide accuhale... \n","420 [ranibizumab injection 0.3 mg, ranibizumab inj... \n","436 [premarin and provera] \n","441 [exenatide, insulin glargine] \n","445 [human insulin inhalation powder, insulin] \n","452 [idea-033] \n","466 [insulin lispro protamine suspension, insulin ... \n","498 [rituximab mabthera/rituxan, rituximab mabther... \n","501 [insulin glargine new formulation (hoe901), in... \n","\n"," diseases \\\n","1 [schizophrenia] \n","7 [anemia] \n","8 [age related macular degeneration, choroidal n... \n","11 [rheumatoid arthritis] \n","20 [diabetes mellitus, type 2] \n","51 [chronic hepatitis b] \n","63 [cerebrovascular accident] \n","73 [children, chronic renal failure, hypertension... \n","75 [thromboembolism] \n","77 [coronary artery disease, valvular heart disease] \n","91 [diabetes, myocardial infarction, cardiovascul... \n","102 [central retinal vein occlusion, macular edema] \n","107 [osteoporosis, osteopenia] \n","143 [pulmonary disease, chronic obstructive] \n","145 [diabetes, diabetes mellitus, type 2] \n","155 [attention deficit hyperactivity disorder, att... \n","156 [lung cancer] \n","158 [age related macular degeneration] \n","168 [diabetes mellitus, type 2] \n","169 [diabetes, diabetes mellitus, type 2] \n","177 [ulcerative colitis] \n","183 [diabetes mellitus, type 1] \n","184 [cystic fibrosis, pseudomonas aeruginosa] \n","197 [renal failure, tremors] \n","201 [chronic renal failure, chronic kidney disease... \n","209 [osteoarthritis of the knee, coagulopathy] \n","210 [diabetes mellitus] \n","218 [hepatitis c, chronic, liver fibrosis] \n","226 [lymphoma] \n","230 [colorectal carcinoma, liver metastases] \n","243 [psoriatic arthritis] \n","289 [diabetes, diabetes mellitus, type 2] \n","301 [diabetes, diabetes mellitus, type 2] \n","307 [renal transplantation] \n","311 [hepatitis b, chronic] \n","323 [rheumatoid arthritis] \n","338 [diabetes, diabetes mellitus, type 1] \n","348 [neuralgia, postherpetic] \n","373 [lung cancer, non-small cell] \n","388 [kidney transplantation, transplantation immun... \n","402 [diabetes, diabetes mellitus, type 2] \n","414 [chronic obstructive pulmonary disease] \n","420 [macular edema, retinal vein occlusion] \n","436 [systemic lupus erythematosus] \n","441 [type 2 diabetes] \n","445 [diabetes mellitus, type 2] \n","452 [joint pain, musculoskeletal pain, stiffness, ... \n","466 [diabetes mellitus, type 2] \n","498 [rheumatoid arthritis] \n","501 [type 2 diabetes mellitus] \n","\n"," icdcodes \\\n","1 [F20.0, F20.1, F20.2, F20.3, F20.5, F20.89, F2... \n","7 [D53.2, D64.9, D46.4, D53.0, D53.9, D61.3, D61.9] \n","8 [H35.3130, H35.3230, H35.3110, H35.3120, H35.3... \n","11 [M06.9, M05.9, M06.08, M06.00, M06.011, M06.01... \n","20 [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42... \n","51 [B18.0, B18.1, B18.2, B18.8, B18.9] \n","63 [A52.05, I67.81, I67.89, I67.9, I67.841, I67.8... \n","73 [Y93.6A, Y92.110, Y92.111, Y92.112, Y92.113, Y... \n","75 [O88.22, O88.23, O88.211, O88.212, O88.213, O8... \n","77 [I25.10, I25.110, I25.119, I25.111, I25.118] \n","91 [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E... \n","102 [H34.8132, H34.8131, H34.8111, H34.8121, H34.8... \n","107 [M81.6, Z82.62, Z13.820, M81.8, Z87.310, M81.0... \n","143 [J44.9, J44.1, J44.0] \n","145 [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E... \n","155 [F90.2, F90.8, F90.9, F90.0, F90.1, F90.2, F90... \n","156 [C78.00, C78.01, C78.02, D14.30, D14.31, D14.3... \n","158 [H35.3130, H35.3230, H35.3110, H35.3120, H35.3... \n","168 [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42... \n","169 [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E... \n","177 [K51.80, K51.813, K51.814, K51.90, K51.913, K5... \n","183 [E10.65, E10.9, E10.21, E10.36, E10.41, E10.42... \n","184 [E84.9, Z14.1, E84.0, E84.11, E84.8, E84.19, P... \n","197 [P96.0, O03.32, O04.82, O08.4, O03.82, O07.32,... \n","201 [I13.11, I13.2, I12.9, N18.9, I12.0, D63.1, N1... \n","209 [M15.4, M15.0, M16.9, M17.9, M19.011, M19.012,... \n","210 [P70.2, O24.92, Z83.3, E10.65, E10.9, E11.65, ... \n","218 [B18.2, B18.0, B18.1, B18.8, B18.9, K71.3, K71... \n","226 [S33.110S, S33.111S, S33.120S, S33.121S, S33.1... \n","230 [C22.0, C22.1, C4A.9, C7B.1, D09.9, C4A.0, C4A... \n","243 [L40.52] \n","289 [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E... \n","301 [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E... \n","307 [N25.0, Q61.4, N23, N26.9, P96.0, Q60.0, Q60.1] \n","311 [B18.0, B18.1, B18.2, B18.8, B18.9] \n","323 [M06.9, M05.9, M06.08, M06.00, M06.011, M06.01... \n","338 [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E... \n","348 [B02.22] \n","373 [C78.00, C78.01, C78.02, D14.30, D14.31, D14.3... \n","388 [N26.2, Q63.0, Q63.2, Z52.4, I75.81, N19, N20.0] \n","402 [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E... \n","414 [J44.9, J44.1, J44.0] \n","420 [H59.033, H34.8130, H59.031, H59.032, H59.039,... \n","436 [M32.9, M32.0, M32.11, M32.12, M32.13, M32.14,... \n","441 [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42... \n","445 [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42... \n","452 [M25.50, M25.59, M25.541, M25.542, M25.549, M2... \n","466 [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42... \n","498 [M06.9, M05.9, M06.08, M06.00, M06.011, M06.01... \n","501 [E11.65, E11.9, E11.21, E11.36, E11.41, E11.42... \n","\n"," criteria label \\\n","1 \\n Inclusion Criteria:\\n\\n - ... 1 \n","7 \\n Inclusion Criteria:\\n\\n - ... 1 \n","8 \\n Patients who participated in this st... 1 \n","11 \\n Inclusion Criteria:\\n\\n - ... 1 \n","20 \\n Inclusion Criteria:\\n\\n - ... 0 \n","51 \\n Inclusion criteria:\\n\\n - ... 1 \n","63 \\n Inclusion Criteria:\\n\\n Patie... 0 \n","73 \\n Inclusion Criteria:\\n\\n - ... 1 \n","75 \\n Inclusion Criteria:\\n\\n - ... 0 \n","77 \\n Inclusion Criteria:\\n\\n - ... 0 \n","91 \\n Inclusion Criteria:\\n\\n - ... 1 \n","102 \\n Inclusion Criteria:\\n\\n 1. ... 1 \n","107 \\n Inclusion Criteria: - Patient is an ... 1 \n","143 \\n Inclusion criteria:\\n\\n 1. ... 1 \n","145 \\n Inclusion Criteria:\\n\\n - ... 1 \n","155 \\n Inclusion Criteria:\\n\\n - ... 1 \n","156 \\n DISEASE CHARACTERISTICS:\\n\\n ... 1 \n","158 \\n Inclusion Criteria:\\n\\n - ... 0 \n","168 \\n Inclusion Criteria:\\n\\n - ... 1 \n","169 \\n Inclusion Criteria:\\n\\n - ... 1 \n","177 \\n Inclusion Criteria:\\n\\n - ... 1 \n","183 \\n Inclusion Criteria:\\n\\n - ... 0 \n","184 \\n Inclusion Criteria:\\n\\n - ... 1 \n","197 \\n Inclusion Criteria:\\n\\n 1. ... 1 \n","201 \\n Inclusion Criteria\\n\\n 1. P... 1 \n","209 \\n Inclusion Criteria:\\n\\n - ... 1 \n","210 \\n Inclusion Criteria:\\n\\n - ... 1 \n","218 \\n Inclusion Criteria:\\n\\n - ... 0 \n","226 \\n DISEASE CHARACTERISTICS:\\n\\n ... 1 \n","230 \\n Inclusion Criteria:\\n\\n - ... 0 \n","243 \\n Inclusion Criteria:\\n\\n - ... 1 \n","289 \\n Inclusion Criteria:\\n\\n - ... 1 \n","301 \\n Inclusion Criteria:\\n\\n - ... 1 \n","307 \\n Inclusion criteria\\n\\n 1. R... 1 \n","311 \\n Inclusion Criteria:\\n\\n - ... 1 \n","323 \\n Inclusion Criteria:\\n\\n - ... 0 \n","338 \\n Inclusion Criteria:\\n\\n - ... 1 \n","348 \\n Inclusion Criteria:\\n\\n - ... 0 \n","373 \\n Inclusion criteria:\\n\\n - ... 0 \n","388 \\n Inclusion Criteria:\\n\\n - ... 1 \n","402 \\n Inclusion Criteria:\\n\\n - ... 1 \n","414 \\n Inclusion Criteria:\\n\\n 1. ... 1 \n","420 \\n Inclusion Criteria:\\n\\n - ... 1 \n","436 \\n Inclusion Criteria:\\n\\n - ... 0 \n","441 \\n Inclusion Criteria:\\n\\n - ... 1 \n","445 \\n Inclusion Criteria:\\n\\n - ... 0 \n","452 \\n Inclusion Criteria:\\n\\n - ... 1 \n","466 \\n Inclusion Criteria:\\n\\n - ... 1 \n","498 \\n Inclusion Criteria:\\n\\n - ... 1 \n","501 \\n Inclusion criteria :\\n\\n o Pa... 1 \n","\n"," drugbank_id smiles \\\n","1 [none, none, none] [none, none, none] \n","7 [DB09107, none] [none, none] \n","8 [DB01270] [none] \n","11 [DB01281, none, none] [none, none, none] \n","20 [none, DB00047] [none, none] \n","51 [none, none] [none, none] \n","63 [none] [none] \n","73 [none, none, none] [none, none, none] \n","75 [none] [none] \n","77 [none] [none] \n","91 [none] [none] \n","102 [DB01270, DB01270] [none, none] \n","107 [none, DB06643] [none, none] \n","143 [none, none, none] [none, none, none] \n","145 [DB06655, DB09564, DB06655] [none, none, none] \n","155 [none] [none] \n","156 [none, none] [none, none] \n","158 [DB00112] [none] \n","168 [none, none, none] [none, none, none] \n","169 [DB09564, none] [none, none] \n","177 [none] [none] \n","183 [none, none, DB00047] [none, none, none] \n","184 [none] [none] \n","197 [none, none] [none, none] \n","201 [DB08894, none] [none, none] \n","209 [none] [none] \n","210 [none, none] [none, none] \n","218 [none, DB11251] [none, none] \n","226 [none] [none] \n","230 [none, none, DB00112] [none, none, none] \n","243 [none, none] [none, none] \n","289 [DB09564, DB09564] [none, none] \n","301 [DB09564] [none] \n","307 [none] [none] \n","311 [none, none, none] [none, none, none] \n","323 [DB00065, DB00065, DB00065] [none, none, none] \n","338 [DB09564, DB01307, DB01306] [none, none, none] \n","348 [none] [none] \n","373 [none, none] [none, none] \n","388 [none, none, none] [none, none, none] \n","402 [none, DB01307, DB01306] [none, none, none] \n","414 [none, none] [none, none] \n","420 [DB01270, DB01270, none] [none, none, none] \n","436 [none] [none] \n","441 [DB01276, DB00047] [none, none] \n","445 [none, none] [none, none] \n","452 [none] [none] \n","466 [DB13700, DB00047] [none, none] \n","498 [DB00073, DB00073, DB00073] [none, none, none] \n","501 [none, none] [none, none] \n","\n"," chembl binding_db \\\n","1 [none, none, none] [none, none, none] \n","7 [none, none] [none, none] \n","8 [CHEMBL1201825] [none] \n","11 [CHEMBL1201823, none, none] [none, none, none] \n","20 [none, CHEMBL1201497] [none, none] \n","51 [none, none] [none, none] \n","63 [none] [none] \n","73 [none, none, none] [none, none, none] \n","75 [none] [none] \n","77 [none] [none] \n","91 [none] [none] \n","102 [CHEMBL1201825, CHEMBL1201825] [none, none] \n","107 [none, CHEMBL1237023] [none, none] \n","143 [none, none, none] [none, none, none] \n","145 [CHEMBL1201866, CHEMBL2107869, CHEMBL1201866] [none, none, none] \n","155 [none] [none] \n","156 [none, none] [none, none] \n","158 [CHEMBL1201583] [none] \n","168 [none, none, none] [none, none, none] \n","169 [CHEMBL2107869, none] [none, none] \n","177 [none] [none] \n","183 [none, none, CHEMBL1201497] [none, none, none] \n","184 [none] [none] \n","197 [none, none] [none, none] \n","201 [CHEMBL2107866, none] [none, none] \n","209 [none] [none] \n","210 [none, none] [none, none] \n","218 [none, none] [none, none] \n","226 [none] [none] \n","230 [none, none, CHEMBL1201583] [none, none, none] \n","243 [none, none] [none, none] \n","289 [CHEMBL2107869, CHEMBL2107869] [none, none] \n","301 [CHEMBL2107869] [none] \n","307 [none] [none] \n","311 [none, none, none] [none, none, none] \n","323 [CHEMBL1201581, CHEMBL1201581, CHEMBL1201581] [none, none, none] \n","338 [CHEMBL2107869, CHEMBL2104391, CHEMBL1201496] [none, none, none] \n","348 [none] [none] \n","373 [none, none] [none, none] \n","388 [none, none, none] [none, none, none] \n","402 [none, CHEMBL2104391, CHEMBL1201496] [none, none, none] \n","414 [none, none] [none, none] \n","420 [CHEMBL1201825, CHEMBL1201825, none] [none, none, none] \n","436 [none] [none] \n","441 [CHEMBL414357, CHEMBL1201497] [none, none] \n","445 [none, none] [none, none] \n","452 [none] [none] \n","466 [none, CHEMBL1201497] [none, none] \n","498 [CHEMBL1201576, CHEMBL1201576, CHEMBL1201576] [none, none, none] \n","501 [none, none] [none, none] \n","\n"," clean_name \\\n","1 [none, none, none] \n","7 [methoxy polyethylene glycol epoetin beta, none] \n","8 [ranibizumab] \n","11 [abatacept, none, none] \n","20 [none, insulin glargine] \n","51 [none, none] \n","63 [none] \n","73 [none, none, none] \n","75 [none] \n","77 [none] \n","91 [none] \n","102 [ranibizumab, ranibizumab] \n","107 [none, denosumab] \n","143 [none, none, none] \n","145 [liraglutide, insulin degludec, liraglutide] \n","155 [none] \n","156 [none, none] \n","158 [bevacizumab] \n","168 [none, none, none] \n","169 [insulin degludec, none] \n","177 [none] \n","183 [none, none, insulin glargine] \n","184 [none] \n","197 [none, none] \n","201 [peginesatide, none] \n","209 [none] \n","210 [none, none] \n","218 [none, tocopherol] \n","226 [none] \n","230 [none, none, bevacizumab] \n","243 [none, none] \n","289 [insulin degludec, insulin degludec] \n","301 [insulin degludec] \n","307 [none] \n","311 [none, none, none] \n","323 [infliximab, infliximab, infliximab] \n","338 [insulin degludec, insulin detemir, insulin as... \n","348 [none] \n","373 [none, none] \n","388 [none, none, none] \n","402 [none, insulin detemir, insulin aspart] \n","414 [none, none] \n","420 [ranibizumab, ranibizumab, none] \n","436 [none] \n","441 [exenatide, insulin glargine] \n","445 [none, none] \n","452 [none] \n","466 [protamine, insulin glargine] \n","498 [rituximab, rituximab, rituximab] \n","501 [none, none] \n","\n"," cuis \\\n","1 [none, none, none] \n","7 [C1328071, none] \n","8 [C1566537] \n","11 [C1619966, none, none] \n","20 [none, C0907402] \n","51 [none, none] \n","63 [none] \n","73 [none, none, none] \n","75 [none] \n","77 [none] \n","91 [none] \n","102 [C1566537, C1566537] \n","107 [none, C1690432] \n","143 [none, none, none] \n","145 [C1456408, C3491971, C1456408] \n","155 [none] \n","156 [none, none] \n","158 [C0796392] \n","168 [none, none, none] \n","169 [C3491971, none] \n","177 [none] \n","183 [none, none, C0907402] \n","184 [none] \n","197 [none, none] \n","201 [C3281388, none] \n","209 [none] \n","210 [none, none] \n","218 [none, C3255108] \n","226 [none] \n","230 [none, none, C0796392] \n","243 [none, none] \n","289 [C3491971, C3491971] \n","301 [C3491971] \n","307 [none] \n","311 [none, none, none] \n","323 [C5238750, C5238750, C5238750] \n","338 [C3491971, C0537270, C1708521] \n","348 [none] \n","373 [none, none] \n","388 [none, none, none] \n","402 [none, C0537270, C1708521] \n","414 [none, none] \n","420 [C1566537, C1566537, none] \n","436 [none] \n","441 [C0167117, C0907402] \n","445 [none, none] \n","452 [none] \n","466 [C0771747, C0907402] \n","498 [C0393022, C0393022, C0393022] \n","501 [none, none] \n","\n"," cui_vectors \\\n","1 [[], [], []] \n","7 [[], []] \n","8 [[-0.0198971997276426, 0.0166968261538703, -8.... \n","11 [[-0.0140159579976783, 0.0032744963090471, -2.... \n","20 [[], [-0.0088522591226082, 0.0038537407826502,... \n","51 [[], []] \n","63 [[]] \n","73 [[], [], []] \n","75 [[]] \n","77 [[]] \n","91 [[]] \n","102 [[-0.0198971997276426, 0.0166968261538703, -8.... \n","107 [[], [-0.0159405395376339, 0.0027208530185431,... \n","143 [[], [], []] \n","145 [[-0.0140124803475009, 0.0037774540637431, 2.6... \n","155 [[]] \n","156 [[], []] \n","158 [[-0.0341094153430391, 0.0219485207498133, -1.... \n","168 [[], [], []] \n","169 [[], []] \n","177 [[]] \n","183 [[], [], [-0.0088522591226082, 0.0038537407826... \n","184 [[]] \n","197 [[], []] \n","201 [[], []] \n","209 [[]] \n","210 [[], []] \n","218 [[], []] \n","226 [[]] \n","230 [[], [], [-0.0341094153430391, 0.0219485207498... \n","243 [[], []] \n","289 [[], []] \n","301 [[]] \n","307 [[]] \n","311 [[], [], []] \n","323 [[], [], []] \n","338 [[], [-0.0051244166680686, 0.0016540584969475,... \n","348 [[]] \n","373 [[], []] \n","388 [[], [], []] \n","402 [[], [-0.0051244166680686, 0.0016540584969475,... \n","414 [[], []] \n","420 [[-0.0198971997276426, 0.0166968261538703, -8.... \n","436 [[]] \n","441 [[-0.0123086924602175, 0.0042105967441839, -3.... \n","445 [[], []] \n","452 [[]] \n","466 [[], [-0.0088522591226082, 0.0038537407826502,... \n","498 [[-0.0339438843118617, 0.0180569791652772, -3.... \n","501 [[], []] \n","\n"," primary_cui_vector \n","1 [] \n","7 [] \n","8 V1 -0.019897\n","V2 0.016697\n","V3 ... \n","11 V1 -0.014016\n","V2 0.003274\n","V3 ... \n","20 V1 -0.008852\n","V2 0.003854\n","V3 ... \n","51 [] \n","63 [] \n","73 [] \n","75 [] \n","77 [] \n","91 [] \n","102 V1 -0.019897\n","V2 0.016697\n","V3 ... \n","107 V1 -0.015941\n","V2 0.002721\n","V3 ... \n","143 [] \n","145 V1 -0.014012\n","V2 0.003777\n","V3 ... \n","155 [] \n","156 [] \n","158 V1 -0.034109\n","V2 0.021949\n","V3 ... \n","168 [] \n","169 [] \n","177 [] \n","183 V1 -0.008852\n","V2 0.003854\n","V3 ... \n","184 [] \n","197 [] \n","201 [] \n","209 [] \n","210 [] \n","218 [] \n","226 [] \n","230 V1 -0.034109\n","V2 0.021949\n","V3 ... \n","243 [] \n","289 [] \n","301 [] \n","307 [] \n","311 [] \n","323 [] \n","338 V1 -0.005124\n","V2 0.001654\n","V3 ... \n","348 [] \n","373 [] \n","388 [] \n","402 V1 -0.005124\n","V2 0.001654\n","V3 ... \n","414 [] \n","420 V1 -0.019897\n","V2 0.016697\n","V3 ... \n","436 [] \n","441 V1 -0.012309\n","V2 0.004211\n","V3 ... \n","445 [] \n","452 [] \n","466 V1 -0.008852\n","V2 0.003854\n","V3 ... \n","498 V1 -0.033944\n","V2 0.018057\n","V3 ... \n","501 [] "],"text/html":["\n"," <div id=\"df-6cd79391-c807-4181-8abe-c43b25f19170\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>nctid</th>\n"," <th>n_participants</th>\n"," <th>drugs</th>\n"," <th>diseases</th>\n"," <th>icdcodes</th>\n"," <th>criteria</th>\n"," <th>label</th>\n"," <th>drugbank_id</th>\n"," <th>smiles</th>\n"," <th>chembl</th>\n"," <th>binding_db</th>\n"," <th>clean_name</th>\n"," <th>cuis</th>\n"," <th>cui_vectors</th>\n"," <th>primary_cui_vector</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>1</th>\n"," <td>NCT01626859</td>\n"," <td>152.0</td>\n"," <td>[mp-214 low dose, mp-214 middle dose, mp-214 h...</td>\n"," <td>[schizophrenia]</td>\n"," <td>[F20.0, F20.1, F20.2, F20.3, F20.5, F20.89, F2...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[[], [], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>7</th>\n"," <td>NCT00605293</td>\n"," <td>578.0</td>\n"," <td>[methoxy polyethylene glycol-epoetin beta, epo...</td>\n"," <td>[anemia]</td>\n"," <td>[D53.2, D64.9, D46.4, D53.0, D53.9, D61.3, D61.9]</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB09107, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[methoxy polyethylene glycol epoetin beta, none]</td>\n"," <td>[C1328071, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>8</th>\n"," <td>NCT00331864</td>\n"," <td>4189.0</td>\n"," <td>[ranibizumab]</td>\n"," <td>[age related macular degeneration, choroidal n...</td>\n"," <td>[H35.3130, H35.3230, H35.3110, H35.3120, H35.3...</td>\n"," <td>\\n Patients who participated in this st...</td>\n"," <td>1</td>\n"," <td>[DB01270]</td>\n"," <td>[none]</td>\n"," <td>[CHEMBL1201825]</td>\n"," <td>[none]</td>\n"," <td>[ranibizumab]</td>\n"," <td>[C1566537]</td>\n"," <td>[[-0.0198971997276426, 0.0166968261538703, -8....</td>\n"," <td>V1 -0.019897\n","V2 0.016697\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>11</th>\n"," <td>NCT00124982</td>\n"," <td>27188.0</td>\n"," <td>[abatacept, non-biologic disease modifying ant...</td>\n"," <td>[rheumatoid arthritis]</td>\n"," <td>[M06.9, M05.9, M06.08, M06.00, M06.011, M06.01...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB01281, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[CHEMBL1201823, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[abatacept, none, none]</td>\n"," <td>[C1619966, none, none]</td>\n"," <td>[[-0.0140159579976783, 0.0032744963090471, -2....</td>\n"," <td>V1 -0.014016\n","V2 0.003274\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>20</th>\n"," <td>NCT00437112</td>\n"," <td>NaN</td>\n"," <td>[human insulin inhalation powder, insulin glar...</td>\n"," <td>[diabetes mellitus, type 2]</td>\n"," <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[none, DB00047]</td>\n"," <td>[none, none]</td>\n"," <td>[none, CHEMBL1201497]</td>\n"," <td>[none, none]</td>\n"," <td>[none, insulin glargine]</td>\n"," <td>[none, C0907402]</td>\n"," <td>[[], [-0.0088522591226082, 0.0038537407826502,...</td>\n"," <td>V1 -0.008852\n","V2 0.003854\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>51</th>\n"," <td>NCT00316719</td>\n"," <td>928.0</td>\n"," <td>[lam group, adv group]</td>\n"," <td>[chronic hepatitis b]</td>\n"," <td>[B18.0, B18.1, B18.2, B18.8, B18.9]</td>\n"," <td>\\n Inclusion criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>63</th>\n"," <td>NCT00311402</td>\n"," <td>12910.0</td>\n"," <td>[aggrenox capsule]</td>\n"," <td>[cerebrovascular accident]</td>\n"," <td>[A52.05, I67.81, I67.89, I67.9, I67.841, I67.8...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n Patie...</td>\n"," <td>0</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>73</th>\n"," <td>NCT00221845</td>\n"," <td>NaN</td>\n"," <td>[ace inhibition, intensified blood pressure co...</td>\n"," <td>[children, chronic renal failure, hypertension...</td>\n"," <td>[Y93.6A, Y92.110, Y92.111, Y92.112, Y92.113, Y...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[[], [], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>75</th>\n"," <td>NCT00206089</td>\n"," <td>NaN</td>\n"," <td>[exanta]</td>\n"," <td>[thromboembolism]</td>\n"," <td>[O88.22, O88.23, O88.211, O88.212, O88.213, O8...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>77</th>\n"," <td>NCT00250965</td>\n"," <td>NaN</td>\n"," <td>[intravenous magnesium]</td>\n"," <td>[coronary artery disease, valvular heart disease]</td>\n"," <td>[I25.10, I25.110, I25.119, I25.111, I25.118]</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>91</th>\n"," <td>NCT00220831</td>\n"," <td>NaN</td>\n"," <td>[natural source vitamin e 400iu/day]</td>\n"," <td>[diabetes, myocardial infarction, cardiovascul...</td>\n"," <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>102</th>\n"," <td>NCT00567697</td>\n"," <td>NaN</td>\n"," <td>[ranibizumab, ranibizumab]</td>\n"," <td>[central retinal vein occlusion, macular edema]</td>\n"," <td>[H34.8132, H34.8131, H34.8111, H34.8121, H34.8...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n 1. ...</td>\n"," <td>1</td>\n"," <td>[DB01270, DB01270]</td>\n"," <td>[none, none]</td>\n"," <td>[CHEMBL1201825, CHEMBL1201825]</td>\n"," <td>[none, none]</td>\n"," <td>[ranibizumab, ranibizumab]</td>\n"," <td>[C1566537, C1566537]</td>\n"," <td>[[-0.0198971997276426, 0.0166968261538703, -8....</td>\n"," <td>V1 -0.019897\n","V2 0.016697\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>107</th>\n"," <td>NCT00330460</td>\n"," <td>5745.0</td>\n"," <td>[alendronate, denosumab]</td>\n"," <td>[osteoporosis, osteopenia]</td>\n"," <td>[M81.6, Z82.62, Z13.820, M81.8, Z87.310, M81.0...</td>\n"," <td>\\n Inclusion Criteria: - Patient is an ...</td>\n"," <td>1</td>\n"," <td>[none, DB06643]</td>\n"," <td>[none, none]</td>\n"," <td>[none, CHEMBL1237023]</td>\n"," <td>[none, none]</td>\n"," <td>[none, denosumab]</td>\n"," <td>[none, C1690432]</td>\n"," <td>[[], [-0.0159405395376339, 0.0027208530185431,...</td>\n"," <td>V1 -0.015941\n","V2 0.002721\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>143</th>\n"," <td>NCT01019694</td>\n"," <td>16447.0</td>\n"," <td>[combivent cfc-mdi, combivent respimat 20/100 ...</td>\n"," <td>[pulmonary disease, chronic obstructive]</td>\n"," <td>[J44.9, J44.1, J44.0]</td>\n"," <td>\\n Inclusion criteria:\\n\\n 1. ...</td>\n"," <td>1</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[[], [], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>145</th>\n"," <td>NCT01336023</td>\n"," <td>6445.0</td>\n"," <td>[insulin degludec/liraglutide, insulin deglude...</td>\n"," <td>[diabetes, diabetes mellitus, type 2]</td>\n"," <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB06655, DB09564, DB06655]</td>\n"," <td>[none, none, none]</td>\n"," <td>[CHEMBL1201866, CHEMBL2107869, CHEMBL1201866]</td>\n"," <td>[none, none, none]</td>\n"," <td>[liraglutide, insulin degludec, liraglutide]</td>\n"," <td>[C1456408, C3491971, C1456408]</td>\n"," <td>[[-0.0140124803475009, 0.0037774540637431, 2.6...</td>\n"," <td>V1 -0.014012\n","V2 0.003777\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>155</th>\n"," <td>NCT00334880</td>\n"," <td>NaN</td>\n"," <td>[nrp104]</td>\n"," <td>[attention deficit hyperactivity disorder, att...</td>\n"," <td>[F90.2, F90.8, F90.9, F90.0, F90.1, F90.2, F90...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>156</th>\n"," <td>NCT00309972</td>\n"," <td>NaN</td>\n"," <td>[control arm (seq):, experimental arm (con):]</td>\n"," <td>[lung cancer]</td>\n"," <td>[C78.00, C78.01, C78.02, D14.30, D14.31, D14.3...</td>\n"," <td>\\n DISEASE CHARACTERISTICS:\\n\\n ...</td>\n"," <td>1</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>158</th>\n"," <td>NCT01327222</td>\n"," <td>NaN</td>\n"," <td>[bevacizumab]</td>\n"," <td>[age related macular degeneration]</td>\n"," <td>[H35.3130, H35.3230, H35.3110, H35.3120, H35.3...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[DB00112]</td>\n"," <td>[none]</td>\n"," <td>[CHEMBL1201583]</td>\n"," <td>[none]</td>\n"," <td>[bevacizumab]</td>\n"," <td>[C0796392]</td>\n"," <td>[[-0.0341094153430391, 0.0219485207498133, -1....</td>\n"," <td>V1 -0.034109\n","V2 0.021949\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>168</th>\n"," <td>NCT01421459</td>\n"," <td>10357.0</td>\n"," <td>[ly2963016, lantus, oams]</td>\n"," <td>[diabetes mellitus, type 2]</td>\n"," <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[[], [], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>169</th>\n"," <td>NCT01059812</td>\n"," <td>2080.0</td>\n"," <td>[insulin degludec/insulin aspart, biphasic ins...</td>\n"," <td>[diabetes, diabetes mellitus, type 2]</td>\n"," <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB09564, none]</td>\n"," <td>[none, none]</td>\n"," <td>[CHEMBL2107869, none]</td>\n"," <td>[none, none]</td>\n"," <td>[insulin degludec, none]</td>\n"," <td>[C3491971, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>177</th>\n"," <td>NCT00350415</td>\n"," <td>NaN</td>\n"," <td>[mesalamine]</td>\n"," <td>[ulcerative colitis]</td>\n"," <td>[K51.80, K51.813, K51.814, K51.90, K51.913, K5...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>183</th>\n"," <td>NCT00127634</td>\n"," <td>NaN</td>\n"," <td>[human insulin inhalation powder, injectable i...</td>\n"," <td>[diabetes mellitus, type 1]</td>\n"," <td>[E10.65, E10.9, E10.21, E10.36, E10.41, E10.42...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[none, none, DB00047]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, CHEMBL1201497]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, insulin glargine]</td>\n"," <td>[none, none, C0907402]</td>\n"," <td>[[], [], [-0.0088522591226082, 0.0038537407826...</td>\n"," <td>V1 -0.008852\n","V2 0.003854\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>184</th>\n"," <td>NCT01404234</td>\n"," <td>601.0</td>\n"," <td>[azli]</td>\n"," <td>[cystic fibrosis, pseudomonas aeruginosa]</td>\n"," <td>[E84.9, Z14.1, E84.0, E84.11, E84.8, E84.19, P...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>197</th>\n"," <td>NCT01438710</td>\n"," <td>38.0</td>\n"," <td>[prograf, lcp-tacro]</td>\n"," <td>[renal failure, tremors]</td>\n"," <td>[P96.0, O03.32, O04.82, O08.4, O03.82, O07.32,...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n 1. ...</td>\n"," <td>1</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>201</th>\n"," <td>NCT00597753</td>\n"," <td>2379.0</td>\n"," <td>[peginesatide, epoetin alfa]</td>\n"," <td>[chronic renal failure, chronic kidney disease...</td>\n"," <td>[I13.11, I13.2, I12.9, N18.9, I12.0, D63.1, N1...</td>\n"," <td>\\n Inclusion Criteria\\n\\n 1. P...</td>\n"," <td>1</td>\n"," <td>[DB08894, none]</td>\n"," <td>[none, none]</td>\n"," <td>[CHEMBL2107866, none]</td>\n"," <td>[none, none]</td>\n"," <td>[peginesatide, none]</td>\n"," <td>[C3281388, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>209</th>\n"," <td>NCT01511939</td>\n"," <td>78.0</td>\n"," <td>[pennsaid]</td>\n"," <td>[osteoarthritis of the knee, coagulopathy]</td>\n"," <td>[M15.4, M15.0, M16.9, M17.9, M19.011, M19.012,...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>210</th>\n"," <td>NCT00668850</td>\n"," <td>NaN</td>\n"," <td>[generex oral-lyn™, regular human insulin]</td>\n"," <td>[diabetes mellitus]</td>\n"," <td>[P70.2, O24.92, Z83.3, E10.65, E10.9, E11.65, ...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>218</th>\n"," <td>NCT00119119</td>\n"," <td>NaN</td>\n"," <td>[pentoxyphilline, tocopherol]</td>\n"," <td>[hepatitis c, chronic, liver fibrosis]</td>\n"," <td>[B18.2, B18.0, B18.1, B18.8, B18.9, K71.3, K71...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[none, DB11251]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, tocopherol]</td>\n"," <td>[none, C3255108]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>226</th>\n"," <td>NCT00064116</td>\n"," <td>NaN</td>\n"," <td>[chop regimen]</td>\n"," <td>[lymphoma]</td>\n"," <td>[S33.110S, S33.111S, S33.120S, S33.121S, S33.1...</td>\n"," <td>\\n DISEASE CHARACTERISTICS:\\n\\n ...</td>\n"," <td>1</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>230</th>\n"," <td>NCT00735241</td>\n"," <td>NaN</td>\n"," <td>[folfox6 cycles 1-3, folfox6 cycles 4 onwards,...</td>\n"," <td>[colorectal carcinoma, liver metastases]</td>\n"," <td>[C22.0, C22.1, C4A.9, C7B.1, D09.9, C4A.0, C4A...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[none, none, DB00112]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, CHEMBL1201583]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, bevacizumab]</td>\n"," <td>[none, none, C0796392]</td>\n"," <td>[[], [], [-0.0341094153430391, 0.0219485207498...</td>\n"," <td>V1 -0.034109\n","V2 0.021949\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>243</th>\n"," <td>NCT01106079</td>\n"," <td>NaN</td>\n"," <td>[intensive management or tight control, standa...</td>\n"," <td>[psoriatic arthritis]</td>\n"," <td>[L40.52]</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>289</th>\n"," <td>NCT01326026</td>\n"," <td>1100.0</td>\n"," <td>[insulin degludec, insulin degludec]</td>\n"," <td>[diabetes, diabetes mellitus, type 2]</td>\n"," <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB09564, DB09564]</td>\n"," <td>[none, none]</td>\n"," <td>[CHEMBL2107869, CHEMBL2107869]</td>\n"," <td>[none, none]</td>\n"," <td>[insulin degludec, insulin degludec]</td>\n"," <td>[C3491971, C3491971]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>301</th>\n"," <td>NCT01365507</td>\n"," <td>1372.0</td>\n"," <td>[insulin degludec/insulin aspart]</td>\n"," <td>[diabetes, diabetes mellitus, type 2]</td>\n"," <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB09564]</td>\n"," <td>[none]</td>\n"," <td>[CHEMBL2107869]</td>\n"," <td>[none]</td>\n"," <td>[insulin degludec]</td>\n"," <td>[C3491971]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>307</th>\n"," <td>NCT00369278</td>\n"," <td>554.0</td>\n"," <td>[enteric-coated mycophenolate sodium (ec-mps)]</td>\n"," <td>[renal transplantation]</td>\n"," <td>[N25.0, Q61.4, N23, N26.9, P96.0, Q60.0, Q60.1]</td>\n"," <td>\\n Inclusion criteria\\n\\n 1. R...</td>\n"," <td>1</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>311</th>\n"," <td>NCT00536263</td>\n"," <td>6874.0</td>\n"," <td>[pegylated interferon alpha-2b, pegylated inte...</td>\n"," <td>[hepatitis b, chronic]</td>\n"," <td>[B18.0, B18.1, B18.2, B18.8, B18.9]</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[[], [], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>323</th>\n"," <td>NCT00394589</td>\n"," <td>17.0</td>\n"," <td>[infliximab increased frequency, infliximab in...</td>\n"," <td>[rheumatoid arthritis]</td>\n"," <td>[M06.9, M05.9, M06.08, M06.00, M06.011, M06.01...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[DB00065, DB00065, DB00065]</td>\n"," <td>[none, none, none]</td>\n"," <td>[CHEMBL1201581, CHEMBL1201581, CHEMBL1201581]</td>\n"," <td>[none, none, none]</td>\n"," <td>[infliximab, infliximab, infliximab]</td>\n"," <td>[C5238750, C5238750, C5238750]</td>\n"," <td>[[], [], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>338</th>\n"," <td>NCT01074268</td>\n"," <td>4974.0</td>\n"," <td>[insulin degludec, insulin detemir, insulin as...</td>\n"," <td>[diabetes, diabetes mellitus, type 1]</td>\n"," <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB09564, DB01307, DB01306]</td>\n"," <td>[none, none, none]</td>\n"," <td>[CHEMBL2107869, CHEMBL2104391, CHEMBL1201496]</td>\n"," <td>[none, none, none]</td>\n"," <td>[insulin degludec, insulin detemir, insulin as...</td>\n"," <td>[C3491971, C0537270, C1708521]</td>\n"," <td>[[], [-0.0051244166680686, 0.0016540584969475,...</td>\n"," <td>V1 -0.005124\n","V2 0.001654\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>348</th>\n"," <td>NCT00295776</td>\n"," <td>NaN</td>\n"," <td>[lamictal in the treatment of post-herpetic ne...</td>\n"," <td>[neuralgia, postherpetic]</td>\n"," <td>[B02.22]</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>373</th>\n"," <td>NCT00390806</td>\n"," <td>13169.0</td>\n"," <td>[hycamtin, oral capsules]</td>\n"," <td>[lung cancer, non-small cell]</td>\n"," <td>[C78.00, C78.01, C78.02, D14.30, D14.31, D14.3...</td>\n"," <td>\\n Inclusion criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>388</th>\n"," <td>NCT00717678</td>\n"," <td>NaN</td>\n"," <td>[prograf-xl, prograf, mmf]</td>\n"," <td>[kidney transplantation, transplantation immun...</td>\n"," <td>[N26.2, Q63.0, Q63.2, Z52.4, I75.81, N19, N20.0]</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[[], [], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>402</th>\n"," <td>NCT00184600</td>\n"," <td>10620.0</td>\n"," <td>[biphasic insulin aspart, insulin detemir, ins...</td>\n"," <td>[diabetes, diabetes mellitus, type 2]</td>\n"," <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none, DB01307, DB01306]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, CHEMBL2104391, CHEMBL1201496]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, insulin detemir, insulin aspart]</td>\n"," <td>[none, C0537270, C1708521]</td>\n"," <td>[[], [-0.0051244166680686, 0.0016540584969475,...</td>\n"," <td>V1 -0.005124\n","V2 0.001654\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>414</th>\n"," <td>NCT01245569</td>\n"," <td>NaN</td>\n"," <td>[foster® 100/6 µg/unit dose, seretide accuhale...</td>\n"," <td>[chronic obstructive pulmonary disease]</td>\n"," <td>[J44.9, J44.1, J44.0]</td>\n"," <td>\\n Inclusion Criteria:\\n\\n 1. ...</td>\n"," <td>1</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>420</th>\n"," <td>NCT00486018</td>\n"," <td>2769.0</td>\n"," <td>[ranibizumab injection 0.3 mg, ranibizumab inj...</td>\n"," <td>[macular edema, retinal vein occlusion]</td>\n"," <td>[H59.033, H34.8130, H59.031, H59.032, H59.039,...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB01270, DB01270, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[CHEMBL1201825, CHEMBL1201825, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[ranibizumab, ranibizumab, none]</td>\n"," <td>[C1566537, C1566537, none]</td>\n"," <td>[[-0.0198971997276426, 0.0166968261538703, -8....</td>\n"," <td>V1 -0.019897\n","V2 0.016697\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>436</th>\n"," <td>NCT00000419</td>\n"," <td>NaN</td>\n"," <td>[premarin and provera]</td>\n"," <td>[systemic lupus erythematosus]</td>\n"," <td>[M32.9, M32.0, M32.11, M32.12, M32.13, M32.14,...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>441</th>\n"," <td>NCT00360334</td>\n"," <td>6327.0</td>\n"," <td>[exenatide, insulin glargine]</td>\n"," <td>[type 2 diabetes]</td>\n"," <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB01276, DB00047]</td>\n"," <td>[none, none]</td>\n"," <td>[CHEMBL414357, CHEMBL1201497]</td>\n"," <td>[none, none]</td>\n"," <td>[exenatide, insulin glargine]</td>\n"," <td>[C0167117, C0907402]</td>\n"," <td>[[-0.0123086924602175, 0.0042105967441839, -3....</td>\n"," <td>V1 -0.012309\n","V2 0.004211\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>445</th>\n"," <td>NCT00330473</td>\n"," <td>NaN</td>\n"," <td>[human insulin inhalation powder, insulin]</td>\n"," <td>[diabetes mellitus, type 2]</td>\n"," <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>452</th>\n"," <td>NCT00372333</td>\n"," <td>NaN</td>\n"," <td>[idea-033]</td>\n"," <td>[joint pain, musculoskeletal pain, stiffness, ...</td>\n"," <td>[M25.50, M25.59, M25.541, M25.542, M25.549, M2...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[none]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," </tr>\n"," <tr>\n"," <th>466</th>\n"," <td>NCT00510952</td>\n"," <td>5016.0</td>\n"," <td>[insulin lispro protamine suspension, insulin ...</td>\n"," <td>[diabetes mellitus, type 2]</td>\n"," <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB13700, DB00047]</td>\n"," <td>[none, none]</td>\n"," <td>[none, CHEMBL1201497]</td>\n"," <td>[none, none]</td>\n"," <td>[protamine, insulin glargine]</td>\n"," <td>[C0771747, C0907402]</td>\n"," <td>[[], [-0.0088522591226082, 0.0038537407826502,...</td>\n"," <td>V1 -0.008852\n","V2 0.003854\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>498</th>\n"," <td>NCT00422383</td>\n"," <td>11992.0</td>\n"," <td>[rituximab mabthera/rituxan, rituximab mabther...</td>\n"," <td>[rheumatoid arthritis]</td>\n"," <td>[M06.9, M05.9, M06.08, M06.00, M06.011, M06.01...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB00073, DB00073, DB00073]</td>\n"," <td>[none, none, none]</td>\n"," <td>[CHEMBL1201576, CHEMBL1201576, CHEMBL1201576]</td>\n"," <td>[none, none, none]</td>\n"," <td>[rituximab, rituximab, rituximab]</td>\n"," <td>[C0393022, C0393022, C0393022]</td>\n"," <td>[[-0.0339438843118617, 0.0180569791652772, -3....</td>\n"," <td>V1 -0.033944\n","V2 0.018057\n","V3 ...</td>\n"," </tr>\n"," <tr>\n"," <th>501</th>\n"," <td>NCT01689142</td>\n"," <td>NaN</td>\n"," <td>[insulin glargine new formulation (hoe901), in...</td>\n"," <td>[type 2 diabetes mellitus]</td>\n"," <td>[E11.65, E11.9, E11.21, E11.36, E11.41, E11.42...</td>\n"," <td>\\n Inclusion criteria :\\n\\n o Pa...</td>\n"," <td>1</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[none, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6cd79391-c807-4181-8abe-c43b25f19170')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-6cd79391-c807-4181-8abe-c43b25f19170 button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-6cd79391-c807-4181-8abe-c43b25f19170');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":71}]},{"cell_type":"code","source":["# Debug code to figure out which unique tuples were matched\n","all_data = pd.concat([train, test, val])\n","\n","def extract_info(r):\n"," assert len(r.drugs) == len(r.drugbank_id), \"%d not equal to %d\" % (len(r.drugs), len(r.drugbank_id))\n"," out = []\n"," for i in range(len(r.drugs)):\n"," out.append((r.drugs[i], r.drugbank_id[i], r.smiles[i], r.chembl[i], r.binding_db[i]))\n"," return out\n","\n","def flatten(lists):\n"," out = []\n"," for l in lists:\n"," out += l\n"," return out\n","\n","pd.DataFrame(set(flatten(all_data.apply(extract_info, axis=1))), columns=[\"drug\", \"drugbank_id\", \"smiles\", \"chembl\", \"binding_db\"]).to_pickle(deep_learning_dir + '/AChander_Targets/unique_drug_tuple.pickle')"],"metadata":{"id":"bXw2yyzlHf3J"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["smiles_embedding = pd.read_pickle(deep_learning_dir + '/embeddings/SMILES_embedding.pkl')"],"metadata":{"id":"U7eV2fVh_G8c"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train['embeddings'] = train['smiles'].map(lambda smiles : list(map(lambda smile : default(smiles_embedding, smile, []), smiles)))\n","test['embeddings'] = test['smiles'].map(lambda smiles : list(map(lambda smile : default(smiles_embedding, smile, []), smiles)))\n","val['embeddings'] = val['smiles'].map(lambda smiles : list(map(lambda smile : default(smiles_embedding, smile, []), smiles)))"],"metadata":{"id":"jGczJrJn_ZIB"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def pick_one_embedding(embeddings):\n"," for embedding in embeddings:\n"," if len(embedding) != 0:\n"," return embedding\n"," return []\n","\n","train['embedding'] = train['embeddings'].map(pick_one_embedding)\n","test['embedding'] = test['embeddings'].map(pick_one_embedding)\n","val['embedding'] = val['embeddings'].map(pick_one_embedding)"],"metadata":{"id":"QOiUTWNFBk2h"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["def pick_one_embedding(embeddings):\n"," for i in range(len(embeddings)):\n"," if len(embeddings[i]) != 0:\n"," return i\n"," return -1\n","\n","train['embedding_id'] = train['embeddings'].map(pick_one_embedding)\n","test['embedding_id'] = test['embeddings'].map(pick_one_embedding)\n","val['embedding_id'] = val['embeddings'].map(pick_one_embedding)"],"metadata":{"id":"wCuVrAuaeXfa"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train.columns"],"metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"dji3CKv7ENe5","executionInfo":{"status":"ok","timestamp":1651456613916,"user_tz":240,"elapsed":288,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"4d3ffea6-802c-46a5-d51f-79109636a08c"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['nctid', 'n_participants', 'drugs', 'diseases', 'icdcodes', 'criteria',\n"," 'label', 'drugbank_id', 'smiles', 'chembl', 'binding_db', 'clean_name',\n"," 'cuis', 'cui_vectors', 'primary_cui_vector', 'embeddings', 'embedding',\n"," 'embedding_id'],\n"," dtype='object')"]},"metadata":{},"execution_count":175}]},{"cell_type":"code","source":["ctid_set = set()\n","ctid_embeddings = []\n","\n","def add_to_ctid_embeddings(row):\n"," if row.nctid in ctid_set:\n"," return\n"," if row.embedding_id == -1:\n"," ctid_embeddings.append((row.nctid, np.zeros(1024), \"none\"))\n"," else:\n"," ctid_embeddings.append((row.nctid, row.embeddings[row.embedding_id], row.drugs[row.embedding_id]))\n"," ctid_set.add(row.nctid)\n","\n","train.apply(add_to_ctid_embeddings, axis=1)\n","test.apply(add_to_ctid_embeddings, axis=1)\n","val.apply(add_to_ctid_embeddings, axis=1)\n","\n","ctid_embeddings_df = pd.DataFrame(ctid_embeddings, columns=[\"nctid\", \"embedding\", \"drug\"])"],"metadata":{"id":"GdGdkSK9D9s0"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["train"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":30000},"id":"hhILDzZkfIVZ","executionInfo":{"status":"ok","timestamp":1651456642615,"user_tz":240,"elapsed":375,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"0623f508-b28b-4db1-e42b-8c5a32a1d739"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" nctid n_participants \\\n","0 NCT00475085 944.0 \n","1 NCT01626859 152.0 \n","2 NCT00203957 NaN \n","3 NCT00169832 NaN \n","4 NCT01249352 NaN \n","... ... ... \n","3089 NCT01015118 12294.0 \n","3090 NCT01127217 NaN \n","3091 NCT01187953 1086.0 \n","3092 NCT01364649 1186.0 \n","3093 NCT01097018 NaN \n","\n"," drugs \\\n","0 [aprepitant, dexamethasone, granisetron hydroc... \n","1 [mp-214 low dose, mp-214 middle dose, mp-214 h... \n","2 [istradefylline, istradefylline] \n","3 [rosiglitazone or placebo] \n","4 [nimotuzumab, cisplatin, fluorouracil] \n","... ... \n","3089 [placebo, paclitaxel, bibf 1120, carboplatin, ... \n","3090 [amlodipine/losartan, amlodipine] \n","3091 [prograf (tacrolimus), lcp-tacro] \n","3092 [vortioxetine, escitalopram, placebo] \n","3093 [capecitabine, perifosine, placebo] \n","\n"," diseases \\\n","0 [nausea] \n","1 [schizophrenia] \n","2 [parkinsons disease] \n","3 [diabetes, coronary artery bypass grafting] \n","4 [esophageal cancer, adenocarcinoma] \n","... ... \n","3089 [ovarian neoplasms, peritoneal neoplasms] \n","3090 [hypertension] \n","3091 [renal failure] \n","3092 [treatment outcome] \n","3093 [colorectal cancer] \n","\n"," icdcodes \\\n","0 [R11.0, R11.11, R11.2] \n","1 [F20.0, F20.1, F20.2, F20.3, F20.5, F20.89, F2... \n","2 [G20] \n","3 [E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E... \n","4 [K22.2, K22.81, Q39.4, P78.83, I85.00, I85.01,... \n","... ... \n","3089 [C05.2, C10.0, C16.0, C16.4, C17.0, C17.1, C17... \n","3090 [I15.0, I97.3, K76.6, P29.2, G93.2, H40.053, I10] \n","3091 [P96.0, O03.32, O04.82, O08.4, O03.82, O07.32,... \n","3092 [Z01.12, Z92.89, Z75.2, M27.59, Z53.9, Z91.19,... \n","3093 [C05.2, C10.0, C16.0, C16.4, C17.0, C17.1, C17.2] \n","\n"," criteria label \\\n","0 \\n Inclusion criteria:\\n\\n - ... 1 \n","1 \\n Inclusion Criteria:\\n\\n - ... 1 \n","2 \\n Inclusion Criteria:\\n\\n - ... 1 \n","3 \\n Inclusion Criteria:\\n\\n AT SC... 0 \n","4 \\n Inclusion Criteria:\\n\\n 1. ... 1 \n","... ... ... \n","3089 \\n Inclusion criteria:\\n\\n - ... 1 \n","3090 \\n Inclusion Criteria:\\n\\n - ... 1 \n","3091 \\n Inclusion Criteria:\\n\\n 1. ... 1 \n","3092 \\n Inclusion Criteria:\\n\\n 1. ... 1 \n","3093 \\n Inclusion Criteria:\\n\\n - ... 0 \n","\n"," drugbank_id \\\n","0 [DB00673, DB14649, DB00889, DB00377, DB00433, ... \n","1 [none, none, none] \n","2 [DB11757, DB11757] \n","3 [placebo] \n","4 [DB06192, DB00515, DB00544] \n","... ... \n","3089 [placebo, DB01229, none, DB00958, DB01229, DB0... \n","3090 [DB00381, DB00381] \n","3091 [DB00864, none] \n","3092 [DB09068, DB01175, placebo] \n","3093 [DB01101, DB06641, placebo] \n","\n"," smiles \\\n","0 [C[C@@H](O[C@H]1OCCN(CC2=NNC(=O)N2)[C@H]1C1=CC... \n","1 [none, none, none] \n","2 [[H]\\C(=C(\\[H])C1=CC(OC)=C(OC)C=C1)C1=NC2=C(N1... \n","3 [placebo] \n","4 [none, [H][N]([H])([H])[Pt](Cl)(Cl)[N]([H])([H... \n","... ... \n","3089 [placebo, [H][C@]12[C@H](OC(=O)C3=CC=CC=C3)[C@... \n","3090 [CCOC(=O)C1=C(COCCN)NC(C)=C(C1C1=CC=CC=C1Cl)C(... \n","3091 [CO[C@@H]1C[C@@H](CC[C@H]1O)\\C=C(/C)[C@H]1OC(=... \n","3092 [CC1=CC=C(SC2=CC=CC=C2N2CCNCC2)C(C)=C1, CN(C)C... \n","3093 [CCCCCOC(=O)NC1=NC(=O)N(C=C1F)[C@@H]1O[C@H](C)... \n","\n"," chembl \\\n","0 [CHEMBL1471, CHEMBL1530428, CHEMBL1290003, CHE... \n","1 [none, none, none] \n","2 [CHEMBL431770, CHEMBL431770] \n","3 [placebo] \n","4 [none, CHEMBL2068237, CHEMBL185] \n","... ... \n","3089 [placebo, CHEMBL428647, none, CHEMBL1351, CHEM... \n","3090 [CHEMBL1491, CHEMBL1491] \n","3091 [CHEMBL269732, none] \n","3092 [CHEMBL2104993, CHEMBL1508, placebo] \n","3093 [CHEMBL1773, CHEMBL372764, placebo] \n","\n"," binding_db \\\n","0 [50220136, 50103620, 50443668, 50417287, 78434... \n","1 [none, none, none] \n","2 [50176050, 50176050] \n","3 [placebo] \n","4 [none, 50028111, 50340677] \n","... ... \n","3089 [placebo, 50001839, none, none, 50001839, none] \n","3090 [50088383, 50088383] \n","3091 [50030448, none] \n","3092 [50400902, 50302225, placebo] \n","3093 [none, 50431630, placebo] \n","\n"," clean_name \\\n","0 [aprepitant, dexamethasone, granisetron, palon... \n","1 [none, none, none] \n","2 [istradefylline, istradefylline] \n","3 [placebo] \n","4 [nimotuzumab, cisplatin, fluorouracil] \n","... ... \n","3089 [placebo, paclitaxel, none, carboplatin, pacli... \n","3090 [amlodipine, amlodipine] \n","3091 [tacrolimus, none] \n","3092 [vortioxetine, escitalopram, placebo] \n","3093 [capecitabine, perifosine, placebo] \n","\n"," cuis \\\n","0 [C1176306, C2930043, C0543476, C1310734, C0770... \n","1 [none, none, none] \n","2 [C0673470, C0673470] \n","3 [C1706408] \n","4 [C1570308, C0008838, C2711401] \n","... ... \n","3089 [C1706408, C0144576, none, C0079083, C0144576,... \n","3090 [C5195719, C5195719] \n","3091 [C0519826, none] \n","3092 [C3661282, C1099456, C1706408] \n","3093 [C0671970, C0754570, C1706408] \n","\n"," cui_vectors \\\n","0 [[-0.0133983809219361, 0.0038140331326222, -3.... \n","1 [[], [], []] \n","2 [[], []] \n","3 [[]] \n","4 [[], [-0.0160435887106513, 0.0074711445684327,... \n","... ... \n","3089 [[], [-0.0152721016686416, 0.0059331896906342,... \n","3090 [[], []] \n","3091 [[], []] \n","3092 [[], [-0.0147954572699932, 0.003364188566606, ... \n","3093 [[-0.0206859657439039, 0.0082703372165789, 1.3... \n","\n"," primary_cui_vector \\\n","0 V1 -0.013398\n","V2 0.003814\n","V3 ... \n","1 [] \n","2 [] \n","3 [] \n","4 V1 -0.016044\n","V2 0.007471\n","V3 ... \n","... ... \n","3089 V1 -0.015272\n","V2 0.005933\n","V3 ... \n","3090 [] \n","3091 [] \n","3092 V1 -0.014795\n","V2 0.003364\n","V3 ... \n","3093 V1 -0.020686\n","V2 0.00827\n","V3 ... \n","\n"," embeddings \\\n","0 [[13.561273574829102, -13.577717781066895, 0.9... \n","1 [[], [], []] \n","2 [[], []] \n","3 [[]] \n","4 [[], [], [4.89539098739624, 3.768472671508789,... \n","... ... \n","3089 [[], [], [], [], [], []] \n","3090 [[5.37669563293457, -5.854226589202881, -4.580... \n","3091 [[8.613880157470703, -9.339082717895508, 19.17... \n","3092 [[-3.836270332336426, -12.949006080627441, 3.9... \n","3093 [[7.950209617614746, -1.9675993919372559, 3.18... \n","\n"," embedding embedding_id \n","0 [13.561273574829102, -13.577717781066895, 0.95... 0 \n","1 [] -1 \n","2 [] -1 \n","3 [] -1 \n","4 [4.89539098739624, 3.768472671508789, 4.447010... 2 \n","... ... ... \n","3089 [] -1 \n","3090 [5.37669563293457, -5.854226589202881, -4.5800... 0 \n","3091 [8.613880157470703, -9.339082717895508, 19.174... 0 \n","3092 [-3.836270332336426, -12.949006080627441, 3.97... 0 \n","3093 [7.950209617614746, -1.9675993919372559, 3.184... 0 \n","\n","[3094 rows x 18 columns]"],"text/html":["\n"," <div id=\"df-16c71b1f-9777-4076-b77b-0a212cb8390b\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>nctid</th>\n"," <th>n_participants</th>\n"," <th>drugs</th>\n"," <th>diseases</th>\n"," <th>icdcodes</th>\n"," <th>criteria</th>\n"," <th>label</th>\n"," <th>drugbank_id</th>\n"," <th>smiles</th>\n"," <th>chembl</th>\n"," <th>binding_db</th>\n"," <th>clean_name</th>\n"," <th>cuis</th>\n"," <th>cui_vectors</th>\n"," <th>primary_cui_vector</th>\n"," <th>embeddings</th>\n"," <th>embedding</th>\n"," <th>embedding_id</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>NCT00475085</td>\n"," <td>944.0</td>\n"," <td>[aprepitant, dexamethasone, granisetron hydroc...</td>\n"," <td>[nausea]</td>\n"," <td>[R11.0, R11.11, R11.2]</td>\n"," <td>\\n Inclusion criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB00673, DB14649, DB00889, DB00377, DB00433, ...</td>\n"," <td>[C[C@@H](O[C@H]1OCCN(CC2=NNC(=O)N2)[C@H]1C1=CC...</td>\n"," <td>[CHEMBL1471, CHEMBL1530428, CHEMBL1290003, CHE...</td>\n"," <td>[50220136, 50103620, 50443668, 50417287, 78434...</td>\n"," <td>[aprepitant, dexamethasone, granisetron, palon...</td>\n"," <td>[C1176306, C2930043, C0543476, C1310734, C0770...</td>\n"," <td>[[-0.0133983809219361, 0.0038140331326222, -3....</td>\n"," <td>V1 -0.013398\n","V2 0.003814\n","V3 ...</td>\n"," <td>[[13.561273574829102, -13.577717781066895, 0.9...</td>\n"," <td>[13.561273574829102, -13.577717781066895, 0.95...</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>NCT01626859</td>\n"," <td>152.0</td>\n"," <td>[mp-214 low dose, mp-214 middle dose, mp-214 h...</td>\n"," <td>[schizophrenia]</td>\n"," <td>[F20.0, F20.1, F20.2, F20.3, F20.5, F20.89, F2...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[none, none, none]</td>\n"," <td>[[], [], []]</td>\n"," <td>[]</td>\n"," <td>[[], [], []]</td>\n"," <td>[]</td>\n"," <td>-1</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>NCT00203957</td>\n"," <td>NaN</td>\n"," <td>[istradefylline, istradefylline]</td>\n"," <td>[parkinsons disease]</td>\n"," <td>[G20]</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB11757, DB11757]</td>\n"," <td>[[H]\\C(=C(\\[H])C1=CC(OC)=C(OC)C=C1)C1=NC2=C(N1...</td>\n"," <td>[CHEMBL431770, CHEMBL431770]</td>\n"," <td>[50176050, 50176050]</td>\n"," <td>[istradefylline, istradefylline]</td>\n"," <td>[C0673470, C0673470]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," <td>-1</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>NCT00169832</td>\n"," <td>NaN</td>\n"," <td>[rosiglitazone or placebo]</td>\n"," <td>[diabetes, coronary artery bypass grafting]</td>\n"," <td>[E23.2, N25.1, P70.2, O24.92, Z83.3, Z86.32, E...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n AT SC...</td>\n"," <td>0</td>\n"," <td>[placebo]</td>\n"," <td>[placebo]</td>\n"," <td>[placebo]</td>\n"," <td>[placebo]</td>\n"," <td>[placebo]</td>\n"," <td>[C1706408]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," <td>[[]]</td>\n"," <td>[]</td>\n"," <td>-1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>NCT01249352</td>\n"," <td>NaN</td>\n"," <td>[nimotuzumab, cisplatin, fluorouracil]</td>\n"," <td>[esophageal cancer, adenocarcinoma]</td>\n"," <td>[K22.2, K22.81, Q39.4, P78.83, I85.00, I85.01,...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n 1. ...</td>\n"," <td>1</td>\n"," <td>[DB06192, DB00515, DB00544]</td>\n"," <td>[none, [H][N]([H])([H])[Pt](Cl)(Cl)[N]([H])([H...</td>\n"," <td>[none, CHEMBL2068237, CHEMBL185]</td>\n"," <td>[none, 50028111, 50340677]</td>\n"," <td>[nimotuzumab, cisplatin, fluorouracil]</td>\n"," <td>[C1570308, C0008838, C2711401]</td>\n"," <td>[[], [-0.0160435887106513, 0.0074711445684327,...</td>\n"," <td>V1 -0.016044\n","V2 0.007471\n","V3 ...</td>\n"," <td>[[], [], [4.89539098739624, 3.768472671508789,...</td>\n"," <td>[4.89539098739624, 3.768472671508789, 4.447010...</td>\n"," <td>2</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>3089</th>\n"," <td>NCT01015118</td>\n"," <td>12294.0</td>\n"," <td>[placebo, paclitaxel, bibf 1120, carboplatin, ...</td>\n"," <td>[ovarian neoplasms, peritoneal neoplasms]</td>\n"," <td>[C05.2, C10.0, C16.0, C16.4, C17.0, C17.1, C17...</td>\n"," <td>\\n Inclusion criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[placebo, DB01229, none, DB00958, DB01229, DB0...</td>\n"," <td>[placebo, [H][C@]12[C@H](OC(=O)C3=CC=CC=C3)[C@...</td>\n"," <td>[placebo, CHEMBL428647, none, CHEMBL1351, CHEM...</td>\n"," <td>[placebo, 50001839, none, none, 50001839, none]</td>\n"," <td>[placebo, paclitaxel, none, carboplatin, pacli...</td>\n"," <td>[C1706408, C0144576, none, C0079083, C0144576,...</td>\n"," <td>[[], [-0.0152721016686416, 0.0059331896906342,...</td>\n"," <td>V1 -0.015272\n","V2 0.005933\n","V3 ...</td>\n"," <td>[[], [], [], [], [], []]</td>\n"," <td>[]</td>\n"," <td>-1</td>\n"," </tr>\n"," <tr>\n"," <th>3090</th>\n"," <td>NCT01127217</td>\n"," <td>NaN</td>\n"," <td>[amlodipine/losartan, amlodipine]</td>\n"," <td>[hypertension]</td>\n"," <td>[I15.0, I97.3, K76.6, P29.2, G93.2, H40.053, I10]</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>1</td>\n"," <td>[DB00381, DB00381]</td>\n"," <td>[CCOC(=O)C1=C(COCCN)NC(C)=C(C1C1=CC=CC=C1Cl)C(...</td>\n"," <td>[CHEMBL1491, CHEMBL1491]</td>\n"," <td>[50088383, 50088383]</td>\n"," <td>[amlodipine, amlodipine]</td>\n"," <td>[C5195719, C5195719]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," <td>[[5.37669563293457, -5.854226589202881, -4.580...</td>\n"," <td>[5.37669563293457, -5.854226589202881, -4.5800...</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>3091</th>\n"," <td>NCT01187953</td>\n"," <td>1086.0</td>\n"," <td>[prograf (tacrolimus), lcp-tacro]</td>\n"," <td>[renal failure]</td>\n"," <td>[P96.0, O03.32, O04.82, O08.4, O03.82, O07.32,...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n 1. ...</td>\n"," <td>1</td>\n"," <td>[DB00864, none]</td>\n"," <td>[CO[C@@H]1C[C@@H](CC[C@H]1O)\\C=C(/C)[C@H]1OC(=...</td>\n"," <td>[CHEMBL269732, none]</td>\n"," <td>[50030448, none]</td>\n"," <td>[tacrolimus, none]</td>\n"," <td>[C0519826, none]</td>\n"," <td>[[], []]</td>\n"," <td>[]</td>\n"," <td>[[8.613880157470703, -9.339082717895508, 19.17...</td>\n"," <td>[8.613880157470703, -9.339082717895508, 19.174...</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>3092</th>\n"," <td>NCT01364649</td>\n"," <td>1186.0</td>\n"," <td>[vortioxetine, escitalopram, placebo]</td>\n"," <td>[treatment outcome]</td>\n"," <td>[Z01.12, Z92.89, Z75.2, M27.59, Z53.9, Z91.19,...</td>\n"," <td>\\n Inclusion Criteria:\\n\\n 1. ...</td>\n"," <td>1</td>\n"," <td>[DB09068, DB01175, placebo]</td>\n"," <td>[CC1=CC=C(SC2=CC=CC=C2N2CCNCC2)C(C)=C1, CN(C)C...</td>\n"," <td>[CHEMBL2104993, CHEMBL1508, placebo]</td>\n"," <td>[50400902, 50302225, placebo]</td>\n"," <td>[vortioxetine, escitalopram, placebo]</td>\n"," <td>[C3661282, C1099456, C1706408]</td>\n"," <td>[[], [-0.0147954572699932, 0.003364188566606, ...</td>\n"," <td>V1 -0.014795\n","V2 0.003364\n","V3 ...</td>\n"," <td>[[-3.836270332336426, -12.949006080627441, 3.9...</td>\n"," <td>[-3.836270332336426, -12.949006080627441, 3.97...</td>\n"," <td>0</td>\n"," </tr>\n"," <tr>\n"," <th>3093</th>\n"," <td>NCT01097018</td>\n"," <td>NaN</td>\n"," <td>[capecitabine, perifosine, placebo]</td>\n"," <td>[colorectal cancer]</td>\n"," <td>[C05.2, C10.0, C16.0, C16.4, C17.0, C17.1, C17.2]</td>\n"," <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n"," <td>0</td>\n"," <td>[DB01101, DB06641, placebo]</td>\n"," <td>[CCCCCOC(=O)NC1=NC(=O)N(C=C1F)[C@@H]1O[C@H](C)...</td>\n"," <td>[CHEMBL1773, CHEMBL372764, placebo]</td>\n"," <td>[none, 50431630, placebo]</td>\n"," <td>[capecitabine, perifosine, placebo]</td>\n"," <td>[C0671970, C0754570, C1706408]</td>\n"," <td>[[-0.0206859657439039, 0.0082703372165789, 1.3...</td>\n"," <td>V1 -0.020686\n","V2 0.00827\n","V3 ...</td>\n"," <td>[[7.950209617614746, -1.9675993919372559, 3.18...</td>\n"," <td>[7.950209617614746, -1.9675993919372559, 3.184...</td>\n"," <td>0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>3094 rows × 18 columns</p>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-16c71b1f-9777-4076-b77b-0a212cb8390b')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-16c71b1f-9777-4076-b77b-0a212cb8390b button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-16c71b1f-9777-4076-b77b-0a212cb8390b');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":179}]},{"cell_type":"code","source":["ctid_embeddings_df"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":419},"id":"OgopTFN6fE2I","executionInfo":{"status":"ok","timestamp":1651456805363,"user_tz":240,"elapsed":336,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"bfd6f32a-3600-4dc9-d2a3-c1eeba6e97a9"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" nctid embedding \\\n","0 NCT00475085 [13.561273574829102, -13.577717781066895, 0.95... \n","1 NCT01626859 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","2 NCT00203957 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","3 NCT00169832 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","4 NCT01249352 [4.89539098739624, 3.768472671508789, 4.447010... \n","... ... ... \n","4579 NCT00679484 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","4580 NCT01057407 [-3.7375659942626953, -3.272908926010132, 4.68... \n","4581 NCT01126580 [9.013765335083008, -4.713770866394043, 15.139... \n","4582 NCT00423813 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","4583 NCT00237653 [12.198214530944824, 2.7601611614227295, 12.73... \n","\n"," drug \n","0 aprepitant \n","1 none \n","2 none \n","3 none \n","4 fluorouracil \n","... ... \n","4579 none \n","4580 sevelamer hydrochloride \n","4581 metformin \n","4582 none \n","4583 valganciclovir \n","\n","[4584 rows x 3 columns]"],"text/html":["\n"," <div id=\"df-56f1d3e3-9289-4499-a24e-24335a3c9e6d\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>nctid</th>\n"," <th>embedding</th>\n"," <th>drug</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>NCT00475085</td>\n"," <td>[13.561273574829102, -13.577717781066895, 0.95...</td>\n"," <td>aprepitant</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>NCT01626859</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>NCT00203957</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>NCT00169832</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>NCT01249352</td>\n"," <td>[4.89539098739624, 3.768472671508789, 4.447010...</td>\n"," <td>fluorouracil</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>4579</th>\n"," <td>NCT00679484</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>4580</th>\n"," <td>NCT01057407</td>\n"," <td>[-3.7375659942626953, -3.272908926010132, 4.68...</td>\n"," <td>sevelamer hydrochloride</td>\n"," </tr>\n"," <tr>\n"," <th>4581</th>\n"," <td>NCT01126580</td>\n"," <td>[9.013765335083008, -4.713770866394043, 15.139...</td>\n"," <td>metformin</td>\n"," </tr>\n"," <tr>\n"," <th>4582</th>\n"," <td>NCT00423813</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>4583</th>\n"," <td>NCT00237653</td>\n"," <td>[12.198214530944824, 2.7601611614227295, 12.73...</td>\n"," <td>valganciclovir</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>4584 rows × 3 columns</p>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-56f1d3e3-9289-4499-a24e-24335a3c9e6d')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-56f1d3e3-9289-4499-a24e-24335a3c9e6d button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-56f1d3e3-9289-4499-a24e-24335a3c9e6d');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":180}]},{"cell_type":"code","source":["ctid_embeddings_df.sample(50)"],"metadata":{"colab":{"base_uri":"https://localhost:8080/","height":1000},"id":"HzmTMSbrHwcD","executionInfo":{"status":"ok","timestamp":1651457317585,"user_tz":240,"elapsed":408,"user":{"displayName":"Man Qing Liang","userId":"03044866353882001989"}},"outputId":"58f17885-67a3-415b-aab3-92dfed8d4c83"},"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":[" nctid embedding \\\n","761 NCT00174720 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","3383 NCT02220894 [14.387839317321777, -10.28243350982666, 1.606... \n","1451 NCT00113386 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","2678 NCT00002597 [10.195808410644531, -9.866142272949219, 0.332... \n","3475 NCT02292446 [23.38435935974121, -0.7330405712127686, 11.95... \n","2898 NCT01768286 [3.361865282058716, 0.7817785739898682, 10.766... \n","1653 NCT01049334 [6.104618072509766, -5.057725429534912, -14.15... \n","3307 NCT02149121 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","3569 NCT02388906 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","1292 NCT01348243 [-2.2266159057617188, -5.000643253326416, 5.43... \n","1578 NCT02182479 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","812 NCT00253968 [9.196856498718262, -10.230440139770508, -13.4... \n","4468 NCT00698581 [8.661005020141602, -0.3385846018791199, 11.90... \n","1146 NCT01512108 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","3392 NCT02226003 [0.14281639456748962, -10.426595687866211, -14... \n","3873 NCT02679573 [14.945191383361816, -25.684415817260742, 17.6... \n","1398 NCT01342913 [-6.010187149047852, 7.235744476318359, 0.6008... \n","2380 NCT00286468 [6.733612060546875, -12.518169403076172, 1.304... \n","2340 NCT00227747 [7.950209617614746, -1.9675993919372559, 3.184... \n","183 NCT00127634 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","574 NCT00265616 [7.544963359832764, 6.164093017578125, -10.620... \n","1999 NCT00540449 [12.20997142791748, -2.5129404067993164, -1.25... \n","114 NCT01672762 [-8.753070831298828, -18.59516143798828, -4.12... \n","358 NCT01763918 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","2330 NCT00484939 [7.950209617614746, -1.9675993919372559, 3.184... \n","1179 NCT00114127 [0.26812058687210083, -11.34184741973877, -2.4... \n","2369 NCT00151255 [-2.3481616973876953, -2.4751017093658447, 4.7... \n","1277 NCT00163293 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","2316 NCT00668525 [9.756115913391113, -13.786759376525879, -8.16... \n","875 NCT00506285 [4.142498970031738, -1.2075169086456299, -2.51... \n","1086 NCT00056407 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","261 NCT00628589 [-3.4664182662963867, -6.698912143707275, -0.5... \n","1329 NCT01510535 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","3964 NCT02785939 [17.70220184326172, -18.377199172973633, 11.80... \n","231 NCT00355641 [6.557114124298096, -12.076240539550781, 0.163... \n","1849 NCT00347412 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","107 NCT00330460 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","316 NCT00113815 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","1431 NCT01277666 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","1167 NCT00385138 [12.460927963256836, -18.833032608032227, 14.6... \n","1795 NCT01573351 [13.946184158325195, -10.585319519042969, 7.30... \n","1315 NCT00449930 [11.46723747253418, -19.053577423095703, 2.737... \n","3061 NCT01287260 [7.839696407318115, -1.8401050567626953, -8.38... \n","507 NCT00363584 [7.950209617614746, -1.9675993919372559, 3.184... \n","4139 NCT03123471 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","2194 NCT00604383 [10.526212692260742, -15.975157737731934, -3.7... \n","3424 NCT02252042 [23.116355895996094, -7.407402515411377, 16.62... \n","1934 NCT00699153 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","637 NCT01433523 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n","3574 NCT02393859 [-3.2333426475524902, -13.376590728759766, 10.... \n","\n"," drug \n","761 none \n","3383 pemetrexed \n","1451 none \n","2678 flutamide \n","3475 ruxolitinib \n","2898 rbv \n","1653 flurbiprofen \n","3307 none \n","3569 none \n","1292 disodium clodronate 200 mg/4 ml with 1% lidocaine \n","1578 none \n","812 eplivanserin \n","4468 brivaracetam \n","1146 none \n","3392 ertugliflozin \n","3873 delafloxacin \n","1398 fluticasone furoate 100mcg/vilanterol 25mcg \n","2380 alogliptin and glyburide \n","2340 capecitabine \n","183 none \n","574 propofol \n","1999 efavirenz \n","114 ipragliflozin \n","358 none \n","2330 capecitabine \n","1179 duloxetine \n","2369 cytarabine \n","1277 none \n","2316 escitalopram \n","875 methylphenidate transdermal system (mts) \n","1086 none \n","261 inhaled loxapine 5 mg \n","1329 none \n","3964 palbociclib \n","231 ropinirole extended release (xr) \n","1849 none \n","107 none \n","316 none \n","1431 none \n","1167 cangrelor \n","1795 asunaprevir \n","1315 sitagliptin phosphate \n","3061 nifedipine (adalat \n","507 capecitabine \n","4139 none \n","2194 ruboxistaurin \n","3424 methotrexate \n","1934 none \n","637 none \n","3574 daunorubicin "],"text/html":["\n"," <div id=\"df-83ba7a9c-9b68-4ff7-9145-5e9879e53bfa\">\n"," <div class=\"colab-df-container\">\n"," <div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>nctid</th>\n"," <th>embedding</th>\n"," <th>drug</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>761</th>\n"," <td>NCT00174720</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>3383</th>\n"," <td>NCT02220894</td>\n"," <td>[14.387839317321777, -10.28243350982666, 1.606...</td>\n"," <td>pemetrexed</td>\n"," </tr>\n"," <tr>\n"," <th>1451</th>\n"," <td>NCT00113386</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>2678</th>\n"," <td>NCT00002597</td>\n"," <td>[10.195808410644531, -9.866142272949219, 0.332...</td>\n"," <td>flutamide</td>\n"," </tr>\n"," <tr>\n"," <th>3475</th>\n"," <td>NCT02292446</td>\n"," <td>[23.38435935974121, -0.7330405712127686, 11.95...</td>\n"," <td>ruxolitinib</td>\n"," </tr>\n"," <tr>\n"," <th>2898</th>\n"," <td>NCT01768286</td>\n"," <td>[3.361865282058716, 0.7817785739898682, 10.766...</td>\n"," <td>rbv</td>\n"," </tr>\n"," <tr>\n"," <th>1653</th>\n"," <td>NCT01049334</td>\n"," <td>[6.104618072509766, -5.057725429534912, -14.15...</td>\n"," <td>flurbiprofen</td>\n"," </tr>\n"," <tr>\n"," <th>3307</th>\n"," <td>NCT02149121</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>3569</th>\n"," <td>NCT02388906</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>1292</th>\n"," <td>NCT01348243</td>\n"," <td>[-2.2266159057617188, -5.000643253326416, 5.43...</td>\n"," <td>disodium clodronate 200 mg/4 ml with 1% lidocaine</td>\n"," </tr>\n"," <tr>\n"," <th>1578</th>\n"," <td>NCT02182479</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>812</th>\n"," <td>NCT00253968</td>\n"," <td>[9.196856498718262, -10.230440139770508, -13.4...</td>\n"," <td>eplivanserin</td>\n"," </tr>\n"," <tr>\n"," <th>4468</th>\n"," <td>NCT00698581</td>\n"," <td>[8.661005020141602, -0.3385846018791199, 11.90...</td>\n"," <td>brivaracetam</td>\n"," </tr>\n"," <tr>\n"," <th>1146</th>\n"," <td>NCT01512108</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>3392</th>\n"," <td>NCT02226003</td>\n"," <td>[0.14281639456748962, -10.426595687866211, -14...</td>\n"," <td>ertugliflozin</td>\n"," </tr>\n"," <tr>\n"," <th>3873</th>\n"," <td>NCT02679573</td>\n"," <td>[14.945191383361816, -25.684415817260742, 17.6...</td>\n"," <td>delafloxacin</td>\n"," </tr>\n"," <tr>\n"," <th>1398</th>\n"," <td>NCT01342913</td>\n"," <td>[-6.010187149047852, 7.235744476318359, 0.6008...</td>\n"," <td>fluticasone furoate 100mcg/vilanterol 25mcg</td>\n"," </tr>\n"," <tr>\n"," <th>2380</th>\n"," <td>NCT00286468</td>\n"," <td>[6.733612060546875, -12.518169403076172, 1.304...</td>\n"," <td>alogliptin and glyburide</td>\n"," </tr>\n"," <tr>\n"," <th>2340</th>\n"," <td>NCT00227747</td>\n"," <td>[7.950209617614746, -1.9675993919372559, 3.184...</td>\n"," <td>capecitabine</td>\n"," </tr>\n"," <tr>\n"," <th>183</th>\n"," <td>NCT00127634</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>574</th>\n"," <td>NCT00265616</td>\n"," <td>[7.544963359832764, 6.164093017578125, -10.620...</td>\n"," <td>propofol</td>\n"," </tr>\n"," <tr>\n"," <th>1999</th>\n"," <td>NCT00540449</td>\n"," <td>[12.20997142791748, -2.5129404067993164, -1.25...</td>\n"," <td>efavirenz</td>\n"," </tr>\n"," <tr>\n"," <th>114</th>\n"," <td>NCT01672762</td>\n"," <td>[-8.753070831298828, -18.59516143798828, -4.12...</td>\n"," <td>ipragliflozin</td>\n"," </tr>\n"," <tr>\n"," <th>358</th>\n"," <td>NCT01763918</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>2330</th>\n"," <td>NCT00484939</td>\n"," <td>[7.950209617614746, -1.9675993919372559, 3.184...</td>\n"," <td>capecitabine</td>\n"," </tr>\n"," <tr>\n"," <th>1179</th>\n"," <td>NCT00114127</td>\n"," <td>[0.26812058687210083, -11.34184741973877, -2.4...</td>\n"," <td>duloxetine</td>\n"," </tr>\n"," <tr>\n"," <th>2369</th>\n"," <td>NCT00151255</td>\n"," <td>[-2.3481616973876953, -2.4751017093658447, 4.7...</td>\n"," <td>cytarabine</td>\n"," </tr>\n"," <tr>\n"," <th>1277</th>\n"," <td>NCT00163293</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>2316</th>\n"," <td>NCT00668525</td>\n"," <td>[9.756115913391113, -13.786759376525879, -8.16...</td>\n"," <td>escitalopram</td>\n"," </tr>\n"," <tr>\n"," <th>875</th>\n"," <td>NCT00506285</td>\n"," <td>[4.142498970031738, -1.2075169086456299, -2.51...</td>\n"," <td>methylphenidate transdermal system (mts)</td>\n"," </tr>\n"," <tr>\n"," <th>1086</th>\n"," <td>NCT00056407</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>261</th>\n"," <td>NCT00628589</td>\n"," <td>[-3.4664182662963867, -6.698912143707275, -0.5...</td>\n"," <td>inhaled loxapine 5 mg</td>\n"," </tr>\n"," <tr>\n"," <th>1329</th>\n"," <td>NCT01510535</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>3964</th>\n"," <td>NCT02785939</td>\n"," <td>[17.70220184326172, -18.377199172973633, 11.80...</td>\n"," <td>palbociclib</td>\n"," </tr>\n"," <tr>\n"," <th>231</th>\n"," <td>NCT00355641</td>\n"," <td>[6.557114124298096, -12.076240539550781, 0.163...</td>\n"," <td>ropinirole extended release (xr)</td>\n"," </tr>\n"," <tr>\n"," <th>1849</th>\n"," <td>NCT00347412</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>107</th>\n"," <td>NCT00330460</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>316</th>\n"," <td>NCT00113815</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>1431</th>\n"," <td>NCT01277666</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>1167</th>\n"," <td>NCT00385138</td>\n"," <td>[12.460927963256836, -18.833032608032227, 14.6...</td>\n"," <td>cangrelor</td>\n"," </tr>\n"," <tr>\n"," <th>1795</th>\n"," <td>NCT01573351</td>\n"," <td>[13.946184158325195, -10.585319519042969, 7.30...</td>\n"," <td>asunaprevir</td>\n"," </tr>\n"," <tr>\n"," <th>1315</th>\n"," <td>NCT00449930</td>\n"," <td>[11.46723747253418, -19.053577423095703, 2.737...</td>\n"," <td>sitagliptin phosphate</td>\n"," </tr>\n"," <tr>\n"," <th>3061</th>\n"," <td>NCT01287260</td>\n"," <td>[7.839696407318115, -1.8401050567626953, -8.38...</td>\n"," <td>nifedipine (adalat</td>\n"," </tr>\n"," <tr>\n"," <th>507</th>\n"," <td>NCT00363584</td>\n"," <td>[7.950209617614746, -1.9675993919372559, 3.184...</td>\n"," <td>capecitabine</td>\n"," </tr>\n"," <tr>\n"," <th>4139</th>\n"," <td>NCT03123471</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>2194</th>\n"," <td>NCT00604383</td>\n"," <td>[10.526212692260742, -15.975157737731934, -3.7...</td>\n"," <td>ruboxistaurin</td>\n"," </tr>\n"," <tr>\n"," <th>3424</th>\n"," <td>NCT02252042</td>\n"," <td>[23.116355895996094, -7.407402515411377, 16.62...</td>\n"," <td>methotrexate</td>\n"," </tr>\n"," <tr>\n"," <th>1934</th>\n"," <td>NCT00699153</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>637</th>\n"," <td>NCT01433523</td>\n"," <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n"," <td>none</td>\n"," </tr>\n"," <tr>\n"," <th>3574</th>\n"," <td>NCT02393859</td>\n"," <td>[-3.2333426475524902, -13.376590728759766, 10....</td>\n"," <td>daunorubicin</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>\n"," <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-83ba7a9c-9b68-4ff7-9145-5e9879e53bfa')\"\n"," title=\"Convert this dataframe to an interactive table.\"\n"," style=\"display:none;\">\n"," \n"," <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n"," width=\"24px\">\n"," <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n"," <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n"," </svg>\n"," </button>\n"," \n"," <style>\n"," .colab-df-container {\n"," display:flex;\n"," flex-wrap:wrap;\n"," gap: 12px;\n"," }\n","\n"," .colab-df-convert {\n"," background-color: #E8F0FE;\n"," border: none;\n"," border-radius: 50%;\n"," cursor: pointer;\n"," display: none;\n"," fill: #1967D2;\n"," height: 32px;\n"," padding: 0 0 0 0;\n"," width: 32px;\n"," }\n","\n"," .colab-df-convert:hover {\n"," background-color: #E2EBFA;\n"," box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n"," fill: #174EA6;\n"," }\n","\n"," [theme=dark] .colab-df-convert {\n"," background-color: #3B4455;\n"," fill: #D2E3FC;\n"," }\n","\n"," [theme=dark] .colab-df-convert:hover {\n"," background-color: #434B5C;\n"," box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n"," filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n"," fill: #FFFFFF;\n"," }\n"," </style>\n","\n"," <script>\n"," const buttonEl =\n"," document.querySelector('#df-83ba7a9c-9b68-4ff7-9145-5e9879e53bfa button.colab-df-convert');\n"," buttonEl.style.display =\n"," google.colab.kernel.accessAllowed ? 'block' : 'none';\n","\n"," async function convertToInteractive(key) {\n"," const element = document.querySelector('#df-83ba7a9c-9b68-4ff7-9145-5e9879e53bfa');\n"," const dataTable =\n"," await google.colab.kernel.invokeFunction('convertToInteractive',\n"," [key], {});\n"," if (!dataTable) return;\n","\n"," const docLinkHtml = 'Like what you see? Visit the ' +\n"," '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n"," + ' to learn more about interactive tables.';\n"," element.innerHTML = '';\n"," dataTable['output_type'] = 'display_data';\n"," await google.colab.output.renderOutput(dataTable, element);\n"," const docLink = document.createElement('div');\n"," docLink.innerHTML = docLinkHtml;\n"," element.appendChild(docLink);\n"," }\n"," </script>\n"," </div>\n"," </div>\n"," "]},"metadata":{},"execution_count":183}]},{"cell_type":"code","source":["ctid_embeddings_df.to_pickle(deep_learning_dir + '/embeddings/drug_embeddings.pickle')"],"metadata":{"id":"nWzJXSVCFTII"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["ctid_embeddings_dict = dict(zip(ctid_embeddings_df[\"nctid\"], ctid_embeddings_df[\"embedding\"]))"],"metadata":{"id":"xj_FACXuTE3j"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["import pickle"],"metadata":{"id":"XQQul_IQT-aj"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["with open(\"nctid2drugs.pkl\", 'wb') as handle:\n"," pickle.dump(ctid_embeddings_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)"],"metadata":{"id":"RVYjsLkyTmHJ"},"execution_count":null,"outputs":[]},{"cell_type":"code","source":["!mv nctid2drugs.pkl \"/content/gdrive/My Drive/BMI 707 Project/embeddings/\""],"metadata":{"id":"mZ9hk1ZeT4u3"},"execution_count":null,"outputs":[]}]}