# Drug Interactions Network Analysis
### Part 1 - Data Exploraton
#### Author: Kenneth Leung

#### Data Sources:  
- https://snap.stanford.edu/biodata/datasets/10001/10001-ChCh-Miner.html
- https://github.com/snap-stanford/miner-data/tree/master/drugbank

___
### 1. Import dependencies

In [3]:
import pandas as pd
import numpy as np
import re
import zipfile
import json
from pyvis.network import Network
import networkx as nx

from collections import Counter
from  itertools import combinations

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

___
### 2. Data preparation

In [4]:
# Unzip all tar/zip files
zip_files_list = [i for i in os.listdir('data') if i.endswith('.zip')]

for file in zip_files_list:
    with zipfile.ZipFile(f'data/{file}', 'r') as zip_ref:
        zip_ref.extractall('data')

os.listdir('data')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

['ChCh-Miner_durgbank-chem-chem.tsv',
 'ChCh-Miner_durgbank-chem-chem.zip',
 'ChChSe-Decagon_polypharmacy.csv',
 'ChChSe-Decagon_polypharmacy.zip',
 'ChSe-Decagon_monopharmacy.csv',
 'ChSe-Decagon_monopharmacy.zip',
 'CID_mapping.json',
 'DB_mapping.json']

#### Drug Interactions (Drugbank)

In [5]:
# Read DB mapping JSON
with open('data/DB_mapping.json', 'r') as fp:
    db_mapping = json.load(fp)
    
db_mapping

{'DB00005': 'Etanercept',
 'DB00006': 'Bivalirudin',
 'DB00007': 'Leuprolide',
 'DB00008': 'Peginterferon alfa-2a',
 'DB00009': 'Alteplase',
 'DB00012': 'Darbepoetin alfa',
 'DB00013': 'Urokinase',
 'DB00014': 'Goserelin',
 'DB00015': 'Reteplase',
 'DB00016': 'Erythropoietin',
 'DB00017': 'Salmon calcitonin',
 'DB00018': 'Interferon alfa-n3',
 'DB00019': 'Pegfilgrastim',
 'DB00020': 'Sargramostim',
 'DB00021': 'Secretin human',
 'DB00022': 'Peginterferon alfa-2b',
 'DB00023': 'Asparaginase Escherichia coli',
 'DB00026': 'Anakinra',
 'DB00028': 'Human immunoglobulin G',
 'DB00029': 'Anistreplase',
 'DB00030': 'Insulin human',
 'DB00031': 'Tenecteplase',
 'DB00033': 'Interferon gamma-1b',
 'DB00035': 'Desmopressin',
 'DB00036': 'Coagulation factor VIIa Recombinant Human',
 'DB00039': 'Palifermin',
 'DB00040': 'Glucagon',
 'DB00041': 'Aldesleukin',
 'DB00042': 'Botulinum toxin type B',
 'DB00043': 'Omalizumab',
 'DB00046': 'Insulin lispro',
 'DB00047': 'Insulin glargine',
 'DB00048': 'Col

In [6]:
# Import raw drugbank dataset
df_db_int = pd.read_csv("data/ChCh-Miner_durgbank-chem-chem.tsv", sep='\t', header=None)
df_db_int.columns = ['drug_1_code', 'drug_2_code']

# Perform code-name mapping
df_db_int['drug_1_name'] = df_db_int['drug_1_code'].map(db_mapping)
df_db_int['drug_2_name'] = df_db_int['drug_2_code'].map(db_mapping)

new_cols = ['drug_1_code', 'drug_1_name', 'drug_2_code', 'drug_2_name']
df_db_int = df_db_int[new_cols]
df_db_int.head()

Unnamed: 0,drug_1_code,drug_1_name,drug_2_code,drug_2_name
0,DB00862,Vardenafil,DB00966,Telmisartan
1,DB00575,Clonidine,DB00806,Pentoxifylline
2,DB01242,Clomipramine,DB08893,Mirabegron
3,DB01151,Desipramine,DB08883,Perampanel
4,DB01235,Levodopa,DB01275,Hydralazine


#### 2(b) Polypharmacy side effects

In [7]:
# Read CID mapping JSON
with open('data/CID_mapping.json', 'r') as fp:
    cid_mapping = json.load(fp)
    
cid_mapping

{'CID000002173': '6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-oxo-4-thia-1-azabicyclo[3.2.0]heptane-2-carboxylate',
 'CID000005206': 'Sevoflurane',
 'CID000003929': 'N-[[(5S)-3-[3-Fluoro-4-(4-morpholinyl)phenyl]-2-oxo-5-oxazolidinyl]methyl]-acetamide',
 'CID000001302': '2-(6-Methoxy-2-naphthyl)propionic acid',
 'CID000005267': 'Duraspiron',
 'CID000004601': 'Orphenadrine',
 'CID000005090': 'Rofecoxib',
 'CID000004946': 'Propranolol',
 'CID000005391': 'Temazepam',
 'CID000002802': 'Clonazepam',
 'CID000004212': 'Mitoxantrone',
 'CID000000596': 'Depocyt',
 'CID000002522': '5-[2-[1-(5-Cyclopropyl-5-hydroxypent-3-en-2-yl)-7a-methyl-2,3,3a,5,6,7-hexahydro-1H-inden-4-ylidene]ethylidene]-4-methylenecyclohexane-1,3-diol',
 'CID000003405': 'CID 3405',
 'CID000003446': 'Gabapentin',
 'CID000004107': 'Methocarbamol',
 'CID000003161': 'CID 3161',
 'CID000003823': 'Ketoconazole',
 'CID000005556': 'Triazolam',
 'CID000002156': '[2-[4-[(2-Butylbenzofuran-3-yl)carbonyl]-2,6-diiodophenoxy]ethyl]dieth

In [8]:
# Import dataset
df_poly_se = pd.read_csv("data/ChChSe-Decagon_polypharmacy.csv")
df_poly_se.columns = ['drug_1_code', 'drug_2_code', 'side_effect_code', 'side_effect_description']

# Perform code-name mapping
df_poly_se['drug_1_name'] = df_poly_se['drug_1_code'].map(cid_mapping)
df_poly_se['drug_2_name'] = df_poly_se['drug_2_code'].map(cid_mapping)

# Rearrange columns
new_cols = ['drug_1_code', 'drug_1_name', 'drug_2_code', 'drug_2_name', 
            'side_effect_code', 'side_effect_description']

df_poly_se = df_poly_se[new_cols]
df_poly_se.head()

Unnamed: 0,drug_1_code,drug_1_name,drug_2_code,drug_2_name,side_effect_code,side_effect_description
0,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0151714,hypermagnesemia
1,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0035344,retinopathy of prematurity
2,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0004144,atelectasis
3,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0002063,alkalosis
4,CID000002173,"6-{[Amino(phenyl)acetyl]amino}-3,3-dimethyl-7-...",CID000003345,Fentanyl,C0004604,Back Ache


In [9]:
df_poly_se['side_effect_description'].value_counts()[:20]

arterial pressure NOS decreased    28568
anaemia                            27006
Difficulty breathing               26037
nausea                             25190
neumonia                           24430
Fatigue                            24260
Pain                               23894
diarrhea                           23848
asthenia                           23515
emesis                             23043
edema extremities                  21981
body temperature increased         21806
pleural pain                       21781
abdominal pain                     21410
Hypoventilation                    21322
chest pain                         21013
dizziness                          20204
Back Ache                          19930
Head ache                          19803
High blood pressure                19376
Name: side_effect_description, dtype: int64

In [10]:
drugs = (df_poly_se['drug_1_name'].value_counts()).append(df_poly_se['drug_2_name'].value_counts())
drugs[:20]

DL-Thyroxine                                                                                           68416
Salbutamol                                                                                             65709
Acetaminophen                                                                                          62959
Citalopram                                                                                             55784
Celecoxib                                                                                              54811
Bupropion                                                                                              54510
Alendronic acid                                                                                        49899
2-(6-Methoxy-2-naphthyl)propionic acid                                                                 48028
Heparin                                                                                                45956
Diazepam           

In [13]:
n = 20
df_poly_se_sm = df_poly_se[['drug_1_name', 'drug_2_name']]
L = Counter([y for x in df_poly_se_sm.values for y in combinations(x, 2)]).most_common(n)

combi_df = pd.DataFrame(L, columns=['Pair', 'Qty'])
print(combi_df)

                                                 Pair  Qty
0                          (Lansoprazole, Omeprazole)  540
1                           (Celecoxib, Lansoprazole)  524
2                             (Omeprazole, Rofecoxib)  494
3                               (Rofecoxib, Zolpidem)  478
4                             (Celecoxib, Gabapentin)  476
5                               (Cetirizine, delta-E)  468
6                              (Lorazepam, Rofecoxib)  466
7                                (delta-E, Rofecoxib)  465
8                                (Celecoxib, delta-E)  462
9                              (Omeprazole, Percodan)  461
10                              (Celecoxib, Zolpidem)  457
11  (Celecoxib, S-(Fluoromethyl) (6S,9R,10S,11S,13...  451
12  (3-(1,3-Benzodioxol-5-yloxymethyl)-4-(4-fluoro...  449
13                              (Salbutamol, delta-E)  448
14                           (Ciprofloxacin, delta-E)  448
15                            (Lansoprazole, delta-E)  4

___
#### 2(c) Monopharmacy side effects

In [9]:
df_mono_se = pd.read_csv("data/ChSe-Decagon_monopharmacy.csv")
df_mono_se.head()

Unnamed: 0,# STITCH,Individual Side Effect,Side Effect Name
0,CID003062316,C1096328,central nervous system mass
1,CID003062316,C0162830,Photosensitivity reaction
2,CID003062316,C1611725,leukaemic infiltration brain
3,CID003062316,C0541767,platelet adhesiveness abnormal
4,CID003062316,C0242973,Ventricular dysfunction


In [15]:
# Import dataset
df_mono_se = pd.read_csv("data/ChSe-Decagon_monopharmacy.csv")
df_mono_se.columns = ['drug_code', 'side_effect_code', 'side_effect_description']

# Perform code-name mapping
df_mono_se['drug_name'] = df_mono_se['drug_code'].map(cid_mapping)

# Rearrange columns
new_cols = ['drug_code', 'drug_name', 'side_effect_code', 'side_effect_description']

df_mono_se = df_mono_se[new_cols]
df_mono_se.head()

Unnamed: 0,drug_code,drug_name,side_effect_code,side_effect_description
0,CID003062316,Dasatinib,C1096328,central nervous system mass
1,CID003062316,Dasatinib,C0162830,Photosensitivity reaction
2,CID003062316,Dasatinib,C1611725,leukaemic infiltration brain
3,CID003062316,Dasatinib,C0541767,platelet adhesiveness abnormal
4,CID003062316,Dasatinib,C0242973,Ventricular dysfunction


In [17]:
df_mono_se['drug_name'].value_counts()[:15]

Zoledronic acid                                                                                    1550
Pamidronic acid                                                                                    1515
3,7-Dimethyl-9-(2,6,6-trimethylcyclohex-1-en-1-yl)nona-2,4,6,8-tetraenoic acid                     1229
Bupropion                                                                                          1186
1-[3,4-Dihydroxy-5-(hydroxymethyl)-2-oxolanyl]-1,2,4-triazole-3-carboxamide                        1045
Rofecoxib                                                                                           979
2-[4-(1,2-Diphenylbut-1-enyl)phenoxy]-N,N-dimethylethanamine                                        947
[(1S)-3-Methyl-1-[[(2R)-3-phenyl-2-(pyrazine-2-carbonylamino)propanoyl]amino]butyl]boronic acid     927
Letrozole                                                                                           896
CID 2818                                                        

In [18]:
df_mono_se['side_effect_description'].value_counts()[:15]

general physical health deterioration    301
hypoaesthesia                            279
mental status changes                    278
tooth extraction                         276
emotional distress                       275
alanine aminotransferase increased       273
condition aggravated                     270
pollakiuria                              267
staphylococcal infection                 266
blood creatinine increased               263
bone disorder                            263
spinal osteoarthritis                    263
dysgeusia                                257
anhedonia                                249
Gastrointestinal disorder                243
Name: side_effect_description, dtype: int64