In [1]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
import sklearn.preprocessing as sk
import seaborn as sns
from sklearn import metrics
from sklearn.feature_selection import VarianceThreshold
from sklearn.model_selection import train_test_split
from utils import AllTripletSelector,HardestNegativeTripletSelector, RandomNegativeTripletSelector, SemihardNegativeTripletSelector # Strategies for selecting triplets within a minibatch
from metrics import AverageNonzeroTripletsMetric
from torch.utils.data.sampler import WeightedRandomSampler
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
import random
from random import randint
from sklearn.model_selection import StratifiedKFold

save_results_to = '/home/hnoghabi/EGFR/'
torch.manual_seed(42)
random.seed(42)

GDSCE = pd.read_csv("GDSC_exprs.z.EGFRi.tsv", 
                    sep = "\t", index_col=0, decimal = ",")
GDSCE = pd.DataFrame.transpose(GDSCE)

GDSCM = pd.read_csv("GDSC_mutations.EGFRi.tsv", 
                    sep = "\t", index_col=0, decimal = ".")
GDSCM = pd.DataFrame.transpose(GDSCM)
GDSCM = GDSCM.loc[:,~GDSCM.columns.duplicated()]

GDSCC = pd.read_csv("GDSC_CNA.EGFRi.tsv", 
                    sep = "\t", index_col=0, decimal = ".")
GDSCC.drop_duplicates(keep='last')
GDSCC = pd.DataFrame.transpose(GDSCC)
GDSCC = GDSCC.loc[:,~GDSCC.columns.duplicated()]

PDXEerlo = pd.read_csv("PDX_exprs.Erlotinib.eb_with.GDSC_exprs.Erlotinib.tsv", 
                   sep = "\t", index_col=0, decimal = ",")
PDXEerlo = pd.DataFrame.transpose(PDXEerlo)
PDXMerlo = pd.read_csv("PDX_mutations.Erlotinib.tsv", 
                   sep = "\t", index_col=0, decimal = ",")
PDXMerlo = pd.DataFrame.transpose(PDXMerlo)
PDXCerlo = pd.read_csv("PDX_CNA.Erlotinib.tsv", 
                   sep = "\t", index_col=0, decimal = ",")
PDXCerlo.drop_duplicates(keep='last')
PDXCerlo = pd.DataFrame.transpose(PDXCerlo)
PDXCerlo = PDXCerlo.loc[:,~PDXCerlo.columns.duplicated()]

PDXEcet = pd.read_csv("PDX_exprs.Cetuximab.eb_with.GDSC_exprs.Cetuximab.tsv", 
                   sep = "\t", index_col=0, decimal = ",")
PDXEcet = pd.DataFrame.transpose(PDXEcet)
PDXMcet = pd.read_csv("PDX_mutations.Cetuximab.tsv", 
                   sep = "\t", index_col=0, decimal = ",")
PDXMcet = pd.DataFrame.transpose(PDXMcet)
PDXCcet = pd.read_csv("PDX_CNA.Cetuximab.tsv", 
                   sep = "\t", index_col=0, decimal = ",")
PDXCcet.drop_duplicates(keep='last')
PDXCcet = pd.DataFrame.transpose(PDXCcet)
PDXCcet = PDXCcet.loc[:,~PDXCcet.columns.duplicated()]

selector = VarianceThreshold(0.05)
selector.fit_transform(GDSCE)
GDSCE = GDSCE[GDSCE.columns[selector.get_support(indices=True)]]

GDSCM = GDSCM.fillna(0)
GDSCM[GDSCM != 0.0] = 1
GDSCC = GDSCC.fillna(0)
GDSCC[GDSCC != 0.0] = 1

ls = GDSCE.columns.intersection(GDSCM.columns)
ls = ls.intersection(GDSCC.columns)
ls = ls.intersection(PDXEerlo.columns)
ls = ls.intersection(PDXMerlo.columns)
ls = ls.intersection(PDXCerlo.columns)
ls = ls.intersection(PDXEcet.columns)
ls = ls.intersection(PDXMcet.columns)
ls = ls.intersection(PDXCcet.columns)
ls2 = GDSCE.index.intersection(GDSCM.index)
ls2 = ls2.intersection(GDSCC.index)
ls3 = PDXEerlo.index.intersection(PDXMerlo.index)
ls3 = ls3.intersection(PDXCerlo.index)
ls4 = PDXEcet.index.intersection(PDXMcet.index)
ls4 = ls4.intersection(PDXCcet.index)
ls = pd.unique(ls)

PDXEerlo = PDXEerlo.loc[ls3,ls]
PDXMerlo = PDXMerlo.loc[ls3,ls]
PDXCerlo = PDXCerlo.loc[ls3,ls]
PDXEcet = PDXEcet.loc[ls4,ls]
PDXMcet = PDXMcet.loc[ls4,ls]
PDXCcet = PDXCcet.loc[ls4,ls]
GDSCE = GDSCE.loc[:,ls]
GDSCM = GDSCM.loc[:,ls]
GDSCC = GDSCC.loc[:,ls]

GDSCR = pd.read_csv("GDSC_response.EGFRi.tsv", 
                    sep = "\t", index_col=0, decimal = ",")

GDSCR.rename(mapper = str, axis = 'index', inplace = True)

d = {"R":0,"S":1}
GDSCR["response"] = GDSCR.loc[:,"response"].apply(lambda x: d[x])

responses = GDSCR
drugs = set(responses["drug"].values)
exprs_z = GDSCE
cna = GDSCC
mut = GDSCM
expression_zscores = []
CNA=[]
mutations = []
for drug in drugs:
    samples = responses.loc[responses["drug"]==drug,:].index.values
    e_z = exprs_z.loc[samples,:]
    c = cna.loc[samples,:]
    m = mut.loc[samples,:]
    m = mut.loc[samples,:]
    # next 3 rows if you want non-unique sample names
    e_z.rename(lambda x : str(x)+"_"+drug, axis = "index", inplace=True)
    c.rename(lambda x : str(x)+"_"+drug, axis = "index", inplace=True)
    m.rename(lambda x : str(x)+"_"+drug, axis = "index", inplace=True)
    expression_zscores.append(e_z)
    CNA.append(c)
    mutations.append(m)
responses.index = responses.index.values +"_"+responses["drug"].values
GDSCEv2 = pd.concat(expression_zscores, axis =0 )
GDSCCv2 = pd.concat(CNA, axis =0 )
GDSCMv2 = pd.concat(mutations, axis =0 )
GDSCRv2 = responses

ls2 = GDSCEv2.index.intersection(GDSCMv2.index)
ls2 = ls2.intersection(GDSCCv2.index)
GDSCEv2 = GDSCEv2.loc[ls2,:]
GDSCMv2 = GDSCMv2.loc[ls2,:]
GDSCCv2 = GDSCCv2.loc[ls2,:]
GDSCRv2 = GDSCRv2.loc[ls2,:]

Y = GDSCRv2['response'].values

PDXRcet = pd.read_csv("PDX_response.Cetuximab.tsv", 
                       sep = "\t", index_col=0, decimal = ",")
PDXRcet.loc[PDXRcet.iloc[:,0] == 'R'] = 0
PDXRcet.loc[PDXRcet.iloc[:,0] == 'S'] = 1
PDXRcet = PDXRcet.loc[ls4,:]
Ytscet = PDXRcet['response'].values    

PDXRerlo = pd.read_csv("PDX_response.Erlotinib.tsv", 
                       sep = "\t", index_col=0, decimal = ",")
PDXRerlo.loc[PDXRerlo.iloc[:,0] == 'R'] = 0
PDXRerlo.loc[PDXRerlo.iloc[:,0] == 'S'] = 1
PDXRerlo = PDXRerlo.loc[ls3,:]
Ytserlo = PDXRerlo['response'].values  

hdm1 = 32
hdm2 = 16
hdm3 = 256
rate1 = 0.5
rate2 = 0.8
rate3 = 0.5
rate4 = 0.3

scalerGDSC = sk.StandardScaler()
scalerGDSC.fit(GDSCEv2.values)
X_trainE = scalerGDSC.transform(GDSCEv2.values)
X_testEerlo = scalerGDSC.transform(PDXEerlo.values)    
X_testEcet = scalerGDSC.transform(PDXEcet.values)    

X_trainM = np.nan_to_num(GDSCMv2.values)
X_trainC = np.nan_to_num(GDSCCv2.values)
X_testMerlo = np.nan_to_num(PDXMerlo.values)
X_testCerlo = np.nan_to_num(PDXCerlo.values)
X_testMcet = np.nan_to_num(PDXMcet.values)
X_testCcet = np.nan_to_num(PDXCcet.values)

TX_testEerlo = torch.FloatTensor(X_testEerlo)
TX_testMerlo = torch.FloatTensor(X_testMerlo)
TX_testCerlo = torch.FloatTensor(X_testCerlo)
ty_testEerlo = torch.FloatTensor(Ytserlo.astype(int))

TX_testEcet = torch.FloatTensor(X_testEcet)
TX_testMcet = torch.FloatTensor(X_testMcet)
TX_testCcet = torch.FloatTensor(X_testCcet)
ty_testEcet = torch.FloatTensor(Ytscet.astype(int))

n_sampE, IE_dim = X_trainE.shape
n_sampM, IM_dim = X_trainM.shape
n_sampC, IC_dim = X_trainC.shape

h_dim1 = hdm1
h_dim2 = hdm2
h_dim3 = hdm3        
Z_in = h_dim1 + h_dim2 + h_dim3

costtr = []
auctr = []
costts = []
aucts = []

class AEE(nn.Module):
    def __init__(self):
        super(AEE, self).__init__()
        self.EnE = torch.nn.Sequential(
            nn.Linear(IE_dim, h_dim1),
            nn.BatchNorm1d(h_dim1),
            nn.ReLU(),
            nn.Dropout(rate1))
    def forward(self, x):
        output = self.EnE(x)
        return output

class AEM(nn.Module):
    def __init__(self):
        super(AEM, self).__init__()
        self.EnM = torch.nn.Sequential(
            nn.Linear(IM_dim, h_dim2),
            nn.BatchNorm1d(h_dim2),
            nn.ReLU(),
            nn.Dropout(rate2))
    def forward(self, x):
        output = self.EnM(x)
        return output    


class AEC(nn.Module):
    def __init__(self):
        super(AEC, self).__init__()
        self.EnC = torch.nn.Sequential(
            nn.Linear(IM_dim, h_dim3),
            nn.BatchNorm1d(h_dim3),
            nn.ReLU(),
            nn.Dropout(rate3))
    def forward(self, x):
        output = self.EnC(x)
        return output       

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.FC = torch.nn.Sequential(
            nn.Linear(Z_in, 1),
            nn.Dropout(rate4),
            nn.Sigmoid())
    def forward(self, x):
        return self.FC(x)

torch.cuda.manual_seed_all(42)

AutoencoderE = torch.load('EGFRv2Exprs.pt')
AutoencoderM = torch.load('EGFRv2Mut.pt')
AutoencoderC = torch.load('EGFRv2CNA.pt')

Clas = torch.load('EGFRv2Class.pt')

AutoencoderE.eval()
AutoencoderM.eval()
AutoencoderC.eval()
Clas.eval()

ZEX = AutoencoderE(torch.FloatTensor(X_trainE))
ZMX = AutoencoderM(torch.FloatTensor(X_trainM))
ZCX = AutoencoderC(torch.FloatTensor(X_trainC))
ZTX = torch.cat((ZEX, ZMX, ZCX), 1)
ZTX = F.normalize(ZTX, p=2, dim=0)
PredX = Clas(ZTX)
AUCt = roc_auc_score(Y, PredX.detach().numpy())
print(AUCt)

ZETerlo = AutoencoderE(TX_testEerlo)
ZMTerlo = AutoencoderM(TX_testMerlo)
ZCTerlo = AutoencoderC(TX_testCerlo)
ZTTerlo = torch.cat((ZETerlo, ZMTerlo, ZCTerlo), 1)
ZTTerlo = F.normalize(ZTTerlo, p=2, dim=0)
PredTerlo = Clas(ZTTerlo)
AUCterlo = roc_auc_score(Ytserlo, PredTerlo.detach().numpy())
print(AUCterlo)

ZETcet = AutoencoderE(TX_testEcet)
ZMTcet = AutoencoderM(TX_testMcet)
ZCTcet = AutoencoderC(TX_testCcet)
ZTTcet = torch.cat((ZETcet, ZMTcet, ZCTcet), 1)
ZTTcet = F.normalize(ZTTcet, p=2, dim=0)
PredTcet = Clas(ZTTcet)
AUCtcet = roc_auc_score(Ytscet, PredTcet.detach().numpy())
print(AUCtcet)



0.9440567436313729
0.7222222222222222
0.8


In [2]:
PRADE = pd.read_csv("TCGA-PRAD_exprs.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
PRADE = pd.DataFrame.transpose(PRADE)

PRADM = pd.read_csv("TCGA-PRAD_mutations.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
PRADM = pd.DataFrame.transpose(PRADM)
PRADM = PRADM.loc[:,~PRADM.columns.duplicated()]

PRADC = pd.read_csv("TCGA-PRAD_CNA.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
PRADC = pd.DataFrame.transpose(PRADC)
PRADC = PRADC.loc[:,~PRADC.columns.duplicated()]

PRADM = PRADM.fillna(0)
PRADM[PRADM != 0.0] = 1
PRADC = PRADC.fillna(0)
PRADC[PRADC != 0.0] = 1

#PRADE.rename(lambda x : x[0:11], axis = "index", inplace=True)  
#PRADM.rename(lambda x : x[0:11], axis = "index", inplace=True)   
#PRADC.rename(lambda x : x[0:11], axis = "index", inplace=True)   

lsPRAD = PRADE.index.intersection(PRADM.index)
lsPRAD = lsPRAD.intersection(PRADC.index)
lsPRAD = pd.unique(lsPRAD)

PRADE = PRADE.loc[lsPRAD,ls]
PRADM = PRADM.loc[lsPRAD,ls]
PRADC = PRADC.loc[lsPRAD,ls]

print(PRADE.shape)
print(PRADM.shape)
print(PRADC.shape)

AutoencoderE.eval()
AutoencoderM.eval()
AutoencoderC.eval()
Clas.eval()

PRADE2 = np.nan_to_num(PRADE.values)
PRADM2 = np.nan_to_num(PRADM.values)
PRADC2 = np.nan_to_num(PRADC.values)

NPRADE2 = scalerGDSC.transform(PRADE2)    

PRADexprs = torch.FloatTensor(NPRADE2)
PRADmut = torch.FloatTensor(PRADM2)
PRADcna = torch.FloatTensor(PRADC2)

PRADZE = AutoencoderE(PRADexprs)
PRADZM = AutoencoderM(PRADmut)
PRADZC = AutoencoderC(PRADcna)

PRADZT = torch.cat((PRADZE, PRADZM, PRADZC), 1)
PRADZTX = F.normalize(PRADZT, p=2, dim=0)
PredPRAD = Clas(PRADZTX)

#print(PredPRAD.detach().numpy())

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


(492, 13081)
(492, 13081)
(492, 13081)


In [3]:
KIRPE = pd.read_csv("TCGA-KIRP_exprs.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
KIRPE = pd.DataFrame.transpose(KIRPE)

KIRPM = pd.read_csv("TCGA-KIRP_mutations.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
KIRPM = pd.DataFrame.transpose(KIRPM)
KIRPM = KIRPM.loc[:,~KIRPM.columns.duplicated()]

KIRPC = pd.read_csv("TCGA-KIRP_CNA.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
KIRPC = pd.DataFrame.transpose(KIRPC)
KIRPC = KIRPC.loc[:,~KIRPC.columns.duplicated()]

KIRPM = KIRPM.fillna(0)
KIRPM[KIRPM != 0.0] = 1
KIRPC = KIRPC.fillna(0)
KIRPC[KIRPC != 0.0] = 1

#KIRPE.rename(lambda x : x[0:11], axis = "index", inplace=True)  
#KIRPM.rename(lambda x : x[0:11], axis = "index", inplace=True)   
#KIRPC.rename(lambda x : x[0:11], axis = "index", inplace=True)   

lsKIRP = KIRPE.index.intersection(KIRPM.index)
lsKIRP = lsKIRP.intersection(KIRPC.index)
lsKIRP = pd.unique(lsKIRP)

KIRPE = KIRPE.loc[lsKIRP,ls]
KIRPM = KIRPM.loc[lsKIRP,ls]
KIRPC = KIRPC.loc[lsKIRP,ls]

print(KIRPE.shape)
print(KIRPM.shape)
print(KIRPC.shape)

AutoencoderE.eval()
AutoencoderM.eval()
AutoencoderC.eval()
Clas.eval()

KIRPE2 = np.nan_to_num(KIRPE.values)
KIRPM2 = np.nan_to_num(KIRPM.values)
KIRPC2 = np.nan_to_num(KIRPC.values)

NKIRPE2 = scalerGDSC.transform(KIRPE2)    

KIRPexprs = torch.FloatTensor(NKIRPE2)
KIRPmut = torch.FloatTensor(KIRPM2)
KIRPcna = torch.FloatTensor(KIRPC2)

KIRPZE = AutoencoderE(KIRPexprs)
KIRPZM = AutoencoderM(KIRPmut)
KIRPZC = AutoencoderC(KIRPcna)

KIRPZT = torch.cat((KIRPZE, KIRPZM, KIRPZC), 1)
KIRPZTX = F.normalize(KIRPZT, p=2, dim=0)
PredKIRP = Clas(KIRPZTX)

#print(PredKIRP.detach().numpy())

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


(161, 13081)
(161, 13081)
(161, 13081)


In [4]:
BLCAE = pd.read_csv("TCGA-BLCA_exprs.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
BLCAE = pd.DataFrame.transpose(BLCAE)

BLCAM = pd.read_csv("TCGA-BLCA_mutations.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
BLCAM = pd.DataFrame.transpose(BLCAM)
BLCAM = BLCAM.loc[:,~BLCAM.columns.duplicated()]

BLCAC = pd.read_csv("TCGA-BLCA_CNA.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
BLCAC = pd.DataFrame.transpose(BLCAC)
BLCAC = BLCAC.loc[:,~BLCAC.columns.duplicated()]

BLCAM = BLCAM.fillna(0)
BLCAM[BLCAM != 0.0] = 1
BLCAC = BLCAC.fillna(0)
BLCAC[BLCAC != 0.0] = 1

#BLCAE.rename(lambda x : x[0:11], axis = "index", inplace=True)  
#BLCAM.rename(lambda x : x[0:11], axis = "index", inplace=True)   
#BLCAC.rename(lambda x : x[0:11], axis = "index", inplace=True)   

lsBLCA = BLCAE.index.intersection(BLCAM.index)
lsBLCA = lsBLCA.intersection(BLCAC.index)
lsBLCA = pd.unique(lsBLCA)

BLCAE = BLCAE.loc[lsBLCA,ls]
BLCAM = BLCAM.loc[lsBLCA,ls]
BLCAC = BLCAC.loc[lsBLCA,ls]

print(BLCAE.shape)
print(BLCAM.shape)
print(BLCAC.shape)

AutoencoderE.eval()
AutoencoderM.eval()
AutoencoderC.eval()
Clas.eval()

BLCAE2 = np.nan_to_num(BLCAE.values)
BLCAM2 = np.nan_to_num(BLCAM.values)
BLCAC2 = np.nan_to_num(BLCAC.values)

NBLCAE2 = scalerGDSC.transform(BLCAE2)    

BLCAexprs = torch.FloatTensor(NBLCAE2)
BLCAmut = torch.FloatTensor(BLCAM2)
BLCAcna = torch.FloatTensor(BLCAC2)

BLCAZE = AutoencoderE(BLCAexprs)
BLCAZM = AutoencoderM(BLCAmut)
BLCAZC = AutoencoderC(BLCAcna)

BLCAZT = torch.cat((BLCAZE, BLCAZM, BLCAZC), 1)
BLCAZTX = F.normalize(BLCAZT, p=2, dim=0)
PredBLCA = Clas(BLCAZTX)

#print(PredBLCA.detach().numpy())

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


(123, 13081)
(123, 13081)
(123, 13081)


In [5]:
BRCAE = pd.read_csv("TCGA-BRCA_exprs.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
BRCAE = pd.DataFrame.transpose(BRCAE)

BRCAM = pd.read_csv("TCGA-BRCA_mutations.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
BRCAM = pd.DataFrame.transpose(BRCAM)
BRCAM = BRCAM.loc[:,~BRCAM.columns.duplicated()]

BRCAC = pd.read_csv("TCGA-BRCA_CNA.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
BRCAC = pd.DataFrame.transpose(BRCAC)
BRCAC = BRCAC.loc[:,~BRCAC.columns.duplicated()]

BRCAM = BRCAM.fillna(0)
BRCAM[BRCAM != 0.0] = 1
BRCAC = BRCAC.fillna(0)
BRCAC[BRCAC != 0.0] = 1

#BRCAE.rename(lambda x : x[0:11], axis = "index", inplace=True)  
#BRCAM.rename(lambda x : x[0:11], axis = "index", inplace=True)   
#BRCAC.rename(lambda x : x[0:11], axis = "index", inplace=True)   

lsBRCA = BRCAE.index.intersection(BRCAM.index)
lsBRCA = lsBRCA.intersection(BRCAC.index)
lsBRCA = pd.unique(lsBRCA)

BRCAE = BRCAE.loc[lsBRCA,ls]
BRCAM = BRCAM.loc[lsBRCA,ls]
BRCAC = BRCAC.loc[lsBRCA,ls]

print(BRCAE.shape)
print(BRCAM.shape)
print(BRCAC.shape)

AutoencoderE.eval()
AutoencoderM.eval()
AutoencoderC.eval()
Clas.eval()

BRCAE2 = np.nan_to_num(BRCAE.values)
BRCAM2 = np.nan_to_num(BRCAM.values)
BRCAC2 = np.nan_to_num(BRCAC.values)

NBRCAE2 = scalerGDSC.transform(BRCAE2)    

BRCAexprs = torch.FloatTensor(NBRCAE2)
BRCAmut = torch.FloatTensor(BRCAM2)
BRCAcna = torch.FloatTensor(BRCAC2)

BRCAZE = AutoencoderE(BRCAexprs)
BRCAZM = AutoencoderM(BRCAmut)
BRCAZC = AutoencoderC(BRCAcna)

BRCAZT = torch.cat((BRCAZE, BRCAZM, BRCAZC), 1)
BRCAZTX = F.normalize(BRCAZT, p=2, dim=0)
PredBRCA = Clas(BRCAZTX)

#print(PredBRCA.detach().numpy())

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


(921, 13081)
(921, 13081)
(921, 13081)


In [6]:
PAADE = pd.read_csv("TCGA-PAAD_exprs.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
PAADE = pd.DataFrame.transpose(PAADE)

PAADM = pd.read_csv("TCGA-PAAD_mutations.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
PAADM = pd.DataFrame.transpose(PAADM)
PAADM = PAADM.loc[:,~PAADM.columns.duplicated()]

PAADC = pd.read_csv("TCGA-PAAD_CNA.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
PAADC = pd.DataFrame.transpose(PAADC)
PAADC = PAADC.loc[:,~PAADC.columns.duplicated()]

PAADM = PAADM.fillna(0)
PAADM[PAADM != 0.0] = 1
PAADC = PAADC.fillna(0)
PAADC[PAADC != 0.0] = 1

#PAADE.rename(lambda x : x[0:11], axis = "index", inplace=True)  
#PAADM.rename(lambda x : x[0:11], axis = "index", inplace=True)   
#PAADC.rename(lambda x : x[0:11], axis = "index", inplace=True)   

lsPAAD = PAADE.index.intersection(PAADM.index)
lsPAAD = lsPAAD.intersection(PAADC.index)
lsPAAD = pd.unique(lsPAAD)

PAADE = PAADE.loc[lsPAAD,ls]
PAADM = PAADM.loc[lsPAAD,ls]
PAADC = PAADC.loc[lsPAAD,ls]

print(PAADE.shape)
print(PAADM.shape)
print(PAADC.shape)

AutoencoderE.eval()
AutoencoderM.eval()
AutoencoderC.eval()
Clas.eval()

PAADE2 = np.nan_to_num(PAADE.values)
PAADM2 = np.nan_to_num(PAADM.values)
PAADC2 = np.nan_to_num(PAADC.values)

NPAADE2 = scalerGDSC.transform(PAADE2)    

PAADexprs = torch.FloatTensor(NPAADE2)
PAADmut = torch.FloatTensor(PAADM2)
PAADcna = torch.FloatTensor(PAADC2)

PAADZE = AutoencoderE(PAADexprs)
PAADZM = AutoencoderM(PAADmut)
PAADZC = AutoencoderC(PAADcna)

PAADZT = torch.cat((PAADZE, PAADZM, PAADZC), 1)
PAADZTX = F.normalize(PAADZT, p=2, dim=0)
PredPAAD = Clas(PAADZTX)

#print(PredPAAD.detach().numpy())

(130, 13081)
(130, 13081)
(130, 13081)


Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


In [7]:
LUADE = pd.read_csv("TCGA-LUAD_exprs.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
LUADE = pd.DataFrame.transpose(LUADE)

LUADM = pd.read_csv("TCGA-LUAD_mutations.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
LUADM = pd.DataFrame.transpose(LUADM)
LUADM = LUADM.loc[:,~LUADM.columns.duplicated()]

LUADC = pd.read_csv("TCGA-LUAD_CNA.tsv", 
                   sep = "\t", index_col=0, decimal = ".")
LUADC = pd.DataFrame.transpose(LUADC)
LUADC = LUADC.loc[:,~LUADC.columns.duplicated()]

LUADM = LUADM.fillna(0)
LUADM[LUADM != 0.0] = 1
LUADC = LUADC.fillna(0)
LUADC[LUADC != 0.0] = 1

#LUADE.rename(lambda x : x[0:11], axis = "index", inplace=True)  
#LUADM.rename(lambda x : x[0:11], axis = "index", inplace=True)   
#LUADC.rename(lambda x : x[0:11], axis = "index", inplace=True)   

lsLUAD = LUADE.index.intersection(LUADM.index)
lsLUAD = lsLUAD.intersection(LUADC.index)
lsLUAD = pd.unique(lsLUAD)

LUADE = LUADE.loc[lsLUAD,ls]
LUADM = LUADM.loc[lsLUAD,ls]
LUADC = LUADC.loc[lsLUAD,ls]

print(LUADE.shape)
print(LUADM.shape)
print(LUADC.shape)

AutoencoderE.eval()
AutoencoderM.eval()
AutoencoderC.eval()
Clas.eval()

LUADE2 = np.nan_to_num(LUADE.values)
LUADM2 = np.nan_to_num(LUADM.values)
LUADC2 = np.nan_to_num(LUADC.values)

NLUADE2 = scalerGDSC.transform(LUADE2)    

LUADexprs = torch.FloatTensor(NLUADE2)
LUADmut = torch.FloatTensor(LUADM2)
LUADcna = torch.FloatTensor(LUADC2)

LUADZE = AutoencoderE(LUADexprs)
LUADZM = AutoencoderM(LUADmut)
LUADZC = AutoencoderC(LUADcna)

LUADZT = torch.cat((LUADZE, LUADZM, LUADZC), 1)
LUADZTX = F.normalize(LUADZT, p=2, dim=0)
PredLUAD = Clas(LUADZTX)

#print(PredLUAD.detach().numpy())

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike


(475, 13081)
(475, 13081)
(475, 13081)


In [8]:
from scipy.stats.stats import pearsonr
from scipy.stats import spearmanr
import statsmodels.api as sm
from mne.stats import bonferroni_correction

lsEGFR = [10000, 102, 10252, 10253,10254,1026,1027,107,108,109,111,11140,112,113,114,1147,115,117145,1173,1175,1211,1213,1385,1445,156,160,161,163,1950,1956,196883,2060,207,208,2308,2309,23239,2475,253260,2549,26018,2885,2931,29924,30011,3164,3265,3320,3709,3710,3845,4193,4303,4893,5136,5153,5170,5290,5295,5335,5566,5567,5568,5573,5575,5576,5577,5578,5580,5581,5582,55824,5594,5595,5604,5605,572,5728,57761,58513,5894,6199,6233,64223,6456,6464,6654,6714,6868,7249,728590,729120,730418,7311,731292,7529,79109,801,8027,8038,805,808,814,842,84335,867,9146,983,998]

#lsEGFR = [10000,1026,1027,10298,10718,1398,1399,145957,1839,1950,1956,1978,2002,2064,2065,2066,2069,207,208,23533,23642,2475,25,2549,25759,27,2885,2932,3084,3265,369,3725,374,3845,399694,4609,4690,4893,5058,5062,5063,5290,5291,5293,5294,5295,5296,5335,53358,5336,5578,5579,5582,5594,5595,5599,5601,5602,5604,5605,5609,56924,57144,572,5747,5894,6198,6199,6416,6464,6654,6655,6714,673,6776,6777,685,7039,815,816,817,818,8440,8503,867,868,9542]

In [12]:
listEGFR = PRADE.columns.intersection(lsEGFR)
PRADEEGFR = PRADE[listEGFR]
PRADMEGFR = PRADM[listEGFR]
PRADCEGFR = PRADC[listEGFR]

In [13]:
X = PRADEEGFR
y = PredPRAD.detach().numpy()

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.999
Model:,OLS,Adj. R-squared:,0.999
Method:,Least Squares,F-statistic:,3331.0
Date:,"Sat, 12 Jan 2019",Prob (F-statistic):,0.0
Time:,18:16:35,Log-Likelihood:,1340.8
No. Observations:,492,AIC:,-2480.0
Df Residuals:,391,BIC:,-2056.0
Df Model:,101,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
102.0,0.0038,0.003,1.171,0.242,-0.003,0.010
107.0,-0.0015,0.001,-0.982,0.327,-0.004,0.001
108.0,-0.0006,0.002,-0.377,0.706,-0.004,0.003
109.0,-0.0021,0.004,-0.585,0.559,-0.009,0.005
111.0,-0.0038,0.003,-1.432,0.153,-0.009,0.001
112.0,-0.0092,0.004,-2.553,0.011,-0.016,-0.002
113.0,0.0046,0.003,1.325,0.186,-0.002,0.011
114.0,0.0184,0.021,0.868,0.386,-0.023,0.060
115.0,-0.0002,0.004,-0.047,0.963,-0.007,0.007

0,1,2,3
Omnibus:,63.897,Durbin-Watson:,1.907
Prob(Omnibus):,0.0,Jarque-Bera (JB):,218.126
Skew:,0.56,Prob(JB):,4.31e-48
Kurtosis:,6.063,Cond. No.,1340.0


In [14]:
print(bonferroni_correction(model.pvalues, alpha=0.05))

(array([False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False,  True,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False,  True]), array([2.44677740e+01, 3.29985438e+01, 7.13092918e+01, 5.64211888e+01,
       1.54509011e+01, 1.11658624e+00, 1.87812262e+01, 3.89558813e+01,
       9.72150779e+01, 5.78711869e+01, 1.56777500e+01, 8.9568282

In [15]:
listEGFR = KIRPE.columns.intersection(lsEGFR)
KIRPEEGFR = KIRPE[listEGFR]
KIRPMEGFR = KIRPM[listEGFR]
KIRPCEGFR = KIRPC[listEGFR]   

In [16]:
X = KIRPEEGFR
y = PredKIRP.detach().numpy()

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.998
Model:,OLS,Adj. R-squared:,0.996
Method:,Least Squares,F-statistic:,356.2
Date:,"Sat, 12 Jan 2019",Prob (F-statistic):,2.84e-62
Time:,18:16:45,Log-Likelihood:,409.04
No. Observations:,161,AIC:,-616.1
Df Residuals:,60,BIC:,-304.9
Df Model:,101,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
102.0,-0.0203,0.011,-1.884,0.064,-0.042,0.001
107.0,0.0050,0.015,0.340,0.735,-0.024,0.034
108.0,-0.0149,0.006,-2.486,0.016,-0.027,-0.003
109.0,0.0090,0.008,1.139,0.259,-0.007,0.025
111.0,0.0058,0.005,1.084,0.283,-0.005,0.016
112.0,0.0015,0.013,0.118,0.906,-0.024,0.027
113.0,-0.0260,0.012,-2.093,0.041,-0.051,-0.001
114.0,-0.0026,0.014,-0.180,0.858,-0.031,0.026
115.0,0.0049,0.012,0.412,0.682,-0.019,0.029

0,1,2,3
Omnibus:,0.349,Durbin-Watson:,1.769
Prob(Omnibus):,0.84,Jarque-Bera (JB):,0.161
Skew:,-0.067,Prob(JB):,0.922
Kurtosis:,3.077,Cond. No.,941.0


In [17]:
print(bonferroni_correction(model.pvalues, alpha=0.05))

(array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), array([6.50924133e+00, 7.42508317e+01, 1.58961376e+00, 2.61829337e+01,
       2.85630855e+01, 9.15220271e+01, 4.09864261e+00, 8.66437911e+01,
       6.88992687e+01, 8.67118407e+01, 1.81588898e+00, 7.7648348

In [18]:
listEGFR = BLCAE.columns.intersection(lsEGFR)
BLCAEEGFR = BLCAE[listEGFR]
BLCAMEGFR = BLCAM[listEGFR]
BLCACEGFR = BLCAC[listEGFR]   

In [19]:
X = BLCAEEGFR
y = PredBLCA.detach().numpy()

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.998
Model:,OLS,Adj. R-squared:,0.987
Method:,Least Squares,F-statistic:,91.05
Date:,"Sat, 12 Jan 2019",Prob (F-statistic):,4.86e-18
Time:,18:16:52,Log-Likelihood:,291.96
No. Observations:,123,AIC:,-381.9
Df Residuals:,22,BIC:,-97.88
Df Model:,101,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
102.0,0.0508,0.025,2.072,0.050,-4.41e-05,0.102
107.0,0.0081,0.011,0.733,0.471,-0.015,0.031
108.0,0.0587,0.039,1.503,0.147,-0.022,0.140
109.0,0.0054,0.021,0.253,0.803,-0.039,0.050
111.0,0.0114,0.018,0.620,0.541,-0.027,0.050
112.0,-0.0243,0.023,-1.037,0.311,-0.073,0.024
113.0,0.0279,0.022,1.292,0.210,-0.017,0.073
114.0,0.4896,0.285,1.715,0.100,-0.102,1.081
115.0,-0.0062,0.029,-0.215,0.831,-0.066,0.054

0,1,2,3
Omnibus:,7.83,Durbin-Watson:,2.262
Prob(Omnibus):,0.02,Jarque-Bera (JB):,8.086
Skew:,0.465,Prob(JB):,0.0175
Kurtosis:,3.845,Cond. No.,3010.0


In [20]:
print(bonferroni_correction(model.pvalues, alpha=0.05))

(array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), array([  5.06840256,  47.60953626,  14.86290717,  81.05458141,
        54.68706095,  31.41998312,  21.17450173,  10.13547604,
        83.97572787,  16.15018927,  14.39431539,  40.7389554 ,
        48.762267

In [21]:
listEGFR = BRCAE.columns.intersection(lsEGFR)
BRCAEEGFR = BRCAE[listEGFR]
BRCAMEGFR = BRCAM[listEGFR]
BRCACEGFR = BRCAC[listEGFR]  

In [22]:
X = BRCAEEGFR
y = PredBRCA.detach().numpy()

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.999
Model:,OLS,Adj. R-squared:,0.999
Method:,Least Squares,F-statistic:,6893.0
Date:,"Sat, 12 Jan 2019",Prob (F-statistic):,0.0
Time:,18:16:54,Log-Likelihood:,2466.9
No. Observations:,921,AIC:,-4732.0
Df Residuals:,820,BIC:,-4245.0
Df Model:,101,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
102.0,0.0011,0.002,0.672,0.502,-0.002,0.004
107.0,-0.0011,0.000,-2.468,0.014,-0.002,-0.000
108.0,-0.0013,0.001,-2.000,0.046,-0.003,-2.48e-05
109.0,0.0002,0.001,0.183,0.855,-0.002,0.003
111.0,-0.0003,0.001,-0.611,0.541,-0.001,0.001
112.0,0.0015,0.001,1.019,0.309,-0.001,0.004
113.0,-0.0020,0.001,-1.351,0.177,-0.005,0.001
114.0,-0.0026,0.002,-1.382,0.167,-0.006,0.001
115.0,0.0008,0.001,0.651,0.515,-0.002,0.003

0,1,2,3
Omnibus:,126.984,Durbin-Watson:,1.943
Prob(Omnibus):,0.0,Jarque-Bera (JB):,391.225
Skew:,0.676,Prob(JB):,1.1100000000000001e-85
Kurtosis:,5.893,Cond. No.,311.0


In [23]:
print(bonferroni_correction(model.pvalues, alpha=0.05))

(array([False, False, False, False, False, False, False, False, False,
        True, False, False, False, False, False, False,  True, False,
       False, False, False, False, False, False, False, False, False,
       False,  True, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False,  True, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), array([5.06825579e+01, 1.39415496e+00, 4.62728727e+00, 8.63416692e+01,
       5.46703973e+01, 3.11693984e+01, 1.78878580e+01, 1.69076294e+01,
       5.20628937e+01, 7.63775258e-03, 5.05012625e+01, 9.7174779

In [24]:
listEGFR = PAADE.columns.intersection(lsEGFR)
PAADEEGFR = PAADE[listEGFR]
PAADMEGFR = PAADM[listEGFR]
PAADCEGFR = PAADC[listEGFR]   

In [25]:
X = PAADEEGFR
y = PredPAAD.detach().numpy()

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.999
Model:,OLS,Adj. R-squared:,0.995
Method:,Least Squares,F-statistic:,251.5
Date:,"Sat, 12 Jan 2019",Prob (F-statistic):,1.7800000000000001e-29
Time:,18:17:00,Log-Likelihood:,364.21
No. Observations:,130,AIC:,-526.4
Df Residuals:,29,BIC:,-236.8
Df Model:,101,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
102.0,-0.0263,0.023,-1.162,0.255,-0.072,0.020
107.0,0.0055,0.013,0.414,0.682,-0.022,0.032
108.0,-0.0179,0.025,-0.717,0.479,-0.069,0.033
109.0,0.0112,0.019,0.600,0.553,-0.027,0.049
111.0,0.0037,0.013,0.282,0.780,-0.023,0.031
112.0,-0.0120,0.023,-0.527,0.602,-0.058,0.034
113.0,0.0143,0.014,1.031,0.311,-0.014,0.043
114.0,-0.0118,0.039,-0.302,0.765,-0.092,0.068
115.0,0.0008,0.036,0.022,0.983,-0.072,0.074

0,1,2,3
Omnibus:,0.52,Durbin-Watson:,2.062
Prob(Omnibus):,0.771,Jarque-Bera (JB):,0.425
Skew:,0.14,Prob(JB):,0.808
Kurtosis:,2.98,Cond. No.,1840.0


In [26]:
print(bonferroni_correction(model.pvalues, alpha=0.05))

(array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), array([ 25.71654309,  68.88518558,  48.36811448,  55.88769998,
        78.77410576,  60.81762683,  31.41328896,  77.27234893,
        99.26457938,  27.69865112,  99.63968843,  41.88315265,
        75.667971

In [27]:
listEGFR = LUADE.columns.intersection(lsEGFR)
LUADEEGFR = LUADE[listEGFR]
LUADMEGFR = LUADM[listEGFR]
LUADCEGFR = LUADC[listEGFR]

In [28]:
X = LUADEEGFR
y = PredLUAD.detach().numpy()

# Note the difference in argument order
model = sm.OLS(y, X).fit()
predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

0,1,2,3
Dep. Variable:,y,R-squared:,0.998
Model:,OLS,Adj. R-squared:,0.998
Method:,Least Squares,F-statistic:,1895.0
Date:,"Sat, 12 Jan 2019",Prob (F-statistic):,0.0
Time:,18:17:07,Log-Likelihood:,1160.5
No. Observations:,475,AIC:,-2119.0
Df Residuals:,374,BIC:,-1699.0
Df Model:,101,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
102.0,0.0028,0.003,0.825,0.410,-0.004,0.010
107.0,-0.0021,0.002,-1.161,0.247,-0.006,0.001
108.0,0.0013,0.003,0.412,0.681,-0.005,0.007
109.0,0.0013,0.003,0.524,0.600,-0.004,0.006
111.0,-0.0006,0.002,-0.411,0.681,-0.004,0.002
112.0,0.0020,0.003,0.771,0.441,-0.003,0.007
113.0,-0.0034,0.002,-1.652,0.099,-0.007,0.001
114.0,-0.0046,0.004,-1.172,0.242,-0.012,0.003
115.0,0.0020,0.003,0.798,0.426,-0.003,0.007

0,1,2,3
Omnibus:,43.558,Durbin-Watson:,2.088
Prob(Omnibus):,0.0,Jarque-Bera (JB):,91.38
Skew:,0.522,Prob(JB):,1.44e-20
Kurtosis:,4.878,Cond. No.,366.0


In [29]:
print(bonferroni_correction(model.pvalues, alpha=0.05))

(array([False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False]), array([4.14015235e+01, 2.48985067e+01, 6.87655871e+01, 6.06407957e+01,
       6.88273009e+01, 4.45728983e+01, 1.00290905e+01, 2.44236844e+01,
       4.29890891e+01, 2.25237889e+01, 9.06980328e+01, 2.2333854