[0aa069]: / src / pca_cancer.py

Download this file

31 lines (23 with data), 864 Bytes

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import time
from datetime import datetime
import csv
import numpy as np
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.cross_validation import train_test_split
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import StratifiedKFold
print "Script start at ", datetime.now().isoformat()
X=np.load('F:/NYU/Hackathon/numpy_array.npy')
Y=X[:,:3] #patient_id cancer_type tissue_type
X=X[:,3:] #rpm
RS=np.random.RandomState(90)
perm=RS.permutation(678)
Y=Y[perm]
X=X[perm]
X_train, X_test, Y_train, Y_test = train_test_split(X, Y[:,1], test_size=0.25, random_state=30, stratify=Y[:,1])
p=PCA(n_components=0.5).fit(X_train)
print(p.explained_variance_)