Diff of /linear-reg.py [000000] .. [f8af2c]

Switch to unified view

a b/linear-reg.py
1
import json
2
import csv
3
import numpy as np
4
import os
5
import pandas as pd
6
import random
7
import tensorflow as tf
8
9
from functions import quan_detector, most_repeared_promoter,dataset
10
from sklearn.metrics import confusion_matrix
11
12
from sklearn import datasets, linear_model
13
from sklearn.metrics import mean_squared_error, r2_score
14
15
np.random.seed(42)
16
tf.set_random_seed(42)
17
random.seed(42)
18
19
20
labels_file = 'labes.csv'
21
labels_df = pd.read_csv(labels_file, index_col=0)
22
ids_csv = labels_df.FID.tolist()
23
24
25
promoters_list = range(1,2484)
26
dataset_X = []
27
for promoter_num in promoters_list:
28
    promoter_file = 'promoters/chr22_'+str(promoter_num)+'.json'
29
    # # read files
30
    with open(promoter_file) as json_data:
31
        ind_var = json.load(json_data)
32
    ids_json = ind_var.keys()
33
34
    var_num = []
35
    for i in ids_csv:
36
        id_name = str(i)
37
        temp = ind_var[id_name]
38
        var_seq = map(int, temp)
39
        var_num.append(var_seq)
40
41
    labels_df['vars'] = var_num
42
    lab_num = {1: [1, 0], # positive
43
               2: [0, 1]} # negative
44
45
    pheno_new = []
46
    for i in labels_df.Pheno.tolist():
47
        pheno_new.append(lab_num[i])
48
    d = {"Pheno": pheno_new, "Vars":labels_df.vars}
49
    dataset_ = pd.DataFrame(d)
50
51
    dataset_X .append(dataset_.Vars.tolist())
52
    dataset_Y = np.array(dataset_.Pheno.tolist())
53
54
dataset_X = np.array(dataset_X).reshape(11908,64*2580,1)
55
N = len(dataset_X)
56
57
58
# network accuracy
59
x_train, y_train,x_test,y_test = dataset(dataset_X,dataset_Y,test_ratio=0.1)
60
61
# Create linear regression object
62
regr = linear_model.LinearRegression()
63
64
# Train the model using the training sets
65
regr.fit(x_train, y_train)
66
67
# Make predictions using the testing set
68
y_pred = regr.predict(x_test)
69
y_pred = np.argmax(y_pred,axis=1)
70
y_test_num = np.argmax(y_test,axis=1)
71
tn, fp, fn, tp = confusion_matrix(y_test_num, y_pred).ravel()
72
73
acc = (tp+tn)*1./(tp+fp+tn+fn)
74
print acc
75
dataset_Y = np.argmax(dataset_Y,axis=-1)
76
x_train, y_train,x_test,y_test = dataset(dataset_X,dataset_Y,test_ratio=0.1)
77
logisticRegr = linear_model.LogisticRegression()
78
79
regr.fit(x_train, y_train)
80
81
# Make predictions using the testing set
82
y_pred = regr.predict(x_test)
83
y_test_num = np.argmax(y_test,axis=1)
84
tn, fp, fn, tp = confusion_matrix(y_test_num, y_pred).ravel()
85
86
acc = (tp+tn)*1./(tp+fp+tn+fn)
87
print "LogisticRegression acc is:",acc