Diff of /promoter_generator.py [000000] .. [f8af2c]

Switch to side-by-side view

--- a
+++ b/promoter_generator.py
@@ -0,0 +1,52 @@
+import sys
+import os
+import vcf
+import pandas as pd
+import numpy as np
+import csv
+import time
+import json
+
+
+
+def subfinder(mylist, pattern):
+    for i in range(len(mylist)):
+        if mylist[i] == pattern[0] and mylist[i:i + len(pattern)] == pattern:
+            return i, i + len(pattern)
+
+
+def promoter_var_idx(p_idx, promoter, var_idx):
+    pmt_var = promoter[p_idx]  # which promoter
+    p_start, p_end = subfinder(var_idx, pmt_var)
+    return p_start, p_end
+
+# IDS:
+labels_file = 'labes.csv'
+labels_df = pd.read_csv(labels_file,index_col=0)
+ids = labels_df.FID.tolist()
+# read promoter table
+with open('promoter1.csv', 'rb') as f:
+    reader = csv.reader(f)
+    promoter = list(reader)
+# read used variant posotions
+with open('all_ALS_var.txt','r') as f:
+    var_idx = [i.replace('\n','') for i in f]
+
+print "number of promoters", len(promoter)
+
+for i in range(10,100):#(len(promoter)):
+
+    num_pro = i
+    print "Generating No.",num_pro," promoter in chr",22
+
+    p_start,p_end = promoter_var_idx(num_pro,promoter,var_idx=var_idx)
+    promoter_ind = {}
+    for ind in ids:
+        indiv_file = 'individual/'+str(ind)+'.txt'
+        with open(indiv_file,'r') as f:
+            ind_v = [i.replace('\n','') for i in f]
+        promo = ind_v[p_start:p_end]
+        promoter_ind[ind] = promo
+
+    with open('promoters/chr22_'+str(num_pro)+'.json', 'w') as fp:
+        json.dump(promoter_ind, fp)
\ No newline at end of file