Switch to side-by-side view

--- a
+++ b/DeepMod_tools/sum_chr_mod.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+
+import os, sys, string
+import multiprocessing
+import time;
+import glob
+from collections import defaultdict;
+
+
+def mprint(mstr):
+   print(mstr); sys.stdout.flush()
+
+if len(sys.argv)<4:
+   print ("Usage: python {} pred_folder-of-DeepMod Base-of-interest unique-fileid-in-sum-file [chr-list]".format(sys.argv[0]))
+   print ("       pred_folder-of-DeepMod: the prediction must in its sub-folder.")
+   sys.exit(1)
+
+pred_folder = sys.argv[1]
+baseofint = sys.argv[2]
+sum_fileid = sys.argv[3]
+
+
+if len(sys.argv)>4:
+   chrkeys = ["%s" % cid for cid in sys.argv[4].split(',')]
+else:
+   chrkeys = []
+   for i in range(1, 23):
+      chrkeys.append("chr%d" % i)
+   chrkeys.append("chrX")
+   chrkeys.append("chrY")
+   chrkeys.append("chrM")
+
+chrkeys = set(chrkeys)
+
+#####################################
+def readbed2(bedf):
+   cur_ad = defaultdict();
+   with open(bedf, 'r') as mr:
+      while True:
+         line = mr.readline();
+         if not line: break;
+         line = line.strip();
+         lsp = line.split();
+         cur_ad[(lsp[0], int(lsp[1]), lsp[5])] = [int(lsp[9]), int(lsp[11])]
+   return cur_ad
+
+def mergeMod(g_ad, cur_ad):
+    for k in cur_ad:
+       if k in g_ad:
+            g_ad[k][0] += cur_ad[k][0]
+            g_ad[k][1] += cur_ad[k][1]
+       else: g_ad[k] = cur_ad[k]
+      
+def save_mod(res_file, amod_dict, baseOfin):
+   poskeys = sorted(list(amod_dict.keys()))
+   for pk in poskeys:
+      if amod_dict[pk][1]==0: del amod_dict[pk]
+
+   poskeys = list(amod_dict.keys())
+   poskeys = sorted(poskeys);
+   with open(res_file, 'w') as mw:
+      for pk in poskeys:
+          mw.write('%s %d %d %s %d %s  %d %d 0,0,0 %d %d %d\n' % (pk[0], pk[1],pk[1]+1, baseOfin,amod_dict[pk][0] if amod_dict[pk][0]<1000 else 1000,pk[2], pk[1],pk[1]+1, amod_dict[pk][0], int(amod_dict[pk][1]*100/amod_dict[pk][0]) if amod_dict[pk][0]>0 else 0, amod_dict[pk][1]  ))
+       
+
+def sum_amod_handler(run_Q):
+   handli = 0;
+   while not run_Q.empty():
+      try:
+          ck, pred_folder, baseOfin = run_Q.get(block=False)
+          #print (ck, pred_folder, baseOfin); continue
+      except:
+         break;
+
+      allbedfiles = glob.glob(os.path.join(pred_folder, ("*/*/*/*.%s-.%s.bed" % (ck, baseOfin)) ))
+      allbedfiles.extend(glob.glob(os.path.join(pred_folder, ("*/*/*.%s-.%s.bed" % (ck, baseOfin)) )))
+      allbedfiles.extend(glob.glob(os.path.join(pred_folder, ("*/*.%s-.%s.bed" % (ck, baseOfin)) )))
+      mprint ("%s - %s: %d" % (ck, baseOfin, len(allbedfiles) )) 
+      allbedfiles.extend(glob.glob(os.path.join(pred_folder, ("*/*/*/*.%s+.%s.bed" % (ck, baseOfin)) )))
+      allbedfiles.extend(glob.glob(os.path.join(pred_folder, ("*/*/*.%s+.%s.bed" % (ck, baseOfin)) )))
+      allbedfiles.extend(glob.glob(os.path.join(pred_folder, ("*/*.%s+.%s.bed" % (ck, baseOfin)) )))
+      mprint ("%s -+ %s: %d" % (ck, baseOfin, len(allbedfiles) ))
+
+      #  0    1     2     3 4 5   6      7      8   9 0 1
+      #chr1 949802 949803 T 1 - 949802 949803 0,0,0 1 0 0 
+      amod_dict = defaultdict();      
+      res_file = "%s/%s.%s.%s.bed" % (pred_folder, sum_fileid, ck, baseOfin)
+      for bedf_ind in range(len(allbedfiles)):
+          mprint("\t %s %s %d/%d" % (ck, baseOfin, bedf_ind+1, len(allbedfiles)))
+          cur_ad = readbed2(allbedfiles[bedf_ind])
+          mergeMod(amod_dict, cur_ad)
+
+      save_mod(res_file, amod_dict, baseOfin)
+
+##############################
+pmanager = multiprocessing.Manager();
+run_Q = pmanager.Queue();
+for ck in chrkeys:
+   run_Q.put((ck, pred_folder, baseofint))
+
+mhandlers = [];
+share_var = (run_Q, )
+m_thread_num = len(chrkeys);
+for i in range(m_thread_num+1):
+   p = multiprocessing.Process(target=sum_amod_handler, args=share_var)
+   p.start();
+   mhandlers.append(p);
+while any(p.is_alive() for p in mhandlers):
+   try:
+      time.sleep(1);
+   except:
+      time.sleep(1);
+      continue;
+
+
+
+
+