|
a |
|
b/bin/DeepMod.py |
|
|
1 |
#!/usr/bin/env python |
|
|
2 |
|
|
|
3 |
import os; |
|
|
4 |
import sys; |
|
|
5 |
|
|
|
6 |
import string; |
|
|
7 |
|
|
|
8 |
from collections import defaultdict |
|
|
9 |
|
|
|
10 |
import argparse; |
|
|
11 |
from argparse import RawTextHelpFormatter |
|
|
12 |
|
|
|
13 |
from DeepMod_scripts.myCom import * |
|
|
14 |
|
|
|
15 |
|
|
|
16 |
|
|
|
17 |
# three modules in DeepMod |
|
|
18 |
parser = argparse.ArgumentParser(description="Detect nucleotide modification from nanopore signals data.", epilog="For example, \n \ |
|
|
19 |
\tpython %(prog)s train: Training a modification classifier.\n \ |
|
|
20 |
\tpython %(prog)s detect: Detect modification by integrating all long reads. \n \ |
|
|
21 |
\tpython %(prog)s getfeatures: Get features for training a model. \n \ |
|
|
22 |
", formatter_class=RawTextHelpFormatter); |
|
|
23 |
|
|
|
24 |
|
|
|
25 |
# |
|
|
26 |
# Return error message when a value<1 |
|
|
27 |
# Return an empty string otherwise |
|
|
28 |
# |
|
|
29 |
def non_negative(i, mstr): |
|
|
30 |
if i<1: return (("\n\tError %d could not be negative(%d)" % (mstr, i))) |
|
|
31 |
else: return '' |
|
|
32 |
|
|
|
33 |
# |
|
|
34 |
# Print all parameters in stdout |
|
|
35 |
# |
|
|
36 |
def printParameters(moptions): |
|
|
37 |
mpkeys = moptions.keys(); #mpkeys.sort() |
|
|
38 |
sorted(mpkeys) |
|
|
39 |
print('%30s: %s' % ('Current directory', os.getcwd())) |
|
|
40 |
for mpk in mpkeys: |
|
|
41 |
print ('%30s: %s' % (mpk, str(moptions[mpk]))) |
|
|
42 |
sys.stdout.flush() |
|
|
43 |
|
|
|
44 |
# |
|
|
45 |
# Got common argument provided by users or default values. |
|
|
46 |
# |
|
|
47 |
# |
|
|
48 |
def mCommonParam(margs): |
|
|
49 |
|
|
|
50 |
ErrorMessage = "" |
|
|
51 |
moptions = defaultdict() |
|
|
52 |
# how to output running message: need more control now. |
|
|
53 |
moptions['outLevel'] = margs.outLevel |
|
|
54 |
# the input working base |
|
|
55 |
moptions["wrkBase"] = margs.wrkBase |
|
|
56 |
if moptions["wrkBase"]==None: |
|
|
57 |
ErrorMessage = ErrorMessage + ("\n\tThe input folder is None.") |
|
|
58 |
|
|
|
59 |
# An unique ID for output |
|
|
60 |
# Usefull for run the program in parallel |
|
|
61 |
moptions["FileID"] = margs.FileID |
|
|
62 |
# output folder; |
|
|
63 |
# make it if the output folder does not exist |
|
|
64 |
moptions['outFolder'] = margs.outFolder |
|
|
65 |
moptions['outFolder'] = format_last_letter_of_folder(moptions['outFolder']) |
|
|
66 |
if moptions['outFolder']==None or (not os.path.isdir(moptions['outFolder'])): |
|
|
67 |
try: |
|
|
68 |
os.system('mkdir -p '+moptions['outFolder']); |
|
|
69 |
except: |
|
|
70 |
ErrorMessage = ErrorMessage + ("\n\tThe output folder (%s) does not exist and cannot be created." % moptions['outFolder']) |
|
|
71 |
|
|
|
72 |
# check all data in a recurive way |
|
|
73 |
moptions['recursive'] = margs.recursive |
|
|
74 |
# the number of threads used and the number of files handled by each thread. |
|
|
75 |
moptions['files_per_thread'] = margs.files_per_thread |
|
|
76 |
if moptions['files_per_thread']<2: moptions['files_per_thread'] = 2 |
|
|
77 |
# the number of threads used |
|
|
78 |
moptions['threads'] = margs.threads |
|
|
79 |
if moptions['threads']<1: moptions['threads'] = 1 |
|
|
80 |
|
|
|
81 |
# windowsize: default=21 |
|
|
82 |
moptions['windowsize'] = margs.windowsize |
|
|
83 |
ErrorMessage = ErrorMessage + non_negative(moptions['windowsize'], 'windowsize') |
|
|
84 |
if moptions['windowsize']<1: moptions['windowsize'] = 1 |
|
|
85 |
|
|
|
86 |
# aligners: bwa-mem or minimap2 |
|
|
87 |
moptions['alignStr'] = margs.alignStr; |
|
|
88 |
|
|
|
89 |
moptions['SignalGroup'] = margs.SignalGroup; |
|
|
90 |
|
|
|
91 |
moptions['move'] = margs.move |
|
|
92 |
|
|
|
93 |
return [moptions, ErrorMessage] |
|
|
94 |
|
|
|
95 |
# |
|
|
96 |
# detect modification for bases of interests |
|
|
97 |
# input is a list of fast5 files, a reference genome and a well-trained model. |
|
|
98 |
# |
|
|
99 |
def mDetect(margs): |
|
|
100 |
# get common parameters |
|
|
101 |
moptions, ErrorMessage = mCommonParam(margs) |
|
|
102 |
|
|
|
103 |
# path for basecall information in fast5 files |
|
|
104 |
moptions['basecall_1d'] = margs.basecall_1d |
|
|
105 |
moptions['basecall_2strand'] = margs.basecall_2strand |
|
|
106 |
# Whether consider those chromosome which contain -_:/ |
|
|
107 |
# default: yes; |
|
|
108 |
moptions['ConUnk'] = margs.ConUnk |
|
|
109 |
# output layer information for deep learning |
|
|
110 |
moptions['outputlayer'] = margs.outputlayer |
|
|
111 |
# base of interest |
|
|
112 |
moptions['Base'] = margs.Base |
|
|
113 |
# whether take cluster effect of methylation into consideration |
|
|
114 |
moptions['mod_cluster'] = margs.mod_cluster |
|
|
115 |
# base of interest |
|
|
116 |
if moptions['Base'] in ["", None]: |
|
|
117 |
ErrorMessage = ErrorMessage + ("\n\t Please provide a base of interest.") |
|
|
118 |
|
|
|
119 |
# predict medification for bases of interest in long reads first |
|
|
120 |
# only summarize them for each genomic position of interest . |
|
|
121 |
moptions['predDet'] = margs.predDet |
|
|
122 |
if moptions['predDet']: |
|
|
123 |
# path to reference genome |
|
|
124 |
moptions['Ref'] = margs.Ref |
|
|
125 |
if moptions['Ref']==None or (not os.path.isfile(moptions['Ref'])): |
|
|
126 |
ErrorMessage = ErrorMessage + ("\n\t reference file does not exist (%s)" % moptions['Ref']) |
|
|
127 |
|
|
|
128 |
# the number of feature for each event |
|
|
129 |
moptions['fnum'] = margs.fnum |
|
|
130 |
ErrorMessage = ErrorMessage + non_negative(moptions['fnum'], 'fnum') |
|
|
131 |
# the number of hidden nodes |
|
|
132 |
moptions['hidden'] = margs.hidden |
|
|
133 |
ErrorMessage = ErrorMessage + non_negative(moptions['hidden'], 'hidden') |
|
|
134 |
# the well-trained model |
|
|
135 |
moptions['modfile'] = margs.modfile |
|
|
136 |
if moptions['modfile']==None: |
|
|
137 |
print("No mod file is provided. The default one is used") |
|
|
138 |
moptions['modfile'] = ('train_deepmod/rnn_P90wd%d_f53/mod_train_P90wd%d_f53' % (moptions['windowsize'], moptions['windowsize'])) |
|
|
139 |
if (not os.path.isfile(moptions['modfile']+'.meta')): |
|
|
140 |
moptions['modfile'] = ('{}/lib/python{}.{}/site-packages/DeepMod/train_deepmod/rnn_P90wd{}_f53/mod_train_P90wd{}_f53'.format(sys.prefix,sys.version_info.major,sys.version_info.minor, moptions['windowsize'], moptions['windowsize'])) |
|
|
141 |
if (not os.path.isfile(moptions['modfile']+'.meta')): |
|
|
142 |
ErrorMessage = ErrorMessage + ("\n\tThe meta file (%s) does not exist" % (moptions['modfile']+'.meta' if not moptions['modfile']==None else "")) |
|
|
143 |
else: |
|
|
144 |
# already done the prediction process? |
|
|
145 |
# Yes: summarize the results only |
|
|
146 |
moptions['predpath'] = margs.predpath |
|
|
147 |
if moptions['predpath']==None or (not os.path.isdir(moptions['predpath'])): |
|
|
148 |
ErrorMessage = ErrorMessage + ("\n\tThe predpath does not exist") |
|
|
149 |
|
|
|
150 |
# specify region of interest |
|
|
151 |
# not consider bases outside regions in a reference genome |
|
|
152 |
# None: all bases of interest |
|
|
153 |
moptions['region'] = [ ] |
|
|
154 |
if margs.region == None or len(margs.region)==0: |
|
|
155 |
moptions['region'].append([None, None, None]) |
|
|
156 |
else: |
|
|
157 |
mregionlist = margs.region.split(';') |
|
|
158 |
for mr in mregionlist: |
|
|
159 |
mr_sp = mr.split(':') |
|
|
160 |
moptions['region'].append([mr_sp[0], int(mr_sp[1]) if len(mr_sp)>1 else None, int(mr_sp[2]) if len(mr_sp)>2 else None ]) |
|
|
161 |
|
|
|
162 |
print("\nNanopore sequencing data analysis is resourece-intensive and time consuming. ") |
|
|
163 |
print("Some potential strong recommendations are below:") |
|
|
164 |
print("\tIf your reference genome is large as human genome and your Nanopore data is huge,") |
|
|
165 |
print("\tIt would be faster to run this program parallelly to speed up.") |
|
|
166 |
print("\tYou might run different input folders of your fast5 files and ") |
|
|
167 |
print("\tgive different output names (--FileID) or folders (--outFolder)") |
|
|
168 |
print("\tA good way for this is to run different chromosome individually.\n") |
|
|
169 |
|
|
|
170 |
# print help information if any necessary options are not provided. |
|
|
171 |
printParameters(moptions) |
|
|
172 |
if not ErrorMessage=="": |
|
|
173 |
ErrorMessage = "Please provide correct parameters" + ErrorMessage |
|
|
174 |
print(ErrorMessage) |
|
|
175 |
parser.print_help(); |
|
|
176 |
parser.parse_args(['detect', '--help']); |
|
|
177 |
sys.exit(1) |
|
|
178 |
|
|
|
179 |
from DeepMod_scripts import myDetect |
|
|
180 |
myDetect.mDetect_manager(moptions) |
|
|
181 |
|
|
|
182 |
# |
|
|
183 |
# Train a model |
|
|
184 |
# Need to get features first. |
|
|
185 |
# |
|
|
186 |
def mTrain(margs): |
|
|
187 |
from DeepMod_scripts import myMultiBiRNN |
|
|
188 |
|
|
|
189 |
# gent common options |
|
|
190 |
moptions, ErrorMessage = mCommonParam(margs) |
|
|
191 |
|
|
|
192 |
# network setting: the number of features and the number of hidden nodes |
|
|
193 |
moptions['fnum'] = margs.fnum |
|
|
194 |
ErrorMessage = ErrorMessage + non_negative(moptions['fnum'], 'fnum') |
|
|
195 |
moptions['hidden'] = margs.hidden |
|
|
196 |
ErrorMessage = ErrorMessage + non_negative(moptions['hidden'], 'hidden') |
|
|
197 |
|
|
|
198 |
# the output function of the deep learning model |
|
|
199 |
moptions['outputlayer'] = margs.outputlayer |
|
|
200 |
# whether using different class weights |
|
|
201 |
moptions['unbalanced'] = margs.unbalanced |
|
|
202 |
|
|
|
203 |
# re-load trained model and continue to train |
|
|
204 |
moptions['modfile'] = margs.modfile |
|
|
205 |
if moptions['modfile']==None: pass; |
|
|
206 |
elif (not os.path.isfile(moptions['modfile']+'.meta')): |
|
|
207 |
ErrorMessage = ErrorMessage + ("\n\tThe meta file (%s) does not exist" % (moptions['modfile']+'.meta' if not moptions['modfile']==None else "")) |
|
|
208 |
|
|
|
209 |
# read-based or region based independent training |
|
|
210 |
# E: region-based |
|
|
211 |
# P: read-based. |
|
|
212 |
if not margs.test==None: |
|
|
213 |
moptions['test'] = margs.test.split(',') |
|
|
214 |
if moptions['test'][0] == 'E': moptions['test'][0] = '-' |
|
|
215 |
elif moptions['test'][0] == 'P': moptions['test'][0] = '0' |
|
|
216 |
else: |
|
|
217 |
ErrorMessage = ErrorMessage + "Unknown option for test: the first character must be E or P "+margs.test |
|
|
218 |
if moptions['test'][0] in ['-']: |
|
|
219 |
moptions['test'][1] = int(moptions['test'][1]) * (10**6) |
|
|
220 |
moptions['test'][2] = int(moptions['test'][2]) * (10**6) |
|
|
221 |
else: moptions['test'][1] = int(moptions['test'][1])/100.0 |
|
|
222 |
else: moptions['test'] = ['N', '100'] |
|
|
223 |
|
|
|
224 |
# print help document if necessary options are not provided. |
|
|
225 |
print("Train") |
|
|
226 |
printParameters(moptions) |
|
|
227 |
if not ErrorMessage=="": |
|
|
228 |
ErrorMessage = "Please provide correct parameters" + ErrorMessage |
|
|
229 |
print(ErrorMessage) |
|
|
230 |
parser.print_help(); |
|
|
231 |
parser.parse_args(['train', '--help']); |
|
|
232 |
sys.exit(2) |
|
|
233 |
|
|
|
234 |
myMultiBiRNN.mMult_RNN_LSTM_train(moptions) |
|
|
235 |
|
|
|
236 |
# |
|
|
237 |
# get features for training |
|
|
238 |
# |
|
|
239 |
# |
|
|
240 |
def mGetFeatures(margs): |
|
|
241 |
from DeepMod_scripts import myGetFeatureBasedPos |
|
|
242 |
|
|
|
243 |
# get common options |
|
|
244 |
moptions, ErrorMessage = mCommonParam(margs) |
|
|
245 |
# motif-based data: positive or negative control data |
|
|
246 |
moptions['posneg'] = margs.posneg |
|
|
247 |
# the number of features: 7-description or 57-description |
|
|
248 |
moptions['fnum'] = margs.fnum |
|
|
249 |
ErrorMessage = ErrorMessage + non_negative(moptions['fnum'], 'fnum') |
|
|
250 |
# size of each bacth to store features |
|
|
251 |
moptions['size_per_batch'] = margs.size_per_batch |
|
|
252 |
if moptions['size_per_batch'] < 0.001: moptions['size_per_batch'] = 0.001 |
|
|
253 |
|
|
|
254 |
# path to basecall inform in fast5 files |
|
|
255 |
moptions['basecall_1d'] = margs.basecall_1d |
|
|
256 |
moptions['basecall_2strand'] = margs.basecall_2strand |
|
|
257 |
|
|
|
258 |
# regions of interest |
|
|
259 |
moptions['region'] = [None, None, None] |
|
|
260 |
if not (margs.region==None or margs.region.strip()==''): |
|
|
261 |
rsp = margs.region.split(':') |
|
|
262 |
for rv_ind in range(len(rsp)): |
|
|
263 |
rsp[rv_ind] = rsp[rv_ind].strip(); |
|
|
264 |
if not rsp[rv_ind]=='': |
|
|
265 |
moptions['region'][rv_ind] = rsp[rv_ind] |
|
|
266 |
|
|
|
267 |
# referene genome |
|
|
268 |
moptions['Ref'] = margs.Ref |
|
|
269 |
if moptions['Ref']==None or (not os.path.isfile(moptions['Ref'])): |
|
|
270 |
ErrorMessage = ErrorMessage + ("\n\t reference file does not exist (%s)" % moptions['Ref']) |
|
|
271 |
|
|
|
272 |
# get motif-based modification |
|
|
273 |
# or specify by --fulmod/--anymod/--nomod |
|
|
274 |
moptions['motifORPos'] = margs.motifORPos |
|
|
275 |
if margs.motifORPos==1: |
|
|
276 |
moptions['motif'] = [margs.motif.upper(), margs.ModinMotif] |
|
|
277 |
elif margs.motifORPos==2: |
|
|
278 |
moptions['fulmod'] = margs.fulmod |
|
|
279 |
if moptions['fulmod']==None: # completely modificated positions |
|
|
280 |
ErrorMessage = ErrorMessage + ("\t There is no parameter for --fulmod.") |
|
|
281 |
moptions['anymod'] = margs.anymod |
|
|
282 |
if moptions['anymod'] == None: # patially modificated positions |
|
|
283 |
ErrorMessage = ErrorMessage + ("\t There is no parameter for --anymod.") |
|
|
284 |
moptions['nomod'] = margs.nomod |
|
|
285 |
if moptions['nomod'] == None: # completely unmodified posisionts |
|
|
286 |
ErrorMessage = ErrorMessage + ("\t There is no parameter for --nomod.") |
|
|
287 |
else: |
|
|
288 |
ErrorMessage = ErrorMessage + ("\tmotifORPos value (%d) is not supported." % margs.motifORPos) |
|
|
289 |
|
|
|
290 |
# print help document if any required options are not provided. |
|
|
291 |
printParameters(moptions) |
|
|
292 |
if not ErrorMessage=="": |
|
|
293 |
ErrorMessage = "Please provide correct parameters" + ErrorMessage |
|
|
294 |
print(ErrorMessage) |
|
|
295 |
parser.print_help(); |
|
|
296 |
parser.parse_args(['getfeatures', '--help']); |
|
|
297 |
sys.exit(1) |
|
|
298 |
|
|
|
299 |
myGetFeatureBasedPos.getFeature_manager(moptions) |
|
|
300 |
|
|
|
301 |
|
|
|
302 |
##################################################################################### |
|
|
303 |
|
|
|
304 |
subparsers = parser.add_subparsers() |
|
|
305 |
parent_parser = argparse.ArgumentParser(add_help=False) |
|
|
306 |
|
|
|
307 |
# add common options |
|
|
308 |
com_group_for_comparison = parent_parser.add_argument_group('Common options.') |
|
|
309 |
com_group_for_comparison.add_argument("--outLevel", type=int, choices=[OUTPUT_DEBUG, OUTPUT_INFO, OUTPUT_WARNING, OUTPUT_ERROR], default=OUTPUT_WARNING, help=("The level for output: %d for DEBUG, %d for INFO, %d for WARNING, %d for ERROR. Default: %d" % (OUTPUT_DEBUG, OUTPUT_INFO, OUTPUT_WARNING, OUTPUT_ERROR, OUTPUT_WARNING))) |
|
|
310 |
com_group_for_comparison.add_argument("--wrkBase", help="The base folder for FAST5 files.") |
|
|
311 |
com_group_for_comparison.add_argument("--FileID", default="mod", help="The unique string for intermediate files and final output files. Default: 'mod'") |
|
|
312 |
com_group_for_comparison.add_argument("--outFolder", default='./mod_output', help="The default folder for outputing the results. Default: ./mod_output") |
|
|
313 |
com_group_for_comparison.add_argument("--recursive", type=int, default=1, choices=[0,1], help="Recurise to find fast5 files. Default:1") |
|
|
314 |
com_group_for_comparison.add_argument("--threads", type=int, default=4, help="The number of threads used (not for train). Default:4") |
|
|
315 |
com_group_for_comparison.add_argument("--files_per_thread", type=int, default=1000, help="The number of fast5 files for each thread (not for train). Default:500") |
|
|
316 |
com_group_for_comparison.add_argument("--windowsize", type=int, default=21, help="The window size to extract features. Default: 21") |
|
|
317 |
com_group_for_comparison.add_argument("--alignStr", type=str, default='minimap2', choices=["bwa","minimap2"], help="Alignment tools (bwa or minimap2 is supported). Default: minimap2") |
|
|
318 |
com_group_for_comparison.add_argument("--SignalGroup", type=str, default='simple', choices=["simple","rundif"], help="How to associate signals to each called bases. Default: simple") |
|
|
319 |
com_group_for_comparison.add_argument("--move", default=False, action="store_true", help="Whether the basecalled data use move tables instead of event tables. Default: False") |
|
|
320 |
|
|
|
321 |
# add detection options |
|
|
322 |
parser_detect = subparsers.add_parser('detect', parents=[parent_parser], help="Detect modifications at a genomic scale", description="Detect modifications by integrating all long reads for a genome", epilog="For example, \n \ |
|
|
323 |
python %(prog)s --wrkBase ctrl_oligo_SpeI_cut --FileID mod_det --outFolder ./mod_output/detect3 \n \ |
|
|
324 |
", formatter_class=RawTextHelpFormatter) |
|
|
325 |
parser_detect.add_argument("--Ref", help="The reference sequence") |
|
|
326 |
parser_detect.add_argument("--predDet", type=int, default=1, choices=[0,1], help="pred first and then detect (1) or only detect (0). Default: 1") |
|
|
327 |
parser_detect.add_argument("--predpath", default=None, help="The file path of predictions for each fast5 file. The file pattern is *_*.detail. Default: './mod_output/pred2/'") |
|
|
328 |
parser_detect.add_argument("--modfile", type=str, default=None, help="The path to load training model. Default: 'mod_output/'") |
|
|
329 |
parser_detect.add_argument("--fnum", type=int, default=7, help="The number of features. Default: 7") |
|
|
330 |
parser_detect.add_argument("--hidden", type=int, default=100, help="The number of hidden node. Default: 100") |
|
|
331 |
parser_detect.add_argument("--basecall_1d", default="Basecall_1D_000", help="Path for basecall_1d. Default: Basecall_1D_000") |
|
|
332 |
parser_detect.add_argument("--basecall_2strand", default="BaseCalled_template", help="Path for basecall_2strand. Default: BaseCalled_template") |
|
|
333 |
parser_detect.add_argument("--region", default=None, help="The region of interest: for example, chr:1:100000;chr2:10000"); |
|
|
334 |
parser_detect.add_argument("--ConUnk", default=True, choices=[False, True], help="Whether contain unknown chromosome"); |
|
|
335 |
parser_detect.add_argument("--outputlayer", default="", choices=["", "sigmoid"], help="how to put activation function for output layer") |
|
|
336 |
parser_detect.add_argument("--Base", type=str, default='C', choices=['A', 'C', 'G', 'T'], help="Interest of bases"); |
|
|
337 |
parser_detect.add_argument("--mod_cluster", default=0, choices=[0,1], help="1: CpG cluster effect; 0: not"); |
|
|
338 |
parser_detect.set_defaults(func=mDetect) |
|
|
339 |
|
|
|
340 |
# add training options |
|
|
341 |
parser_training = subparsers.add_parser('train', parents=[parent_parser], help="Training a modification classifier", description="Training a modification classifier", epilog="For example, \n \ |
|
|
342 |
python %(prog)s --wrkBase umr --wrkBase2 sss --FileID mod_train --outFolder ./mod_output/train1 \n \ |
|
|
343 |
", formatter_class=RawTextHelpFormatter) |
|
|
344 |
parser_training.add_argument("--wrkBase2", help="The base folder for long reads without any modifications.") |
|
|
345 |
parser_training.add_argument("--fnum", type=int, default=7, help="The number of features. Default: 7") |
|
|
346 |
parser_training.add_argument("--hidden", type=int, default=100, help="The number of hidden node. Default: 100") |
|
|
347 |
parser_training.add_argument("--modfile", type=str, default=None, help="The path to load training model. Default: 'mod_output/'") |
|
|
348 |
parser_training.add_argument("--test", help="The number of E Coli genomic position for testing. Default: 'E,1,2'") |
|
|
349 |
parser_training.add_argument("--outputlayer", default="", choices=["", "sigmoid"], help="how to put activation function for output layer") |
|
|
350 |
parser_training.add_argument("--unbalanced", type=int, default=0, choices=[1, 0, None], help="Whether data is unbalanced"); |
|
|
351 |
parser_training.set_defaults(func=mTrain) |
|
|
352 |
|
|
|
353 |
# add get-feature options |
|
|
354 |
parser_getfeatures = subparsers.add_parser('getfeatures', parents=[parent_parser], help="Get features for all fast5 files", description="Get features for all fast5 files", epilog="For example, \n \ |
|
|
355 |
python %(prog)s --wrkBase umr/160617_ecolilowinput_UMR9/called/pass --threads 48 --recursive 0 --posneg 0 --outFolder umr \n \ |
|
|
356 |
python %(prog)s --wrkBase sss/160617_ecolilowinput_sssiR9/called/pass --threads 48 --recursive 0 --posneg 1 --outFolder sss \n \ |
|
|
357 |
", formatter_class=RawTextHelpFormatter) |
|
|
358 |
parser_getfeatures.add_argument("--posneg", type=int, default=0, choices=[0,1], help="The positive(1) or negative(0) class. Default: 0") |
|
|
359 |
parser_getfeatures.add_argument("--size_per_batch", type=int, default=1, help="The size (unit: 10^7=10M) of a feature file. Default: 1") |
|
|
360 |
parser_getfeatures.add_argument("--fnum", type=int, default=7, help="The number of features. Default: 7") |
|
|
361 |
parser_getfeatures.add_argument("--region", type=str, help="The region of interest. Set to None or empty for all. Format is chr:start_pos:end_pos") |
|
|
362 |
parser_getfeatures.add_argument("--basecall_1d", default="Basecall_1D_000", help="Path for basecall_1d. Default: Basecall_1D_000") |
|
|
363 |
parser_getfeatures.add_argument("--basecall_2strand", default="BaseCalled_template", help="Path for basecall_2strand. Default: BaseCalled_template") |
|
|
364 |
|
|
|
365 |
parser_getfeatures.add_argument("--motifORPos", type=int, default=1, help="Use Motif (1) or pos (2) for modified bases. Default: 1") |
|
|
366 |
|
|
|
367 |
parser_getfeatures.add_argument("--motif", default='CG', type=str, help="The motif of interest") |
|
|
368 |
parser_getfeatures.add_argument("--ModinMotif", default=0, type=int, help="The position of modified base in the motif of interest") |
|
|
369 |
parser_getfeatures.add_argument("--Ref", help="The reference sequence") |
|
|
370 |
|
|
|
371 |
parser_getfeatures.add_argument("--fulmod", type=str, help="The file pattern for full modification: bisultfiteseq/chr20_C*_0.95.txt") |
|
|
372 |
parser_getfeatures.add_argument("--anymod", type=str, help="The file pattern for any modification: bisultfiteseq/chr20_any_0.95.txt") |
|
|
373 |
parser_getfeatures.add_argument("--nomod", type=str, help="The file pattern for any modification: bisultfiteseq/chr20_no1_0.95.txt") |
|
|
374 |
|
|
|
375 |
parser_getfeatures.set_defaults(func=mGetFeatures) |
|
|
376 |
|
|
|
377 |
# no provided argument |
|
|
378 |
# print help document |
|
|
379 |
if len(sys.argv)<2: |
|
|
380 |
parser.print_help(); |
|
|
381 |
else: |
|
|
382 |
args = parser.parse_args() |
|
|
383 |
args.func(args); |