|
a |
|
b/bin/DeepMod_scripts/MoveTable.py |
|
|
1 |
|
|
|
2 |
import os,sys |
|
|
3 |
import numpy as np |
|
|
4 |
import h5py |
|
|
5 |
|
|
|
6 |
|
|
|
7 |
def getMove_Info(moptions, sp_param, move_data): |
|
|
8 |
''' |
|
|
9 |
sp_param.keys: fq_seq, raw_signals, first_sample_template, duration_template |
|
|
10 |
''' |
|
|
11 |
|
|
|
12 |
#sp_param['first_sample_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['first_sample_template'] |
|
|
13 |
#sp_param['duration_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['duration_template'] |
|
|
14 |
|
|
|
15 |
seg = "Segmentation_" + moptions['basecall_1d'].split('_')[-1] |
|
|
16 |
attr_path = '/'.join(['', 'Analyses', seg, 'Summary', 'segmentation']) |
|
|
17 |
#mv_str = '/'.join(['', 'Analyses', moptions['basecall_1d'], moptions['basecall_2strand'], 'Move']) |
|
|
18 |
sp_param['first_sample_template'] = sp_param['f5reader'][attr_path].attrs['first_sample_template'] |
|
|
19 |
sp_param['duration_template'] = sp_param['f5reader'][attr_path].attrs['duration_template'] |
|
|
20 |
#move_data = sp_param['f5reader'][mv_str][()] |
|
|
21 |
nrow = len(sp_param['fq_seq']) # row number of event_info; equals to the base number |
|
|
22 |
nsig = len(sp_param['raw_signals']) |
|
|
23 |
first = int(sp_param['first_sample_template']) |
|
|
24 |
duration = int(sp_param['duration_template']) |
|
|
25 |
move_info = np.empty([nrow], dtype=[('mean', '<f4'), ('stdv', '<f4'), ('start', np.uint64), ('length', np.uint64), ('model_state', 'U5')]) |
|
|
26 |
effect_sig_index = list(range(first, nsig)) |
|
|
27 |
pivot = first |
|
|
28 |
seg_count = 0 #which segmentation |
|
|
29 |
for i in range(1, len(move_data)): |
|
|
30 |
if move_data[i] == 1: |
|
|
31 |
move_info[seg_count]['mean'] = np.mean(sp_param['raw_signals'][pivot:(2*i + first)]) |
|
|
32 |
move_info[seg_count]['length'] = 2*i + first - pivot |
|
|
33 |
move_info[seg_count]['stdv'] = np.std(sp_param['raw_signals'][pivot:(2*i + first)]) |
|
|
34 |
move_info[seg_count]['start'] = pivot |
|
|
35 |
if seg_count == 0: |
|
|
36 |
move_info[seg_count]['model_state'] = 'N'*2 + sp_param['fq_seq'][seg_count:seg_count+3] |
|
|
37 |
elif seg_count == 1: |
|
|
38 |
move_info[seg_count]['model_state'] = 'N' + sp_param['fq_seq'][seg_count-1:seg_count+3] |
|
|
39 |
elif seg_count == nrow-2: |
|
|
40 |
move_info[seg_count]['model_state'] = sp_param['fq_seq'][seg_count-2:seg_count+2] + 'N' |
|
|
41 |
else: |
|
|
42 |
move_info[seg_count]['model_state'] = sp_param['fq_seq'][seg_count-2 : seg_count+3] |
|
|
43 |
pivot = 2*i + first |
|
|
44 |
seg_count += 1 |
|
|
45 |
move_info[seg_count]['mean'] = np.mean(sp_param['raw_signals'][pivot:nsig]) |
|
|
46 |
move_info[seg_count]['length'] = nsig - pivot |
|
|
47 |
move_info[seg_count]['stdv'] = np.std(sp_param['raw_signals'][pivot:nsig]) |
|
|
48 |
move_info[seg_count]['start'] = pivot |
|
|
49 |
move_info[seg_count]['model_state'] = sp_param['fq_seq'][seg_count-2:seg_count+1] + 'N'*2 |
|
|
50 |
return move_info |
|
|
51 |
|
|
|
52 |
|
|
|
53 |
if __name__=='__main__': |
|
|
54 |
moptions = {} |
|
|
55 |
sp_param = {} |
|
|
56 |
|
|
|
57 |
exple_data = ['/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch12_read102_strand.fast5', \ |
|
|
58 |
'/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch47_read165_strand.fast5', \ |
|
|
59 |
'/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch48_read38_strand.fast5', \ |
|
|
60 |
'/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch52_read12_strand.fast5' \ |
|
|
61 |
] |
|
|
62 |
|
|
|
63 |
sp_param['f5reader'] = h5py.File(exple_data[0], 'r'); |
|
|
64 |
|
|
|
65 |
|
|
|
66 |
fq_str = '/Analyses/Basecall_1D_001/BaseCalled_template/Fastq' |
|
|
67 |
mv_str = '/Analyses/Basecall_1D_001/BaseCalled_template/Move' |
|
|
68 |
sg_str = '/Raw/Reads/' |
|
|
69 |
|
|
|
70 |
sp_param['fq_seq'] = sp_param['f5reader'][fq_str][()].splitlines()[1] |
|
|
71 |
k = list(sp_param['f5reader'][sg_str].keys())[0] |
|
|
72 |
sp_param['raw_signals'] = sp_param['f5reader'][sg_str][k]['Signal'][()] |
|
|
73 |
sp_param['first_sample_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['first_sample_template'] |
|
|
74 |
sp_param['duration_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['duration_template'] |
|
|
75 |
move_data = sp_param['f5reader'][mv_str][()] |
|
|
76 |
|
|
|
77 |
getEvent_Info(moptions, sp_param, move_data) |
|
|
78 |
|
|
|
79 |
sp_param['f5reader'].close(); |