a b/bin/DeepMod_scripts/MoveTable.py
1
2
import os,sys
3
import numpy as np
4
import h5py
5
6
7
def getMove_Info(moptions, sp_param, move_data):
8
    '''
9
    sp_param.keys: fq_seq, raw_signals, first_sample_template, duration_template
10
    '''
11
12
    #sp_param['first_sample_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['first_sample_template']
13
    #sp_param['duration_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['duration_template']
14
15
    seg = "Segmentation_" + moptions['basecall_1d'].split('_')[-1]
16
    attr_path = '/'.join(['', 'Analyses', seg, 'Summary', 'segmentation'])
17
    #mv_str = '/'.join(['', 'Analyses', moptions['basecall_1d'], moptions['basecall_2strand'], 'Move'])
18
    sp_param['first_sample_template'] = sp_param['f5reader'][attr_path].attrs['first_sample_template']
19
    sp_param['duration_template'] = sp_param['f5reader'][attr_path].attrs['duration_template']
20
    #move_data = sp_param['f5reader'][mv_str][()]
21
    nrow = len(sp_param['fq_seq']) # row number of event_info; equals to the base number
22
    nsig = len(sp_param['raw_signals'])
23
    first = int(sp_param['first_sample_template'])
24
    duration = int(sp_param['duration_template'])
25
    move_info = np.empty([nrow], dtype=[('mean', '<f4'), ('stdv', '<f4'), ('start', np.uint64), ('length', np.uint64), ('model_state', 'U5')])
26
    effect_sig_index = list(range(first, nsig))
27
    pivot = first
28
    seg_count = 0 #which segmentation
29
    for i in range(1, len(move_data)):
30
        if move_data[i] == 1:
31
            move_info[seg_count]['mean'] = np.mean(sp_param['raw_signals'][pivot:(2*i + first)])
32
            move_info[seg_count]['length'] = 2*i + first - pivot
33
            move_info[seg_count]['stdv'] = np.std(sp_param['raw_signals'][pivot:(2*i + first)])
34
            move_info[seg_count]['start'] = pivot
35
            if seg_count == 0:
36
                move_info[seg_count]['model_state'] = 'N'*2 + sp_param['fq_seq'][seg_count:seg_count+3]
37
            elif seg_count == 1:
38
                move_info[seg_count]['model_state'] = 'N' + sp_param['fq_seq'][seg_count-1:seg_count+3]
39
            elif seg_count == nrow-2:
40
                move_info[seg_count]['model_state'] = sp_param['fq_seq'][seg_count-2:seg_count+2] + 'N'
41
            else:
42
                move_info[seg_count]['model_state'] = sp_param['fq_seq'][seg_count-2 : seg_count+3]
43
            pivot = 2*i + first
44
            seg_count += 1
45
    move_info[seg_count]['mean'] = np.mean(sp_param['raw_signals'][pivot:nsig])
46
    move_info[seg_count]['length'] = nsig - pivot
47
    move_info[seg_count]['stdv'] = np.std(sp_param['raw_signals'][pivot:nsig])
48
    move_info[seg_count]['start'] = pivot
49
    move_info[seg_count]['model_state'] = sp_param['fq_seq'][seg_count-2:seg_count+1] + 'N'*2
50
    return move_info
51
52
53
if __name__=='__main__':
54
   moptions = {}
55
   sp_param = {}
56
57
   exple_data = ['/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch12_read102_strand.fast5', \
58
                 '/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch47_read165_strand.fast5', \
59
                 '/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch48_read38_strand.fast5', \
60
                 '/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch52_read12_strand.fast5' \
61
                 ]
62
63
   sp_param['f5reader'] = h5py.File(exple_data[0], 'r');
64
65
66
   fq_str = '/Analyses/Basecall_1D_001/BaseCalled_template/Fastq'
67
   mv_str = '/Analyses/Basecall_1D_001/BaseCalled_template/Move'
68
   sg_str = '/Raw/Reads/'
69
70
   sp_param['fq_seq'] = sp_param['f5reader'][fq_str][()].splitlines()[1]
71
   k = list(sp_param['f5reader'][sg_str].keys())[0]
72
   sp_param['raw_signals'] = sp_param['f5reader'][sg_str][k]['Signal'][()]
73
   sp_param['first_sample_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['first_sample_template']
74
   sp_param['duration_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['duration_template']
75
   move_data = sp_param['f5reader'][mv_str][()]
76
77
   getEvent_Info(moptions, sp_param, move_data)
78
79
   sp_param['f5reader'].close();