--- a +++ b/bin/DeepMod_scripts/MoveTable.py @@ -0,0 +1,79 @@ + +import os,sys +import numpy as np +import h5py + + +def getMove_Info(moptions, sp_param, move_data): + ''' + sp_param.keys: fq_seq, raw_signals, first_sample_template, duration_template + ''' + + #sp_param['first_sample_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['first_sample_template'] + #sp_param['duration_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['duration_template'] + + seg = "Segmentation_" + moptions['basecall_1d'].split('_')[-1] + attr_path = '/'.join(['', 'Analyses', seg, 'Summary', 'segmentation']) + #mv_str = '/'.join(['', 'Analyses', moptions['basecall_1d'], moptions['basecall_2strand'], 'Move']) + sp_param['first_sample_template'] = sp_param['f5reader'][attr_path].attrs['first_sample_template'] + sp_param['duration_template'] = sp_param['f5reader'][attr_path].attrs['duration_template'] + #move_data = sp_param['f5reader'][mv_str][()] + nrow = len(sp_param['fq_seq']) # row number of event_info; equals to the base number + nsig = len(sp_param['raw_signals']) + first = int(sp_param['first_sample_template']) + duration = int(sp_param['duration_template']) + move_info = np.empty([nrow], dtype=[('mean', '<f4'), ('stdv', '<f4'), ('start', np.uint64), ('length', np.uint64), ('model_state', 'U5')]) + effect_sig_index = list(range(first, nsig)) + pivot = first + seg_count = 0 #which segmentation + for i in range(1, len(move_data)): + if move_data[i] == 1: + move_info[seg_count]['mean'] = np.mean(sp_param['raw_signals'][pivot:(2*i + first)]) + move_info[seg_count]['length'] = 2*i + first - pivot + move_info[seg_count]['stdv'] = np.std(sp_param['raw_signals'][pivot:(2*i + first)]) + move_info[seg_count]['start'] = pivot + if seg_count == 0: + move_info[seg_count]['model_state'] = 'N'*2 + sp_param['fq_seq'][seg_count:seg_count+3] + elif seg_count == 1: + move_info[seg_count]['model_state'] = 'N' + sp_param['fq_seq'][seg_count-1:seg_count+3] + elif seg_count == nrow-2: + move_info[seg_count]['model_state'] = sp_param['fq_seq'][seg_count-2:seg_count+2] + 'N' + else: + move_info[seg_count]['model_state'] = sp_param['fq_seq'][seg_count-2 : seg_count+3] + pivot = 2*i + first + seg_count += 1 + move_info[seg_count]['mean'] = np.mean(sp_param['raw_signals'][pivot:nsig]) + move_info[seg_count]['length'] = nsig - pivot + move_info[seg_count]['stdv'] = np.std(sp_param['raw_signals'][pivot:nsig]) + move_info[seg_count]['start'] = pivot + move_info[seg_count]['model_state'] = sp_param['fq_seq'][seg_count-2:seg_count+1] + 'N'*2 + return move_info + + +if __name__=='__main__': + moptions = {} + sp_param = {} + + exple_data = ['/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch12_read102_strand.fast5', \ + '/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch47_read165_strand.fast5', \ + '/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch48_read38_strand.fast5', \ + '/mnt/isilon/wang_lab/shared/temp_shared/fast5_move/IBDUCAL377261L_20170201_FNfab41074_MN17640_mux_scan_X209_66786_ch52_read12_strand.fast5' \ + ] + + sp_param['f5reader'] = h5py.File(exple_data[0], 'r'); + + + fq_str = '/Analyses/Basecall_1D_001/BaseCalled_template/Fastq' + mv_str = '/Analyses/Basecall_1D_001/BaseCalled_template/Move' + sg_str = '/Raw/Reads/' + + sp_param['fq_seq'] = sp_param['f5reader'][fq_str][()].splitlines()[1] + k = list(sp_param['f5reader'][sg_str].keys())[0] + sp_param['raw_signals'] = sp_param['f5reader'][sg_str][k]['Signal'][()] + sp_param['first_sample_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['first_sample_template'] + sp_param['duration_template'] = sp_param['f5reader']['/Analyses/Segmentation_001/Summary/segmentation'].attrs['duration_template'] + move_data = sp_param['f5reader'][mv_str][()] + + getEvent_Info(moptions, sp_param, move_data) + + sp_param['f5reader'].close();