Download this file

129 lines (95 with data), 4.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import pandas as pd
from singlecellmultiomics.bamProcessing import get_contig_sizes
from collections import defaultdict
import pysam
import pyBigWig
import numpy as np
import gzip
import pickle
def dataframe_to_wig(df: pd.DataFrame, wig_path: str, span: int = 1, stepper: str = "variableStep",
graphType: str = "points", offset: int = 1, extra_header=''):
"""
Args:
df: pandas.DataFrame to export to wig, expected format: multi_index (chrom,position)
with columns: value,
wig_path(str) : write WIG file to this location
graphType: bar/points/line
stepper:
span:
Returns:
"""
with open(wig_path, 'w') as out:
for chrom, chrom_data in df.sort_index().groupby(level=0):
# Write the header:
out.write(f'{stepper} chrom={chrom} graphType={graphType} span={span} {extra_header}\n')
for k, row in chrom_data.iterrows():
if len(k) == 2:
(chrom, pos) = k
elif len(k) == 3:
(chrom, pos, _) = k
else:
raise ValueError('Supply a dataframe with a multi index in the format (chrom, pos) ')
out.write(f"{pos+offset}\t{row.iloc[0] if 'value' not in row else row['value']}\n")
def series_to_bigwig(series, source_bam:str, write_path:str,bin_size: int=None):
"""
Write pd.Series to a bigwig file
Args:
series (pd.Series) : Series to write to the bigwig, keys are multi-dimensional (contig, start)
source_bam(str): path to source bam file to extract contig ordering from
write_path(str): write bigwig file here
bin_size(int) : bin_size, set to None to use a bin size of 1
"""
series.sort_index(inplace=True)
with pysam.AlignmentFile(source_bam) as alignments, pyBigWig.open(write_path,'w') as out:
cs = get_contig_sizes(alignments)
# Write header
out.addHeader(list(zip(alignments.references, alignments.lengths)))
for contig in series.index.get_level_values(0).unique():
start = series.loc[contig].index.astype(int)
contig_list = [contig]*len(start)
if bin_size is None:
end = np.array(start)+1
else:
end = np.clip( np.array(start)+bin_size, 0, cs[contig] )
values_to_write = np.array(series.loc[contig].values ,
dtype=np.float64)
out.addEntries(
contig_list, #Contig
list(start), #Start
ends= list(end), #end
values= values_to_write)
def write_pickle(obj, output_path: str ):
with gzip.open(output_path,'wb') as out:
pickle.dump(obj, out)
def read_pickle(path: str):
with gzip.open(path,'rb') as out:
return pickle.load(out)
def write_bigwig(write_locations:dict, write_values:dict, source_bam:str, write_path:str,bin_size: int=None):
"""
Write (location / values dictionaries) to a bigwig file
Args:
write_locations(dict) : Locations to write to, grouped by contig, {contig:[location (int), ..]
write_values(dict) : Value to write, grouped by contig {contig:{location: value (float), ..}},
source_bam(str): path to source bam file to extract contig ordering from
write_path(str): write bigwig file here
bin_size(int) : bin_size, set to None to use a bin size of 1
"""
with pysam.AlignmentFile(source_bam) as alignments, pyBigWig.open(write_path,'w') as out:
cs = get_contig_sizes(alignments)
# Write header
out.addHeader(list(zip(alignments.references, alignments.lengths)))
for contig in alignments.references:
if not contig in write_locations:
continue
contig_list = [contig]*len(write_locations[contig])
start = sorted(list(write_locations[contig]))
if bin_size is None:
end = np.array(start)+1
else:
end = np.array(start)+bin_size
values_to_write = np.array( [write_values[contig][p] for p in start] , dtype=np.float32)
out.addEntries(
contig_list, #Contig
list(start), #Start
ends= list(end), #end
values= values_to_write)