|
a |
|
b/modules/RawDB/IE/boolMapToFreq.py |
|
|
1 |
#!/usr/bin/python |
|
|
2 |
|
|
|
3 |
import pandas as pd |
|
|
4 |
import numpy as np |
|
|
5 |
import sys |
|
|
6 |
|
|
|
7 |
def getBoolFrequencies(boolData): |
|
|
8 |
df = pd.read_table(boolData, sep="\t") |
|
|
9 |
frequencies = df.groupby('sample_name', axis = 0).sum() |
|
|
10 |
return frequencies |
|
|
11 |
|
|
|
12 |
def getFrequencies(data): |
|
|
13 |
df = pd.read_table(data, sep="\t") |
|
|
14 |
columns = list(df) |
|
|
15 |
#frequencies = df.apply(pd.to_numeric, errors = 'ignore') |
|
|
16 |
for i in columns: |
|
|
17 |
#df[i] = df[i].astype('float64') |
|
|
18 |
pd.to_numeric(df[i]) |
|
|
19 |
return frequencies |
|
|
20 |
|
|
|
21 |
def getCSVFrequencies(data): |
|
|
22 |
df = pd.read_table(data, sep=",") |
|
|
23 |
#frequencies = df.apply(pd.to_numeric, errors = 'ignore') |
|
|
24 |
dft = df.transpose() |
|
|
25 |
dft.rename(columns=dft.iloc[0]) |
|
|
26 |
frequencies = dft.drop(dft.index[0]) |
|
|
27 |
return frequencies |