Diff of /util/preprocess.py [000000] .. [03464c]

Switch to side-by-side view

--- a
+++ b/util/preprocess.py
@@ -0,0 +1,30 @@
+"""
+Contain some omics data preprocess functions
+"""
+import pandas as pd
+
+
+def separate_B(B_df_single):
+    """
+    Separate the DNA methylation dataframe into subsets according to their targeting chromosomes
+
+    Parameters:
+        B_df_single(DataFrame) -- a dataframe that contains the single DNA methylation matrix
+
+    Return:
+        B_df_list(list) -- a list with 23 subset dataframe
+        B_dim(list) -- the dims of each chromosome
+    """
+    anno = pd.read_csv('./anno/B_anno.csv', dtype={'CHR': str}, index_col=0)
+    anno_contain = anno.loc[B_df_single.index, :]
+    print('Separating B.tsv according the targeting chromosome...')
+    B_df_list, B_dim_list = [], []
+    ch_id = list(range(1, 23))
+    ch_id.append('X')
+    for ch in ch_id:
+        ch_index = anno_contain[anno_contain.CHR == str(ch)].index
+        ch_df = B_df_single.loc[ch_index, :]
+        B_df_list.append(ch_df)
+        B_dim_list.append(len(ch_df))
+
+    return B_df_list, B_dim_list