Diff of /scripts/_utils.py [000000] .. [efd906]

Switch to side-by-side view

--- a
+++ b/scripts/_utils.py
@@ -0,0 +1,77 @@
+import numpy as np
+import pandas as pd
+import yaml
+from joblib import Parallel
+from tqdm.auto import tqdm
+
+
+def encode_biopsy_site(df_rna):
+    d = {}
+    for site in df_rna["Biopsy site"].unique():
+        if pd.isnull(site) | (site == "Non disponible"):
+            d[site] = np.nan
+        elif site in ["PRIMITIF", "META_PULM", "META_PULM_HL", "META_PULM_CL"]:
+            d[site] = 0
+        elif site in ["META_PLEVRE", "META_PLEVRE_HL", "META_PLEVRE_CL"]:
+            d[site] = 1
+        elif site.split("_")[0] == "ADP":
+            d[site] = 2
+        elif site == "META_OS":
+            d[site] = 3
+        elif site == "META_FOIE":
+            d[site] = 4
+        elif site == "META_SURRENALE":
+            d[site] = 5
+        elif site == "META_BRAIN":
+            d[site] = 6
+        else:
+            d[site] = 7
+    return df_rna.replace({"Biopsy site": d})
+
+
+def process_radiomics(df_rad, transformed_features):
+    df_rad[transformed_features] = np.log(df_rad[transformed_features] + 1)
+    return df_rad
+
+
+def write_yaml(content, fname):
+    content = _clean_nested_dict(content)
+    with open(fname, "w") as yaml_file:
+        yaml.safe_dump(
+            content, yaml_file, default_flow_style=None
+        )  # , default_flow_style=False)
+
+
+def read_yaml(fname):
+    with open(fname) as yaml_file:
+        return yaml.safe_load(yaml_file)
+
+
+class ProgressParallel(Parallel):
+    def __init__(self, use_tqdm=True, total=None, *args, **kwargs):
+        self._use_tqdm = use_tqdm
+        self._total = total
+        super().__init__(*args, **kwargs)
+
+    def __call__(self, *args, **kwargs):
+        with tqdm(disable=not self._use_tqdm, total=self._total) as self._pbar:
+            return Parallel.__call__(self, *args, **kwargs)
+
+    def print_progress(self):
+        if self._total is None:
+            self._pbar.total = self.n_dispatched_tasks
+        self._pbar.n = self.n_completed_tasks
+        self._pbar.refresh()
+
+
+def _clean_nested_dict(d):
+    if isinstance(d, (np.int8, np.int16, np.int32, np.int64)):
+        return int(d)
+    if isinstance(d, (np.float16, np.float32, np.float64)):
+        return float(d)
+    if isinstance(d, list):
+        return [_clean_nested_dict(x) for x in d]
+    if isinstance(d, dict):
+        for key, value in d.items():
+            d.update({key: _clean_nested_dict(value)})
+    return d