Switch to unified view

a b/examples/tcga_lung/_utils.py
1
import numpy as np
2
import pandas as pd
3
import yaml
4
from joblib import Parallel
5
from tqdm.auto import tqdm
6
7
8
def encode_biopsy_site(df_rna):
9
    d = {}
10
    for site in df_rna["Biopsy site"].unique():
11
        if pd.isnull(site) | (site == "Non disponible"):
12
            d[site] = np.nan
13
        elif site in ["PRIMITIF", "META_PULM", "META_PULM_HL", "META_PULM_CL"]:
14
            d[site] = 0
15
        elif site in ["META_PLEVRE", "META_PLEVRE_HL", "META_PLEVRE_CL"]:
16
            d[site] = 1
17
        elif site.split("_")[0] == "ADP":
18
            d[site] = 2
19
        elif site == "META_OS":
20
            d[site] = 3
21
        elif site == "META_FOIE":
22
            d[site] = 4
23
        elif site == "META_SURRENALE":
24
            d[site] = 5
25
        elif site == "META_BRAIN":
26
            d[site] = 6
27
        else:
28
            d[site] = 7
29
    return df_rna.replace({"Biopsy site": d})
30
31
32
def process_radiomics(df_rad, transformed_features):
33
    df_rad[transformed_features] = np.log(df_rad[transformed_features] + 1)
34
    return df_rad
35
36
37
def write_yaml(content, fname):
38
    content = _clean_nested_dict(content)
39
    with open(fname, "w") as yaml_file:
40
        yaml.safe_dump(
41
            content, yaml_file, default_flow_style=None
42
        )  # , default_flow_style=False)
43
44
45
def read_yaml(fname):
46
    with open(fname) as yaml_file:
47
        return yaml.safe_load(yaml_file)
48
49
50
class ProgressParallel(Parallel):
51
    def __init__(self, use_tqdm=True, total=None, *args, **kwargs):
52
        self._use_tqdm = use_tqdm
53
        self._total = total
54
        super().__init__(*args, **kwargs)
55
56
    def __call__(self, *args, **kwargs):
57
        with tqdm(disable=not self._use_tqdm, total=self._total) as self._pbar:
58
            return Parallel.__call__(self, *args, **kwargs)
59
60
    def print_progress(self):
61
        if self._total is None:
62
            self._pbar.total = self.n_dispatched_tasks
63
        self._pbar.n = self.n_completed_tasks
64
        self._pbar.refresh()
65
66
67
def _clean_nested_dict(d):
68
    if isinstance(d, (np.int8, np.int16, np.int32, np.int64)):
69
        return int(d)
70
    if isinstance(d, (np.float16, np.float32, np.float64)):
71
        return float(d)
72
    if isinstance(d, list):
73
        return [_clean_nested_dict(x) for x in d]
74
    if isinstance(d, dict):
75
        for key, value in d.items():
76
            d.update({key: _clean_nested_dict(value)})
77
    return d