|
a |
|
b/scripts/adicap.py |
|
|
1 |
""" |
|
|
2 |
Process ADICAP codes |
|
|
3 |
Thésaurus de la codification ADICAP - Index raisonné des lésions |
|
|
4 |
source : https://smt.esante.gouv.fr/terminologie-adicap/ |
|
|
5 |
|
|
|
6 |
""" |
|
|
7 |
|
|
|
8 |
import gzip |
|
|
9 |
import json |
|
|
10 |
import re |
|
|
11 |
from pathlib import Path |
|
|
12 |
|
|
|
13 |
import pandas as pd |
|
|
14 |
import typer |
|
|
15 |
|
|
|
16 |
|
|
|
17 |
def parse_each_dict(df, dictionaryCode: str): |
|
|
18 |
d_spec = df.query(f"dictionaryCode=='{dictionaryCode}'") |
|
|
19 |
d_spec.fillna("", inplace=True) |
|
|
20 |
|
|
|
21 |
decode_d_spec = {} |
|
|
22 |
|
|
|
23 |
for code, label, anatomyCode in d_spec[["code", "label", "anatomyCode"]].values: |
|
|
24 |
if dictionaryCode == "D5": |
|
|
25 |
if re.match(r"[0-9]{4}", code) is None: |
|
|
26 |
decode_d_spec[str(anatomyCode) + str(code)] = label |
|
|
27 |
else: |
|
|
28 |
decode_d_spec[str(anatomyCode) + str(code)] = label |
|
|
29 |
|
|
|
30 |
d_value = decode_d_spec.pop(dictionaryCode) |
|
|
31 |
|
|
|
32 |
return dict(label=d_value, codes=decode_d_spec) |
|
|
33 |
|
|
|
34 |
|
|
|
35 |
def get_decode_dict(df, dict_keys=["D1", "D2", "D3", "D4", "D5", "D6", "D7"]): |
|
|
36 |
decode_dict = {} |
|
|
37 |
for key in dict_keys: |
|
|
38 |
|
|
|
39 |
decode_dict[key] = parse_each_dict(df, dictionaryCode=key) |
|
|
40 |
|
|
|
41 |
return decode_dict |
|
|
42 |
|
|
|
43 |
|
|
|
44 |
def run( |
|
|
45 |
raw: Path = typer.Argument(..., help="Path to the raw file"), |
|
|
46 |
output: Path = typer.Option( |
|
|
47 |
"edsnlp/resources/adicap.json.gz", help="Path to the output CSV table." |
|
|
48 |
), |
|
|
49 |
) -> None: |
|
|
50 |
""" |
|
|
51 |
Convenience script to automatically process the ADICAP codes |
|
|
52 |
into a processable file. |
|
|
53 |
""" |
|
|
54 |
|
|
|
55 |
df = pd.read_excel( |
|
|
56 |
raw, |
|
|
57 |
sheet_name="rawdatas", |
|
|
58 |
header=0, |
|
|
59 |
) |
|
|
60 |
|
|
|
61 |
decode_dict = get_decode_dict(df) |
|
|
62 |
|
|
|
63 |
typer.echo(f"Saving to {output}") |
|
|
64 |
|
|
|
65 |
with gzip.open(output, "w") as f: |
|
|
66 |
f.write(json.dumps(decode_dict).encode("utf-8")) |
|
|
67 |
|
|
|
68 |
typer.echo("Done !") |
|
|
69 |
|
|
|
70 |
|
|
|
71 |
if __name__ == "__main__": |
|
|
72 |
typer.run(run) |