[cad161]: / scripts / adicap.py

Download this file

73 lines (50 with data), 1.7 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
"""
Process ADICAP codes
Thésaurus de la codification ADICAP - Index raisonné des lésions
source : https://smt.esante.gouv.fr/terminologie-adicap/
"""
import gzip
import json
import re
from pathlib import Path
import pandas as pd
import typer
def parse_each_dict(df, dictionaryCode: str):
d_spec = df.query(f"dictionaryCode=='{dictionaryCode}'")
d_spec.fillna("", inplace=True)
decode_d_spec = {}
for code, label, anatomyCode in d_spec[["code", "label", "anatomyCode"]].values:
if dictionaryCode == "D5":
if re.match(r"[0-9]{4}", code) is None:
decode_d_spec[str(anatomyCode) + str(code)] = label
else:
decode_d_spec[str(anatomyCode) + str(code)] = label
d_value = decode_d_spec.pop(dictionaryCode)
return dict(label=d_value, codes=decode_d_spec)
def get_decode_dict(df, dict_keys=["D1", "D2", "D3", "D4", "D5", "D6", "D7"]):
decode_dict = {}
for key in dict_keys:
decode_dict[key] = parse_each_dict(df, dictionaryCode=key)
return decode_dict
def run(
raw: Path = typer.Argument(..., help="Path to the raw file"),
output: Path = typer.Option(
"edsnlp/resources/adicap.json.gz", help="Path to the output CSV table."
),
) -> None:
"""
Convenience script to automatically process the ADICAP codes
into a processable file.
"""
df = pd.read_excel(
raw,
sheet_name="rawdatas",
header=0,
)
decode_dict = get_decode_dict(df)
typer.echo(f"Saving to {output}")
with gzip.open(output, "w") as f:
f.write(json.dumps(decode_dict).encode("utf-8"))
typer.echo("Done !")
if __name__ == "__main__":
typer.run(run)