|
a |
|
b/scripts/cim10.py |
|
|
1 |
""" |
|
|
2 |
Process CIM10 patterns. |
|
|
3 |
|
|
|
4 |
!!! warning "Watch out for the encoding" |
|
|
5 |
|
|
|
6 |
We had to convert the CIM-10 file from windows-1252 to utf-8. |
|
|
7 |
|
|
|
8 |
Source: https://www.atih.sante.fr/plateformes-de-transmission-et-logiciels/logiciels-espace-de-telechargement/id_lot/456 |
|
|
9 |
""" # noqa |
|
|
10 |
|
|
|
11 |
from pathlib import Path |
|
|
12 |
|
|
|
13 |
import pandas as pd |
|
|
14 |
import typer |
|
|
15 |
|
|
|
16 |
|
|
|
17 |
def run( |
|
|
18 |
raw: Path = typer.Argument(..., help="Path to the raw file"), |
|
|
19 |
output: Path = typer.Option( |
|
|
20 |
"edsnlp/resources/cim10.csv.gz", help="Path to the output CSV table." |
|
|
21 |
), |
|
|
22 |
) -> None: |
|
|
23 |
""" |
|
|
24 |
Convenience script to automatically process the CIM10 terminology |
|
|
25 |
into a processable file. |
|
|
26 |
""" |
|
|
27 |
|
|
|
28 |
df = pd.read_csv(raw, sep="|", header=None) |
|
|
29 |
|
|
|
30 |
typer.echo(f"Processing {len(df)} French ICD codes...") |
|
|
31 |
|
|
|
32 |
df.columns = ["code", "type", "ssr", "psy", "short", "long"] |
|
|
33 |
for column in ["code", "short", "long"]: |
|
|
34 |
df[column] = df[column].str.strip() |
|
|
35 |
|
|
|
36 |
typer.echo(f"Saving to {output}") |
|
|
37 |
|
|
|
38 |
df.to_csv(output, index=False) |
|
|
39 |
|
|
|
40 |
typer.echo("Done !") |
|
|
41 |
|
|
|
42 |
|
|
|
43 |
if __name__ == "__main__": |
|
|
44 |
typer.run(run) |