a b/scripts/cim10.py
1
"""
2
Process CIM10 patterns.
3
4
!!! warning "Watch out for the encoding"
5
6
    We had to convert the CIM-10 file from windows-1252 to utf-8.
7
8
Source: https://www.atih.sante.fr/plateformes-de-transmission-et-logiciels/logiciels-espace-de-telechargement/id_lot/456
9
"""  # noqa
10
11
from pathlib import Path
12
13
import pandas as pd
14
import typer
15
16
17
def run(
18
    raw: Path = typer.Argument(..., help="Path to the raw file"),
19
    output: Path = typer.Option(
20
        "edsnlp/resources/cim10.csv.gz", help="Path to the output CSV table."
21
    ),
22
) -> None:
23
    """
24
    Convenience script to automatically process the CIM10 terminology
25
    into a processable file.
26
    """
27
28
    df = pd.read_csv(raw, sep="|", header=None)
29
30
    typer.echo(f"Processing {len(df)} French ICD codes...")
31
32
    df.columns = ["code", "type", "ssr", "psy", "short", "long"]
33
    for column in ["code", "short", "long"]:
34
        df[column] = df[column].str.strip()
35
36
    typer.echo(f"Saving to {output}")
37
38
    df.to_csv(output, index=False)
39
40
    typer.echo("Done !")
41
42
43
if __name__ == "__main__":
44
    typer.run(run)