Diff of /scripts/cim10.py [000000] .. [cad161]

Switch to side-by-side view

--- a
+++ b/scripts/cim10.py
@@ -0,0 +1,44 @@
+"""
+Process CIM10 patterns.
+
+!!! warning "Watch out for the encoding"
+
+    We had to convert the CIM-10 file from windows-1252 to utf-8.
+
+Source: https://www.atih.sante.fr/plateformes-de-transmission-et-logiciels/logiciels-espace-de-telechargement/id_lot/456
+"""  # noqa
+
+from pathlib import Path
+
+import pandas as pd
+import typer
+
+
+def run(
+    raw: Path = typer.Argument(..., help="Path to the raw file"),
+    output: Path = typer.Option(
+        "edsnlp/resources/cim10.csv.gz", help="Path to the output CSV table."
+    ),
+) -> None:
+    """
+    Convenience script to automatically process the CIM10 terminology
+    into a processable file.
+    """
+
+    df = pd.read_csv(raw, sep="|", header=None)
+
+    typer.echo(f"Processing {len(df)} French ICD codes...")
+
+    df.columns = ["code", "type", "ssr", "psy", "short", "long"]
+    for column in ["code", "short", "long"]:
+        df[column] = df[column].str.strip()
+
+    typer.echo(f"Saving to {output}")
+
+    df.to_csv(output, index=False)
+
+    typer.echo("Done !")
+
+
+if __name__ == "__main__":
+    typer.run(run)