[cad161]: / scripts / cim10.py

Download this file

45 lines (29 with data), 1.1 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
"""
Process CIM10 patterns.
!!! warning "Watch out for the encoding"
We had to convert the CIM-10 file from windows-1252 to utf-8.
Source: https://www.atih.sante.fr/plateformes-de-transmission-et-logiciels/logiciels-espace-de-telechargement/id_lot/456
""" # noqa
from pathlib import Path
import pandas as pd
import typer
def run(
raw: Path = typer.Argument(..., help="Path to the raw file"),
output: Path = typer.Option(
"edsnlp/resources/cim10.csv.gz", help="Path to the output CSV table."
),
) -> None:
"""
Convenience script to automatically process the CIM10 terminology
into a processable file.
"""
df = pd.read_csv(raw, sep="|", header=None)
typer.echo(f"Processing {len(df)} French ICD codes...")
df.columns = ["code", "type", "ssr", "psy", "short", "long"]
for column in ["code", "short", "long"]:
df[column] = df[column].str.strip()
typer.echo(f"Saving to {output}")
df.to_csv(output, index=False)
typer.echo("Done !")
if __name__ == "__main__":
typer.run(run)