Diff of /tests/unit/test_codes.py [000000] .. [e988c2]

Switch to unified view

a b/tests/unit/test_codes.py
1
import textwrap
2
from pathlib import Path
3
4
import pytest
5
6
from ehrql.codes import (
7
    BNFCode,
8
    CodelistError,
9
    CTV3Code,
10
    DMDCode,
11
    ICD10Code,
12
    OPCS4Code,
13
    SNOMEDCTCode,
14
    codelist_from_csv,
15
    codelist_from_csv_lines,
16
)
17
18
19
# `codelist_from_csv` can take either a string or `pathlib.Path` and we want to check
20
# that we handle both correctly
21
@pytest.fixture(params=[str, Path])
22
def path_type(request):
23
    return request.param
24
25
26
def test_codelist_from_csv(path_type, tmp_path):
27
    csv_file = tmp_path / "codes.csv"
28
    csv_text = """
29
        CodeID,foo
30
        abc00,
31
        def00,
32
        """
33
    csv_file.write_text(textwrap.dedent(csv_text.strip()))
34
    codelist = codelist_from_csv(path_type(csv_file), column="CodeID")
35
    assert codelist == ["abc00", "def00"]
36
37
38
def test_codelist_from_csv_missing_file(path_type):
39
    missing_file = Path(__file__) / "no_file_here.csv"
40
    with pytest.raises(CodelistError, match="no_file_here.csv"):
41
        codelist_from_csv(path_type(missing_file), column="CodeID")
42
43
44
def test_codelist_from_csv_missing_file_hint(path_type):
45
    bad_path = Path(__file__) / "bad\file.csv"
46
    with pytest.raises(CodelistError, match="backslash"):
47
        codelist_from_csv(path_type(bad_path), column="CodeID")
48
49
50
def test_codelist_from_csv_lines():
51
    csv_lines = [
52
        "CodeID,foo",
53
        "abc00,",
54
        "def00,",
55
        # Check codes are trimmed
56
        "ghi00 ,",
57
        # Check blanks are ignored
58
        "  ,"
59
        # Check duplicates are ignored
60
        " def00,",
61
    ]
62
    codelist = codelist_from_csv_lines(csv_lines, column="CodeID")
63
    assert codelist == ["abc00", "def00", "ghi00"]
64
65
66
def test_codelist_from_csv_lines_missing_column():
67
    csv_lines = [
68
        "CodeID",
69
        "abc00",
70
    ]
71
    with pytest.raises(CodelistError, match="no_col_here"):
72
        codelist_from_csv_lines(csv_lines, column="no_col_here")
73
74
75
def test_codelist_from_csv_lines_with_category_column():
76
    csv_lines = [
77
        "CodeID,Cat1",
78
        "abc00,foo",
79
        "def00,bar",
80
        "ghi00,",
81
    ]
82
    codelist = codelist_from_csv_lines(
83
        csv_lines,
84
        column="CodeID",
85
        category_column="Cat1",
86
    )
87
    assert codelist == {
88
        "abc00": "foo",
89
        "def00": "bar",
90
        "ghi00": "",
91
    }
92
93
94
def test_codelist_from_csv_lines_with_missing_category_column():
95
    csv_lines = [
96
        "CodeID,Cat1",
97
        "abc00,foo",
98
    ]
99
    with pytest.raises(CodelistError, match="no_col_here"):
100
        codelist_from_csv_lines(
101
            csv_lines,
102
            column="CodeID",
103
            category_column="no_col_here",
104
        )
105
106
107
@pytest.mark.parametrize(
108
    "cls,value",
109
    [
110
        (BNFCode, "0101010I0AAAEAE"),
111
        (BNFCode, "23965909711"),
112
        (CTV3Code, "ABC01"),
113
        (CTV3Code, "De4.."),
114
        (ICD10Code, "A01"),
115
        (ICD10Code, "A012"),
116
        (OPCS4Code, "B23"),
117
        (OPCS4Code, "B234"),
118
        (SNOMEDCTCode, "1234567890"),
119
    ],
120
)
121
def test_valid_codes(cls, value):
122
    assert cls(value).value == value
123
124
125
@pytest.mark.parametrize(
126
    "cls,value",
127
    [
128
        # Digit (5) instead of letter as first character of Product
129
        (BNFCode, "0101010I05AAEAE"),
130
        # Appliance but too many digits
131
        (BNFCode, "239659097111"),
132
        # Wrong length
133
        (CTV3Code, "ABC0"),
134
        # Dot other than at the end
135
        (CTV3Code, "ABC.0"),
136
        # Letter other than at the start
137
        (ICD10Code, "AA1"),
138
        # Wrong length
139
        (ICD10Code, "A0124"),
140
        # I is not an allowed first character
141
        (OPCS4Code, "I00"),
142
        # Too short
143
        (SNOMEDCTCode, "123"),
144
        # Too long
145
        (SNOMEDCTCode, "12345678901234567890"),
146
        # Leading zero
147
        (SNOMEDCTCode, "0123456789"),
148
    ],
149
)
150
def test_invalid_codes(cls, value):
151
    with pytest.raises(ValueError):
152
        cls(value)
153
154
155
def test_syntactically_equivalent_codes():
156
    # No point duplicating the tests here, but we'll need to test them if we ever stop
157
    # sharing the regex
158
    assert DMDCode.regex == SNOMEDCTCode.regex