[e988c2]: / tests / unit / test_codes.py

Download this file

159 lines (134 with data), 4.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import textwrap
from pathlib import Path
import pytest
from ehrql.codes import (
BNFCode,
CodelistError,
CTV3Code,
DMDCode,
ICD10Code,
OPCS4Code,
SNOMEDCTCode,
codelist_from_csv,
codelist_from_csv_lines,
)
# `codelist_from_csv` can take either a string or `pathlib.Path` and we want to check
# that we handle both correctly
@pytest.fixture(params=[str, Path])
def path_type(request):
return request.param
def test_codelist_from_csv(path_type, tmp_path):
csv_file = tmp_path / "codes.csv"
csv_text = """
CodeID,foo
abc00,
def00,
"""
csv_file.write_text(textwrap.dedent(csv_text.strip()))
codelist = codelist_from_csv(path_type(csv_file), column="CodeID")
assert codelist == ["abc00", "def00"]
def test_codelist_from_csv_missing_file(path_type):
missing_file = Path(__file__) / "no_file_here.csv"
with pytest.raises(CodelistError, match="no_file_here.csv"):
codelist_from_csv(path_type(missing_file), column="CodeID")
def test_codelist_from_csv_missing_file_hint(path_type):
bad_path = Path(__file__) / "bad\file.csv"
with pytest.raises(CodelistError, match="backslash"):
codelist_from_csv(path_type(bad_path), column="CodeID")
def test_codelist_from_csv_lines():
csv_lines = [
"CodeID,foo",
"abc00,",
"def00,",
# Check codes are trimmed
"ghi00 ,",
# Check blanks are ignored
" ,"
# Check duplicates are ignored
" def00,",
]
codelist = codelist_from_csv_lines(csv_lines, column="CodeID")
assert codelist == ["abc00", "def00", "ghi00"]
def test_codelist_from_csv_lines_missing_column():
csv_lines = [
"CodeID",
"abc00",
]
with pytest.raises(CodelistError, match="no_col_here"):
codelist_from_csv_lines(csv_lines, column="no_col_here")
def test_codelist_from_csv_lines_with_category_column():
csv_lines = [
"CodeID,Cat1",
"abc00,foo",
"def00,bar",
"ghi00,",
]
codelist = codelist_from_csv_lines(
csv_lines,
column="CodeID",
category_column="Cat1",
)
assert codelist == {
"abc00": "foo",
"def00": "bar",
"ghi00": "",
}
def test_codelist_from_csv_lines_with_missing_category_column():
csv_lines = [
"CodeID,Cat1",
"abc00,foo",
]
with pytest.raises(CodelistError, match="no_col_here"):
codelist_from_csv_lines(
csv_lines,
column="CodeID",
category_column="no_col_here",
)
@pytest.mark.parametrize(
"cls,value",
[
(BNFCode, "0101010I0AAAEAE"),
(BNFCode, "23965909711"),
(CTV3Code, "ABC01"),
(CTV3Code, "De4.."),
(ICD10Code, "A01"),
(ICD10Code, "A012"),
(OPCS4Code, "B23"),
(OPCS4Code, "B234"),
(SNOMEDCTCode, "1234567890"),
],
)
def test_valid_codes(cls, value):
assert cls(value).value == value
@pytest.mark.parametrize(
"cls,value",
[
# Digit (5) instead of letter as first character of Product
(BNFCode, "0101010I05AAEAE"),
# Appliance but too many digits
(BNFCode, "239659097111"),
# Wrong length
(CTV3Code, "ABC0"),
# Dot other than at the end
(CTV3Code, "ABC.0"),
# Letter other than at the start
(ICD10Code, "AA1"),
# Wrong length
(ICD10Code, "A0124"),
# I is not an allowed first character
(OPCS4Code, "I00"),
# Too short
(SNOMEDCTCode, "123"),
# Too long
(SNOMEDCTCode, "12345678901234567890"),
# Leading zero
(SNOMEDCTCode, "0123456789"),
],
)
def test_invalid_codes(cls, value):
with pytest.raises(ValueError):
cls(value)
def test_syntactically_equivalent_codes():
# No point duplicating the tests here, but we'll need to test them if we ever stop
# sharing the regex
assert DMDCode.regex == SNOMEDCTCode.regex