ehrql / Git / [e988c2] /tests/unit/test

Models:
philipB/
ehrql
Downloads: 1
[e988c2]: / tests / unit / test_codes.py
History
Download this file
159 lines (134 with data), 4.1 kB

import textwrap
from pathlib import Path

import pytest

from ehrql.codes import (
    BNFCode,
    CodelistError,
    CTV3Code,
    DMDCode,
    ICD10Code,
    OPCS4Code,
    SNOMEDCTCode,
    codelist_from_csv,
    codelist_from_csv_lines,
)


# `codelist_from_csv` can take either a string or `pathlib.Path` and we want to check
# that we handle both correctly
@pytest.fixture(params=[str, Path])
def path_type(request):
    return request.param


def test_codelist_from_csv(path_type, tmp_path):
    csv_file = tmp_path / "codes.csv"
    csv_text = """
        CodeID,foo
        abc00,
        def00,
        """
    csv_file.write_text(textwrap.dedent(csv_text.strip()))
    codelist = codelist_from_csv(path_type(csv_file), column="CodeID")
    assert codelist == ["abc00", "def00"]


def test_codelist_from_csv_missing_file(path_type):
    missing_file = Path(__file__) / "no_file_here.csv"
    with pytest.raises(CodelistError, match="no_file_here.csv"):
        codelist_from_csv(path_type(missing_file), column="CodeID")


def test_codelist_from_csv_missing_file_hint(path_type):
    bad_path = Path(__file__) / "bad\file.csv"
    with pytest.raises(CodelistError, match="backslash"):
        codelist_from_csv(path_type(bad_path), column="CodeID")


def test_codelist_from_csv_lines():
    csv_lines = [
        "CodeID,foo",
        "abc00,",
        "def00,",
        # Check codes are trimmed
        "ghi00 ,",
        # Check blanks are ignored
        "  ,"
        # Check duplicates are ignored
        " def00,",
    ]
    codelist = codelist_from_csv_lines(csv_lines, column="CodeID")
    assert codelist == ["abc00", "def00", "ghi00"]


def test_codelist_from_csv_lines_missing_column():
    csv_lines = [
        "CodeID",
        "abc00",
    ]
    with pytest.raises(CodelistError, match="no_col_here"):
        codelist_from_csv_lines(csv_lines, column="no_col_here")


def test_codelist_from_csv_lines_with_category_column():
    csv_lines = [
        "CodeID,Cat1",
        "abc00,foo",
        "def00,bar",
        "ghi00,",
    ]
    codelist = codelist_from_csv_lines(
        csv_lines,
        column="CodeID",
        category_column="Cat1",
    )
    assert codelist == {
        "abc00": "foo",
        "def00": "bar",
        "ghi00": "",
    }


def test_codelist_from_csv_lines_with_missing_category_column():
    csv_lines = [
        "CodeID,Cat1",
        "abc00,foo",
    ]
    with pytest.raises(CodelistError, match="no_col_here"):
        codelist_from_csv_lines(
            csv_lines,
            column="CodeID",
            category_column="no_col_here",
        )


@pytest.mark.parametrize(
    "cls,value",
    [
        (BNFCode, "0101010I0AAAEAE"),
        (BNFCode, "23965909711"),
        (CTV3Code, "ABC01"),
        (CTV3Code, "De4.."),
        (ICD10Code, "A01"),
        (ICD10Code, "A012"),
        (OPCS4Code, "B23"),
        (OPCS4Code, "B234"),
        (SNOMEDCTCode, "1234567890"),
    ],
)
def test_valid_codes(cls, value):
    assert cls(value).value == value


@pytest.mark.parametrize(
    "cls,value",
    [
        # Digit (5) instead of letter as first character of Product
        (BNFCode, "0101010I05AAEAE"),
        # Appliance but too many digits
        (BNFCode, "239659097111"),
        # Wrong length
        (CTV3Code, "ABC0"),
        # Dot other than at the end
        (CTV3Code, "ABC.0"),
        # Letter other than at the start
        (ICD10Code, "AA1"),
        # Wrong length
        (ICD10Code, "A0124"),
        # I is not an allowed first character
        (OPCS4Code, "I00"),
        # Too short
        (SNOMEDCTCode, "123"),
        # Too long
        (SNOMEDCTCode, "12345678901234567890"),
        # Leading zero
        (SNOMEDCTCode, "0123456789"),
    ],
)
def test_invalid_codes(cls, value):
    with pytest.raises(ValueError):
        cls(value)


def test_syntactically_equivalent_codes():
    # No point duplicating the tests here, but we'll need to test them if we ever stop
    # sharing the regex
    assert DMDCode.regex == SNOMEDCTCode.regex