--- a
+++ b/tests/unit/test_codes.py
@@ -0,0 +1,158 @@
+import textwrap
+from pathlib import Path
+
+import pytest
+
+from ehrql.codes import (
+    BNFCode,
+    CodelistError,
+    CTV3Code,
+    DMDCode,
+    ICD10Code,
+    OPCS4Code,
+    SNOMEDCTCode,
+    codelist_from_csv,
+    codelist_from_csv_lines,
+)
+
+
+# `codelist_from_csv` can take either a string or `pathlib.Path` and we want to check
+# that we handle both correctly
+@pytest.fixture(params=[str, Path])
+def path_type(request):
+    return request.param
+
+
+def test_codelist_from_csv(path_type, tmp_path):
+    csv_file = tmp_path / "codes.csv"
+    csv_text = """
+        CodeID,foo
+        abc00,
+        def00,
+        """
+    csv_file.write_text(textwrap.dedent(csv_text.strip()))
+    codelist = codelist_from_csv(path_type(csv_file), column="CodeID")
+    assert codelist == ["abc00", "def00"]
+
+
+def test_codelist_from_csv_missing_file(path_type):
+    missing_file = Path(__file__) / "no_file_here.csv"
+    with pytest.raises(CodelistError, match="no_file_here.csv"):
+        codelist_from_csv(path_type(missing_file), column="CodeID")
+
+
+def test_codelist_from_csv_missing_file_hint(path_type):
+    bad_path = Path(__file__) / "bad\file.csv"
+    with pytest.raises(CodelistError, match="backslash"):
+        codelist_from_csv(path_type(bad_path), column="CodeID")
+
+
+def test_codelist_from_csv_lines():
+    csv_lines = [
+        "CodeID,foo",
+        "abc00,",
+        "def00,",
+        # Check codes are trimmed
+        "ghi00 ,",
+        # Check blanks are ignored
+        "  ,"
+        # Check duplicates are ignored
+        " def00,",
+    ]
+    codelist = codelist_from_csv_lines(csv_lines, column="CodeID")
+    assert codelist == ["abc00", "def00", "ghi00"]
+
+
+def test_codelist_from_csv_lines_missing_column():
+    csv_lines = [
+        "CodeID",
+        "abc00",
+    ]
+    with pytest.raises(CodelistError, match="no_col_here"):
+        codelist_from_csv_lines(csv_lines, column="no_col_here")
+
+
+def test_codelist_from_csv_lines_with_category_column():
+    csv_lines = [
+        "CodeID,Cat1",
+        "abc00,foo",
+        "def00,bar",
+        "ghi00,",
+    ]
+    codelist = codelist_from_csv_lines(
+        csv_lines,
+        column="CodeID",
+        category_column="Cat1",
+    )
+    assert codelist == {
+        "abc00": "foo",
+        "def00": "bar",
+        "ghi00": "",
+    }
+
+
+def test_codelist_from_csv_lines_with_missing_category_column():
+    csv_lines = [
+        "CodeID,Cat1",
+        "abc00,foo",
+    ]
+    with pytest.raises(CodelistError, match="no_col_here"):
+        codelist_from_csv_lines(
+            csv_lines,
+            column="CodeID",
+            category_column="no_col_here",
+        )
+
+
+@pytest.mark.parametrize(
+    "cls,value",
+    [
+        (BNFCode, "0101010I0AAAEAE"),
+        (BNFCode, "23965909711"),
+        (CTV3Code, "ABC01"),
+        (CTV3Code, "De4.."),
+        (ICD10Code, "A01"),
+        (ICD10Code, "A012"),
+        (OPCS4Code, "B23"),
+        (OPCS4Code, "B234"),
+        (SNOMEDCTCode, "1234567890"),
+    ],
+)
+def test_valid_codes(cls, value):
+    assert cls(value).value == value
+
+
+@pytest.mark.parametrize(
+    "cls,value",
+    [
+        # Digit (5) instead of letter as first character of Product
+        (BNFCode, "0101010I05AAEAE"),
+        # Appliance but too many digits
+        (BNFCode, "239659097111"),
+        # Wrong length
+        (CTV3Code, "ABC0"),
+        # Dot other than at the end
+        (CTV3Code, "ABC.0"),
+        # Letter other than at the start
+        (ICD10Code, "AA1"),
+        # Wrong length
+        (ICD10Code, "A0124"),
+        # I is not an allowed first character
+        (OPCS4Code, "I00"),
+        # Too short
+        (SNOMEDCTCode, "123"),
+        # Too long
+        (SNOMEDCTCode, "12345678901234567890"),
+        # Leading zero
+        (SNOMEDCTCode, "0123456789"),
+    ],
+)
+def test_invalid_codes(cls, value):
+    with pytest.raises(ValueError):
+        cls(value)
+
+
+def test_syntactically_equivalent_codes():
+    # No point duplicating the tests here, but we'll need to test them if we ever stop
+    # sharing the regex
+    assert DMDCode.regex == SNOMEDCTCode.regex