Switch to side-by-side view

--- a
+++ b/tests/unit/test_loaders.py
@@ -0,0 +1,264 @@
+import sys
+from functools import partial
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import patch
+
+import pytest
+
+from ehrql import loaders
+from ehrql.loaders import DefinitionError
+from ehrql.measures.measures import DisclosureControlConfig
+from ehrql.query_language import DummyDataConfig
+from ehrql.query_model.nodes import Dataset
+
+
+FIXTURES_GOOD = Path(__file__).parents[1] / "fixtures" / "good_definition_files"
+FIXTURES_BAD = Path(__file__).parents[1] / "fixtures" / "bad_definition_files"
+FIXTURES_DEBUG = Path(__file__).parents[1] / "fixtures" / "debug"
+
+
+# Parameterize all tests over all three of the isolated subprocess, subprocess and
+# unsafe versions of the loader functions so we can check they all behave the same
+@pytest.fixture(
+    params=[
+        pytest.param(
+            ("subprocess", True),
+            marks=pytest.mark.skipif(
+                not sys.platform.startswith("linux"),
+                reason="Subprocess isolation only works on Linux",
+            ),
+        ),
+        ("subprocess", False),
+        ("unsafe", None),
+    ]
+)
+def funcs(request):
+    default_kwargs = {"user_args": (), "environ": {}}
+    loader_type, use_isolation = request.param
+    if loader_type == "subprocess":
+        funcs = SimpleNamespace(
+            load_dataset_definition=partial(
+                loaders.load_dataset_definition, **default_kwargs
+            ),
+            load_measure_definitions=partial(
+                loaders.load_measure_definitions, **default_kwargs
+            ),
+            load_test_definition=partial(
+                loaders.load_test_definition, **default_kwargs
+            ),
+            load_debug_definition=partial(
+                loaders.load_debug_definition, **default_kwargs
+            ),
+        )
+    elif loader_type == "unsafe":
+        funcs = SimpleNamespace(
+            load_dataset_definition=partial(
+                loaders.load_definition_unsafe, "dataset", **default_kwargs
+            ),
+            load_measure_definitions=partial(
+                loaders.load_definition_unsafe, "measures", **default_kwargs
+            ),
+            load_test_definition=partial(
+                loaders.load_definition_unsafe, "test", **default_kwargs
+            ),
+            load_debug_definition=partial(
+                loaders.load_definition_unsafe, "debug", **default_kwargs
+            ),
+        )
+    else:
+        assert False
+    with patch.object(loaders, "isolation_is_supported", return_value=use_isolation):
+        yield funcs
+
+
+def test_load_dataset_definition(funcs, capsys):
+    filename = FIXTURES_GOOD / "dataset_definition.py"
+    dataset, dummy_data_config = funcs.load_dataset_definition(filename)
+    assert isinstance(dataset, Dataset)
+    assert isinstance(dummy_data_config, DummyDataConfig)
+    # Check the subprocess doesn't emit warnings
+    assert capsys.readouterr().err == ""
+
+
+def test_load_dataset_definition_with_print(funcs, capsys):
+    filename = FIXTURES_GOOD / "dataset_definition_with_print.py"
+    dataset, dummy_data_config = funcs.load_dataset_definition(filename)
+    assert isinstance(dataset, Dataset)
+    assert isinstance(dummy_data_config, DummyDataConfig)
+    out, err = capsys.readouterr()
+    assert "user stdout" not in out
+    assert "user stdout" in err
+
+
+def test_load_measure_definitions(funcs, capsys):
+    filename = FIXTURES_GOOD / "measure_definitions.py"
+    (
+        measures,
+        dummy_data_config,
+        disclosure_control_config,
+    ) = funcs.load_measure_definitions(filename)
+    assert isinstance(measures, list)
+    assert isinstance(dummy_data_config, DummyDataConfig)
+    assert isinstance(disclosure_control_config, DisclosureControlConfig)
+    # Check the subprocess doesn't emit warnings
+    assert capsys.readouterr().err == ""
+
+
+def test_load_test_definition(funcs, capsys):
+    filename = FIXTURES_GOOD / "assurance.py"
+    dataset, test_data = funcs.load_test_definition(filename)
+    assert isinstance(dataset, Dataset)
+    assert isinstance(test_data, dict)
+    # Check the subprocess doesn't emit warnings
+    assert capsys.readouterr().err == ""
+
+
+def test_load_debug_dataset_definition(funcs, capsys):
+    filename = FIXTURES_GOOD / "debug_definition.py"
+    funcs.load_debug_definition(
+        filename, dummy_tables_path=FIXTURES_DEBUG, render_format="ascii"
+    )
+    # show() messages are sent to stderr during the loading process
+    assert (
+        capsys.readouterr().err.strip()
+        == """
+Show line 7:
+patient_id
+-----------------
+""".strip()
+    )
+    assert capsys.readouterr().out == ""
+
+
+def test_load_dataset_definition_passes_stderr_through(funcs, capsys):
+    filename = FIXTURES_GOOD / "chatty_dataset_definition.py"
+    funcs.load_dataset_definition(filename)
+    assert capsys.readouterr().err == "I am a bit chatty\n"
+
+
+def test_load_dataset_definition_no_dataset(funcs):
+    filename = FIXTURES_BAD / "no_dataset.py"
+    with pytest.raises(
+        DefinitionError, match="Did not find a variable called 'dataset'"
+    ):
+        funcs.load_dataset_definition(filename)
+
+
+def test_load_dataset_definition_not_a_dataset(funcs):
+    filename = FIXTURES_BAD / "not_a_dataset.py"
+    with pytest.raises(
+        DefinitionError, match=r"'dataset' must be an instance of .*\.Dataset"
+    ):
+        funcs.load_dataset_definition(filename)
+
+
+def test_load_dataset_definition_no_population(funcs):
+    filename = FIXTURES_BAD / "no_population.py"
+    with pytest.raises(DefinitionError, match="A population has not been defined"):
+        funcs.load_dataset_definition(filename)
+
+
+def test_load_dataset_definition_bad_syntax(funcs):
+    filename = FIXTURES_BAD / "bad_syntax.py"
+    with pytest.raises(DefinitionError, match="what even is a Python"):
+        funcs.load_dataset_definition(filename)
+
+
+def test_load_dataset_definition_operator_error(funcs):
+    filename = FIXTURES_BAD / "operator_error.py"
+    with pytest.raises(
+        DefinitionError,
+        match=(
+            "WARNING: The `|` operator has different precedence rules from the "
+            "normal `or` operator"
+        ),
+    ):
+        funcs.load_dataset_definition(filename)
+
+
+def test_load_measure_definitions_no_measures(funcs):
+    filename = FIXTURES_BAD / "no_measures.py"
+    with pytest.raises(
+        DefinitionError, match="Did not find a variable called 'measures'"
+    ):
+        funcs.load_measure_definitions(filename)
+
+
+def test_load_measure_definitions_not_measures_instance(funcs):
+    filename = FIXTURES_BAD / "not_measures_instance.py"
+    with pytest.raises(
+        DefinitionError, match=r"'measures' must be an instance of .*\.Measures"
+    ):
+        funcs.load_measure_definitions(filename)
+
+
+def test_load_measure_definitions_empty_measures(funcs):
+    filename = FIXTURES_BAD / "empty_measures.py"
+    with pytest.raises(DefinitionError, match="No measures defined"):
+        funcs.load_measure_definitions(filename)
+
+
+@pytest.mark.parametrize(
+    "environ,expected",
+    [
+        ({}, False),
+        ({"DATABASE_URL": "foo://bar"}, True),
+        ({"EHRQL_ISOLATE_USER_CODE": "always"}, True),
+        ({"EHRQL_ISOLATE_USER_CODE": "never"}, False),
+        ({"EHRQL_ISOLATE_USER_CODE": "never", "DATABASE_URL": "foo://bar"}, False),
+    ],
+)
+def test_isolation_is_required(environ, expected):
+    assert loaders.isolation_is_required(environ) == expected
+
+
+def test_isolation_is_required_rejects_unknown_values():
+    with pytest.raises(RuntimeError, match="Invalid value"):
+        loaders.isolation_is_required({"EHRQL_ISOLATE_USER_CODE": "maybe"})
+
+
+@patch.object(loaders, "isolation_is_supported", return_value=False)
+def test_load_definition_raises_error_if_isolation_required_but_unavailable(_):
+    filename = FIXTURES_GOOD / "dataset_definition.py"
+    with pytest.raises(RuntimeError, match="current environment does not support"):
+        loaders.load_dataset_definition(
+            filename,
+            user_args=(),
+            environ={"EHRQL_ISOLATE_USER_CODE": "always"},
+        )
+
+
+def test_load_definition_unsafe_raises_error_if_isolation_required():
+    filename = FIXTURES_GOOD / "dataset_definition.py"
+    with pytest.raises(RuntimeError, match="call to unsafe loader function"):
+        loaders.load_definition_unsafe(
+            "dataset",
+            filename,
+            user_args=(),
+            environ={"EHRQL_ISOLATE_USER_CODE": "always"},
+        )
+
+
+# Confirm that various things we can do in an ordinary subprocess are blocked in an
+# isolated subprocess
+@pytest.mark.skipif(
+    not sys.platform.startswith("linux"),
+    reason="Subprocess isolation only works on Linux",
+)
+def test_isolation_report(tmp_path):
+    assert loaders.isolation_is_supported()
+    assert loaders.isolation_report(tmp_path) == {
+        "subprocess.run": {
+            "touch": "ALLOWED",
+            "open_socket": "ALLOWED",
+            "exec": "ALLOWED",
+            "read_env_vars": "ALLOWED",
+        },
+        "subprocess_run_isolated": {
+            "touch": "BLOCKED",
+            "open_socket": "BLOCKED",
+            "exec": "BLOCKED",
+            "read_env_vars": "BLOCKED",
+        },
+    }