a b/tests/unit/test_loaders.py
1
import sys
2
from functools import partial
3
from pathlib import Path
4
from types import SimpleNamespace
5
from unittest.mock import patch
6
7
import pytest
8
9
from ehrql import loaders
10
from ehrql.loaders import DefinitionError
11
from ehrql.measures.measures import DisclosureControlConfig
12
from ehrql.query_language import DummyDataConfig
13
from ehrql.query_model.nodes import Dataset
14
15
16
FIXTURES_GOOD = Path(__file__).parents[1] / "fixtures" / "good_definition_files"
17
FIXTURES_BAD = Path(__file__).parents[1] / "fixtures" / "bad_definition_files"
18
FIXTURES_DEBUG = Path(__file__).parents[1] / "fixtures" / "debug"
19
20
21
# Parameterize all tests over all three of the isolated subprocess, subprocess and
22
# unsafe versions of the loader functions so we can check they all behave the same
23
@pytest.fixture(
24
    params=[
25
        pytest.param(
26
            ("subprocess", True),
27
            marks=pytest.mark.skipif(
28
                not sys.platform.startswith("linux"),
29
                reason="Subprocess isolation only works on Linux",
30
            ),
31
        ),
32
        ("subprocess", False),
33
        ("unsafe", None),
34
    ]
35
)
36
def funcs(request):
37
    default_kwargs = {"user_args": (), "environ": {}}
38
    loader_type, use_isolation = request.param
39
    if loader_type == "subprocess":
40
        funcs = SimpleNamespace(
41
            load_dataset_definition=partial(
42
                loaders.load_dataset_definition, **default_kwargs
43
            ),
44
            load_measure_definitions=partial(
45
                loaders.load_measure_definitions, **default_kwargs
46
            ),
47
            load_test_definition=partial(
48
                loaders.load_test_definition, **default_kwargs
49
            ),
50
            load_debug_definition=partial(
51
                loaders.load_debug_definition, **default_kwargs
52
            ),
53
        )
54
    elif loader_type == "unsafe":
55
        funcs = SimpleNamespace(
56
            load_dataset_definition=partial(
57
                loaders.load_definition_unsafe, "dataset", **default_kwargs
58
            ),
59
            load_measure_definitions=partial(
60
                loaders.load_definition_unsafe, "measures", **default_kwargs
61
            ),
62
            load_test_definition=partial(
63
                loaders.load_definition_unsafe, "test", **default_kwargs
64
            ),
65
            load_debug_definition=partial(
66
                loaders.load_definition_unsafe, "debug", **default_kwargs
67
            ),
68
        )
69
    else:
70
        assert False
71
    with patch.object(loaders, "isolation_is_supported", return_value=use_isolation):
72
        yield funcs
73
74
75
def test_load_dataset_definition(funcs, capsys):
76
    filename = FIXTURES_GOOD / "dataset_definition.py"
77
    dataset, dummy_data_config = funcs.load_dataset_definition(filename)
78
    assert isinstance(dataset, Dataset)
79
    assert isinstance(dummy_data_config, DummyDataConfig)
80
    # Check the subprocess doesn't emit warnings
81
    assert capsys.readouterr().err == ""
82
83
84
def test_load_dataset_definition_with_print(funcs, capsys):
85
    filename = FIXTURES_GOOD / "dataset_definition_with_print.py"
86
    dataset, dummy_data_config = funcs.load_dataset_definition(filename)
87
    assert isinstance(dataset, Dataset)
88
    assert isinstance(dummy_data_config, DummyDataConfig)
89
    out, err = capsys.readouterr()
90
    assert "user stdout" not in out
91
    assert "user stdout" in err
92
93
94
def test_load_measure_definitions(funcs, capsys):
95
    filename = FIXTURES_GOOD / "measure_definitions.py"
96
    (
97
        measures,
98
        dummy_data_config,
99
        disclosure_control_config,
100
    ) = funcs.load_measure_definitions(filename)
101
    assert isinstance(measures, list)
102
    assert isinstance(dummy_data_config, DummyDataConfig)
103
    assert isinstance(disclosure_control_config, DisclosureControlConfig)
104
    # Check the subprocess doesn't emit warnings
105
    assert capsys.readouterr().err == ""
106
107
108
def test_load_test_definition(funcs, capsys):
109
    filename = FIXTURES_GOOD / "assurance.py"
110
    dataset, test_data = funcs.load_test_definition(filename)
111
    assert isinstance(dataset, Dataset)
112
    assert isinstance(test_data, dict)
113
    # Check the subprocess doesn't emit warnings
114
    assert capsys.readouterr().err == ""
115
116
117
def test_load_debug_dataset_definition(funcs, capsys):
118
    filename = FIXTURES_GOOD / "debug_definition.py"
119
    funcs.load_debug_definition(
120
        filename, dummy_tables_path=FIXTURES_DEBUG, render_format="ascii"
121
    )
122
    # show() messages are sent to stderr during the loading process
123
    assert (
124
        capsys.readouterr().err.strip()
125
        == """
126
Show line 7:
127
patient_id
128
-----------------
129
""".strip()
130
    )
131
    assert capsys.readouterr().out == ""
132
133
134
def test_load_dataset_definition_passes_stderr_through(funcs, capsys):
135
    filename = FIXTURES_GOOD / "chatty_dataset_definition.py"
136
    funcs.load_dataset_definition(filename)
137
    assert capsys.readouterr().err == "I am a bit chatty\n"
138
139
140
def test_load_dataset_definition_no_dataset(funcs):
141
    filename = FIXTURES_BAD / "no_dataset.py"
142
    with pytest.raises(
143
        DefinitionError, match="Did not find a variable called 'dataset'"
144
    ):
145
        funcs.load_dataset_definition(filename)
146
147
148
def test_load_dataset_definition_not_a_dataset(funcs):
149
    filename = FIXTURES_BAD / "not_a_dataset.py"
150
    with pytest.raises(
151
        DefinitionError, match=r"'dataset' must be an instance of .*\.Dataset"
152
    ):
153
        funcs.load_dataset_definition(filename)
154
155
156
def test_load_dataset_definition_no_population(funcs):
157
    filename = FIXTURES_BAD / "no_population.py"
158
    with pytest.raises(DefinitionError, match="A population has not been defined"):
159
        funcs.load_dataset_definition(filename)
160
161
162
def test_load_dataset_definition_bad_syntax(funcs):
163
    filename = FIXTURES_BAD / "bad_syntax.py"
164
    with pytest.raises(DefinitionError, match="what even is a Python"):
165
        funcs.load_dataset_definition(filename)
166
167
168
def test_load_dataset_definition_operator_error(funcs):
169
    filename = FIXTURES_BAD / "operator_error.py"
170
    with pytest.raises(
171
        DefinitionError,
172
        match=(
173
            "WARNING: The `|` operator has different precedence rules from the "
174
            "normal `or` operator"
175
        ),
176
    ):
177
        funcs.load_dataset_definition(filename)
178
179
180
def test_load_measure_definitions_no_measures(funcs):
181
    filename = FIXTURES_BAD / "no_measures.py"
182
    with pytest.raises(
183
        DefinitionError, match="Did not find a variable called 'measures'"
184
    ):
185
        funcs.load_measure_definitions(filename)
186
187
188
def test_load_measure_definitions_not_measures_instance(funcs):
189
    filename = FIXTURES_BAD / "not_measures_instance.py"
190
    with pytest.raises(
191
        DefinitionError, match=r"'measures' must be an instance of .*\.Measures"
192
    ):
193
        funcs.load_measure_definitions(filename)
194
195
196
def test_load_measure_definitions_empty_measures(funcs):
197
    filename = FIXTURES_BAD / "empty_measures.py"
198
    with pytest.raises(DefinitionError, match="No measures defined"):
199
        funcs.load_measure_definitions(filename)
200
201
202
@pytest.mark.parametrize(
203
    "environ,expected",
204
    [
205
        ({}, False),
206
        ({"DATABASE_URL": "foo://bar"}, True),
207
        ({"EHRQL_ISOLATE_USER_CODE": "always"}, True),
208
        ({"EHRQL_ISOLATE_USER_CODE": "never"}, False),
209
        ({"EHRQL_ISOLATE_USER_CODE": "never", "DATABASE_URL": "foo://bar"}, False),
210
    ],
211
)
212
def test_isolation_is_required(environ, expected):
213
    assert loaders.isolation_is_required(environ) == expected
214
215
216
def test_isolation_is_required_rejects_unknown_values():
217
    with pytest.raises(RuntimeError, match="Invalid value"):
218
        loaders.isolation_is_required({"EHRQL_ISOLATE_USER_CODE": "maybe"})
219
220
221
@patch.object(loaders, "isolation_is_supported", return_value=False)
222
def test_load_definition_raises_error_if_isolation_required_but_unavailable(_):
223
    filename = FIXTURES_GOOD / "dataset_definition.py"
224
    with pytest.raises(RuntimeError, match="current environment does not support"):
225
        loaders.load_dataset_definition(
226
            filename,
227
            user_args=(),
228
            environ={"EHRQL_ISOLATE_USER_CODE": "always"},
229
        )
230
231
232
def test_load_definition_unsafe_raises_error_if_isolation_required():
233
    filename = FIXTURES_GOOD / "dataset_definition.py"
234
    with pytest.raises(RuntimeError, match="call to unsafe loader function"):
235
        loaders.load_definition_unsafe(
236
            "dataset",
237
            filename,
238
            user_args=(),
239
            environ={"EHRQL_ISOLATE_USER_CODE": "always"},
240
        )
241
242
243
# Confirm that various things we can do in an ordinary subprocess are blocked in an
244
# isolated subprocess
245
@pytest.mark.skipif(
246
    not sys.platform.startswith("linux"),
247
    reason="Subprocess isolation only works on Linux",
248
)
249
def test_isolation_report(tmp_path):
250
    assert loaders.isolation_is_supported()
251
    assert loaders.isolation_report(tmp_path) == {
252
        "subprocess.run": {
253
            "touch": "ALLOWED",
254
            "open_socket": "ALLOWED",
255
            "exec": "ALLOWED",
256
            "read_env_vars": "ALLOWED",
257
        },
258
        "subprocess_run_isolated": {
259
            "touch": "BLOCKED",
260
            "open_socket": "BLOCKED",
261
            "exec": "BLOCKED",
262
            "read_env_vars": "BLOCKED",
263
        },
264
    }