[e988c2]: / tests / acceptance / test_external_studies.py

Download this file

120 lines (112 with data), 4.3 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import contextlib
from pathlib import Path
import pytest
from ehrql.main import load_dataset_definition, load_measure_definitions
# These tests specify dataset definitions in other repositories which we want to ensure
# we don't accidentally break. In order to keep tests hermetic and deterministic, we
# copy the study code into the repo and commit it (taking care to copy just the files
# needed to evalute the dataset definition). The `update_external_studies.py` script
# handles this. It can be invoked as:
#
# python -m tests.acceptance.update_external_studies
#
# Or via just as:
#
# just update-external-studies
#
# This is run automatically by a scheduled action which will create a PR if there are
# any changes to be made.
EXTERNAL_STUDIES = {
"test-age-distribution": dict(
repo="opensafely/test-age-distribution",
branch="main",
file_globs=[
"analysis/dataset_definition.py",
],
dataset_definitions=["analysis/dataset_definition.py"],
),
"qof-diabetes": dict(
repo="opensafely/qof-diabetes",
branch="main",
file_globs=[
"analysis/dataset_definition_*.py",
"analysis/dm_dataset.py",
"analysis/codelists.py",
"analysis/variable_lib_helper.py",
"codelists/*.csv",
],
dataset_definitions=[],
measure_definitions=[
"analysis/dataset_definition_dm017.py",
("analysis/dataset_definition_dm020.py", ["--ifcchba-cutoff-val", "58"]),
("analysis/dataset_definition_dm021.py", ["--ifcchba-cutoff-val", "75"]),
],
),
"waiting-list": dict(
repo="opensafely/waiting-list",
branch="main",
file_globs=[
"analysis/codelists.py",
"analysis/dataset_definition_clockstops.py",
"analysis/measures_checks.py",
"analysis/measures_opioid.py",
"codelists/*.csv",
],
dataset_definitions=[
"analysis/dataset_definition_clockstops.py",
],
measure_definitions=[
"analysis/measures_checks.py",
("analysis/measures_opioid.py", ["--codelist", "opioid_codes"]),
],
),
"mainroute_cancer": dict(
repo="opensafely/mainroute_cancer",
branch="main",
file_globs=[
"analysis/codelists.py",
"analysis/dataset_definition.py",
"analysis/define_static_dataset.py",
"analysis/measures_demo.py",
"codelists/*.csv",
],
dataset_definitions=[
"analysis/define_static_dataset.py",
],
measure_definitions=[
(
"analysis/measures_demo.py",
["--start-date", "2018-03-23", "--intervals", "67"],
),
],
),
}
STUDY_DIR = Path(__file__).parent / "external_studies"
@pytest.mark.parametrize(
"study_name,definition_file,load_function",
[
(study_name, dataset_def, load_dataset_definition)
for (study_name, config) in EXTERNAL_STUDIES.items()
for dataset_def in config["dataset_definitions"]
]
+ [
(study_name, measure_def, load_measure_definitions)
for (study_name, config) in EXTERNAL_STUDIES.items()
for measure_def in config.get("measure_definitions", ())
],
)
def test_external_study(study_name, definition_file, load_function):
if isinstance(definition_file, tuple):
definition_file, user_args = definition_file
else:
user_args = []
study_path = STUDY_DIR / study_name
definition_path = study_path / definition_file
# Studies often use project-relative paths so ensure these resolve correctly
with contextlib.chdir(study_path):
# Load the dataset or measure definition. This tests that we can construct a
# valid query model graph from the definition. I think this is sufficient for
# these tests which are intended to ensure we don't accidentally break the API.
# If we're unable to execute a valid query, that's a separate class of problem
# for which we need separate tests.
assert load_function(definition_path, user_args=user_args, environ={})