[e988c2]: / tests / generative / data_strategies.py

Download this file

69 lines (49 with data), 2.6 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import hypothesis.strategies as st
# Data generation strategies are complicated by the need for patient ids in patient tables to
# be unique (see `patient_records()`).
patient_id_column = "patient_id"
max_patient_id = 10
max_num_patient_records = max_patient_id # <= max_patient_id, hasn't been fine-tuned
max_num_event_records = max_patient_id # could be anything, hasn't been fine-tuned
def record(class_, id_strategy, schema, value_strategies):
# We don't construct the actual objects here because it's easier to extract stats for the generated data if we
# pass around simple objects.
columns = {patient_id_column: id_strategy}
for name, type_ in schema.column_types:
columns[name] = value_strategies[type_]
return st.builds(dict, type=st.just(class_), **columns)
@st.composite
def concat(draw, *list_strategies):
results = []
for list_strategy in list_strategies:
for example in draw(list_strategy):
results.append(example)
return results
patient_ids = st.integers(min_value=1, max_value=max_patient_id)
def event_records(class_, schema, value_strategies):
return st.lists(
record(class_, patient_ids, schema, value_strategies),
min_size=0,
max_size=max_num_event_records,
)
@st.composite
def patient_records(draw, class_, schema, value_strategies):
# This strategy ensures that the patient ids are unique. We need to maintain the state to ensure that uniqueness
# inside the strategy itself so that we can ensure the tests are idempotent as Hypothesis requires. That means that
# this strategy must be called once only for a given table in a given test.
# patients IDs are a permutation of the unique integers representing all possible patient IDs,
# between 1 and max_patient_id. We pop these one at a time to create patient records, so
# somewhere between 0 and all of them will be used in the patient_records strategy
patient_ids = draw(st.permutations(list(range(1, max_patient_id + 1))))
@st.composite
def one_patient_record(draw_):
id_ = patient_ids.pop()
return draw_(record(class_, st.just(id_), schema, value_strategies))
return draw(
st.lists(one_patient_record(), min_size=0, max_size=max_num_patient_records)
)
def data(patient_classes, event_classes, schema, value_strategies):
return concat(
*[patient_records(c, schema, value_strategies) for c in patient_classes],
*[event_records(c, schema, value_strategies) for c in event_classes],
)