a b/tests/generative/variable_strategies.py
1
import datetime
2
from os import environ
3
4
import hypothesis as hyp
5
import hypothesis.strategies as st
6
from hypothesis.control import current_build_context
7
8
from ehrql.query_model.nodes import (
9
    AggregateByPatient,
10
    Case,
11
    Dataset,
12
    Filter,
13
    Function,
14
    InlinePatientTable,
15
    PickOneRowPerPatient,
16
    Position,
17
    SelectColumn,
18
    SelectPatientTable,
19
    SelectTable,
20
    SeriesCollectionFrame,
21
    Sort,
22
    Value,
23
)
24
from ehrql.query_model.population_validation import (
25
    ValidationError,
26
    validate_population_definition,
27
)
28
29
from .generic_strategies import usually
30
from .ignored_errors import get_ignored_error_type
31
32
33
# Max depth
34
#
35
# There are various points at which we generate deeply recursive data
36
# which hits Hypothesis's recursion limits, and we need to stop going deeper
37
# at this point and force generating a terminating node.
38
#
39
# Otherwise, the generated graph can continue forever, and will eventually hit the
40
# hypothesis limit (100) and will be abandoned. This results in too many invalid examples,
41
# which triggers the too-many-filters healthcheck.
42
#
43
# If the max limit is set high - e.g. if we always let it go to 100 and then return our
44
# default terminating node, generating the examples takes a really long time.  Setting it
45
# too low means that hypothesis takes too long to shrink examples.
46
#
47
# The default is therefore set, somewhat arbitrarily, to 15.
48
49
MAX_DEPTH = int(environ.get("GENTEST_MAX_DEPTH", 15))
50
51
52
def depth_exceeded():
53
    ctx = current_build_context()
54
    return ctx.data.depth > MAX_DEPTH
55
56
57
@st.composite
58
def _should_stop(draw):
59
    """Returns True if we need to stop and generate a terminating node."""
60
61
    # Generally speaking we want this to return False unless it needs
62
    # to return True. This need can either come from the fact that
63
    # we've exceeded the maximum depth, or because the shrinker told
64
    # us to.
65
    #
66
    # In the former case, we still need to draw a variable that says
67
    # we should, because this gives us the shrinker the opportunity to
68
    # set that decision to false, which makes us no longer dependent on
69
    # hitting the maximum depth to generate a terminating node here.
70
71
    should_continue = draw(usually)
72
73
    if depth_exceeded():
74
        should_continue = False
75
76
    return not should_continue
77
78
79
should_stop = _should_stop()
80
81
82
@st.composite
83
def depth_bounded_one_of(draw, *options):
84
    """Equivalent to `one_of` but if we've got too deep always uses the first option."""
85
    assert options
86
87
    # Similar to how `should_stop` works, we always draw the choice, but if
88
    # we've exceeded the current maximum depth, we pretend that we got a zero
89
    # even if we didn't. When the shrinker runs it will change this to zero
90
    # for real, and then we no longer need to hit maximum depth for this branch
91
    # to trigger.
92
    i = draw(st.integers(0, len(options) - 1))
93
    if depth_exceeded():
94
        i = 0
95
    return draw(options[i])
96
97
98
# This module defines a set of recursive Hypothesis strategies for generating query model graphs.
99
#
100
# There are a few points where we deliberate order the types that we choose from, with the
101
# "simplest" first (by some subjective measure). This is to enable Hypothesis to more effectively
102
# explore the query space and to "shrink" examples when it finds errors. These points are commented
103
# below.
104
#
105
# We use several Hypothesis combinators for defining our strategies. Most (`one_of`, `just`,
106
# `sampled_from`) are fairly self-explanatory. A couple are worth clarifying.
107
#     * `st.builds()` is used to construct objects, it takes the class and strategies
108
#       corresponding to the constructor arguments.
109
#     * `@st.composite` allows us to define a strategy by composing other strategies with
110
#       arbitrary Python code; it adds a `draw` argument which is part of the machinery that
111
#       enables this composition but which doesn't form part of the signature of the resulting
112
#       strategy function.
113
114
115
def dataset(patient_tables, event_tables, schema, value_strategies):
116
    # Every inner-function here returns a Hypothesis strategy for creating the thing it is named
117
    # for, not the thing itself.
118
    #
119
    # Several of these strategy functions ignore one or more of their arguments in order to make
120
    # them uniform with other functions that return the same sort of strategy. Such ignored
121
    # arguments are named with a leading underscore.
122
123
    # Series strategies
124
    #
125
    # Whenever a series is needed, we call series() passing the type of the series and frame that
126
    # it should be built on (these are either constrained by the context in which the series is to
127
    # be used or chosen arbitrarily by the caller).
128
    #
129
    # This strategy then chooses an arbitrary concrete series that respects the constraints imposed
130
    # by the passed type and frame.
131
    #
132
    # A note on frames and domains:
133
    #
134
    #     When we pass `frame` as an argument to a series strategy function, the intended semantics
135
    #     are always "construct a series that is _consistent_ with this frame". It's always
136
    #     permitted to return a one-row-per-patient series, because such series can always be
137
    #     composed a many-rows-per-patient series; so there are series strategy functions that,
138
    #     always or sometimes, ignore the frame argument.
139
140
    COMPARABLE_TYPES = [t for t in value_strategies.keys() if t is not bool]
141
142
    @st.composite
143
    def series(draw, type_, frame):
144
        if draw(should_stop):  # pragma: no cover
145
            return draw(select_column(type_, frame))
146
147
        class DomainConstraint:
148
            PATIENT = (True,)
149
            NON_PATIENT = (False,)
150
            ANY = (True, False)
151
152
        # Order matters: "simpler" first (see header comment)
153
        series_constraints = {
154
            select_column: (value_strategies.keys(), DomainConstraint.ANY),
155
            exists: ({bool}, DomainConstraint.PATIENT),
156
            count: ({int}, DomainConstraint.PATIENT),
157
            count_distinct: ({int}, DomainConstraint.PATIENT),
158
            min_: (COMPARABLE_TYPES, DomainConstraint.PATIENT),
159
            max_: (COMPARABLE_TYPES, DomainConstraint.PATIENT),
160
            sum_: ({int, float}, DomainConstraint.PATIENT),
161
            mean: ({float}, DomainConstraint.PATIENT),
162
            is_null: ({bool}, DomainConstraint.ANY),
163
            not_: ({bool}, DomainConstraint.ANY),
164
            year_from_date: ({int}, DomainConstraint.ANY),
165
            month_from_date: ({int}, DomainConstraint.ANY),
166
            day_from_date: ({int}, DomainConstraint.ANY),
167
            to_first_of_year: ({datetime.date}, DomainConstraint.ANY),
168
            to_first_of_month: ({datetime.date}, DomainConstraint.ANY),
169
            cast_to_float: ({float}, DomainConstraint.ANY),
170
            cast_to_int: ({int}, DomainConstraint.ANY),
171
            negate: ({int, float}, DomainConstraint.ANY),
172
            eq: ({bool}, DomainConstraint.ANY),
173
            ne: ({bool}, DomainConstraint.ANY),
174
            string_contains: ({bool}, DomainConstraint.ANY),
175
            in_: ({bool}, DomainConstraint.ANY),
176
            and_: ({bool}, DomainConstraint.ANY),
177
            or_: ({bool}, DomainConstraint.ANY),
178
            lt: ({bool}, DomainConstraint.ANY),
179
            gt: ({bool}, DomainConstraint.ANY),
180
            le: ({bool}, DomainConstraint.ANY),
181
            ge: ({bool}, DomainConstraint.ANY),
182
            add: ({int, float}, DomainConstraint.ANY),
183
            subtract: ({int, float}, DomainConstraint.ANY),
184
            multiply: ({int, float}, DomainConstraint.ANY),
185
            truediv: ({float}, DomainConstraint.ANY),
186
            floordiv: ({int}, DomainConstraint.ANY),
187
            date_add_years: ({datetime.date}, DomainConstraint.ANY),
188
            date_add_months: ({datetime.date}, DomainConstraint.ANY),
189
            date_add_days: ({datetime.date}, DomainConstraint.ANY),
190
            date_difference_in_years: ({int}, DomainConstraint.ANY),
191
            date_difference_in_months: ({int}, DomainConstraint.ANY),
192
            date_difference_in_days: ({int}, DomainConstraint.ANY),
193
            count_episodes: ({int}, DomainConstraint.PATIENT),
194
            case: ({int, float, bool, datetime.date}, DomainConstraint.ANY),
195
            maximum_of: (COMPARABLE_TYPES, DomainConstraint.ANY),
196
            minimum_of: (COMPARABLE_TYPES, DomainConstraint.ANY),
197
        }
198
        series_types = series_constraints.keys()
199
200
        def constraints_match(series_type):
201
            type_constraint, domain_constraint = series_constraints[series_type]
202
            return (
203
                type_ in type_constraint
204
                and is_one_row_per_patient_frame(frame) in domain_constraint
205
            )
206
207
        possible_series = [s for s in series_types if constraints_match(s)]
208
        assert possible_series, f"No series matches {type_}, {type(frame)}"
209
210
        series_strategy = draw(st.sampled_from(possible_series))
211
        return draw(series_strategy(type_, frame))
212
213
    def value(type_, _frame):
214
        return st.builds(Value, value_strategies[type_])
215
216
    def select_column(type_, frame):
217
        column_names = [n for n, t in schema.column_types if t == type_]
218
        return st.builds(SelectColumn, st.just(frame), st.sampled_from(column_names))
219
220
    def exists(_type, _frame):
221
        return st.builds(AggregateByPatient.Exists, any_frame())
222
223
    def count(_type, _frame):
224
        return st.builds(AggregateByPatient.Count, any_frame())
225
226
    @st.composite
227
    def count_distinct(draw, _type, _frame):
228
        type_ = draw(any_type())
229
        frame = draw(many_rows_per_patient_frame())
230
        return AggregateByPatient.CountDistinct(draw(series(type_, frame)))
231
232
    @st.composite
233
    def count_episodes(draw, _type, _frame):
234
        frame = draw(many_rows_per_patient_frame())
235
        date_series = draw(series(datetime.date, frame))
236
        maximum_gap_days = draw(st.integers(1, 5))
237
        return AggregateByPatient.CountEpisodes(date_series, maximum_gap_days)
238
239
    def min_(type_, _frame):
240
        return aggregation_operation(type_, AggregateByPatient.Min)
241
242
    def max_(type_, _frame):
243
        return aggregation_operation(type_, AggregateByPatient.Max)
244
245
    def sum_(type_, _frame):
246
        return aggregation_operation(type_, AggregateByPatient.Sum)
247
248
    def combine_as_set(type_, _frame):
249
        return aggregation_operation(type_, AggregateByPatient.CombineAsSet)
250
251
    @st.composite
252
    def mean(draw, _type, _frame):
253
        type_ = draw(any_numeric_type())
254
        frame = draw(many_rows_per_patient_frame())
255
        return AggregateByPatient.Mean(draw(series(type_, frame)))
256
257
    @st.composite
258
    def aggregation_operation(draw, type_, aggregation):
259
        # An aggregation operation that returns a patient series but takes a
260
        # series drawn from a many-rows-per-patient frame
261
        frame = draw(many_rows_per_patient_frame())
262
        return aggregation(draw(series(type_, frame)))
263
264
    @st.composite
265
    def is_null(draw, _type, frame):
266
        type_ = draw(any_type())
267
        return Function.IsNull(draw(series(type_, frame)))
268
269
    def not_(type_, frame):
270
        return st.builds(Function.Not, series(type_, frame))
271
272
    def year_from_date(_type, frame):
273
        return st.builds(Function.YearFromDate, series(datetime.date, frame))
274
275
    def month_from_date(_type, frame):
276
        return st.builds(Function.MonthFromDate, series(datetime.date, frame))
277
278
    def day_from_date(_type, frame):
279
        return st.builds(Function.DayFromDate, series(datetime.date, frame))
280
281
    def to_first_of_year(_type, frame):
282
        return st.builds(Function.ToFirstOfYear, series(datetime.date, frame))
283
284
    def to_first_of_month(_type, frame):
285
        return st.builds(Function.ToFirstOfMonth, series(datetime.date, frame))
286
287
    @st.composite
288
    def cast_to_float(draw, _type, frame):
289
        type_ = draw(any_numeric_type())
290
        return Function.CastToFloat(draw(series(type_, frame)))
291
292
    @st.composite
293
    def cast_to_int(draw, type_, frame):
294
        type_ = draw(any_numeric_type())
295
        return Function.CastToInt(draw(series(type_, frame)))
296
297
    def negate(type_, frame):
298
        return st.builds(Function.Negate, series(type_, frame))
299
300
    @st.composite
301
    def eq(draw, _type, frame):
302
        type_ = draw(any_type())
303
        return draw(binary_operation(type_, frame, Function.EQ))
304
305
    @st.composite
306
    def ne(draw, _type, frame):
307
        type_ = draw(any_type())
308
        return draw(binary_operation(type_, frame, Function.NE))
309
310
    def string_contains(_type, frame):
311
        return binary_operation(str, frame, Function.StringContains)
312
313
    @st.composite
314
    def in_(draw, _type, frame):
315
        type_ = draw(any_type())
316
        if not draw(st.booleans()):
317
            rhs = Value(
318
                frozenset(
319
                    draw(st.sets(value_strategies[type_], min_size=0, max_size=5))
320
                )
321
            )
322
        else:
323
            rhs = draw(combine_as_set(type_, frame))
324
        return Function.In(draw(series(type_, frame)), rhs)
325
326
    def and_(type_, frame):
327
        return binary_operation(type_, frame, Function.And, allow_value=False)
328
329
    def or_(type_, frame):
330
        return binary_operation(type_, frame, Function.Or, allow_value=False)
331
332
    @st.composite
333
    def lt(draw, _type, frame):
334
        type_ = draw(any_comparable_type())
335
        return draw(binary_operation(type_, frame, Function.LT))
336
337
    @st.composite
338
    def gt(draw, _type, frame):
339
        type_ = draw(any_comparable_type())
340
        return draw(binary_operation(type_, frame, Function.GT))
341
342
    @st.composite
343
    def le(draw, _type, frame):
344
        type_ = draw(any_comparable_type())
345
        return draw(binary_operation(type_, frame, Function.LE))
346
347
    @st.composite
348
    def ge(draw, _type, frame):
349
        type_ = draw(any_comparable_type())
350
        return draw(binary_operation(type_, frame, Function.GE))
351
352
    def add(type_, frame):
353
        return binary_operation(type_, frame, Function.Add)
354
355
    def subtract(type_, frame):
356
        return binary_operation(type_, frame, Function.Subtract)
357
358
    def multiply(type_, frame):
359
        return binary_operation(type_, frame, Function.Multiply)
360
361
    def truediv(type_, frame):
362
        return binary_operation(type_, frame, Function.TrueDivide)
363
364
    def floordiv(type_, frame):
365
        return binary_operation(type_, frame, Function.FloorDivide)
366
367
    def date_add_years(type_, frame):
368
        return binary_operation_with_types(type_, int, frame, Function.DateAddYears)
369
370
    def date_add_months(type_, frame):
371
        return binary_operation_with_types(type_, int, frame, Function.DateAddMonths)
372
373
    def date_add_days(type_, frame):
374
        return binary_operation_with_types(type_, int, frame, Function.DateAddDays)
375
376
    def date_difference_in_years(type_, frame):
377
        return binary_operation(datetime.date, frame, Function.DateDifferenceInYears)
378
379
    def date_difference_in_months(type_, frame):
380
        return binary_operation(datetime.date, frame, Function.DateDifferenceInMonths)
381
382
    def date_difference_in_days(type_, frame):
383
        return binary_operation(datetime.date, frame, Function.DateDifferenceInDays)
384
385
    @st.composite
386
    def case(draw, type_, frame):
387
        # case takes a mapping argument which is a dict where:
388
        #   - keys are a bool series
389
        #   - values are either a series or Value of `type_` or None
390
        # It also takes a default, which can be None or a Value or series of `type_`
391
        key_st = series(bool, frame)
392
        value_st = st.one_of(st.none(), value(type_, frame), series(type_, frame))
393
        mapping_st = st.dictionaries(key_st, value_st, min_size=1, max_size=3)
394
        default_st = st.one_of(st.none(), value(type_, frame), series(type_, frame))
395
        mapping = draw(mapping_st)
396
        default = draw(default_st)
397
        # A valid Case needs at least one non-NULL value or a default
398
        hyp.assume(not all(v is None for v in [default, *mapping.values()]))
399
        return Case(mapping, default)
400
401
    def binary_operation(type_, frame, operator_func, allow_value=True):
402
        # A strategy for operations that take lhs and rhs arguments of the
403
        # same type
404
        return binary_operation_with_types(
405
            type_, type_, frame, operator_func, allow_value=allow_value
406
        )
407
408
    @st.composite
409
    def binary_operation_with_types(
410
        draw, lhs_type, rhs_type, frame, operator_func, allow_value=True
411
    ):
412
        # A strategy for operations that take lhs and rhs arguments with specified lhs
413
        # and rhs types (which may be different)
414
415
        # A binary operation has 2 inputs, which are
416
        # 1) A series drawn from the specified frame
417
        # 2) one of:
418
        #    a) A series drawn from the specified frame
419
        #    b) A series drawn from any one-row-per-patient-frame
420
        #    c) A series that is a Value
421
        #       For certain operations, Value is not allowed;  Specifically, for boolean operations
422
        #       i.e. and/or which take two boolean series as inputs, we exclude operations that would
423
        #       use True/False constant Values.  These are unlikely to be seen in the wild, and cause
424
        #       particularly nonsensical Case statements in generative test examples.
425
426
        # first pick an "other" input series (i.e. #2 above), either a value series (if allowed)
427
        # or a series drawn from a frame
428
        series_options = [value, series] if allow_value else [series]
429
        other_series = draw(st.sampled_from(series_options))
430
        # Now pick a frame for the series to be drawn from
431
        # The other frame will either be a new one-row-per-patient-frame or this frame
432
        # (Note if the other_series is a value, the frame will be ignored)
433
        other_frame = draw(st.one_of(one_row_per_patient_frame(), st.just(frame)))
434
435
        # Pick the order of the lhs and rhs inputs built from the two frames and
436
        # associated strategies
437
        lhs_frame, lhs_input, rhs_frame, rhs_input = draw(
438
            st.sampled_from(
439
                [
440
                    (frame, series, other_frame, other_series),
441
                    (other_frame, other_series, frame, series),
442
                ]
443
            )
444
        )
445
        lhs = draw(lhs_input(lhs_type, lhs_frame))
446
        rhs = draw(rhs_input(rhs_type, rhs_frame))
447
448
        return operator_func(lhs, rhs)
449
450
    @st.composite
451
    def nary_operation_with_types(draw, frame, operator_func, series_type):
452
        # A strategy for operations that take _n_ arguments which are expected to be
453
        # the same type
454
455
        # Decide how many arguments we want – we're intending to test the logic of the
456
        # query engines, not their scaling properties so we don't need too many
457
        num_args = draw(st.integers(1, 4))
458
        # Pick out some arguments (identified by index) to be drawn from other frames
459
        other_frame_args = draw(
460
            st.lists(
461
                # Draw a list of argument indices
462
                st.integers(0, num_args - 1),
463
                # Always leaving at least one argument to be drawn from the original
464
                # frame
465
                max_size=num_args - 1,
466
                unique=True,
467
            )
468
        )
469
        args = []
470
        # Clauses below arranged in order of simplicity (as Hypothesis sees it)
471
        for i in range(num_args):
472
            if i not in other_frame_args:
473
                arg = draw(series(series_type, frame))
474
            else:
475
                # If it's not drawn from the supplied frame then it should be either a
476
                # value or a one-row-per-patient series
477
                if not draw(st.booleans()):
478
                    arg = draw(value(series_type, None))
479
                else:
480
                    arg = draw(series(series_type, draw(one_row_per_patient_frame())))
481
            args.append(arg)
482
        return operator_func(tuple(args))
483
484
    def maximum_of(type_, frame):
485
        return nary_operation_with_types(frame, Function.MaximumOf, type_)
486
487
    def minimum_of(type_, frame):
488
        return nary_operation_with_types(frame, Function.MinimumOf, type_)
489
490
    def any_type():
491
        return st.sampled_from(list(value_strategies.keys()))
492
493
    def any_numeric_type():
494
        return st.sampled_from([int, float])
495
496
    def any_comparable_type():
497
        return st.sampled_from(COMPARABLE_TYPES)
498
499
    # Frame strategies
500
    #
501
    # The main concern when choosing a frame is whether it has one or many rows per patient. Some
502
    # callers require one or the other, some don't mind; so we provide strategies for each case.
503
    # And sometimes callers need _either_ the frame they have in their hand _or_ an arbitrary
504
    # patient frame, so we provide a strategy for that too.
505
    #
506
    # At variance with the general approach here, many-rows-per-patient frames are created by
507
    # imperatively building stacks of filters on top of select nodes, rather than relying on
508
    # recursion, because it enormously simplifies the logic needed to keep filter conditions
509
    # consistent with the source.
510
    def any_frame():
511
        # Order matters: "simpler" first (see header comment)
512
        return st.one_of(
513
            one_row_per_patient_frame(),
514
            many_rows_per_patient_frame(),
515
        )
516
517
    def one_row_per_patient_frame():
518
        return depth_bounded_one_of(
519
            select_patient_table(),
520
            pick_one_row_per_patient_frame(),
521
            inline_patient_table(),
522
        )
523
524
    def many_rows_per_patient_frame():
525
        return depth_bounded_one_of(select_table(), filtered_table())
526
527
    @st.composite
528
    def filtered_table(draw):
529
        source = draw(select_table())
530
        for _ in range(draw(st.integers(min_value=1, max_value=6))):
531
            source = draw(filter_(source))
532
        return source
533
534
    @st.composite
535
    def sorted_frame(draw):
536
        # Decide how many Sorts and Filters (if any) we're going to apply
537
        operations = draw(
538
            st.lists(st.sampled_from([sort, filter_]), min_size=1, max_size=9).filter(
539
                lambda ls: (1 <= ls.count(sort) <= 3) and (ls.count(filter_) <= 6)
540
            )
541
        )
542
        # Pick a table and apply the operations
543
        source = draw(select_table())
544
        for operation in operations:
545
            source = draw(operation(source))
546
        return source
547
548
    @st.composite
549
    def pick_one_row_per_patient_frame(draw):
550
        source = draw(sorted_frame())
551
        sort_order = draw(st.sampled_from([Position.FIRST, Position.LAST]))
552
        return PickOneRowPerPatient(source, sort_order)
553
554
    def select_table():
555
        return st.builds(SelectTable, st.sampled_from(event_tables), st.just(schema))
556
557
    def select_patient_table():
558
        return st.builds(
559
            SelectPatientTable, st.sampled_from(patient_tables), st.just(schema)
560
        )
561
562
    @st.composite
563
    def inline_patient_table(draw):
564
        return InlinePatientTable(
565
            rows=tuple(
566
                draw(
567
                    st.lists(
568
                        st.tuples(
569
                            st.integers(1, 10),
570
                            *[
571
                                value_strategies[type_]
572
                                for name, type_ in schema.column_types
573
                            ],
574
                        ),
575
                        unique_by=lambda r: r[0],
576
                    ),
577
                )
578
            ),
579
            schema=schema,
580
        )
581
582
    @st.composite
583
    def filter_(draw, source):
584
        condition = draw(series(bool, draw(ancestor_of(source))))
585
        return Filter(source, condition)
586
587
    @st.composite
588
    def sort(draw, source):
589
        type_ = draw(any_comparable_type())
590
        sort_by = draw(series(type_, draw(ancestor_of(source))))
591
        return Sort(source, sort_by)
592
593
    @st.composite
594
    def ancestor_of(draw, frame):
595
        for _ in range(draw(st.integers(min_value=0, max_value=3))):
596
            if hasattr(frame, "source"):
597
                frame = frame.source
598
            else:
599
                break
600
        return frame
601
602
    # Variable strategy
603
    #
604
    # Puts everything above together to create a variable.
605
    @st.composite
606
    def valid_patient_variable(draw):
607
        type_ = draw(any_type())
608
        frame = draw(one_row_per_patient_frame())
609
        return draw(series(type_, frame))
610
611
    @st.composite
612
    def valid_event_series(draw):
613
        type_ = draw(any_type())
614
        frame = draw(many_rows_per_patient_frame())
615
        return draw(series(type_, frame))
616
617
    # A population definition is a boolean-typed variable that meets some additional
618
    # criteria enforced by the query model
619
    @st.composite
620
    def valid_population(draw):
621
        frame = draw(one_row_per_patient_frame())
622
        population = draw(series(bool, frame))
623
        hyp.assume(is_valid_population(population))
624
        return population
625
626
    return st.builds(
627
        make_dataset,
628
        valid_population(),
629
        valid_patient_variable(),
630
        # Event series is optional
631
        st.one_of(st.none(), valid_event_series()),
632
    )
633
634
635
def make_dataset(population, patient_variable, event_series):
636
    return Dataset(
637
        population=population,
638
        variables={"v": patient_variable},
639
        events=(
640
            {
641
                "event_table": SeriesCollectionFrame({"e": event_series}),
642
            }
643
            if event_series is not None
644
            else {}
645
        ),
646
        measures=None,
647
    )
648
649
650
def is_valid_population(series):
651
    try:
652
        validate_population_definition(series)
653
        return True
654
    except ValidationError:
655
        return False
656
    except Exception as e:  # pragma: no cover
657
        if get_ignored_error_type(e):
658
            return False
659
        raise
660
661
662
def is_one_row_per_patient_frame(frame):
663
    return isinstance(frame, SelectPatientTable | PickOneRowPerPatient)