[e988c2]: / tests / unit / utils / test_regex_utils.py

Download this file

66 lines (57 with data), 1.8 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import random
import pytest
from ehrql.utils import regex_utils
@pytest.mark.parametrize(
"re_str,examples",
[
# Branches
(
"abc(foo|bar)",
["abcbar", "abcfoo"],
),
# Ranges
(
"[A-Z][0-9]",
["D1", "V1", "H0", "L9", "E2"],
),
# Repeats
(
"A{2,4}_?B{2}",
["AAABB", "AABB", "AA_BB", "AABB", "AAA_BB"],
),
# Unbounded repeats
(
"a+b*",
["aaaaaaaab", "ab", "aaaaaaaaaa", "aab", "aaaaaabbb"],
),
# All together now ...
(
"(none|alpha[A-Z]{3,5}|digit[0-9]{3,5})",
["alphaCVD", "alphaALT", "alphaFAH", "none", "digit18445"],
),
],
)
def test_create_regex_generator(re_str, examples):
generator = regex_utils.create_regex_generator(re_str)
rnd = random.Random(1234)
assert [generator(rnd) for _ in examples] == examples
def test_validate_regex():
assert regex_utils.validate_regex("E[A-Z]{3}-(foo|bar)")
@pytest.mark.parametrize(
"re_str,error",
[
# Parse errors from Python's regex engine are bubbled up
("abc(123", r"missing \), unterminated subpattern at position 3"),
# Valid regexes which use unhandled constructs (e.g. non-greedy matches) should
# raise an "unsupported" error
("t+?test", "unsupported"),
# Subpattern groups are supported, but attempting to set flags inside the group
# is not
("(?i:TEST)", "unsupported"),
# And neither is unsetting flags
("(?-i:TEST)", "unsupported"),
],
)
def test_validate_regex_error(re_str, error):
with pytest.raises(regex_utils.RegexError, match=error):
regex_utils.validate_regex(re_str)