[1bd6b5]: / tests / test_text_processing.py

Download this file

50 lines (40 with data), 1.3 kB

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from pandas import Series
from pytest import raises
from helpers.text_processing import matches_n_consecutive_words, prefix_remover
def test_matches_n_consecutive_words():
# at the beginning
assert matches_n_consecutive_words(
'lorem ipsum dolor',
{'lorem ipsum', 'sit amet'},
2
) == ['lorem ipsum']
# in the middle
assert matches_n_consecutive_words(
'X lorem ipsum X',
{'lorem ipsum', 'sit amet'},
2
) == ['lorem ipsum']
# at the end
assert matches_n_consecutive_words(
'X lorem ipsum',
{'lorem ipsum', 'sit amet'},
2
) == ['lorem ipsum']
# not present
assert matches_n_consecutive_words(
'XloremX XipsumX dolor',
{'lorem ipsum', 'sit amet'},
2
) == []
# multiple
assert matches_n_consecutive_words(
'X lorem ipsum X lorem ipsum',
{'lorem ipsum', 'sit amet'},
2
) == ['lorem ipsum', 'lorem ipsum']
def test_prefix_remover():
remove_test_prefix = prefix_remover('test_')
result = Series(['test_1', 'test_2']).apply(remove_test_prefix)
assert list(result) == ['1', '2']
with raises(ValueError, match="Prefix 'test_' missing in 'te_3'"):
Series(['test_1', 'test_2', 'te_3']).apply(remove_test_prefix)