a b/tests/unit/test_lookup_struct.py
1
import io
2
from pathlib import Path
3
from unittest.mock import patch
4
5
import docdeid as dd
6
7
from deduce.lookup_structs import (
8
    cache_lookup_structs,
9
    load_lookup_structs_from_cache,
10
    load_raw_itemset,
11
    load_raw_itemsets,
12
    validate_lookup_struct_cache,
13
)
14
15
DATA_PATH = Path(".").cwd() / "tests" / "data" / "lookup"
16
17
18
class TestLookupStruct:
19
    def test_load_raw_itemset(self):
20
        raw_itemset = load_raw_itemset(DATA_PATH / "src" / "lst_test")
21
22
        assert len(raw_itemset) == 5
23
        assert "de Vries" in raw_itemset
24
        assert "De Vries" in raw_itemset
25
        assert "Sijbrand" in raw_itemset
26
        assert "Sybrand" in raw_itemset
27
        assert "Pieters" in raw_itemset
28
        assert "Wolter" not in raw_itemset
29
30
    def test_load_raw_itemset_nested(self):
31
        raw_itemset = load_raw_itemset(DATA_PATH / "src" / "lst_test_nested")
32
33
        assert raw_itemset == {"a", "b", "c", "d"}
34
35
    def test_load_raw_itemsets(self):
36
        raw_itemsets = load_raw_itemsets(
37
            base_path=DATA_PATH, subdirs=["lst_test", "lst_test_nested"]
38
        )
39
40
        assert "test" in raw_itemsets
41
        assert len(raw_itemsets["test"]) == 5
42
        assert "test_nested" in raw_itemsets
43
        assert len(raw_itemsets["test_nested"]) == 4
44
45
    def test_validate_lookup_struct_cache_valid(self):
46
        cache = {
47
            "deduce_version": "2.5.0",
48
            "saved_datetime": "2023-12-06 10:19:39.198133",
49
            "lookup_structs": "_",
50
        }
51
52
        class MockStats:
53
            st_mtime = 1000000000  # way in the past
54
55
        with patch("pathlib.Path.glob", return_value=[1, 2, 3]):
56
            with patch("os.stat", return_value=MockStats()):
57
                assert validate_lookup_struct_cache(
58
                    cache=cache, base_path=DATA_PATH, deduce_version="2.5.0"
59
                )
60
61
    def test_validate_lookup_struct_cache_file_changes(self):
62
        cache = {
63
            "deduce_version": "2.5.0",
64
            "saved_datetime": "2023-12-06 10:19:39.198133",
65
            "lookup_structs": "_",
66
        }
67
68
        class MockStats:
69
            st_mtime = 2000000000  # way in the future
70
71
        with patch("pathlib.Path.glob", return_value=[1, 2, 3]):
72
            with patch("os.stat", return_value=MockStats()):
73
                assert not validate_lookup_struct_cache(
74
                    cache=cache, base_path=DATA_PATH, deduce_version="2.5.0"
75
                )
76
77
    @patch("deduce.lookup_structs.validate_lookup_struct_cache", return_value=True)
78
    def test_load_lookup_structs_from_cache(self, _):
79
        ds_collection = load_lookup_structs_from_cache(
80
            cache_path=DATA_PATH, deduce_version="_"
81
        )
82
83
        assert len(ds_collection) == 2
84
        assert "test" in ds_collection
85
        assert "test_nested" in ds_collection
86
87
    @patch("deduce.lookup_structs.validate_lookup_struct_cache", return_value=True)
88
    def test_load_lookup_structs_from_cache_nofile(self, _):
89
        ds_collection = load_lookup_structs_from_cache(
90
            cache_path=DATA_PATH / "non_existing_dir", deduce_version="_"
91
        )
92
93
        assert ds_collection is None
94
95
    @patch("deduce.lookup_structs.validate_lookup_struct_cache", return_value=False)
96
    def test_load_lookup_structs_from_cache_invalid(self, _):
97
        ds_collection = load_lookup_structs_from_cache(
98
            cache_path=DATA_PATH, deduce_version="_"
99
        )
100
101
        assert ds_collection is None
102
103
    @patch("builtins.open", return_value=io.BytesIO())
104
    @patch("pickle.dump")
105
    def test_cache_lookup_structs(self, _, mock_pickle_dump):
106
        cache_lookup_structs(
107
            lookup_structs=dd.ds.DsCollection(),
108
            cache_path=DATA_PATH,
109
            deduce_version="2.5.0",
110
        )
111
112
        assert mock_pickle_dump.called_once()