a b/src/models/logical_criteria.py
1
# models/logical_criteria.py
2
"""
3
Data Models for Logical Structured Clinical Trial Criteria
4
5
This module defines the Pydantic models used to represent the logical relationships
6
between atomic criteria extracted from clinical trial eligibility criteria.
7
8
Classes prefixed with 'LLM' are used as output formats for LLM processing, while
9
other classes are used for internal data storage.
10
11
Classes:
12
    LLMLogicalAnd: Logical AND relationship between criteria.
13
    LLMLogicalOr: Logical OR relationship between criteria.
14
    LLMLogicalNot: Logical NOT operation on criteria.
15
    LLMLogicalXor: Logical XOR relationship between criteria.
16
    LLMLogicalConditional: Conditional (if-then-else) relationship.
17
    LLMLogicalWrapperResponse: Container for LLM logical structure response.
18
    LogicalLine: Line with identified criteria and logical structure.
19
    LogicalTrial: Complete trial with logically structured criteria.
20
21
Important:
22
    The docstrings of Pydantic models are used as prompts for LLM processing.
23
    Changing these docstrings will alter how the LLM interprets the output format.
24
    Use code comments rather than docstring modifications if documentation changes
25
    are needed without affecting LLM behavior.
26
"""
27
28
from typing import List, Union
29
30
from pydantic import BaseModel, Field
31
32
from src.models.identified_criteria import (
33
    RawTrialData,
34
    Requirement,
35
)
36
37
38
class SingleRequirementCriterion(BaseModel):
39
    """
40
    Represents an atomic criterion with a single requirement extracted from the eligibility criteria.
41
42
    example:
43
        input:
44
        "Tissue from tumor must be available and > 2 cm in diameter.",
45
46
        output:
47
        "exact_snippets": "Tissue from tumor must be available ... > 2 cm in diameter.",
48
        "criterion": "tumor tissue",
49
        "requirement": {
50
            "requirement_type": "availability",
51
            "expected_value": true
52
        }
53
    """
54
55
    exact_snippets: str = Field(
56
        ...,
57
        description="Exact text snippets from the eligibility criteria that were used to extract this criterion, using ellipses (...) for non-consecutive text.",
58
    )
59
60
    criterion: str = Field(
61
        ...,
62
        description="The specific property, attribute, or condition that is being tested (e.g., 'age', 'lung cancer', 'BMI').",
63
    )
64
65
    requirement: Requirement = Field(
66
        ..., description="The requirement and its expected value for the criterion."
67
    )
68
69
    # needed to make this hashable for validating logical structures
70
    def __eq__(self, other):
71
        if isinstance(other, SingleRequirementCriterion):
72
            return str(self) == str(other)
73
        return False
74
75
    def __hash__(self):
76
        return hash((tuple(self.exact_snippets), self.criterion, self.requirement))
77
78
79
class IdentifiedUnrolledLine(BaseModel):
80
    """
81
    Represents a structured line of eligibility criteria.
82
    """
83
84
    line: str = Field(..., description="The original line of eligibility criteria.")
85
    criterions: List[SingleRequirementCriterion] = Field(
86
        ..., description="List of structured criteria."
87
    )
88
89
90
class LLMLogicalAnd(BaseModel):
91
    """
92
    Represents a logical AND relationship between criteria.
93
    """
94
95
    and_criteria: List[
96
        Union[
97
            SingleRequirementCriterion,
98
            "LLMLogicalAnd",
99
            "LLMLogicalOr",
100
            "LLMLogicalNot",
101
            "LLMLogicalXor",
102
            "LLMLogicalConditional",
103
        ]
104
    ] = Field(..., description="The criteria involved in the relationship.")
105
106
107
class LLMLogicalOr(BaseModel):
108
    """
109
    Represents a logical OR relationship between criteria.
110
    """
111
112
    or_criteria: List[
113
        Union[
114
            SingleRequirementCriterion,
115
            "LLMLogicalAnd",
116
            "LLMLogicalOr",
117
            "LLMLogicalNot",
118
            "LLMLogicalXor",
119
            "LLMLogicalConditional",
120
        ]
121
    ] = Field(..., description="The criteria involved in the relationship.")
122
123
124
class LLMLogicalNot(BaseModel):
125
    """
126
    Represents a logical NOT operation on a criterion or logical expression. Ensure that negation isn't already implicit in the criterion's value (e.g., 'must not be older than 18' is already represented by 'age ≤ 18 and no need for additional LLMLogicalNot').
127
    """
128
129
    not_criteria: Union[
130
        SingleRequirementCriterion,
131
        "LLMLogicalAnd",
132
        "LLMLogicalOr",
133
        "LLMLogicalNot",
134
        "LLMLogicalXor",
135
        "LLMLogicalConditional",
136
    ] = Field(..., description="The criteria involved in the relationship.")
137
138
139
class LLMLogicalXor(BaseModel):
140
    """
141
    Represents a logical XOR relationship between criteria.
142
    """
143
144
    xor_criteria: List[
145
        Union[
146
            SingleRequirementCriterion,
147
            "LLMLogicalAnd",
148
            "LLMLogicalOr",
149
            "LLMLogicalNot",
150
            "LLMLogicalXor",
151
            "LLMLogicalConditional",
152
        ]
153
    ] = Field(..., description="The criteria involved in the relationship.")
154
155
156
class LLMLogicalConditional(BaseModel):
157
    """
158
    Represents a conditional relationship between criteria.
159
    """
160
161
    condition: Union[
162
        SingleRequirementCriterion,
163
        "LLMLogicalAnd",
164
        "LLMLogicalOr",
165
        "LLMLogicalNot",
166
        "LLMLogicalXor",
167
        "LLMLogicalConditional",
168
    ] = Field(..., description="The condition criterion (antecedent)")
169
    then_criteria: Union[
170
        SingleRequirementCriterion,
171
        "LLMLogicalAnd",
172
        "LLMLogicalOr",
173
        "LLMLogicalNot",
174
        "LLMLogicalXor",
175
        "LLMLogicalConditional",
176
        None,
177
    ] = Field(
178
        ...,
179
        description="The criteria that apply if the condition (antecedent) is met. (consequent)",
180
    )
181
    else_criteria: Union[
182
        SingleRequirementCriterion,
183
        "LLMLogicalAnd",
184
        "LLMLogicalOr",
185
        "LLMLogicalNot",
186
        "LLMLogicalXor",
187
        "LLMLogicalConditional",
188
        None,
189
    ] = Field(
190
        ...,
191
        description="The criteria that apply if the condition (antecedent) is not met (optional consequent).",
192
    )
193
194
195
# Rebuild model schemas to resolve forward references in the Union types
196
LLMLogicalAnd.model_rebuild()
197
LLMLogicalOr.model_rebuild()
198
LLMLogicalNot.model_rebuild()
199
LLMLogicalXor.model_rebuild()
200
LLMLogicalConditional.model_rebuild()
201
202
203
# This wrapper is necessary because the LLM needs a single type to generate,
204
# not a Union of possible logical relation types
205
class LLMLogicalWrapperResponse(BaseModel):
206
    """
207
    Represents the response from the LLM.
208
    """
209
210
    logicalRepresentation: Union[
211
        LLMLogicalAnd, LLMLogicalOr, LLMLogicalNot, LLMLogicalXor, LLMLogicalConditional
212
    ] = Field(..., description="The logical representation of the criteria.")
213
214
215
class LogicalLine(BaseModel):
216
    """
217
    Represents a line of eligibility criteria that has been logically structured.
218
    """
219
220
    identified_line: IdentifiedUnrolledLine = Field(
221
        ..., description="The identified line this was made from."
222
    )
223
    logical_structure: Union[
224
        SingleRequirementCriterion,
225
        LLMLogicalAnd,
226
        LLMLogicalOr,
227
        LLMLogicalNot,
228
        LLMLogicalXor,
229
        LLMLogicalConditional,
230
    ] = Field(..., description="The logically structured Criteria.")
231
    # Note: We don't use a Pydantic validator here to check if the logical_structure
232
    # includes all of the identified_line's criteria because we want to be able to
233
    # store failed lines. This validation happens in the "logify" procedure instead.
234
235
236
class LogicalTrial(BaseModel):
237
    """
238
    Represents a complete trial with logically structured eligibility criteria.
239
    """
240
241
    info: RawTrialData = Field(..., description="Raw data of the clinical trial.")
242
    inclusion_lines: List[LogicalLine] = Field(
243
        ..., description="List of inclusion lines successfully logically structurized."
244
    )
245
    exclusion_lines: List[LogicalLine] = Field(
246
        ..., description="List of exclusion lines successfully logically structurized."
247
    )
248
    miscellaneous_lines: List[LogicalLine] = Field(
249
        ...,
250
        description="List of miscellaneous lines successfully logically structurized.",
251
    )
252
    failed_inclusion: List[LogicalLine] = Field(
253
        ...,
254
        description="List of inclusion lines that failed to be logically structurized.",
255
    )
256
    failed_exclusion: List[LogicalLine] = Field(
257
        ...,
258
        description="List of exclusion lines that failed to be logically structurized.",
259
    )
260
    failed_miscellaneous: List[LogicalLine] = Field(
261
        ...,
262
        description="List of miscellaneous lines that failed to be logically structurized.",
263
    )