Switch to side-by-side view

--- a
+++ b/src/models/logical_criteria.py
@@ -0,0 +1,263 @@
+# models/logical_criteria.py
+"""
+Data Models for Logical Structured Clinical Trial Criteria
+
+This module defines the Pydantic models used to represent the logical relationships
+between atomic criteria extracted from clinical trial eligibility criteria.
+
+Classes prefixed with 'LLM' are used as output formats for LLM processing, while
+other classes are used for internal data storage.
+
+Classes:
+    LLMLogicalAnd: Logical AND relationship between criteria.
+    LLMLogicalOr: Logical OR relationship between criteria.
+    LLMLogicalNot: Logical NOT operation on criteria.
+    LLMLogicalXor: Logical XOR relationship between criteria.
+    LLMLogicalConditional: Conditional (if-then-else) relationship.
+    LLMLogicalWrapperResponse: Container for LLM logical structure response.
+    LogicalLine: Line with identified criteria and logical structure.
+    LogicalTrial: Complete trial with logically structured criteria.
+
+Important:
+    The docstrings of Pydantic models are used as prompts for LLM processing.
+    Changing these docstrings will alter how the LLM interprets the output format.
+    Use code comments rather than docstring modifications if documentation changes
+    are needed without affecting LLM behavior.
+"""
+
+from typing import List, Union
+
+from pydantic import BaseModel, Field
+
+from src.models.identified_criteria import (
+    RawTrialData,
+    Requirement,
+)
+
+
+class SingleRequirementCriterion(BaseModel):
+    """
+    Represents an atomic criterion with a single requirement extracted from the eligibility criteria.
+
+    example:
+        input:
+        "Tissue from tumor must be available and > 2 cm in diameter.",
+
+        output:
+        "exact_snippets": "Tissue from tumor must be available ... > 2 cm in diameter.",
+        "criterion": "tumor tissue",
+        "requirement": {
+            "requirement_type": "availability",
+            "expected_value": true
+        }
+    """
+
+    exact_snippets: str = Field(
+        ...,
+        description="Exact text snippets from the eligibility criteria that were used to extract this criterion, using ellipses (...) for non-consecutive text.",
+    )
+
+    criterion: str = Field(
+        ...,
+        description="The specific property, attribute, or condition that is being tested (e.g., 'age', 'lung cancer', 'BMI').",
+    )
+
+    requirement: Requirement = Field(
+        ..., description="The requirement and its expected value for the criterion."
+    )
+
+    # needed to make this hashable for validating logical structures
+    def __eq__(self, other):
+        if isinstance(other, SingleRequirementCriterion):
+            return str(self) == str(other)
+        return False
+
+    def __hash__(self):
+        return hash((tuple(self.exact_snippets), self.criterion, self.requirement))
+
+
+class IdentifiedUnrolledLine(BaseModel):
+    """
+    Represents a structured line of eligibility criteria.
+    """
+
+    line: str = Field(..., description="The original line of eligibility criteria.")
+    criterions: List[SingleRequirementCriterion] = Field(
+        ..., description="List of structured criteria."
+    )
+
+
+class LLMLogicalAnd(BaseModel):
+    """
+    Represents a logical AND relationship between criteria.
+    """
+
+    and_criteria: List[
+        Union[
+            SingleRequirementCriterion,
+            "LLMLogicalAnd",
+            "LLMLogicalOr",
+            "LLMLogicalNot",
+            "LLMLogicalXor",
+            "LLMLogicalConditional",
+        ]
+    ] = Field(..., description="The criteria involved in the relationship.")
+
+
+class LLMLogicalOr(BaseModel):
+    """
+    Represents a logical OR relationship between criteria.
+    """
+
+    or_criteria: List[
+        Union[
+            SingleRequirementCriterion,
+            "LLMLogicalAnd",
+            "LLMLogicalOr",
+            "LLMLogicalNot",
+            "LLMLogicalXor",
+            "LLMLogicalConditional",
+        ]
+    ] = Field(..., description="The criteria involved in the relationship.")
+
+
+class LLMLogicalNot(BaseModel):
+    """
+    Represents a logical NOT operation on a criterion or logical expression. Ensure that negation isn't already implicit in the criterion's value (e.g., 'must not be older than 18' is already represented by 'age ≤ 18 and no need for additional LLMLogicalNot').
+    """
+
+    not_criteria: Union[
+        SingleRequirementCriterion,
+        "LLMLogicalAnd",
+        "LLMLogicalOr",
+        "LLMLogicalNot",
+        "LLMLogicalXor",
+        "LLMLogicalConditional",
+    ] = Field(..., description="The criteria involved in the relationship.")
+
+
+class LLMLogicalXor(BaseModel):
+    """
+    Represents a logical XOR relationship between criteria.
+    """
+
+    xor_criteria: List[
+        Union[
+            SingleRequirementCriterion,
+            "LLMLogicalAnd",
+            "LLMLogicalOr",
+            "LLMLogicalNot",
+            "LLMLogicalXor",
+            "LLMLogicalConditional",
+        ]
+    ] = Field(..., description="The criteria involved in the relationship.")
+
+
+class LLMLogicalConditional(BaseModel):
+    """
+    Represents a conditional relationship between criteria.
+    """
+
+    condition: Union[
+        SingleRequirementCriterion,
+        "LLMLogicalAnd",
+        "LLMLogicalOr",
+        "LLMLogicalNot",
+        "LLMLogicalXor",
+        "LLMLogicalConditional",
+    ] = Field(..., description="The condition criterion (antecedent)")
+    then_criteria: Union[
+        SingleRequirementCriterion,
+        "LLMLogicalAnd",
+        "LLMLogicalOr",
+        "LLMLogicalNot",
+        "LLMLogicalXor",
+        "LLMLogicalConditional",
+        None,
+    ] = Field(
+        ...,
+        description="The criteria that apply if the condition (antecedent) is met. (consequent)",
+    )
+    else_criteria: Union[
+        SingleRequirementCriterion,
+        "LLMLogicalAnd",
+        "LLMLogicalOr",
+        "LLMLogicalNot",
+        "LLMLogicalXor",
+        "LLMLogicalConditional",
+        None,
+    ] = Field(
+        ...,
+        description="The criteria that apply if the condition (antecedent) is not met (optional consequent).",
+    )
+
+
+# Rebuild model schemas to resolve forward references in the Union types
+LLMLogicalAnd.model_rebuild()
+LLMLogicalOr.model_rebuild()
+LLMLogicalNot.model_rebuild()
+LLMLogicalXor.model_rebuild()
+LLMLogicalConditional.model_rebuild()
+
+
+# This wrapper is necessary because the LLM needs a single type to generate,
+# not a Union of possible logical relation types
+class LLMLogicalWrapperResponse(BaseModel):
+    """
+    Represents the response from the LLM.
+    """
+
+    logicalRepresentation: Union[
+        LLMLogicalAnd, LLMLogicalOr, LLMLogicalNot, LLMLogicalXor, LLMLogicalConditional
+    ] = Field(..., description="The logical representation of the criteria.")
+
+
+class LogicalLine(BaseModel):
+    """
+    Represents a line of eligibility criteria that has been logically structured.
+    """
+
+    identified_line: IdentifiedUnrolledLine = Field(
+        ..., description="The identified line this was made from."
+    )
+    logical_structure: Union[
+        SingleRequirementCriterion,
+        LLMLogicalAnd,
+        LLMLogicalOr,
+        LLMLogicalNot,
+        LLMLogicalXor,
+        LLMLogicalConditional,
+    ] = Field(..., description="The logically structured Criteria.")
+    # Note: We don't use a Pydantic validator here to check if the logical_structure
+    # includes all of the identified_line's criteria because we want to be able to
+    # store failed lines. This validation happens in the "logify" procedure instead.
+
+
+class LogicalTrial(BaseModel):
+    """
+    Represents a complete trial with logically structured eligibility criteria.
+    """
+
+    info: RawTrialData = Field(..., description="Raw data of the clinical trial.")
+    inclusion_lines: List[LogicalLine] = Field(
+        ..., description="List of inclusion lines successfully logically structurized."
+    )
+    exclusion_lines: List[LogicalLine] = Field(
+        ..., description="List of exclusion lines successfully logically structurized."
+    )
+    miscellaneous_lines: List[LogicalLine] = Field(
+        ...,
+        description="List of miscellaneous lines successfully logically structurized.",
+    )
+    failed_inclusion: List[LogicalLine] = Field(
+        ...,
+        description="List of inclusion lines that failed to be logically structurized.",
+    )
+    failed_exclusion: List[LogicalLine] = Field(
+        ...,
+        description="List of exclusion lines that failed to be logically structurized.",
+    )
+    failed_miscellaneous: List[LogicalLine] = Field(
+        ...,
+        description="List of miscellaneous lines that failed to be logically structurized.",
+    )