[96a5a0]: / patient_matching / user_answer_parser.py

Download this file

164 lines (137 with data), 6.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# patient_matching/user_answer_parser.py
import logging
from datetime import datetime
from typing import List, Union
from pydantic import BaseModel, Field
from src.repositories.trial_repository import export_pydantic_to_json
from src.utils.config import TEMPERATURE, TIMEOUT
from src.utils.openai_client import get_openai_client
logger = logging.getLogger(__name__)
# -----------------------------------------------------------------------------
# Pydantic models for parsed user answer history
# -----------------------------------------------------------------------------
class RequirementResponse(BaseModel):
requirement_type: str = Field(
..., description="The type of requirement (e.g., 'minimum', 'status')."
)
user_value: Union[str, int, float, bool] = Field(
..., description="The value provided by the user."
)
class ParsedCriterion(BaseModel):
criterion: str = Field(
..., description="The name of the criterion (e.g., 'age', 'lung status')."
)
responses: List[RequirementResponse] = Field(
..., description="List of responses for this criterion."
)
class LLMResponse(BaseModel):
"""
The format that the LLM will return, without any timestamp fields.
This is used for parsing the OpenAI response.
"""
question: str = Field(..., description="The question asked to the user.")
parsed_answers: List[ParsedCriterion] = Field(
..., description="The parsed criteria and responses from the user's answer."
)
class UserAnswerHistory(BaseModel):
"""
Internal model that includes timestamp information.
Can be constructed from an LLMResponse.
"""
question: str = Field(..., description="The question asked to the user.")
parsed_answers: List[ParsedCriterion] = Field(
..., description="The parsed criteria and responses from the user's answer."
)
timestamp: datetime = Field(
default_factory=datetime.now, description="When this answer was recorded"
)
@classmethod
def from_llm_response(cls, llm_response: LLMResponse) -> "UserAnswerHistory":
"""
Creates a UserAnswerHistory from an LLMResponse, adding the timestamp.
"""
return cls(
question=llm_response.question, parsed_answers=llm_response.parsed_answers
)
class ConversationHistory(BaseModel):
"""
Stores the complete history of a conversation, including all question-answer pairs
and their timestamps.
"""
start_time: datetime = Field(
default_factory=datetime.now, description="When the conversation started"
)
conversation: List[UserAnswerHistory] = Field(
default_factory=list,
description="List of all question-answer pairs in the conversation",
)
def add_response(
self, question: str, parsed_answers: List[ParsedCriterion]
) -> None:
"""
Adds a new question-answer pair to the conversation history.
"""
self.conversation.append( # pylint: disable=E1101 this is because it assumes it to be a field type instead of list type so doesn't believe it has an append method incorrectly
UserAnswerHistory(question=question, parsed_answers=parsed_answers)
)
def parse_user_response(user_input: str, question: str) -> UserAnswerHistory:
"""
Calls an LLM (using OpenAI) to parse the user's free text response into a structured
UserAnswerHistory object containing a list of ParsedCriterion objects.
"""
client = get_openai_client()
prompt = (
"You are an expert in parsing patient responses for clinical trial eligibility. "
"Given the following question and the user's response, extract all the criteria mentioned "
"along with their requirement types and provided values. The output should be valid JSON corresponding "
"to a list of objects with the following format:\n\n"
"{\n"
' "criterion": "<criterion_name>",\n'
' "responses": [\n'
' { "requirement_type": "<type>", "user_value": <value> },\n'
" ...\n"
" ]\n"
"}\n\n"
"Now, use this format to build a JSON object that also includes the original question. "
"The final JSON should have the following structure:\n\n"
"{\n"
' "question": "<the question>",\n'
' "parsed_answers": [\n'
' { "criterion": "...", "responses": [ { "requirement_type": "...", "user_value": ... }, ... ] },\n'
" ...\n"
" ]\n"
"}\n\n"
"Question: " + question + "\n"
"User Response: " + user_input + "\n\n"
"Provide your answer in valid JSON."
)
try:
completion = client.beta.chat.completions.parse(
model="gpt-4o",
messages=[
{"role": "system", "content": "You are a patient response parser."},
{"role": "user", "content": prompt},
],
temperature=TEMPERATURE,
response_format=LLMResponse,
timeout=TIMEOUT,
)
if completion.choices[0].message.parsed:
llm_response = completion.choices[0].message.parsed
return UserAnswerHistory.from_llm_response(llm_response)
else:
logger.error("LLM did not return a parsable response.")
raise ValueError(
"LLM response could not be parsed into a UserAnswerHistory object."
)
except Exception as e:
logger.error("Error during user response parsing: %s", e)
raise ValueError(f"Error during user response parsing: {e}") from e
def save_user_answer_history(history: UserAnswerHistory, file_name: str) -> bool:
"""
Saves the UserAnswerHistory object to a JSON file using the existing export function.
"""
# The export_pydantic_to_json function takes model, file_name, and folder as arguments.
# We'll save to a folder named "user_data".
folder = "user_data"
return export_pydantic_to_json(history, file_name, folder)