In [1]:
import json
import os
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain, SimpleSequentialChain

from dotenv import load_dotenv

openai_api_key = os.getenv("OPENAI_API_KEY")

In [2]:
single_patient_ehr_path = '/Users/bharathbeeravelly/Desktop/patient-trials-matching/data/processed/patients_small/1a654b50-5c1d-ec96-1d56-8d7c12140983_data.json'
single_trial_criteria_path = '/Users/bharathbeeravelly/Desktop/patient-trials-matching/data/raw/scraped_small/NCT06576401_criteria.txt'

# Read the single patient EHR and trial criteria
with open(single_patient_ehr_path) as f:
    patient_ehr = json.load(f)

with open(single_trial_criteria_path) as f:
    trial_criteria = f.read()

In [3]:
# 1st LLM: Identify keywords for each criterion
def identify_criteria_keywords(trial_criteria):
    # Define the system message for the LLM to identify relevant keywords
    system_message = """
    You are a clinical trial assistant.
    Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, and identify relevant keywords from each criterion.
    
    Common keywords may include: "Gender", "Age", "Race", "Ethnic Group", "Language", "BMI", "BPM", "Height", "Weight", etc.

    For each criterion, respond with the most relevant keyword or attribute it is concerned with.
    """

    # Initialize the OpenAI LLM model
    llm = ChatOpenAI(temperature=0, model = 'gpt-4o-mini', openai_api_key=openai_api_key)

    # Create the prompt for keyword identification
    prompt_template = PromptTemplate(
        input_variables=["criteria"],
        template=f"""
        {system_message}

        Trial Criteria: {{criteria}}

        For each criterion, identify the relevant keyword or patient attribute.
        """
    )
    
    # Format the prompt with the actual trial criteria
    prompt = prompt_template.format(criteria=trial_criteria)
    
    # Send the prompt to the LLM for processing
    response = llm(prompt)
    
    # Return the keywords identified by the LLM
    return response

In [4]:
# Function to extract only the relevant information from Patient_EHR for LLM processing
def extract_relevant_patient_data(patient_ehr):
    # Extracting necessary attributes from the patient EHR
    # Extract the relevant patient information from the EHR for the second chain
    relevant_patient_data = {
        "Gender": patient_ehr.get("Gender"),
        "Age": patient_ehr.get("Age"),
        "Race": patient_ehr.get("Race"),
        "Ethnic Group": patient_ehr.get("Ethnic Group"),
        "Language": patient_ehr.get("Language"),
        "Vital Signs": patient_ehr.get("Vital Signs"),
        "Medications": patient_ehr.get("Medications"),
        "Problems": patient_ehr.get("Problems"),
        "Surgeries": patient_ehr.get("Surgeries"),
        "Immunizations": patient_ehr.get("Immunizations"),
    }
    return relevant_patient_data

In [5]:
# 2nd LLM: Evaluate patient eligibility based on keywords
def evaluate_criteria_by_keywords(criteria_keywords, patient_ehr):
    # Define the system message for the LLM to evaluate eligibility
    system_message = """
    You are a clinical trial assistant. Today's date is 07th October 2024. 
    Your task is to compare the patient's information (Gender, Age, Race, Ethnic Group, Language, Vital Signs) 
    with the clinical trial's inclusion and exclusion criteria using the identified keywords. 
    If any medication is used by the patient, then check the last usage of the medication.
    
    
    For each inclusion criterion, respond with one of the following:
    - "Yes" if the patient meets the criterion
    - "No" if there is evidence that the criterion is not met
    - "Yes" if there is no information available to determine eligibility.
    
    For each exclusion criterion, respond with one of the following:
    - "Yes" if the patient does not meet the criterion
    - "No" if there is evidence that the criterion is met
    - "Yes" if there is no information available to determine eligibility.
    
    """

    # Initialize the OpenAI LLM model
    llm = ChatOpenAI(temperature=0, model = 'gpt-4o-mini', openai_api_key=openai_api_key)

    # Extract relevant patient data from EHR
    relevant_patient_data = extract_relevant_patient_data(patient_ehr)

    # Create the prompt for evaluating eligibility
    prompt_template = PromptTemplate(
        input_variables=["criteria_keywords", "patient_data"],
        template=f"""
        {system_message}

        Criteria Keywords: {{criteria_keywords}}

        Patient Information: {{patient_data}}

        For each criterion keyword, respond with:
        - "Yes" if the patient meets the criterion
        - "No" if the patient does not meet the criterion and reason
        
        While evaluating one criteria, consider only the respective criteria but not any other criteria.
        While rating the criteria, with 'Yes' or 'No', do not give any reasoning
        
        The format of response should be as below:
        
        Inclusion Criteria:
        - Keyword Placeholder 1: Yes 
        - Keyword Placeholder 2: No 
        - Keyword Placeholder 3: No 
        - Keyword Placeholder 4: Yes 
        .
        .
        .
        - Keyword Placeholder N: Yes
        
        Exclusion Criteria:
        - Keyword Placeholder 1: No
        - Keyword Placeholder 2: Yes
        - Keyword Placeholder 3: No
        - Keyword Placeholder 4: Yes
        .
        .
        .
        - Keyword Placeholder N: No
        
        While giving the response, do not output the whole criteria mentioned in the txt file. Instead, just give the keyword and the response.
      
        
        """
    )

    # Format the prompt with the criteria keywords and patient data
    prompt = prompt_template.format(
        criteria_keywords=criteria_keywords,
        patient_data=relevant_patient_data
    )
    
    # Send the prompt to the LLM for processing
    response = llm(prompt)
    
    return response

In [6]:
def process_patient_eligibility(trial_criteria, patient_ehr):
    # Step 1: Identify keywords from trial criteria
    criteria_keywords = identify_criteria_keywords(trial_criteria)
    
    # Step 2: Evaluate patient eligibility based on identified keywords
    eligibility_results = evaluate_criteria_by_keywords(criteria_keywords, patient_ehr)
    
    return eligibility_results.content


In [7]:
process_patient_eligibility(trial_criteria, patient_ehr)

  response = llm(prompt)


'Inclusion Criteria:\n- Gender: No \n- Age: No \n- BMI: No \n\nExclusion Criteria:\n- Health Status: Yes\n- Medical History: Yes\n- Medication Use: No \n- Substance Use: Yes\n- Infectious Disease Status: Yes\n- Environmental Exposure: Yes'

In [9]:
print('Inclusion Criteria:\n- Gender: No \n- Age: No \n- BMI: No \n\nExclusion Criteria:\n- Health Status: Yes\n- Medical History: Yes\n- Medication Use: No \n- Substance Use: Yes\n- Infectious Disease Status: Yes\n- Environmental Exposure: Yes'
)

Inclusion Criteria:
- Gender: No 
- Age: No 
- BMI: No 

Exclusion Criteria:
- Health Status: Yes
- Medical History: Yes
- Medication Use: No 
- Substance Use: Yes
- Infectious Disease Status: Yes
- Environmental Exposure: Yes


In [10]:
def determine_overall_eligibility(eligibility_results):
    # Split the results into inclusion and exclusion criteria
    inclusion_criteria, exclusion_criteria = eligibility_results.split('Exclusion Criteria:')
    
    # Check inclusion criteria
    inclusion_eligible = all(result.strip().endswith(': Yes') for result in inclusion_criteria.split('\n') if result.strip())
    
    # Check exclusion criteria
    exclusion_eligible = all(result.strip().endswith(': No') for result in exclusion_criteria.split('\n') if result.strip())
    
    # Determine overall eligibility
    overall_eligible = inclusion_eligible and exclusion_eligible
    
    return "Yes" if overall_eligible else "No"

# Use the function
eligibility_results = process_patient_eligibility(trial_criteria, patient_ehr)
final_eligibility = determine_overall_eligibility(eligibility_results)

print(f"Patient Eligibility Details:\n{eligibility_results}")
print(f"\nOverall Patient Eligibility: {final_eligibility}")

Patient Eligibility Details:
Inclusion Criteria:
- Gender: No 
- Age: No 
- BMI: No 
- Weight: Yes 

Exclusion Criteria:
- Health Status: Yes
- Medical History: Yes
- Medication Use: No
- Substance Use: Yes
- Infectious Disease Status: Yes
- Environmental Exposure: Yes

Overall Patient Eligibility: No


In [17]:
def parse_eligibility_results(eligibility_results):
    # Split the results into inclusion and exclusion criteria
    inclusion_criteria, exclusion_criteria = eligibility_results.split('Exclusion Criteria:')
    
    eligibility_dict = {}
    
    # Process inclusion criteria
    for line in inclusion_criteria.split('\n'):
        if line.strip().startswith('-'):
            key, value = line.strip('- ').split(': ')
            eligibility_dict[key.strip()] = value.strip()
    
    # Process exclusion criteria
    for line in exclusion_criteria.split('\n'):
        if line.strip().startswith('-'):
            key, value = line.strip('- ').split(': ')
            eligibility_dict[key.strip()] = value.strip()
    
    return eligibility_dict

def determine_overall_eligibility(eligibility_dict):
    # Check if all values are 'Yes'
    return "Yes" if all(value == 'Yes' for value in eligibility_dict.values()) else "No"

# Use the functions
eligibility_dict = parse_eligibility_results(eligibility_results)
final_eligibility = determine_overall_eligibility(eligibility_dict)

print("Patient Eligibility Details:")
for criterion, status in eligibility_dict.items():
    print(f"{criterion}: {status}")

print(f"\nOverall Patient Eligibility: {final_eligibility}")

Patient Eligibility Details:
Gender: No
Age: No
BMI: No
Weight: Yes
Health Status: Yes
Medical History: Yes
Medication Use: No
Substance Use: Yes
Infectious Disease Status: Yes
Environmental Exposure: Yes

Overall Patient Eligibility: No


In [18]:
def extract_ids(patient_ehr_path, trial_criteria_path):
    # Extract patient ID from the file name
    patient_id = os.path.basename(patient_ehr_path).split('_')[0]
    
    # Extract trial ID from the file name
    trial_id = os.path.basename(trial_criteria_path).split('_')[0]
    
    return patient_id, trial_id

def extract_study_title(trial_criteria_path):
    # Read the first line of the trial criteria file to get the study title
    with open(trial_criteria_path, 'r') as f:
        first_line = f.readline().strip()
        # Assuming the format is "Study Title: {title}"
        if first_line.startswith("Study Title:"):
            return first_line.replace("Study Title:", "").strip()
    return None

def create_eligibility_json(patient_id, trial_id, study_title, eligibility_dict):
    # Create the JSON structure
    eligibility_json = {
        "patientId": patient_id,
        "eligibleTrials": [
            {
                "trialId": trial_id,
                "studyTitle": study_title,
                "eligibilityCriteriaMet": [
                    criterion for criterion, status in eligibility_dict.items() if status == "Yes"
                ]
            }
        ]
    }
    return eligibility_json

# Main process
patient_id, trial_id = extract_ids(single_patient_ehr_path, single_trial_criteria_path)
study_title = extract_study_title(single_trial_criteria_path)

# Process eligibility
# eligibility_results = process_patient_eligibility(trial_criteria, patient_ehr)
eligibility_dict = parse_eligibility_results(eligibility_results)

# Create the JSON structure
eligibility_json = create_eligibility_json(patient_id, trial_id, study_title, eligibility_dict)

# Save the JSON file
output_filename = f"{patient_id}_eligibility.json"
with open(output_filename, 'w') as f:
    json.dump(eligibility_json, f, indent=2)

print(f"Eligibility results saved to {output_filename}")

# Print overall eligibility
final_eligibility = determine_overall_eligibility(eligibility_dict)
print(f"\nOverall Patient Eligibility: {final_eligibility}")

Eligibility results saved to 1a654b50-5c1d-ec96-1d56-8d7c12140983_eligibility.json

Overall Patient Eligibility: No
