In [1]:
import json
import os
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain, SimpleSequentialChain

from dotenv import load_dotenv

In [2]:
openai_api_key = os.getenv("OPENAI_API_KEY")

In [3]:
single_patient_ehr_path = '/Users/bharathbeeravelly/Desktop/patient-trials-matching/data/processed/patients_small/1a654b50-5c1d-ec96-1d56-8d7c12140983_data.json'
single_trial_criteria_path = '/Users/bharathbeeravelly/Desktop/patient-trials-matching/data/raw/scraped_small/NCT06576401_criteria.txt'

In [4]:
# Read the single patient EHR and trial criteria
with open(single_patient_ehr_path) as f:
    patient_ehr = json.load(f)

with open(single_trial_criteria_path) as f:
    trial_criteria = f.read()

In [5]:
print(patient_ehr)

{'Patient ID': '1a654b50-5c1d-ec96-1d56-8d7c12140983', 'Given Name': 'Andra606', 'Gender': 'M', 'Birth Time': '20100325105620', 'Age': 22, 'Race': 'white', 'Ethnic Group': 'non-hispanic', 'Language': 'en-US', 'Medications': [{'Start': '2015-03-12T13:27:59Z', 'Stop': '2015-03-12T13:27:59Z', 'Description': 'sodium fluoride 0.0272 MG/MG Oral Gel', 'Duration of Usage': '1 days', 'Last Usage': '3495 days ago'}, {'Start': '2015-04-04T10:56:20Z', 'Stop': '2015-04-18T10:56:20Z', 'Description': 'Ibuprofen 100 MG Oral Tablet', 'Duration of Usage': '15 days', 'Last Usage': '3459 days ago'}, {'Start': '2016-03-17T13:56:46Z', 'Stop': '2016-03-17T13:56:46Z', 'Description': 'sodium fluoride 0.0272 MG/MG Oral Gel', 'Duration of Usage': '1 days', 'Last Usage': '3124 days ago'}, {'Start': '2017-12-18T08:56:20Z', 'Stop': '2017-12-28T16:56:20Z', 'Description': 'Penicillin V Potassium 250 MG Oral Tablet', 'Duration of Usage': '11 days', 'Last Usage': '2473 days ago'}, {'Start': '2019-11-05T09:56:20Z', 'Sto

In [6]:
print(trial_criteria)

Inclusion/Exclusion Criteria:
Description

Inclusion Criteria:
Male, healthy adult;
Age of 18 to 45 years old (both inclusive);
Body mass index (BMI) between 19.0- 32.0 kg/m2 (both inclusive) and body weight no less than 50.0 kg.


Exclusion Criteria:
Clinically significant diseases at the time of screening;
History or family history of medullary thyroid carcinoma, thyroid C-cell hyperplasia, or multiple endocrine neoplasia type 2 (MEN2), or calcitonin ≥ 35 ng/L during the screening period;
History of chronic pancreatitis or acute pancreatitis within 3 months prior to screening;
History of acute cholecystitis attack within 3 months prior to screening;
Participant judged by investigator has dysphagia, diseases or conditions that affect gastric emptying or affect the absorption of nutrients in the gastrointestinal tract, such as bariatric surgery or other gastrectomy, irritable bowel syndrome, dyspepsia, etc.;
Any of the following: habitual constipation or diarrhea, hemorrhoids or accomp

In [7]:
# 1st LLM: Identify keywords for each criterion
def identify_criteria_keywords(trial_criteria):
    # Define the system message for the LLM to identify relevant keywords
    system_message = """
    You are a clinical trial assistant.
    Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, and identify relevant keywords from each criterion.
    
    Common keywords may include: "Gender", "Age", "Race", "Ethnic Group", "Language", "BMI", "BPM", "Height", "Weight", etc.

    For each criterion, respond with the most relevant keyword or attribute it is concerned with.
    """

    # Initialize the OpenAI LLM model
    llm = ChatOpenAI(temperature=0, model = 'gpt-4o-mini', openai_api_key=openai_api_key)

    # Create the prompt for keyword identification
    prompt_template = PromptTemplate(
        input_variables=["criteria"],
        template=f"""
        {system_message}

        Trial Criteria: {{criteria}}

        For each criterion, identify the relevant keyword or patient attribute.
        """
    )
    
    # Format the prompt with the actual trial criteria
    prompt = prompt_template.format(criteria=trial_criteria)
    
    # Send the prompt to the LLM for processing
    response = llm(prompt)
    
    # Return the keywords identified by the LLM
    return response

In [8]:
# Function to extract only the relevant information from Patient_EHR for LLM processing
def extract_relevant_patient_data(patient_ehr):
    # Extracting necessary attributes from the patient EHR
    relevant_data = {
        "Gender": patient_ehr.get("Gender"),
        "Age": patient_ehr.get("Age"),
        "Race": patient_ehr.get("Race"),
        "Ethnic Group": patient_ehr.get("Ethnic Group"),
        "Language": patient_ehr.get("Language"),
        "Vital Signs": patient_ehr.get("Vital Signs"),
    }
    return relevant_data

In [9]:
# 2nd LLM: Evaluate patient eligibility based on keywords
def evaluate_criteria_by_keywords(criteria_keywords, patient_ehr):
    # Define the system message for the LLM to evaluate eligibility
    system_message = """
    You are a clinical trial assistant.
    Your task is to compare the patient's information (Gender, Age, Race, Ethnic Group, Language, Vital Signs) 
    with the clinical trial's inclusion and exclusion criteria using the identified keywords.
    
    For each criterion, respond with one of the following:
    - "Yes" if the patient meets the criterion
    - "No" if the patient does not meet the criterion
    - "No Information" if the necessary patient information is missing to assess this criterion
    """

    # Initialize the OpenAI LLM model
    llm = ChatOpenAI(temperature=0, model = 'gpt-4o-mini', openai_api_key=openai_api_key)

    # Extract relevant patient data from EHR
    relevant_patient_data = extract_relevant_patient_data(patient_ehr)

    # Create the prompt for evaluating eligibility
    prompt_template = PromptTemplate(
        input_variables=["criteria_keywords", "patient_data"],
        template=f"""
        {system_message}

        Criteria Keywords: {{criteria_keywords}}

        Patient Information: {{patient_data}}

        For each criterion keyword, respond with:
        - "Yes" if the patient meets the criterion
        - "No" if the patient does not meet the criterion
        - "No Information" if the necessary patient information is missing.
        
        While evaluating one criteria, consider only the respective criteria but not any other criteria.
        While rating the criteria, with 'Yes' or 'No' or 'No Information', do not give any reasoning
        
      
        
        """
    )

    # Format the prompt with the criteria keywords and patient data
    prompt = prompt_template.format(
        criteria_keywords=criteria_keywords,
        patient_data=relevant_patient_data
    )
    
    # Send the prompt to the LLM for processing
    response = llm(prompt)
    
    return response

In [10]:
def process_patient_eligibility(trial_criteria, patient_ehr):
    # Step 1: Identify keywords from trial criteria
    criteria_keywords = identify_criteria_keywords(trial_criteria)
    
    # Step 2: Evaluate patient eligibility based on identified keywords
    eligibility_results = evaluate_criteria_by_keywords(criteria_keywords, patient_ehr)
    
    return eligibility_results


In [11]:
process_patient_eligibility(trial_criteria, patient_ehr)

  response = llm(prompt)


AIMessage(content='1. Gender: Yes  \n2. Age: Yes  \n3. BMI: Yes  \n4. Weight: Yes  \n5. Health Status: No Information  \n6. Medical History: No Information  \n7. Medication Use: No Information  \n8. Substance Use: No Information  \n9. Infectious Disease Status: No Information  \n10. Environmental Exposure: No Information  ', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 74, 'prompt_tokens': 4625, 'total_tokens': 4699, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-bc60d403-a0c5-4569-9463-31da557a10e1-0', usage_metadata={'input_tokens': 4625, 'output_tokens': 74, 'total_tokens': 4699, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 0}})

In [12]:
# Define LLM model (you can adjust the model parameters as needed)
llm = ChatOpenAI(temperature=0, model='gpt-4o-mini', openai_api_key=openai_api_key)

### Step 1: First Chain - Identify Keywords from Criteria ###

# Create the prompt template for identifying keywords
keyword_template = """
You are a clinical trial assistant.
Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, 
and identify relevant keywords from each criterion.

Common keywords may include: "Gender", "Age", "Race", "Ethnic Group", "Language", BMI, BPM, Weight, Height etc.

For each criterion, respond with the most relevant keyword or patient attribute it is concerned with.

Trial Criteria: {criteria}
"""

# Create the PromptTemplate
keyword_prompt = PromptTemplate(
    input_variables=["criteria"],
    template=keyword_template
)

# Create the first chain that identifies keywords
keyword_chain = LLMChain(
    llm=llm,
    prompt=keyword_prompt
)

### Step 2: Second Chain - Evaluate Criteria Based on Keywords ###

# Create the prompt template for evaluating eligibility
evaluation_template = """
You are a clinical trial assistant.
Your task is to compare the patient's information (Gender, Age, Race, Ethnic Group, Language, Vital Signs)
with the clinical trial's inclusion and exclusion criteria using the identified keywords.

For each criterion keyword, respond with:
- "Yes" if the patient meets the criterion
- "No" if the patient does not meet the criterion
- "No Information" if the necessary patient information is missing to assess this criterion.

Do not give any reasoning for your response. Only respond with "Yes", "No", or "No Information".

Criteria Keywords: {criteria_keywords}
Patient Information: {patient_data}
"""

# Create the PromptTemplate for the second LLM
evaluation_prompt = PromptTemplate(
    input_variables=["criteria_keywords", "patient_data"],
    template=evaluation_template
)

# Create the second chain that evaluates criteria eligibility
evaluation_chain = LLMChain(
    llm=llm,
    prompt=evaluation_prompt
)

### Step 3: Combine Both Chains ###

# Now, we'll combine both chains into a sequential chain
def process_patient_eligibility(trial_criteria, patient_ehr):
    # Step 1: Get the keywords from the criteria using the first chain
    criteria_keywords = keyword_chain.run(criteria=trial_criteria)
   
    # Extract the relevant patient information from the EHR for the second chain
    relevant_patient_data = {
        "Gender": patient_ehr.get("Gender"),
        "Age": patient_ehr.get("Age"),
        "Race": patient_ehr.get("Race"),
        "Ethnic Group": patient_ehr.get("Ethnic Group"),
        "Language": patient_ehr.get("Language"),
        "Vital Signs": patient_ehr.get("Vital Signs"),
        "Medications": patient_ehr.get("Medications"),
        "Problems": patient_ehr.get("Problems"),
        "Surgeries": patient_ehr.get("Surgeries"),
        "Immunizations": patient_ehr.get("Immunizations"),
    }
    
    # Step 2: Pass the keywords and patient data into the second chain for evaluation
    eligibility_results = evaluation_chain.run(
        criteria_keywords=criteria_keywords,
        patient_data=relevant_patient_data
    )
    
    # Return the final eligibility results
    return eligibility_results

  keyword_chain = LLMChain(


In [13]:
process_patient_eligibility(trial_criteria, patient_ehr)

  criteria_keywords = keyword_chain.run(criteria=trial_criteria)


'1. **Gender**: Yes  \n2. **Age**: Yes  \n3. **BMI**: Yes  \n4. **Weight**: Yes  \n5. **Health Status**: No Information  \n6. **Medical History**: No Information  \n7. **Biomarker**: No Information  \n8. **Medical History**: No Information  \n9. **Medical History**: No Information  \n10. **Gastrointestinal Conditions**: No Information  \n11. **Gastrointestinal Conditions**: No Information  \n12. **Medication Use**: No  \n13. **Health Status**: No Information  \n14. **Substance Abuse**: No Information  \n15. **Infectious Disease**: No Information  \n16. **Occupational Exposure**: No Information  \n17. **Age**: Yes  \n18. **Gender**: Yes  \n19. **Healthy Volunteers**: No Information  '

In [14]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain_openai import ChatOpenAI

# Define LLM model
llm = ChatOpenAI(temperature=0, model = 'gpt-4o-mini')

# Step 1: Extract Keywords from Trial Criteria
def extract_keywords_chain():
    """
    First chain to extract keywords from trial criteria.
    """
    keyword_template = """
    You are a clinical trial assistant.
    Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, 
    and identify relevant keywords from each criterion.

    Common keywords may include: "Gender", "Age", "Race", "Ethnic Group", "Language", BMI, BPM, Weight, Height, etc.

    Trial Criteria: {criteria}
    """
    
    keyword_prompt = PromptTemplate(
        input_variables=["criteria"],
        template=keyword_template
    )
    
    return LLMChain(llm=llm, prompt=keyword_prompt, output_key="criteria_keywords")

# Step 2: Evaluate Criteria Based on Patient Data
def evaluate_patient_chain():
    """
    Second chain to evaluate each keyword against the patient's data.
    """
    evaluation_template = """
    You are a clinical trial assistant.
    Your task is to compare the patient's information with the clinical trial's inclusion and exclusion criteria.

    For each criterion keyword, respond with:
    - "Yes" if the patient meets the criterion
    - "No" if the patient does not meet the criterion
    - "No Information" if the necessary patient information is missing to assess this criterion.

    Criteria Keywords: {criteria_keywords}
    Patient Information: {patient_data}
    """
    
    evaluation_prompt = PromptTemplate(
        input_variables=["criteria_keywords", "patient_data"],
        template=evaluation_template
    )
    
    chain = LLMChain(llm=llm, prompt=evaluation_prompt, output_key="criteria_results")
    

    
    return chain

# Step 3: Make Final Eligibility Decision
def final_decision_chain():
    """
    Third chain to make the final eligibility decision based on the evaluated criteria.
    """
    decision_template = """
    You are an eligibility checker.
    Your task is to evaluate the results of a patient's eligibility for a clinical trial.

    Eligibility criteria results: {criteria_results}

    Rules:
    - If there is at least one "No", the final eligibility is "No".
    - If there are only "Yes" and "No Information", the final eligibility is "Yes".

    Provide the final eligibility decision. Just give a straight yes or no. Do not give any reasoning.
    """
    
    decision_prompt = PromptTemplate(
        input_variables=["criteria_results"],
        template=decision_template
    )
    
    return LLMChain(llm=llm, prompt=decision_prompt, output_key="final_decision")


# Now, create a Sequential Chain combining all three
def create_sequential_chain():
    """
    Create the entire pipeline chain that runs the three steps sequentially.
    """
    # Define the three chains
    extract_keywords = extract_keywords_chain()
    evaluate_patient = evaluate_patient_chain()
    final_decision = final_decision_chain()
    
    # Create the sequential chain
    sequential_chain = SequentialChain(
        chains=[extract_keywords, evaluate_patient, final_decision],
        input_variables=["criteria", "patient_data"],
        output_variables=["final_decision"]
    )
    
    return sequential_chain

# Example usage
def process_patient_eligibility(trial_criteria, patient_data):
    """
    Main function to process eligibility using a sequential chain.
    
    Args:
    trial_criteria (str): The inclusion/exclusion criteria text of the trial.
    patient_data (dict): A dictionary containing patient information.
    
    Returns:
    str: The final eligibility decision ('Yes' or 'No').
    """
    # Create the sequential chain
    sequential_chain = create_sequential_chain()
    
    # Run the chain with trial criteria and patient data
    final_result = sequential_chain.run({
        "criteria": trial_criteria,
        "patient_data": patient_data
    })
    
    return final_result


In [15]:
process_patient_eligibility(trial_criteria, patient_ehr)

'Yes\n\nReasoning: The patient meets all the inclusion criteria and there are no definitive "No" responses in the exclusion criteria, only "No Information." According to the rules, if there is at least one "No," the eligibility would be "No," but since there are only "Yes" and "No Information," the final eligibility is "Yes."'

In [43]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
import json

# Define LLM model
llm = ChatOpenAI(temperature=0, model='gpt-4o-mini')

# Step 1: Extract Keywords from Trial Criteria
def extract_keywords_chain():
    """
    First chain to extract keywords from trial criteria.
    """
    keyword_template = """
    You are a clinical trial assistant.
    Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, 
    and identify relevant keywords from each criterion.

    Common keywords may include: "Gender", "Age", "Race", "Ethnic Group", "Language", BMI, BPM, Weight, Height, etc.

    Trial Criteria: {criteria}
    """
    
    keyword_prompt = PromptTemplate(
        input_variables=["criteria"],
        template=keyword_template
    )
    
    return LLMChain(llm=llm, prompt=keyword_prompt, output_key="criteria_keywords")

# Step 2: Evaluate Criteria Based on Patient Data
def evaluate_patient_chain():
    """
    Second chain to evaluate each keyword against the patient's data.
    """
    evaluation_template = """
    You are a clinical trial assistant.
    Your task is to compare the patient's information with the clinical trial's inclusion and exclusion criteria.

    For each criterion keyword, respond with:
    - "Yes" if the patient meets the criterion
    - "No" if the patient does not meet the criterion
    - "No Information" if the necessary patient information is missing to assess this criterion.

    Criteria Keywords: {criteria_keywords}
    Patient Information: {patient_data}
    """
    
    evaluation_prompt = PromptTemplate(
        input_variables=["criteria_keywords", "patient_data"],
        template=evaluation_template
    )
    
    return LLMChain(llm=llm, prompt=evaluation_prompt, output_key="criteria_results")

# Step 3: Make Final Eligibility Decision
def final_decision_chain():
    decision_template = """
    You are an eligibility checker.
    Your task is to evaluate the results of a patient's eligibility for a clinical trial.

    Eligibility criteria results: {criteria_results}

    Rules:
    - If there is at least one "No", the final eligibility is "No".
    - If there are only "Yes" and "No Information", the final eligibility is "Yes".

    Provide only the final eligibility decision as 'Yes' or 'No'. Do not include any other text or explanation.
    """
    
    decision_prompt = PromptTemplate(
        input_variables=["criteria_results"],
        template=decision_template
    )
    
    return LLMChain(llm=llm, prompt=decision_prompt, output_key="final_decision")

def json_output_parser_chain():
    json_template = """
    Format the following information into a JSON structure:

    Patient ID: {patient_id}
    Trial ID: {trial_id}
    Final Decision: {final_decision}

    The JSON should have the following structure:
    {{
      "patientId": "{patient_id}",
      "eligibleTrials": [
        {{
          "trialId": "{trial_id}",
          "eligibilityCriteriaMet": [],
          "moreInformationNeeded": [],
          "finalEligibility": "{final_decision}"
        }}
      ]
    }}

    Ensure that the output is a valid JSON string.
    """

    json_prompt = PromptTemplate(
        input_variables=["patient_id", "trial_id", "final_decision"],
        template=json_template
    )

    return LLMChain(llm=llm, prompt=json_prompt, output_key="json_output")

    


# Now, create a Sequential Chain combining all four
def create_sequential_chain():
    """
    Create the entire pipeline chain that runs the three steps sequentially.
    """
    # Define the three chains
    extract_keywords = extract_keywords_chain()
    evaluate_patient = evaluate_patient_chain()
    final_decision = final_decision_chain()
    json_output = json_output_chain()  # Add the new JSON output chain

    # Create the sequential chain
    sequential_chain = SequentialChain(
        chains=[
            extract_keywords,
            evaluate_patient,
            final_decision,
            json_output
        ],
        input_variables=["criteria", "patient_data", "patient_id", "trial_id"],
        output_variables=["json_output"],
        # Collect necessary outputs from earlier chains for JSON output
        return_only_final_output=False  # Return all outputs for further processing
    )
    
    return sequential_chain

def process_patient_eligibility(trial_criteria, patient_data, trial_filename):
    trial_id = trial_filename.split('_')[0]
    patient_id = patient_data.get("Patient ID")
    
    sequential_chain = create_sequential_chain()

    final_result = sequential_chain.run({
        "criteria": trial_criteria,
        "patient_data": json.dumps(patient_data),
        "patient_id": patient_id,
        "trial_id": trial_id
    })
    
    print("Final result:", final_result)  # Add this line to inspect the output
    
    try:
        json_result = json.loads(final_result)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        print(f"Raw output: {final_result}")
        json_result = {
            "patientId": patient_id,
            "eligibleTrials": [{
                "trialId": trial_id,
                "eligibilityCriteriaMet": [],
                "moreInformationNeeded": [],
                "finalEligibility": "Error"
            }]
        }
    
    output_filename = f"{patient_id}_qualifies.json"
    with open(output_filename, 'w') as json_file:
        json.dump(json_result, json_file, indent=4)

    return json_result


In [46]:
process_patient_eligibility(trial_criteria, patient_ehr, 'NCT06576401_criteria.txt')

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [45]:
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain, SequentialChain
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
import json

# Define LLM model
llm = ChatOpenAI(temperature=0, model='gpt-4o-mini')

# Step 1: Extract Keywords from Trial Criteria
def extract_keywords_chain():
    keyword_template = """
    You are a clinical trial assistant.
    Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, 
    and identify relevant keywords from each criterion.

    Common keywords may include: "Gender", "Age", "Race", "Ethnic Group", "Language", BMI, BPM, Weight, Height, etc.

    Trial Criteria: {criteria}

    Provide the keywords as a comma-separated list.
    """
    
    keyword_prompt = PromptTemplate(
        input_variables=["criteria"],
        template=keyword_template
    )
    
    return LLMChain(llm=llm, prompt=keyword_prompt, output_key="criteria_keywords")

# Step 2: Evaluate Criteria Based on Patient Data
def evaluate_patient_chain():
    evaluation_template = """
    You are a clinical trial assistant.
    Your task is to compare the patient's information with the clinical trial's inclusion and exclusion criteria.

    For each criterion keyword, respond with:
    - "Yes" if the patient meets the criterion
    - "No" if the patient does not meet the criterion
    - "No Information" if the necessary patient information is missing to assess this criterion.

    Criteria Keywords: {criteria_keywords}
    Patient Information: {patient_data}

    Provide your response as a list of dictionaries, each containing 'criterion' and 'result' keys.
    """
    
    evaluation_prompt = PromptTemplate(
        input_variables=["criteria_keywords", "patient_data"],
        template=evaluation_template
    )
    
    return LLMChain(llm=llm, prompt=evaluation_prompt, output_key="criteria_results")

# Step 3: Make Final Eligibility Decision
def final_decision_chain():
    decision_template = """
    You are an eligibility checker.
    Your task is to evaluate the results of a patient's eligibility for a clinical trial.

    Eligibility criteria results: {criteria_results}

    Rules:
    - If there is at least one "No", the final eligibility is "No".
    - If there are only "Yes" and "No Information", the final eligibility is "Yes".

    Provide only the final eligibility decision as 'Yes' or 'No'. Do not include any other text or explanation.
    """
    
    decision_prompt = PromptTemplate(
        input_variables=["criteria_results"],
        template=decision_template
    )
    
    return LLMChain(llm=llm, prompt=decision_prompt, output_key="final_decision")

# Step 4: Format results into JSON
def json_output_parser_chain():
    json_template = """
    Format the following information into a JSON structure:

    Patient ID: {patient_id}
    Trial ID: {trial_id}
    Final Decision: {final_decision}

    The JSON should have the following structure:
    {{
      "patientId": "{patient_id}",
      "eligibleTrials": [
        {{
          "trialId": "{trial_id}",
          "eligibilityCriteriaMet": [],
          "moreInformationNeeded": [],
          "finalEligibility": "{final_decision}"
        }}
      ]
    }}

    Ensure that the output is a valid JSON string.
    """

    json_prompt = PromptTemplate(
        input_variables=["patient_id", "trial_id", "final_decision"],
        template=json_template
    )

    return LLMChain(llm=llm, prompt=json_prompt, output_key="json_output")

# Create Sequential Chain
def create_sequential_chain():
    extract_keywords = extract_keywords_chain()
    evaluate_patient = evaluate_patient_chain()
    final_decision = final_decision_chain()
    json_output = json_output_parser_chain()

    sequential_chain = SequentialChain(
        chains=[extract_keywords, evaluate_patient, final_decision, json_output],
        input_variables=["criteria", "patient_data", "patient_id", "trial_id"],
        output_variables=["json_output"]
    )

    return sequential_chain

# Main Processing Function
def process_patient_eligibility(trial_criteria, patient_data, trial_filename):
    trial_id = trial_filename.split('_')[0]
    patient_id = patient_data.get("Patient ID")
    
    sequential_chain = create_sequential_chain()

    final_result = sequential_chain.run({
        "criteria": trial_criteria,
        "patient_data": json.dumps(patient_data),
        "patient_id": patient_id,
        "trial_id": trial_id
    })
    
    print("Final result:", final_result)  # Add this line to inspect the output
    
    try:
        json_result = json.loads(final_result)
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e}")
        print(f"Raw output: {final_result}")
        json_result = {
            "patientId": patient_id,
            "eligibleTrials": [{
                "trialId": trial_id,
                "eligibilityCriteriaMet": [],
                "moreInformationNeeded": [],
                "finalEligibility": "Error"
            }]
        }
    
    output_filename = f"{patient_id}_qualifies.json"
    with open(output_filename, 'w') as json_file:
        json.dump(json_result, json_file, indent=4)

    return json_result

In [47]:
process_patient_eligibility(trial_criteria, patient_ehr, 'NCT06576401_criteria.txt')

JSONDecodeError: Expecting value: line 1 column 1 (char 0)