Patient-Trials-Matching / Git / [8c54ae] /notebooks/ai

Models:
joseph-gordon/
Patient-Trials-Matching
Downloads: 1
[8c54ae]: / notebooks / ai_finetuning.ipynb
History
Download this file
1000 lines (999 with data), 78.5 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.chains import LLMChain\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain.chains import LLMChain, SimpleSequentialChain\n",
    "\n",
    "from dotenv import load_dotenv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "openai_api_key = os.getenv(\"OPENAI_API_KEY\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "single_patient_ehr_path = '/Users/bharathbeeravelly/Desktop/patient-trials-matching/data/processed/patients_small/1a654b50-5c1d-ec96-1d56-8d7c12140983_data.json'\n",
    "single_trial_criteria_path = '/Users/bharathbeeravelly/Desktop/patient-trials-matching/data/raw/scraped_small/NCT06576401_criteria.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read the single patient EHR and trial criteria\n",
    "with open(single_patient_ehr_path) as f:\n",
    "    patient_ehr = json.load(f)\n",
    "\n",
    "with open(single_trial_criteria_path) as f:\n",
    "    trial_criteria = f.read()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'Patient ID': '1a654b50-5c1d-ec96-1d56-8d7c12140983', 'Given Name': 'Andra606', 'Gender': 'M', 'Birth Time': '20100325105620', 'Age': 22, 'Race': 'white', 'Ethnic Group': 'non-hispanic', 'Language': 'en-US', 'Medications': [{'Start': '2015-03-12T13:27:59Z', 'Stop': '2015-03-12T13:27:59Z', 'Description': 'sodium fluoride 0.0272 MG/MG Oral Gel', 'Duration of Usage': '1 days', 'Last Usage': '3495 days ago'}, {'Start': '2015-04-04T10:56:20Z', 'Stop': '2015-04-18T10:56:20Z', 'Description': 'Ibuprofen 100 MG Oral Tablet', 'Duration of Usage': '15 days', 'Last Usage': '3459 days ago'}, {'Start': '2016-03-17T13:56:46Z', 'Stop': '2016-03-17T13:56:46Z', 'Description': 'sodium fluoride 0.0272 MG/MG Oral Gel', 'Duration of Usage': '1 days', 'Last Usage': '3124 days ago'}, {'Start': '2017-12-18T08:56:20Z', 'Stop': '2017-12-28T16:56:20Z', 'Description': 'Penicillin V Potassium 250 MG Oral Tablet', 'Duration of Usage': '11 days', 'Last Usage': '2473 days ago'}, {'Start': '2019-11-05T09:56:20Z', 'Stop': '2019-11-16T09:56:20Z', 'Description': 'Amoxicillin 250 MG / Clavulanate 125 MG Oral Tablet', 'Duration of Usage': '12 days', 'Last Usage': '1786 days ago'}, {'Start': '2023-05-04T14:08:25Z', 'Stop': '2023-05-04T14:08:25Z', 'Description': 'sodium fluoride 0.0272 MG/MG Oral Gel', 'Duration of Usage': '1 days', 'Last Usage': '520 days ago'}, {'Start': '2023-05-17T23:13:37Z', 'Stop': '2023-06-02T23:13:37Z', 'Description': 'Acetaminophen 325 MG Oral Tablet', 'Duration of Usage': '17 days', 'Last Usage': '491 days ago'}, {'Start': '2024-03-04T11:59:32Z', 'Stop': '2024-05-27T11:59:32Z', 'Description': 'Ibuprofen 200 MG Oral Tablet', 'Duration of Usage': '85 days', 'Last Usage': '131 days ago'}], 'Diagnostic Results': [{'Start': '2016-03-10T10:56:20Z', 'Stop': None, 'Description': 'CBC panel - Blood by Automated count'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'CBC panel - Blood by Automated count'}, {'Start': '2022-04-14T11:32:31Z', 'Stop': None, 'Description': 'Patient Health Questionnaire-9: Modified for Teens [Reported.PHQ.Teen]'}, {'Start': '2023-04-20T11:31:03Z', 'Stop': None, 'Description': 'Patient Health Questionnaire-9: Modified for Teens [Reported.PHQ.Teen]'}, {'Start': '2024-04-25T11:25:54Z', 'Stop': None, 'Description': 'Morse Fall Scale panel'}, {'Start': '2024-04-25T12:00:06Z', 'Stop': None, 'Description': 'Patient Health Questionnaire-9: Modified for Teens [Reported.PHQ.Teen]'}], 'Problems': [{'Start': '2014-02-27T10:56:20Z', 'Stop': '2016-03-10T10:56:20Z', 'Description': 'Medication review due (situation)'}, {'Start': '2014-08-31T10:56:20Z', 'Stop': '2015-03-05T10:56:20Z', 'Description': 'Otitis media'}, {'Start': '2015-04-04T10:56:20Z', 'Stop': '2016-03-10T10:56:20Z', 'Description': 'Otitis media'}, {'Start': '2016-03-17T13:21:07Z', 'Stop': '2016-03-17T13:56:46Z', 'Description': 'Primary dental caries (disorder)'}, {'Start': '2017-03-16T10:56:20Z', 'Stop': '2018-03-22T10:56:20Z', 'Description': 'Medication review due (situation)'}, {'Start': '2017-12-18T08:56:20Z', 'Stop': '2017-12-28T16:56:20Z', 'Description': 'Streptococcal sore throat (disorder)'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': '2021-04-08T10:56:20Z', 'Description': 'Medication review due (situation)'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': '2019-04-04T13:22:01Z', 'Description': 'Gingivitis (disorder)'}, {'Start': '2019-11-05T09:56:20Z', 'Stop': '2019-11-16T09:56:20Z', 'Description': 'Viral sinusitis (disorder)'}, {'Start': '2020-04-02T10:56:20Z', 'Stop': '2020-04-09T13:08:19Z', 'Description': 'Gingivitis (disorder)'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': '2024-04-25T10:56:20Z', 'Description': 'Medication review due (situation)'}, {'Start': '2023-05-17T22:56:20Z', 'Stop': '2023-06-02T23:13:37Z', 'Description': 'Acute bronchitis (disorder)'}, {'Start': '2024-03-04T11:20:36Z', 'Stop': '2024-05-27T11:59:32Z', 'Description': 'Fracture of bone (disorder)'}, {'Start': '2024-03-04T11:20:36Z', 'Stop': '2024-05-27T11:59:32Z', 'Description': 'Fracture of forearm'}], 'Surgeries': [{'Start': '2015-03-05T10:56:20Z', 'Stop': '2015-03-05T11:16:12Z', 'Description': 'Patient referral for dental care (procedure)'}, {'Start': '2015-03-12T10:56:20Z', 'Stop': '2015-03-12T11:26:24Z', 'Description': 'Dental consultation and report (procedure)'}, {'Start': '2015-03-12T11:26:24Z', 'Stop': '2015-03-12T11:56:02Z', 'Description': 'Dental care (regime/therapy)'}, {'Start': '2015-03-12T11:56:02Z', 'Stop': '2015-03-12T12:33:23Z', 'Description': 'Removal of supragingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2015-03-12T12:33:23Z', 'Stop': '2015-03-12T12:46:04Z', 'Description': 'Removal of subgingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2015-03-12T12:46:04Z', 'Stop': '2015-03-12T13:16:04Z', 'Description': 'Dental X-ray bitewing (procedure)'}, {'Start': '2015-03-12T12:46:04Z', 'Stop': '2015-03-12T13:27:59Z', 'Description': 'Examination of gingivae (procedure)'}, {'Start': '2015-03-12T13:27:59Z', 'Stop': '2015-03-12T13:52:32Z', 'Description': 'Dental fluoride treatment (procedure)'}, {'Start': '2015-03-12T13:52:32Z', 'Stop': '2015-03-12T14:07:14Z', 'Description': 'Oral health education (procedure)'}, {'Start': '2016-03-10T10:56:20Z', 'Stop': '2016-03-10T11:11:20Z', 'Description': 'Medication Reconciliation (procedure)'}, {'Start': '2016-03-10T10:56:20Z', 'Stop': '2016-03-10T11:11:28Z', 'Description': 'Patient referral for dental care (procedure)'}, {'Start': '2016-03-17T10:56:20Z', 'Stop': '2016-03-17T11:12:24Z', 'Description': 'Dental consultation and report (procedure)'}, {'Start': '2016-03-17T11:12:24Z', 'Stop': '2016-03-17T11:46:14Z', 'Description': 'Dental care (regime/therapy)'}, {'Start': '2016-03-17T11:46:14Z', 'Stop': '2016-03-17T12:10:36Z', 'Description': 'Removal of supragingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2016-03-17T12:10:36Z', 'Stop': '2016-03-17T12:38:48Z', 'Description': 'Removal of subgingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2016-03-17T12:38:48Z', 'Stop': '2016-03-17T13:21:07Z', 'Description': 'Examination of gingivae (procedure)'}, {'Start': '2016-03-17T13:21:07Z', 'Stop': '2016-03-17T13:56:46Z', 'Description': 'Application of composite dental filling material to dentin of tooth following fracture of tooth (procedure)'}, {'Start': '2016-03-17T13:56:46Z', 'Stop': '2016-03-17T14:27:29Z', 'Description': 'Dental fluoride treatment (procedure)'}, {'Start': '2016-03-17T14:27:29Z', 'Stop': '2016-03-17T14:43:17Z', 'Description': 'Oral health education (procedure)'}, {'Start': '2018-03-22T10:56:20Z', 'Stop': '2018-03-22T11:11:20Z', 'Description': 'Medication Reconciliation (procedure)'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': '2019-03-28T11:06:06Z', 'Description': 'Patient referral for dental care (procedure)'}, {'Start': '2019-04-04T10:56:20Z', 'Stop': '2019-04-04T11:22:51Z', 'Description': 'Dental consultation and report (procedure)'}, {'Start': '2019-04-04T11:22:51Z', 'Stop': '2019-04-04T11:53:47Z', 'Description': 'Dental care (regime/therapy)'}, {'Start': '2019-04-04T11:53:47Z', 'Stop': '2019-04-04T12:18:05Z', 'Description': 'Removal of supragingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2019-04-04T12:18:05Z', 'Stop': '2019-04-04T12:35:56Z', 'Description': 'Removal of subgingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2019-04-04T12:35:56Z', 'Stop': '2019-04-04T13:05:56Z', 'Description': 'Dental X-ray bitewing (procedure)'}, {'Start': '2019-04-04T12:35:56Z', 'Stop': '2019-04-04T13:22:01Z', 'Description': 'Examination of gingivae (procedure)'}, {'Start': '2019-04-04T13:22:01Z', 'Stop': '2019-04-04T13:35:13Z', 'Description': 'Oral health education (procedure)'}, {'Start': '2020-04-02T10:56:20Z', 'Stop': '2020-04-02T11:09:57Z', 'Description': 'Patient referral for dental care (procedure)'}, {'Start': '2020-04-09T10:56:20Z', 'Stop': '2020-04-09T11:01:20Z', 'Description': 'Dental consultation and report (procedure)'}, {'Start': '2020-04-09T11:01:20Z', 'Stop': '2020-04-09T11:33:55Z', 'Description': 'Dental care (regime/therapy)'}, {'Start': '2020-04-09T11:33:55Z', 'Stop': '2020-04-09T12:13:11Z', 'Description': 'Removal of supragingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2020-04-09T12:13:11Z', 'Stop': '2020-04-09T12:38:17Z', 'Description': 'Removal of subgingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2020-04-09T12:38:17Z', 'Stop': '2020-04-09T13:08:17Z', 'Description': 'Dental X-ray bitewing (procedure)'}, {'Start': '2020-04-09T12:38:17Z', 'Stop': '2020-04-09T13:08:19Z', 'Description': 'Examination of gingivae (procedure)'}, {'Start': '2020-04-09T13:08:19Z', 'Stop': '2020-04-09T13:26:04Z', 'Description': 'Oral health education (procedure)'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': '2021-04-08T11:11:20Z', 'Description': 'Medication Reconciliation (procedure)'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': '2022-04-14T11:09:14Z', 'Description': 'Depression screening (procedure)'}, {'Start': '2022-04-14T11:09:14Z', 'Stop': '2022-04-14T11:32:31Z', 'Description': 'Depression screening using Patient Health Questionnaire Nine Item score (procedure)'}, {'Start': '2023-04-20T10:56:20Z', 'Stop': '2023-04-20T11:06:31Z', 'Description': 'Depression screening (procedure)'}, {'Start': '2023-04-20T11:06:31Z', 'Stop': '2023-04-20T11:31:03Z', 'Description': 'Depression screening using Patient Health Questionnaire Nine Item score (procedure)'}, {'Start': '2023-04-20T11:31:03Z', 'Stop': '2023-04-20T11:43:45Z', 'Description': 'Assessment of substance use (procedure)'}, {'Start': '2023-04-20T11:43:45Z', 'Stop': '2023-04-20T12:02:29Z', 'Description': 'Assessment using Car, Relax, Alone, Forget, Friends, Trouble Screening Test (procedure)'}, {'Start': '2023-04-20T12:02:29Z', 'Stop': '2023-04-20T12:17:08Z', 'Description': 'Anticipatory guidance (procedure)'}, {'Start': '2023-04-20T12:17:08Z', 'Stop': '2023-04-20T12:23:08Z', 'Description': 'Patient referral for dental care (procedure)'}, {'Start': '2023-05-04T10:56:20Z', 'Stop': '2023-05-04T11:32:36Z', 'Description': 'Dental consultation and report (procedure)'}, {'Start': '2023-05-04T11:32:36Z', 'Stop': '2023-05-04T12:03:58Z', 'Description': 'Dental care (regime/therapy)'}, {'Start': '2023-05-04T12:03:58Z', 'Stop': '2023-05-04T12:44:47Z', 'Description': 'Removal of supragingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2023-05-04T12:44:47Z', 'Stop': '2023-05-04T13:11:26Z', 'Description': 'Removal of subgingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2023-05-04T13:11:26Z', 'Stop': '2023-05-04T13:41:26Z', 'Description': 'Dental X-ray bitewing (procedure)'}, {'Start': '2023-05-04T13:11:26Z', 'Stop': '2023-05-04T14:08:25Z', 'Description': 'Examination of gingivae (procedure)'}, {'Start': '2023-05-04T14:08:25Z', 'Stop': '2023-05-04T14:32:38Z', 'Description': 'Dental fluoride treatment (procedure)'}, {'Start': '2023-05-04T14:32:38Z', 'Stop': '2023-05-04T14:47:12Z', 'Description': 'Oral health education (procedure)'}, {'Start': '2023-05-17T22:56:20Z', 'Stop': '2023-05-17T23:13:37Z', 'Description': 'Measurement of respiratory function (procedure)'}, {'Start': '2024-03-04T11:20:36Z', 'Stop': '2024-03-04T11:50:36Z', 'Description': 'Upper arm X-ray'}, {'Start': '2024-03-04T11:20:36Z', 'Stop': '2024-03-04T11:59:32Z', 'Description': 'Bone immobilization'}, {'Start': '2024-04-25T10:56:20Z', 'Stop': '2024-04-25T11:11:20Z', 'Description': 'Medication Reconciliation (procedure)'}, {'Start': '2024-04-25T10:56:20Z', 'Stop': '2024-04-25T11:25:54Z', 'Description': 'Assessment using Morse Fall Scale (procedure)'}, {'Start': '2024-04-25T11:25:54Z', 'Stop': '2024-04-25T11:36:16Z', 'Description': 'Depression screening (procedure)'}, {'Start': '2024-04-25T11:36:16Z', 'Stop': '2024-04-25T12:00:06Z', 'Description': 'Depression screening using Patient Health Questionnaire Nine Item score (procedure)'}, {'Start': '2024-04-25T12:00:06Z', 'Stop': '2024-04-25T12:13:38Z', 'Description': 'Assessment of substance use (procedure)'}, {'Start': '2024-04-25T12:13:38Z', 'Stop': '2024-04-25T12:40:26Z', 'Description': 'Assessment using Car, Relax, Alone, Forget, Friends, Trouble Screening Test (procedure)'}, {'Start': '2024-04-25T12:40:26Z', 'Stop': '2024-04-25T12:54:16Z', 'Description': 'Anticipatory guidance (procedure)'}, {'Start': '2024-04-25T12:54:16Z', 'Stop': '2024-04-25T13:07:53Z', 'Description': 'Patient referral for dental care (procedure)'}, {'Start': '2024-05-09T10:56:20Z', 'Stop': '2024-05-09T11:30:07Z', 'Description': 'Dental consultation and report (procedure)'}, {'Start': '2024-05-09T11:30:07Z', 'Stop': '2024-05-09T12:14:31Z', 'Description': 'Dental care (regime/therapy)'}, {'Start': '2024-05-09T12:14:31Z', 'Stop': '2024-05-09T12:43:09Z', 'Description': 'Removal of supragingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2024-05-09T12:43:09Z', 'Stop': '2024-05-09T13:30:21Z', 'Description': 'Removal of subgingival plaque and calculus from all teeth using dental instrument (procedure)'}, {'Start': '2024-05-09T13:30:21Z', 'Stop': '2024-05-09T14:00:21Z', 'Description': 'Dental X-ray bitewing (procedure)'}, {'Start': '2024-05-09T13:30:21Z', 'Stop': '2024-05-09T14:17:29Z', 'Description': 'Examination of gingivae (procedure)'}, {'Start': '2024-05-09T14:17:29Z', 'Stop': '2024-05-09T14:31:02Z', 'Description': 'Oral health education (procedure)'}], 'Vital Signs': [{'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '106.8 cm'}, {'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '3 {score}'}, {'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '50.1 kg'}, {'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '20.62 kg/m2'}, {'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '92.13623365376648 %'}, {'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '82 /min'}, {'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '13 /min'}, {'Start': '2016-03-10T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '113.8 cm'}, {'Start': '2016-03-10T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '0 {score}'}, {'Start': '2016-03-10T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '24.3 kg'}, {'Start': '2016-03-10T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '18.75 kg/m2'}, {'Start': '2016-03-10T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '94.90994653589968 %'}, {'Start': '2016-03-10T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '82 /min'}, {'Start': '2016-03-10T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '16 /min'}, {'Start': '2017-03-16T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '120.5 cm'}, {'Start': '2017-03-16T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '0 {score}'}, {'Start': '2017-03-16T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '26.7 kg'}, {'Start': '2017-03-16T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '18.36 kg/m2'}, {'Start': '2017-03-16T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '90.38332648597121 %'}, {'Start': '2017-03-16T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '77 /min'}, {'Start': '2017-03-16T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '13 /min'}, {'Start': '2017-12-18T08:56:20Z', 'Stop': None, 'Description': 'Body temperature', 'Value': '38.671689933626276 Cel'}, {'Start': '2018-03-22T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '126.6 cm'}, {'Start': '2018-03-22T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '2 {score}'}, {'Start': '2018-03-22T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '30.4 kg'}, {'Start': '2018-03-22T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '18.95 kg/m2'}, {'Start': '2018-03-22T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '89.36099657742018 %'}, {'Start': '2018-03-22T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '96 /min'}, {'Start': '2018-03-22T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '13 /min'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '132.3 cm'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '2 {score}'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '35.4 kg'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '20.22 kg/m2'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '90.61941316580473 %'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '89 /min'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '16 /min'}, {'Start': '2020-04-02T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '137.4 cm'}, {'Start': '2020-04-02T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '4 {score}'}, {'Start': '2020-04-02T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '40.7 kg'}, {'Start': '2020-04-02T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '21.57 kg/m2'}, {'Start': '2020-04-02T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '91.79078263856259 %'}, {'Start': '2020-04-02T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '66 /min'}, {'Start': '2020-04-02T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '16 /min'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '143.3 cm'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '4 {score}'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '47.4 kg'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '23.07 kg/m2'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '92.97208216027344 %'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '78 /min'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '14 /min'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '150.6 cm'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '1 {score}'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '55.5 kg'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '24.47 kg/m2'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '93.71114207459172 %'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '73 /min'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '14 /min'}, {'Start': '2023-04-20T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '156.5 cm'}, {'Start': '2023-04-20T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '1 {score}'}, {'Start': '2023-04-20T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '60.8 kg'}, {'Start': '2023-04-20T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '24.81 kg/m2'}, {'Start': '2023-04-20T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '92.42356008307917 %'}, {'Start': '2023-04-20T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '63 /min'}, {'Start': '2023-04-20T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '13 /min'}, {'Start': '2023-05-25T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '157.2 cm'}, {'Start': '2023-05-25T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '3 {score}'}, {'Start': '2023-05-25T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '62.5 kg'}, {'Start': '2023-05-25T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '25.28 kg/m2'}, {'Start': '2023-05-25T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '93.07621611126119 %'}, {'Start': '2023-05-25T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '66 /min'}, {'Start': '2023-05-25T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '15 /min'}, {'Start': '2024-04-25T10:56:20Z', 'Stop': None, 'Description': 'Body Height', 'Value': '159.8 cm'}, {'Start': '2024-04-25T10:56:20Z', 'Stop': None, 'Description': 'Pain severity - 0-10 verbal numeric rating [Score] - Reported', 'Value': '1 {score}'}, {'Start': '2024-04-25T10:56:20Z', 'Stop': None, 'Description': 'Body Weight', 'Value': '67.8 kg'}, {'Start': '2024-04-25T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Ratio]', 'Value': '26.53 kg/m2'}, {'Start': '2024-04-25T10:56:20Z', 'Stop': None, 'Description': 'Body mass index (BMI) [Percentile] Per age and sex', 'Value': '93.84712204904622 %'}, {'Start': '2024-04-25T10:56:20Z', 'Stop': None, 'Description': 'Heart rate', 'Value': '81 /min'}, {'Start': '2024-04-25T10:56:20Z', 'Stop': None, 'Description': 'Respiratory rate', 'Value': '16 /min'}], 'Immunizations': [{'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'varicella'}, {'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'IPV'}, {'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'Influenza, seasonal, injectable, preservative free'}, {'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'DTaP'}, {'Start': '2015-03-05T10:56:20Z', 'Stop': None, 'Description': 'MMR'}, {'Start': '2016-03-10T10:56:20Z', 'Stop': None, 'Description': 'Influenza, seasonal, injectable, preservative free'}, {'Start': '2017-03-16T10:56:20Z', 'Stop': None, 'Description': 'Influenza, seasonal, injectable, preservative free'}, {'Start': '2018-03-22T10:56:20Z', 'Stop': None, 'Description': 'Influenza, seasonal, injectable, preservative free'}, {'Start': '2019-03-28T10:56:20Z', 'Stop': None, 'Description': 'Influenza, seasonal, injectable, preservative free'}, {'Start': '2020-04-02T10:56:20Z', 'Stop': None, 'Description': 'Influenza, seasonal, injectable, preservative free'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'Tdap'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'Influenza, seasonal, injectable, preservative free'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'HPV, quadrivalent'}, {'Start': '2021-04-08T10:56:20Z', 'Stop': None, 'Description': 'meningococcal MCV4P'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': None, 'Description': 'Influenza, seasonal, injectable, preservative free'}, {'Start': '2022-04-14T10:56:20Z', 'Stop': None, 'Description': 'HPV, quadrivalent'}, {'Start': '2022-05-05T10:56:20Z', 'Stop': None, 'Description': 'COVID-19, mRNA, LNP-S, PF, 30 mcg/0.3 mL dose'}, {'Start': '2022-05-26T10:56:20Z', 'Stop': None, 'Description': 'COVID-19, mRNA, LNP-S, PF, 30 mcg/0.3 mL dose'}, {'Start': '2023-04-20T10:56:20Z', 'Stop': None, 'Description': 'Influenza, seasonal, injectable, preservative free'}, {'Start': '2023-04-20T10:56:20Z', 'Stop': None, 'Description': 'HPV, quadrivalent'}, {'Start': '2024-04-25T10:56:20Z', 'Stop': None, 'Description': 'Influenza, seasonal, injectable, preservative free'}]}\n"
     ]
    }
   ],
   "source": [
    "print(patient_ehr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Inclusion/Exclusion Criteria:\n",
      "Description\n",
      "\n",
      "Inclusion Criteria:\n",
      "Male, healthy adult;\n",
      "Age of 18 to 45 years old (both inclusive);\n",
      "Body mass index (BMI) between 19.0- 32.0 kg/m2 (both inclusive) and body weight no less than 50.0 kg.\n",
      "\n",
      "\n",
      "Exclusion Criteria:\n",
      "Clinically significant diseases at the time of screening;\n",
      "History or family history of medullary thyroid carcinoma, thyroid C-cell hyperplasia, or multiple endocrine neoplasia type 2 (MEN2), or calcitonin ≥ 35 ng/L during the screening period;\n",
      "History of chronic pancreatitis or acute pancreatitis within 3 months prior to screening;\n",
      "History of acute cholecystitis attack within 3 months prior to screening;\n",
      "Participant judged by investigator has dysphagia, diseases or conditions that affect gastric emptying or affect the absorption of nutrients in the gastrointestinal tract, such as bariatric surgery or other gastrectomy, irritable bowel syndrome, dyspepsia, etc.;\n",
      "Any of the following: habitual constipation or diarrhea, hemorrhoids or accompanied by perianal disease, irritable bowel syndrome, inflammatory bowel disease, etc;\n",
      "Use of any prescription medication, over-the-counter medication, Chinese herbal medicine or food supplement within 14 days or 5 half-lives (whichever is longer) prior to the screening period;\n",
      "Patients with any abnormal and clinically significant examinations in given comprehensive physical examination;\n",
      "History of drug abuse (e.g., morphine, ketamine, tetrahydrocannabinolic acid, metham phetamine, methylenedioxyamphetamine, cocaine) or positive urine drug test during the screening period;\n",
      "Positive for hepatitis B surface antigen or E antigen, hepatitis C virus antibody IgG (Anti-HCV IgG), human immunodeficiency virus antigen/antibody combination test (HIV-Ag/Ab) and Treponema pallidum antibody during the screening period.\n",
      "Engaged in working conditions requiring long-term exposure to radioactivity; or significant radioactive exposure ( ≥ 2 CT chest/abdomen tests, or ≥ 3 all other types of X-rays test) within 1 year prior to the trail, or participated in radio-label trails;\n",
      "\n",
      "\n",
      "Other Criteria:\n",
      "Ages Eligible for Study\n",
      "18 Years to 45 Years (Adult )\n",
      "Sexes Eligible for Study\n",
      "Male\n",
      "Accepts Healthy Volunteers\n",
      "Yes\n"
     ]
    }
   ],
   "source": [
    "print(trial_criteria)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 1st LLM: Identify keywords for each criterion\n",
    "def identify_criteria_keywords(trial_criteria):\n",
    "    # Define the system message for the LLM to identify relevant keywords\n",
    "    system_message = \"\"\"\n",
    "    You are a clinical trial assistant.\n",
    "    Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, and identify relevant keywords from each criterion.\n",
    "    \n",
    "    Common keywords may include: \"Gender\", \"Age\", \"Race\", \"Ethnic Group\", \"Language\", \"BMI\", \"BPM\", \"Height\", \"Weight\", etc.\n",
    "\n",
    "    For each criterion, respond with the most relevant keyword or attribute it is concerned with.\n",
    "    \"\"\"\n",
    "\n",
    "    # Initialize the OpenAI LLM model\n",
    "    llm = ChatOpenAI(temperature=0, model = 'gpt-4o-mini', openai_api_key=openai_api_key)\n",
    "\n",
    "    # Create the prompt for keyword identification\n",
    "    prompt_template = PromptTemplate(\n",
    "        input_variables=[\"criteria\"],\n",
    "        template=f\"\"\"\n",
    "        {system_message}\n",
    "\n",
    "        Trial Criteria: {{criteria}}\n",
    "\n",
    "        For each criterion, identify the relevant keyword or patient attribute.\n",
    "        \"\"\"\n",
    "    )\n",
    "    \n",
    "    # Format the prompt with the actual trial criteria\n",
    "    prompt = prompt_template.format(criteria=trial_criteria)\n",
    "    \n",
    "    # Send the prompt to the LLM for processing\n",
    "    response = llm(prompt)\n",
    "    \n",
    "    # Return the keywords identified by the LLM\n",
    "    return response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Function to extract only the relevant information from Patient_EHR for LLM processing\n",
    "def extract_relevant_patient_data(patient_ehr):\n",
    "    # Extracting necessary attributes from the patient EHR\n",
    "    relevant_data = {\n",
    "        \"Gender\": patient_ehr.get(\"Gender\"),\n",
    "        \"Age\": patient_ehr.get(\"Age\"),\n",
    "        \"Race\": patient_ehr.get(\"Race\"),\n",
    "        \"Ethnic Group\": patient_ehr.get(\"Ethnic Group\"),\n",
    "        \"Language\": patient_ehr.get(\"Language\"),\n",
    "        \"Vital Signs\": patient_ehr.get(\"Vital Signs\"),\n",
    "    }\n",
    "    return relevant_data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 2nd LLM: Evaluate patient eligibility based on keywords\n",
    "def evaluate_criteria_by_keywords(criteria_keywords, patient_ehr):\n",
    "    # Define the system message for the LLM to evaluate eligibility\n",
    "    system_message = \"\"\"\n",
    "    You are a clinical trial assistant.\n",
    "    Your task is to compare the patient's information (Gender, Age, Race, Ethnic Group, Language, Vital Signs) \n",
    "    with the clinical trial's inclusion and exclusion criteria using the identified keywords.\n",
    "    \n",
    "    For each criterion, respond with one of the following:\n",
    "    - \"Yes\" if the patient meets the criterion\n",
    "    - \"No\" if the patient does not meet the criterion\n",
    "    - \"No Information\" if the necessary patient information is missing to assess this criterion\n",
    "    \"\"\"\n",
    "\n",
    "    # Initialize the OpenAI LLM model\n",
    "    llm = ChatOpenAI(temperature=0, model = 'gpt-4o-mini', openai_api_key=openai_api_key)\n",
    "\n",
    "    # Extract relevant patient data from EHR\n",
    "    relevant_patient_data = extract_relevant_patient_data(patient_ehr)\n",
    "\n",
    "    # Create the prompt for evaluating eligibility\n",
    "    prompt_template = PromptTemplate(\n",
    "        input_variables=[\"criteria_keywords\", \"patient_data\"],\n",
    "        template=f\"\"\"\n",
    "        {system_message}\n",
    "\n",
    "        Criteria Keywords: {{criteria_keywords}}\n",
    "\n",
    "        Patient Information: {{patient_data}}\n",
    "\n",
    "        For each criterion keyword, respond with:\n",
    "        - \"Yes\" if the patient meets the criterion\n",
    "        - \"No\" if the patient does not meet the criterion\n",
    "        - \"No Information\" if the necessary patient information is missing.\n",
    "        \n",
    "        While evaluating one criteria, consider only the respective criteria but not any other criteria.\n",
    "        While rating the criteria, with 'Yes' or 'No' or 'No Information', do not give any reasoning\n",
    "        \n",
    "      \n",
    "        \n",
    "        \"\"\"\n",
    "    )\n",
    "\n",
    "    # Format the prompt with the criteria keywords and patient data\n",
    "    prompt = prompt_template.format(\n",
    "        criteria_keywords=criteria_keywords,\n",
    "        patient_data=relevant_patient_data\n",
    "    )\n",
    "    \n",
    "    # Send the prompt to the LLM for processing\n",
    "    response = llm(prompt)\n",
    "    \n",
    "    return response"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_patient_eligibility(trial_criteria, patient_ehr):\n",
    "    # Step 1: Identify keywords from trial criteria\n",
    "    criteria_keywords = identify_criteria_keywords(trial_criteria)\n",
    "    \n",
    "    # Step 2: Evaluate patient eligibility based on identified keywords\n",
    "    eligibility_results = evaluate_criteria_by_keywords(criteria_keywords, patient_ehr)\n",
    "    \n",
    "    return eligibility_results\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/vh/y8k1dgkj76s01krzn8qtn3nw0000gn/T/ipykernel_37847/552846339.py:32: LangChainDeprecationWarning: The method `BaseChatModel.__call__` was deprecated in langchain-core 0.1.7 and will be removed in 1.0. Use :meth:`~invoke` instead.\n",
      "  response = llm(prompt)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "AIMessage(content='1. Gender: Yes  \\n2. Age: Yes  \\n3. BMI: Yes  \\n4. Weight: Yes  \\n5. Health Status: No Information  \\n6. Medical History: No Information  \\n7. Medication Use: No Information  \\n8. Substance Use: No Information  \\n9. Infectious Disease Status: No Information  \\n10. Environmental Exposure: No Information  ', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 74, 'prompt_tokens': 4625, 'total_tokens': 4699, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f85bea6784', 'finish_reason': 'stop', 'logprobs': None}, id='run-bc60d403-a0c5-4569-9463-31da557a10e1-0', usage_metadata={'input_tokens': 4625, 'output_tokens': 74, 'total_tokens': 4699, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 0}})"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "process_patient_eligibility(trial_criteria, patient_ehr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/vh/y8k1dgkj76s01krzn8qtn3nw0000gn/T/ipykernel_37847/805543201.py:26: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 1.0. Use :meth:`~RunnableSequence, e.g., `prompt | llm`` instead.\n",
      "  keyword_chain = LLMChain(\n"
     ]
    }
   ],
   "source": [
    "# Define LLM model (you can adjust the model parameters as needed)\n",
    "llm = ChatOpenAI(temperature=0, model='gpt-4o-mini', openai_api_key=openai_api_key)\n",
    "\n",
    "### Step 1: First Chain - Identify Keywords from Criteria ###\n",
    "\n",
    "# Create the prompt template for identifying keywords\n",
    "keyword_template = \"\"\"\n",
    "You are a clinical trial assistant.\n",
    "Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, \n",
    "and identify relevant keywords from each criterion.\n",
    "\n",
    "Common keywords may include: \"Gender\", \"Age\", \"Race\", \"Ethnic Group\", \"Language\", BMI, BPM, Weight, Height etc.\n",
    "\n",
    "For each criterion, respond with the most relevant keyword or patient attribute it is concerned with.\n",
    "\n",
    "Trial Criteria: {criteria}\n",
    "\"\"\"\n",
    "\n",
    "# Create the PromptTemplate\n",
    "keyword_prompt = PromptTemplate(\n",
    "    input_variables=[\"criteria\"],\n",
    "    template=keyword_template\n",
    ")\n",
    "\n",
    "# Create the first chain that identifies keywords\n",
    "keyword_chain = LLMChain(\n",
    "    llm=llm,\n",
    "    prompt=keyword_prompt\n",
    ")\n",
    "\n",
    "### Step 2: Second Chain - Evaluate Criteria Based on Keywords ###\n",
    "\n",
    "# Create the prompt template for evaluating eligibility\n",
    "evaluation_template = \"\"\"\n",
    "You are a clinical trial assistant.\n",
    "Your task is to compare the patient's information (Gender, Age, Race, Ethnic Group, Language, Vital Signs)\n",
    "with the clinical trial's inclusion and exclusion criteria using the identified keywords.\n",
    "\n",
    "For each criterion keyword, respond with:\n",
    "- \"Yes\" if the patient meets the criterion\n",
    "- \"No\" if the patient does not meet the criterion\n",
    "- \"No Information\" if the necessary patient information is missing to assess this criterion.\n",
    "\n",
    "Do not give any reasoning for your response. Only respond with \"Yes\", \"No\", or \"No Information\".\n",
    "\n",
    "Criteria Keywords: {criteria_keywords}\n",
    "Patient Information: {patient_data}\n",
    "\"\"\"\n",
    "\n",
    "# Create the PromptTemplate for the second LLM\n",
    "evaluation_prompt = PromptTemplate(\n",
    "    input_variables=[\"criteria_keywords\", \"patient_data\"],\n",
    "    template=evaluation_template\n",
    ")\n",
    "\n",
    "# Create the second chain that evaluates criteria eligibility\n",
    "evaluation_chain = LLMChain(\n",
    "    llm=llm,\n",
    "    prompt=evaluation_prompt\n",
    ")\n",
    "\n",
    "### Step 3: Combine Both Chains ###\n",
    "\n",
    "# Now, we'll combine both chains into a sequential chain\n",
    "def process_patient_eligibility(trial_criteria, patient_ehr):\n",
    "    # Step 1: Get the keywords from the criteria using the first chain\n",
    "    criteria_keywords = keyword_chain.run(criteria=trial_criteria)\n",
    "   \n",
    "    # Extract the relevant patient information from the EHR for the second chain\n",
    "    relevant_patient_data = {\n",
    "        \"Gender\": patient_ehr.get(\"Gender\"),\n",
    "        \"Age\": patient_ehr.get(\"Age\"),\n",
    "        \"Race\": patient_ehr.get(\"Race\"),\n",
    "        \"Ethnic Group\": patient_ehr.get(\"Ethnic Group\"),\n",
    "        \"Language\": patient_ehr.get(\"Language\"),\n",
    "        \"Vital Signs\": patient_ehr.get(\"Vital Signs\"),\n",
    "        \"Medications\": patient_ehr.get(\"Medications\"),\n",
    "        \"Problems\": patient_ehr.get(\"Problems\"),\n",
    "        \"Surgeries\": patient_ehr.get(\"Surgeries\"),\n",
    "        \"Immunizations\": patient_ehr.get(\"Immunizations\"),\n",
    "    }\n",
    "    \n",
    "    # Step 2: Pass the keywords and patient data into the second chain for evaluation\n",
    "    eligibility_results = evaluation_chain.run(\n",
    "        criteria_keywords=criteria_keywords,\n",
    "        patient_data=relevant_patient_data\n",
    "    )\n",
    "    \n",
    "    # Return the final eligibility results\n",
    "    return eligibility_results"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/vh/y8k1dgkj76s01krzn8qtn3nw0000gn/T/ipykernel_37847/805543201.py:67: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 1.0. Use :meth:`~invoke` instead.\n",
      "  criteria_keywords = keyword_chain.run(criteria=trial_criteria)\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'1. **Gender**: Yes  \\n2. **Age**: Yes  \\n3. **BMI**: Yes  \\n4. **Weight**: Yes  \\n5. **Health Status**: No Information  \\n6. **Medical History**: No Information  \\n7. **Biomarker**: No Information  \\n8. **Medical History**: No Information  \\n9. **Medical History**: No Information  \\n10. **Gastrointestinal Conditions**: No Information  \\n11. **Gastrointestinal Conditions**: No Information  \\n12. **Medication Use**: No  \\n13. **Health Status**: No Information  \\n14. **Substance Abuse**: No Information  \\n15. **Infectious Disease**: No Information  \\n16. **Occupational Exposure**: No Information  \\n17. **Age**: Yes  \\n18. **Gender**: Yes  \\n19. **Healthy Volunteers**: No Information  '"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "process_patient_eligibility(trial_criteria, patient_ehr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.chains import LLMChain, SequentialChain\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "# Define LLM model\n",
    "llm = ChatOpenAI(temperature=0, model = 'gpt-4o-mini')\n",
    "\n",
    "# Step 1: Extract Keywords from Trial Criteria\n",
    "def extract_keywords_chain():\n",
    "    \"\"\"\n",
    "    First chain to extract keywords from trial criteria.\n",
    "    \"\"\"\n",
    "    keyword_template = \"\"\"\n",
    "    You are a clinical trial assistant.\n",
    "    Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, \n",
    "    and identify relevant keywords from each criterion.\n",
    "\n",
    "    Common keywords may include: \"Gender\", \"Age\", \"Race\", \"Ethnic Group\", \"Language\", BMI, BPM, Weight, Height, etc.\n",
    "\n",
    "    Trial Criteria: {criteria}\n",
    "    \"\"\"\n",
    "    \n",
    "    keyword_prompt = PromptTemplate(\n",
    "        input_variables=[\"criteria\"],\n",
    "        template=keyword_template\n",
    "    )\n",
    "    \n",
    "    return LLMChain(llm=llm, prompt=keyword_prompt, output_key=\"criteria_keywords\")\n",
    "\n",
    "# Step 2: Evaluate Criteria Based on Patient Data\n",
    "def evaluate_patient_chain():\n",
    "    \"\"\"\n",
    "    Second chain to evaluate each keyword against the patient's data.\n",
    "    \"\"\"\n",
    "    evaluation_template = \"\"\"\n",
    "    You are a clinical trial assistant.\n",
    "    Your task is to compare the patient's information with the clinical trial's inclusion and exclusion criteria.\n",
    "\n",
    "    For each criterion keyword, respond with:\n",
    "    - \"Yes\" if the patient meets the criterion\n",
    "    - \"No\" if the patient does not meet the criterion\n",
    "    - \"No Information\" if the necessary patient information is missing to assess this criterion.\n",
    "\n",
    "    Criteria Keywords: {criteria_keywords}\n",
    "    Patient Information: {patient_data}\n",
    "    \"\"\"\n",
    "    \n",
    "    evaluation_prompt = PromptTemplate(\n",
    "        input_variables=[\"criteria_keywords\", \"patient_data\"],\n",
    "        template=evaluation_template\n",
    "    )\n",
    "    \n",
    "    chain = LLMChain(llm=llm, prompt=evaluation_prompt, output_key=\"criteria_results\")\n",
    "    \n",
    "\n",
    "    \n",
    "    return chain\n",
    "\n",
    "# Step 3: Make Final Eligibility Decision\n",
    "def final_decision_chain():\n",
    "    \"\"\"\n",
    "    Third chain to make the final eligibility decision based on the evaluated criteria.\n",
    "    \"\"\"\n",
    "    decision_template = \"\"\"\n",
    "    You are an eligibility checker.\n",
    "    Your task is to evaluate the results of a patient's eligibility for a clinical trial.\n",
    "\n",
    "    Eligibility criteria results: {criteria_results}\n",
    "\n",
    "    Rules:\n",
    "    - If there is at least one \"No\", the final eligibility is \"No\".\n",
    "    - If there are only \"Yes\" and \"No Information\", the final eligibility is \"Yes\".\n",
    "\n",
    "    Provide the final eligibility decision. Just give a straight yes or no. Do not give any reasoning.\n",
    "    \"\"\"\n",
    "    \n",
    "    decision_prompt = PromptTemplate(\n",
    "        input_variables=[\"criteria_results\"],\n",
    "        template=decision_template\n",
    "    )\n",
    "    \n",
    "    return LLMChain(llm=llm, prompt=decision_prompt, output_key=\"final_decision\")\n",
    "\n",
    "\n",
    "# Now, create a Sequential Chain combining all three\n",
    "def create_sequential_chain():\n",
    "    \"\"\"\n",
    "    Create the entire pipeline chain that runs the three steps sequentially.\n",
    "    \"\"\"\n",
    "    # Define the three chains\n",
    "    extract_keywords = extract_keywords_chain()\n",
    "    evaluate_patient = evaluate_patient_chain()\n",
    "    final_decision = final_decision_chain()\n",
    "    \n",
    "    # Create the sequential chain\n",
    "    sequential_chain = SequentialChain(\n",
    "        chains=[extract_keywords, evaluate_patient, final_decision],\n",
    "        input_variables=[\"criteria\", \"patient_data\"],\n",
    "        output_variables=[\"final_decision\"]\n",
    "    )\n",
    "    \n",
    "    return sequential_chain\n",
    "\n",
    "# Example usage\n",
    "def process_patient_eligibility(trial_criteria, patient_data):\n",
    "    \"\"\"\n",
    "    Main function to process eligibility using a sequential chain.\n",
    "    \n",
    "    Args:\n",
    "    trial_criteria (str): The inclusion/exclusion criteria text of the trial.\n",
    "    patient_data (dict): A dictionary containing patient information.\n",
    "    \n",
    "    Returns:\n",
    "    str: The final eligibility decision ('Yes' or 'No').\n",
    "    \"\"\"\n",
    "    # Create the sequential chain\n",
    "    sequential_chain = create_sequential_chain()\n",
    "    \n",
    "    # Run the chain with trial criteria and patient data\n",
    "    final_result = sequential_chain.run({\n",
    "        \"criteria\": trial_criteria,\n",
    "        \"patient_data\": patient_data\n",
    "    })\n",
    "    \n",
    "    return final_result\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'Yes\\n\\nReasoning: The patient meets all the inclusion criteria and there are no definitive \"No\" responses in the exclusion criteria, only \"No Information.\" According to the rules, if there is at least one \"No,\" the eligibility would be \"No,\" but since there are only \"Yes\" and \"No Information,\" the final eligibility is \"Yes.\"'"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "process_patient_eligibility(trial_criteria, patient_ehr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.chains import LLMChain, SequentialChain\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain_core.output_parsers import JsonOutputParser\n",
    "import json\n",
    "\n",
    "# Define LLM model\n",
    "llm = ChatOpenAI(temperature=0, model='gpt-4o-mini')\n",
    "\n",
    "# Step 1: Extract Keywords from Trial Criteria\n",
    "def extract_keywords_chain():\n",
    "    \"\"\"\n",
    "    First chain to extract keywords from trial criteria.\n",
    "    \"\"\"\n",
    "    keyword_template = \"\"\"\n",
    "    You are a clinical trial assistant.\n",
    "    Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, \n",
    "    and identify relevant keywords from each criterion.\n",
    "\n",
    "    Common keywords may include: \"Gender\", \"Age\", \"Race\", \"Ethnic Group\", \"Language\", BMI, BPM, Weight, Height, etc.\n",
    "\n",
    "    Trial Criteria: {criteria}\n",
    "    \"\"\"\n",
    "    \n",
    "    keyword_prompt = PromptTemplate(\n",
    "        input_variables=[\"criteria\"],\n",
    "        template=keyword_template\n",
    "    )\n",
    "    \n",
    "    return LLMChain(llm=llm, prompt=keyword_prompt, output_key=\"criteria_keywords\")\n",
    "\n",
    "# Step 2: Evaluate Criteria Based on Patient Data\n",
    "def evaluate_patient_chain():\n",
    "    \"\"\"\n",
    "    Second chain to evaluate each keyword against the patient's data.\n",
    "    \"\"\"\n",
    "    evaluation_template = \"\"\"\n",
    "    You are a clinical trial assistant.\n",
    "    Your task is to compare the patient's information with the clinical trial's inclusion and exclusion criteria.\n",
    "\n",
    "    For each criterion keyword, respond with:\n",
    "    - \"Yes\" if the patient meets the criterion\n",
    "    - \"No\" if the patient does not meet the criterion\n",
    "    - \"No Information\" if the necessary patient information is missing to assess this criterion.\n",
    "\n",
    "    Criteria Keywords: {criteria_keywords}\n",
    "    Patient Information: {patient_data}\n",
    "    \"\"\"\n",
    "    \n",
    "    evaluation_prompt = PromptTemplate(\n",
    "        input_variables=[\"criteria_keywords\", \"patient_data\"],\n",
    "        template=evaluation_template\n",
    "    )\n",
    "    \n",
    "    return LLMChain(llm=llm, prompt=evaluation_prompt, output_key=\"criteria_results\")\n",
    "\n",
    "# Step 3: Make Final Eligibility Decision\n",
    "def final_decision_chain():\n",
    "    decision_template = \"\"\"\n",
    "    You are an eligibility checker.\n",
    "    Your task is to evaluate the results of a patient's eligibility for a clinical trial.\n",
    "\n",
    "    Eligibility criteria results: {criteria_results}\n",
    "\n",
    "    Rules:\n",
    "    - If there is at least one \"No\", the final eligibility is \"No\".\n",
    "    - If there are only \"Yes\" and \"No Information\", the final eligibility is \"Yes\".\n",
    "\n",
    "    Provide only the final eligibility decision as 'Yes' or 'No'. Do not include any other text or explanation.\n",
    "    \"\"\"\n",
    "    \n",
    "    decision_prompt = PromptTemplate(\n",
    "        input_variables=[\"criteria_results\"],\n",
    "        template=decision_template\n",
    "    )\n",
    "    \n",
    "    return LLMChain(llm=llm, prompt=decision_prompt, output_key=\"final_decision\")\n",
    "\n",
    "def json_output_parser_chain():\n",
    "    json_template = \"\"\"\n",
    "    Format the following information into a JSON structure:\n",
    "\n",
    "    Patient ID: {patient_id}\n",
    "    Trial ID: {trial_id}\n",
    "    Final Decision: {final_decision}\n",
    "\n",
    "    The JSON should have the following structure:\n",
    "    {{\n",
    "      \"patientId\": \"{patient_id}\",\n",
    "      \"eligibleTrials\": [\n",
    "        {{\n",
    "          \"trialId\": \"{trial_id}\",\n",
    "          \"eligibilityCriteriaMet\": [],\n",
    "          \"moreInformationNeeded\": [],\n",
    "          \"finalEligibility\": \"{final_decision}\"\n",
    "        }}\n",
    "      ]\n",
    "    }}\n",
    "\n",
    "    Ensure that the output is a valid JSON string.\n",
    "    \"\"\"\n",
    "\n",
    "    json_prompt = PromptTemplate(\n",
    "        input_variables=[\"patient_id\", \"trial_id\", \"final_decision\"],\n",
    "        template=json_template\n",
    "    )\n",
    "\n",
    "    return LLMChain(llm=llm, prompt=json_prompt, output_key=\"json_output\")\n",
    "\n",
    "    \n",
    "\n",
    "\n",
    "# Now, create a Sequential Chain combining all four\n",
    "def create_sequential_chain():\n",
    "    \"\"\"\n",
    "    Create the entire pipeline chain that runs the three steps sequentially.\n",
    "    \"\"\"\n",
    "    # Define the three chains\n",
    "    extract_keywords = extract_keywords_chain()\n",
    "    evaluate_patient = evaluate_patient_chain()\n",
    "    final_decision = final_decision_chain()\n",
    "    json_output = json_output_chain()  # Add the new JSON output chain\n",
    "\n",
    "    # Create the sequential chain\n",
    "    sequential_chain = SequentialChain(\n",
    "        chains=[\n",
    "            extract_keywords,\n",
    "            evaluate_patient,\n",
    "            final_decision,\n",
    "            json_output\n",
    "        ],\n",
    "        input_variables=[\"criteria\", \"patient_data\", \"patient_id\", \"trial_id\"],\n",
    "        output_variables=[\"json_output\"],\n",
    "        # Collect necessary outputs from earlier chains for JSON output\n",
    "        return_only_final_output=False  # Return all outputs for further processing\n",
    "    )\n",
    "    \n",
    "    return sequential_chain\n",
    "\n",
    "def process_patient_eligibility(trial_criteria, patient_data, trial_filename):\n",
    "    trial_id = trial_filename.split('_')[0]\n",
    "    patient_id = patient_data.get(\"Patient ID\")\n",
    "    \n",
    "    sequential_chain = create_sequential_chain()\n",
    "\n",
    "    final_result = sequential_chain.run({\n",
    "        \"criteria\": trial_criteria,\n",
    "        \"patient_data\": json.dumps(patient_data),\n",
    "        \"patient_id\": patient_id,\n",
    "        \"trial_id\": trial_id\n",
    "    })\n",
    "    \n",
    "    print(\"Final result:\", final_result)  # Add this line to inspect the output\n",
    "    \n",
    "    try:\n",
    "        json_result = json.loads(final_result)\n",
    "    except json.JSONDecodeError as e:\n",
    "        print(f\"Error decoding JSON: {e}\")\n",
    "        print(f\"Raw output: {final_result}\")\n",
    "        json_result = {\n",
    "            \"patientId\": patient_id,\n",
    "            \"eligibleTrials\": [{\n",
    "                \"trialId\": trial_id,\n",
    "                \"eligibilityCriteriaMet\": [],\n",
    "                \"moreInformationNeeded\": [],\n",
    "                \"finalEligibility\": \"Error\"\n",
    "            }]\n",
    "        }\n",
    "    \n",
    "    output_filename = f\"{patient_id}_qualifies.json\"\n",
    "    with open(output_filename, 'w') as json_file:\n",
    "        json.dump(json_result, json_file, indent=4)\n",
    "\n",
    "    return json_result\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "ename": "JSONDecodeError",
     "evalue": "Expecting value: line 1 column 1 (char 0)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mJSONDecodeError\u001b[0m                           Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[46], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mprocess_patient_eligibility\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrial_criteria\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpatient_ehr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mNCT06576401_criteria.txt\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "Cell \u001b[0;32mIn[45], line 137\u001b[0m, in \u001b[0;36mprocess_patient_eligibility\u001b[0;34m(trial_criteria, patient_data, trial_filename)\u001b[0m\n\u001b[1;32m    128\u001b[0m sequential_chain \u001b[38;5;241m=\u001b[39m create_sequential_chain()\n\u001b[1;32m    130\u001b[0m final_result \u001b[38;5;241m=\u001b[39m sequential_chain\u001b[38;5;241m.\u001b[39mrun({\n\u001b[1;32m    131\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcriteria\u001b[39m\u001b[38;5;124m\"\u001b[39m: trial_criteria,\n\u001b[1;32m    132\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpatient_data\u001b[39m\u001b[38;5;124m\"\u001b[39m: json\u001b[38;5;241m.\u001b[39mdumps(patient_data),\n\u001b[1;32m    133\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpatient_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: patient_id,\n\u001b[1;32m    134\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrial_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: trial_id\n\u001b[1;32m    135\u001b[0m })\n\u001b[0;32m--> 137\u001b[0m json_result \u001b[38;5;241m=\u001b[39m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_result\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    139\u001b[0m output_filename \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpatient_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_qualifies.json\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    140\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(output_filename, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m json_file:\n",
      "File \u001b[0;32m~/.pyenv/versions/3.9.19/lib/python3.9/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m    341\u001b[0m     s \u001b[38;5;241m=\u001b[39m s\u001b[38;5;241m.\u001b[39mdecode(detect_encoding(s), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msurrogatepass\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m    343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m    344\u001b[0m         parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m    345\u001b[0m         parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    347\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    348\u001b[0m     \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONDecoder\n",
      "File \u001b[0;32m~/.pyenv/versions/3.9.19/lib/python3.9/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m    332\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecode\u001b[39m(\u001b[38;5;28mself\u001b[39m, s, _w\u001b[38;5;241m=\u001b[39mWHITESPACE\u001b[38;5;241m.\u001b[39mmatch):\n\u001b[1;32m    333\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m    334\u001b[0m \u001b[38;5;124;03m    containing a JSON document).\u001b[39;00m\n\u001b[1;32m    335\u001b[0m \n\u001b[1;32m    336\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m     obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraw_decode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_w\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    338\u001b[0m     end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n\u001b[1;32m    339\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m end \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(s):\n",
      "File \u001b[0;32m~/.pyenv/versions/3.9.19/lib/python3.9/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m    353\u001b[0m     obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscan_once(s, idx)\n\u001b[1;32m    354\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpecting value\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, err\u001b[38;5;241m.\u001b[39mvalue) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    356\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj, end\n",
      "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)"
     ]
    }
   ],
   "source": [
    "process_patient_eligibility(trial_criteria, patient_ehr, 'NCT06576401_criteria.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain.prompts import PromptTemplate\n",
    "from langchain.chains import LLMChain, SequentialChain\n",
    "from langchain_openai import ChatOpenAI\n",
    "from langchain_core.output_parsers import JsonOutputParser\n",
    "import json\n",
    "\n",
    "# Define LLM model\n",
    "llm = ChatOpenAI(temperature=0, model='gpt-4o-mini')\n",
    "\n",
    "# Step 1: Extract Keywords from Trial Criteria\n",
    "def extract_keywords_chain():\n",
    "    keyword_template = \"\"\"\n",
    "    You are a clinical trial assistant.\n",
    "    Your task is to read the inclusion, exclusion, and other criteria of a clinical trial, \n",
    "    and identify relevant keywords from each criterion.\n",
    "\n",
    "    Common keywords may include: \"Gender\", \"Age\", \"Race\", \"Ethnic Group\", \"Language\", BMI, BPM, Weight, Height, etc.\n",
    "\n",
    "    Trial Criteria: {criteria}\n",
    "\n",
    "    Provide the keywords as a comma-separated list.\n",
    "    \"\"\"\n",
    "    \n",
    "    keyword_prompt = PromptTemplate(\n",
    "        input_variables=[\"criteria\"],\n",
    "        template=keyword_template\n",
    "    )\n",
    "    \n",
    "    return LLMChain(llm=llm, prompt=keyword_prompt, output_key=\"criteria_keywords\")\n",
    "\n",
    "# Step 2: Evaluate Criteria Based on Patient Data\n",
    "def evaluate_patient_chain():\n",
    "    evaluation_template = \"\"\"\n",
    "    You are a clinical trial assistant.\n",
    "    Your task is to compare the patient's information with the clinical trial's inclusion and exclusion criteria.\n",
    "\n",
    "    For each criterion keyword, respond with:\n",
    "    - \"Yes\" if the patient meets the criterion\n",
    "    - \"No\" if the patient does not meet the criterion\n",
    "    - \"No Information\" if the necessary patient information is missing to assess this criterion.\n",
    "\n",
    "    Criteria Keywords: {criteria_keywords}\n",
    "    Patient Information: {patient_data}\n",
    "\n",
    "    Provide your response as a list of dictionaries, each containing 'criterion' and 'result' keys.\n",
    "    \"\"\"\n",
    "    \n",
    "    evaluation_prompt = PromptTemplate(\n",
    "        input_variables=[\"criteria_keywords\", \"patient_data\"],\n",
    "        template=evaluation_template\n",
    "    )\n",
    "    \n",
    "    return LLMChain(llm=llm, prompt=evaluation_prompt, output_key=\"criteria_results\")\n",
    "\n",
    "# Step 3: Make Final Eligibility Decision\n",
    "def final_decision_chain():\n",
    "    decision_template = \"\"\"\n",
    "    You are an eligibility checker.\n",
    "    Your task is to evaluate the results of a patient's eligibility for a clinical trial.\n",
    "\n",
    "    Eligibility criteria results: {criteria_results}\n",
    "\n",
    "    Rules:\n",
    "    - If there is at least one \"No\", the final eligibility is \"No\".\n",
    "    - If there are only \"Yes\" and \"No Information\", the final eligibility is \"Yes\".\n",
    "\n",
    "    Provide only the final eligibility decision as 'Yes' or 'No'. Do not include any other text or explanation.\n",
    "    \"\"\"\n",
    "    \n",
    "    decision_prompt = PromptTemplate(\n",
    "        input_variables=[\"criteria_results\"],\n",
    "        template=decision_template\n",
    "    )\n",
    "    \n",
    "    return LLMChain(llm=llm, prompt=decision_prompt, output_key=\"final_decision\")\n",
    "\n",
    "# Step 4: Format results into JSON\n",
    "def json_output_parser_chain():\n",
    "    json_template = \"\"\"\n",
    "    Format the following information into a JSON structure:\n",
    "\n",
    "    Patient ID: {patient_id}\n",
    "    Trial ID: {trial_id}\n",
    "    Final Decision: {final_decision}\n",
    "\n",
    "    The JSON should have the following structure:\n",
    "    {{\n",
    "      \"patientId\": \"{patient_id}\",\n",
    "      \"eligibleTrials\": [\n",
    "        {{\n",
    "          \"trialId\": \"{trial_id}\",\n",
    "          \"eligibilityCriteriaMet\": [],\n",
    "          \"moreInformationNeeded\": [],\n",
    "          \"finalEligibility\": \"{final_decision}\"\n",
    "        }}\n",
    "      ]\n",
    "    }}\n",
    "\n",
    "    Ensure that the output is a valid JSON string.\n",
    "    \"\"\"\n",
    "\n",
    "    json_prompt = PromptTemplate(\n",
    "        input_variables=[\"patient_id\", \"trial_id\", \"final_decision\"],\n",
    "        template=json_template\n",
    "    )\n",
    "\n",
    "    return LLMChain(llm=llm, prompt=json_prompt, output_key=\"json_output\")\n",
    "\n",
    "# Create Sequential Chain\n",
    "def create_sequential_chain():\n",
    "    extract_keywords = extract_keywords_chain()\n",
    "    evaluate_patient = evaluate_patient_chain()\n",
    "    final_decision = final_decision_chain()\n",
    "    json_output = json_output_parser_chain()\n",
    "\n",
    "    sequential_chain = SequentialChain(\n",
    "        chains=[extract_keywords, evaluate_patient, final_decision, json_output],\n",
    "        input_variables=[\"criteria\", \"patient_data\", \"patient_id\", \"trial_id\"],\n",
    "        output_variables=[\"json_output\"]\n",
    "    )\n",
    "\n",
    "    return sequential_chain\n",
    "\n",
    "# Main Processing Function\n",
    "def process_patient_eligibility(trial_criteria, patient_data, trial_filename):\n",
    "    trial_id = trial_filename.split('_')[0]\n",
    "    patient_id = patient_data.get(\"Patient ID\")\n",
    "    \n",
    "    sequential_chain = create_sequential_chain()\n",
    "\n",
    "    final_result = sequential_chain.run({\n",
    "        \"criteria\": trial_criteria,\n",
    "        \"patient_data\": json.dumps(patient_data),\n",
    "        \"patient_id\": patient_id,\n",
    "        \"trial_id\": trial_id\n",
    "    })\n",
    "    \n",
    "    print(\"Final result:\", final_result)  # Add this line to inspect the output\n",
    "    \n",
    "    try:\n",
    "        json_result = json.loads(final_result)\n",
    "    except json.JSONDecodeError as e:\n",
    "        print(f\"Error decoding JSON: {e}\")\n",
    "        print(f\"Raw output: {final_result}\")\n",
    "        json_result = {\n",
    "            \"patientId\": patient_id,\n",
    "            \"eligibleTrials\": [{\n",
    "                \"trialId\": trial_id,\n",
    "                \"eligibilityCriteriaMet\": [],\n",
    "                \"moreInformationNeeded\": [],\n",
    "                \"finalEligibility\": \"Error\"\n",
    "            }]\n",
    "        }\n",
    "    \n",
    "    output_filename = f\"{patient_id}_qualifies.json\"\n",
    "    with open(output_filename, 'w') as json_file:\n",
    "        json.dump(json_result, json_file, indent=4)\n",
    "\n",
    "    return json_result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "ename": "JSONDecodeError",
     "evalue": "Expecting value: line 1 column 1 (char 0)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mJSONDecodeError\u001b[0m                           Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[47], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mprocess_patient_eligibility\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrial_criteria\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpatient_ehr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mNCT06576401_criteria.txt\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n",
      "Cell \u001b[0;32mIn[45], line 137\u001b[0m, in \u001b[0;36mprocess_patient_eligibility\u001b[0;34m(trial_criteria, patient_data, trial_filename)\u001b[0m\n\u001b[1;32m    128\u001b[0m sequential_chain \u001b[38;5;241m=\u001b[39m create_sequential_chain()\n\u001b[1;32m    130\u001b[0m final_result \u001b[38;5;241m=\u001b[39m sequential_chain\u001b[38;5;241m.\u001b[39mrun({\n\u001b[1;32m    131\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcriteria\u001b[39m\u001b[38;5;124m\"\u001b[39m: trial_criteria,\n\u001b[1;32m    132\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpatient_data\u001b[39m\u001b[38;5;124m\"\u001b[39m: json\u001b[38;5;241m.\u001b[39mdumps(patient_data),\n\u001b[1;32m    133\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpatient_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: patient_id,\n\u001b[1;32m    134\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtrial_id\u001b[39m\u001b[38;5;124m\"\u001b[39m: trial_id\n\u001b[1;32m    135\u001b[0m })\n\u001b[0;32m--> 137\u001b[0m json_result \u001b[38;5;241m=\u001b[39m \u001b[43mjson\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfinal_result\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    139\u001b[0m output_filename \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpatient_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_qualifies.json\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    140\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(output_filename, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m json_file:\n",
      "File \u001b[0;32m~/.pyenv/versions/3.9.19/lib/python3.9/json/__init__.py:346\u001b[0m, in \u001b[0;36mloads\u001b[0;34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[0m\n\u001b[1;32m    341\u001b[0m     s \u001b[38;5;241m=\u001b[39m s\u001b[38;5;241m.\u001b[39mdecode(detect_encoding(s), \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msurrogatepass\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m    343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m    344\u001b[0m         parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[1;32m    345\u001b[0m         parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[0;32m--> 346\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    347\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    348\u001b[0m     \u001b[38;5;28mcls\u001b[39m \u001b[38;5;241m=\u001b[39m JSONDecoder\n",
      "File \u001b[0;32m~/.pyenv/versions/3.9.19/lib/python3.9/json/decoder.py:337\u001b[0m, in \u001b[0;36mJSONDecoder.decode\u001b[0;34m(self, s, _w)\u001b[0m\n\u001b[1;32m    332\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdecode\u001b[39m(\u001b[38;5;28mself\u001b[39m, s, _w\u001b[38;5;241m=\u001b[39mWHITESPACE\u001b[38;5;241m.\u001b[39mmatch):\n\u001b[1;32m    333\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[1;32m    334\u001b[0m \u001b[38;5;124;03m    containing a JSON document).\u001b[39;00m\n\u001b[1;32m    335\u001b[0m \n\u001b[1;32m    336\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 337\u001b[0m     obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraw_decode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_w\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mend\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    338\u001b[0m     end \u001b[38;5;241m=\u001b[39m _w(s, end)\u001b[38;5;241m.\u001b[39mend()\n\u001b[1;32m    339\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m end \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(s):\n",
      "File \u001b[0;32m~/.pyenv/versions/3.9.19/lib/python3.9/json/decoder.py:355\u001b[0m, in \u001b[0;36mJSONDecoder.raw_decode\u001b[0;34m(self, s, idx)\u001b[0m\n\u001b[1;32m    353\u001b[0m     obj, end \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscan_once(s, idx)\n\u001b[1;32m    354\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[0;32m--> 355\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mExpecting value\u001b[39m\u001b[38;5;124m\"\u001b[39m, s, err\u001b[38;5;241m.\u001b[39mvalue) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m    356\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m obj, end\n",
      "\u001b[0;31mJSONDecodeError\u001b[0m: Expecting value: line 1 column 1 (char 0)"
     ]
    }
   ],
   "source": [
    "process_patient_eligibility(trial_criteria, patient_ehr, 'NCT06576401_criteria.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Scraping Environment",
   "language": "python",
   "name": "scraping_env"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}