--- a +++ b/clinical-trial-main.ipynb @@ -0,0 +1 @@ +{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.12","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":10768325,"sourceType":"datasetVersion","datasetId":6680079}],"dockerImageVersionId":30886,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import os\nos.environ[\"OPENAI_API_KEY\"] = \"you API key\" \nimport xml.etree.ElementTree as ET\nimport requests\nimport pandas as pd\nfrom datetime import datetime\nfrom openai import OpenAI\n\nclass MedicalHistoryAnalyzer:\n def __init__(self, api_key):\n self.medical_llm = OpenAI(api_key=api_key)\n \n def analyze_medical_text(self, prompt):\n \"\"\"Process medical criteria with AI\"\"\"\n try:\n response = self.medical_llm.chat.completions.create(\n model=\"gpt-3.5-turbo\",\n messages=[{\"role\": \"user\", \"content\": prompt}],\n temperature=0.2\n )\n return response.choices[0].message.content.strip()\n except Exception as e:\n print(f\"AI analysis error: {e}\")\n return \"\"\n\nclass TrialMatchFinder:\n def __init__(self, api_key):\n self.active_trials = []\n self.patient_data = []\n self.ai_helper = MedicalHistoryAnalyzer(api_key)\n \n def fetch_active_trials(self):\n \"\"\"Get recruiting trials from clinicaltrials.gov\"\"\"\n trials_url = \"https://clinicaltrials.gov/api/query/full_studies\"\n params = {\n \"expr\": \"RECRUITING\",\n \"min_rnk\": 1,\n \"max_rnk\": 30,\n \"fmt\": \"json\"\n }\n \n response = requests.get(trials_url, params=params)\n studies = response.json().get(\"FullStudiesResponse\", {}).get(\"FullStudies\", [])\n \n for study in studies:\n protocol = study[\"Study\"][\"ProtocolSection\"]\n eligibility = protocol[\"EligibilityModule\"]\n \n self.active_trials.append({\n \"trial_id\": protocol[\"IdentificationModule\"][\"NCTId\"],\n \"title\": protocol[\"IdentificationModule\"].get(\"OfficialTitle\", \"Unnamed Trial\"),\n \"age_range\": (\n int(eligibility[\"MinimumAge\"].split()[0]),\n int(eligibility[\"MaximumAge\"].split()[0])\n ),\n \"gender\": eligibility.get(\"Gender\", \"All\"),\n \"conditions\": [c.lower() for c in eligibility.get(\"ConditionList\", [])],\n \"inclusion\": protocol.get(\"DescriptionModule\", {}).get(\"DetailedDescription\", \"\"),\n \"exclusions\": eligibility.get(\"ExclusionCriteria\", {}).get(\"TextBlock\", \"\")\n })\n\n def process_patient_file(self, xml_path):\n \"\"\"Read and parse patient XML data\"\"\"\n tree = ET.parse(xml_path)\n root = tree.getroot()\n \n for patient in root.findall(\"Patient\"):\n record = {\n \"id\": patient.find(\"ID\").text,\n \"dob\": datetime.strptime(patient.find(\"Demographics/DOB\").text, \"%Y-%m-%d\"),\n \"gender\": patient.find(\"Demographics/Gender\").text,\n \"conditions\": self._get_patient_conditions(patient),\n \"medications\": [d.find(\"Name\").text.lower() \n for d in patient.findall(\"Treatments/CurrentMedications/Drug\")],\n \"labs\": {t.find(\"Name\").text: t.find(\"Value\").text \n for t in patient.findall(\"LabResults/Test\")}\n }\n self.patient_data.append(record)\n \n def _get_patient_conditions(self, patient):\n \"\"\"Extract all medical conditions\"\"\"\n conditions = []\n primary = patient.find(\"MedicalConditions/PrimaryDiagnosis/Condition\")\n if primary is not None:\n conditions.append(primary.text.lower())\n \n for cond in patient.findall(\"MedicalConditions/Comorbidities/Condition\"):\n conditions.append(cond.text.lower())\n return conditions\n\n def calculate_current_age(self, birth_date):\n \"\"\"Determine patient's age\"\"\"\n today = datetime(2024, 2, 15) # Use current date in real implementation\n return today.year - birth_date.year - ((today.month, today.day) < \n (birth_date.month, birth_date.day))\n\n def check_eligibility(self, patient, trial):\n \"\"\"Evaluate match between patient and trial\"\"\"\n reasons = []\n age = self.calculate_current_age(patient[\"dob\"])\n \n # Basic checks\n if trial[\"age_range\"][0] <= age <= trial[\"age_range\"][1]:\n reasons.append(f\"Age {age} matches range {trial['age_range']}\")\n \n if patient[\"gender\"].lower() in trial[\"gender\"].lower():\n reasons.append(\"Gender matches trial requirements\")\n \n # Condition matching\n common_conditions = set(patient[\"conditions\"]) & set(trial[\"conditions\"])\n if common_conditions:\n reasons.append(f\"Shared conditions: {', '.join(common_conditions)}\")\n \n # AI analysis\n ai_analysis = self.ai_criteria_check(patient, trial)\n if ai_analysis:\n reasons.extend(ai_analysis)\n \n return reasons\n \n def ai_criteria_check(self, patient, trial):\n \"\"\"Use LLM for complex criteria evaluation\"\"\"\n prompt = f\"\"\"Analyze patient-trial compatibility:\n \n Patient:\n - Age: {self.calculate_current_age(patient['dob'])}\n - Gender: {patient['gender']}\n - Conditions: {', '.join(patient['conditions'])}\n - Medications: {', '.join(patient['medications'])}\n - Lab Results: {patient['labs']}\n \n Trial Requirements:\n {trial['inclusion']}\n \n Exclusions:\n {trial['exclusions']}\n \n Output format:\n Matches: [list]\n Conflicts: [list]\"\"\"\n \n response = self.ai_helper.analyze_medical_text(prompt)\n return self._parse_ai_response(response)\n\n def _parse_ai_response(self, text):\n \"\"\"Convert LLM output to structured format\"\"\"\n try:\n matches_line = [line for line in text.split(\"\\n\") if \"Matches:\" in line][0]\n conflicts_line = [line for line in text.split(\"\\n\") if \"Conflicts:\" in line][0]\n \n matches = matches_line.split(\":\")[1].strip().split(\", \")\n conflicts = conflicts_line.split(\":\")[1].strip().split(\", \")\n \n results = []\n if matches and matches[0] != \"None\":\n results.append(f\"AI Matches: {', '.join(matches)}\")\n if conflicts and conflicts[0] != \"None\":\n results.append(f\"AI Warnings: {', '.join(conflicts)}\")\n \n return results\n except:\n return [\"AI analysis incomplete\"]\n\n def run_matching(self, input_xml, output_file):\n \"\"\"Main execution flow\"\"\"\n self.fetch_active_trials()\n self.process_patient_file(input_xml)\n \n results = []\n for patient in self.patient_data:\n matches = []\n for trial in self.active_trials:\n eligibility = self.check_eligibility(patient, trial)\n if eligibility:\n matches.append({\n \"trialId\": trial[\"trial_id\"],\n \"trialName\": trial[\"title\"],\n \"eligibilityReasons\": eligibility\n })\n \n results.append({\n \"patientId\": patient[\"id\"],\n \"eligibleTrials\": matches\n })\n \n pd.DataFrame(results).to_excel(output_file, index=False)\n print(f\"Success! Results saved to {output_file}\")\n return results\n\nif __name__ == \"__main__\":\n matcher = TrialMatchFinder(os.getenv(\"OPENAI_API_KEY\"))\n \n matcher.run_matching(\n input_xml=\"add input path as xml file\",\n output_file=\"add output path as xlsv file\"\n )","metadata":{"trusted":true},"outputs":[],"execution_count":null}]} \ No newline at end of file