168 lines (167 with data), 6.5 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from openai import OpenAI\n",
"from dotenv import load_dotenv\n",
"import openai\n",
"import json\n",
"import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"load_dotenv()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def generate_adverse_event_report(prompt):\n",
" # OpenAI Client\n",
" client = OpenAI(api_key=os.getenv(\"OPENAI_API_KEY\"))\n",
" \n",
" # OpenAI Completion API\n",
" response = client.chat.completions.create(\n",
" model=\"gpt-4-1106-preview\",\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": \"Act as an expert Analyst with 20+ years of experience in Pharma and Healthcare industry. You have to generate Adverse Event Reports in properly formatted JSON\"},\n",
" {\"role\": \"user\", \"content\": prompt}],\n",
" response_format={ \"type\": \"json_object\" },\n",
" temperature=1,\n",
" max_tokens=3500,\n",
" top_p=1,\n",
" frequency_penalty=0,\n",
" presence_penalty=0\n",
" )\n",
"\n",
" return response.choices[0].message.content.strip()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"folder_path = '../data/raw_drug_info/' # Replace with the path to your folder of text files\n",
"\n",
"# Iterate through the files in the folder\n",
"for filename in os.listdir(folder_path):\n",
" if filename.endswith('.txt'):\n",
" file_path = os.path.join(folder_path, filename)\n",
"\n",
" # Read the contents of the file\n",
" with open(file_path, 'r') as file:\n",
" file_contents = file.read()\n",
"\n",
" # Get the name of the drug from the filename\n",
" drug_name = filename.split('.')[0]\n",
" # Get the information about the drug from the file contents\n",
" drug_report = file_contents\n",
"\n",
" prompt = f\"\"\"Sample Adverse Event reports:\n",
"[\n",
" {{\n",
" \"input\": \"Nicole Moore\n",
" moore123nicole@hotmail.com\n",
" 32 McMurray Court, Columbia, SC 41250\n",
" 9840105113, United States \n",
" \n",
" Relationship to XYZ Pharma Inc.: Patient or Caregiver\n",
" Reason for contacting: Adverse Event\n",
" \n",
" Message: Yes, I have been taking Mylan’s brand of Metroprolol for two years now and with no problem. I recently had my prescription refilled with the same Mylan Metoprolol and I’m having a hard time sleeping at night along with running nose. Did you possibly change something with the pill...possibly different fillers? The pharmacist at CVS didn’t have any information for me. Thank you, Nicole Moore\", \n",
" \"output\": {{\n",
" \"drug_name\":\"Metroprolol\",\n",
" \"adverse_events\": [\"hard time sleeping at night\", \"running nose\"]\n",
" }}\n",
" }},\n",
" {{\n",
" \"input\": \"Jack Ryan,\n",
" jack3rayan@gmail.com\n",
" 120 Erwin RD, Canonsburg, PA 21391,\n",
" 2133681441, United States\n",
" \n",
" Relationship to XYZ Pharma Inc.: Patient\n",
" Reason for contacting: Defective Product\n",
" \n",
" Message: I recently purchased a Wixela inhub 250/50 at my local CVS pharmacy and the inhaler appears to be defective. When I try and activate it, the yellow knob only goes down halfway. I just removed this one from the wrapper so I know it's not empty. The pharmacy wouldn't exchange it so I am contacting you to get a replacement. Thank you for your time and consideration in this matter\",\n",
" \"output\": {{\n",
" \"drug_name\":\"Wixela inhub 250/50\",\n",
" \"adverse_events\": [\"defective inhaler\"]\n",
" }}\n",
" }},\n",
"]\n",
"\n",
"Now create Adverse Event Reports in a similar way for the Drug - {drug_name}. \n",
"\n",
"You have more information about the drug's use and its side effects below:\n",
"{drug_report}\n",
"\n",
"Generate 15 different reports each with different side effects. Mention one or two side effects in each report at max. You have to prepare data for Entity Extraction of 2 entities: \"drug_name\" and \"adverse_events\" only.\n",
"Followng the following format for the final output:\n",
"\n",
"[\n",
" {{\n",
" \"input\":\"## Generated Report Here\",\n",
" \"output\": {{ \"drug_name\":\"## Name of Drug\", \"adverse_events\": [\"side effect 1\", \"side effect 2\"] }}\n",
" }},\n",
" {{\n",
" \"input\":\"## Generated Report Here\",\n",
" \"output\": {{ \"drug_name\":\"## Name of Drug\", \"adverse_events\": [\"side effect 1\", \"side effect 2\"] }}\n",
" }},\n",
"]\n",
"\"\"\"\n",
" # Generate Adverse Event Reports for the Drug\n",
" reports = generate_adverse_event_report(prompt)\n",
"\n",
" # Convert the string response to a Python Dict object\n",
" output_list = json.loads(reports)\n",
"\n",
" # Save the generated data as a JSON file\n",
" with open(f\"../data/entity_extraction_reports/{drug_name}.txt\", 'w') as text_file:\n",
" text_file.write(output_list)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "scrape",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}