[0beccf]: / open_ai_med_text_to_pandas_df.ipynb

Download this file

172 lines (171 with data), 6.9 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import openai\n",
    "import pandas as pd\n",
    "# you need to own api key\n",
    "openai.api_key = \"API-KEY\"\n",
    "# Search Model\n",
    "openai.Model.list()\n",
    "# This Text generrated by ChatGPT\n",
    "text = \"John Doe is a 35-year-old male with a reported weight of 80 kilograms and height of 185 centimeters.\\\n",
    "        His blood pressure was measured at 120/80 mmHg, while his glucose levels were found to be at 90 mg/dL and cholesterol levels at 200 mg/dL.\\\n",
    "        Mr. Doe has a history of elevated cholesterol levels, which may increase his risk of developing heart disease.\\\n",
    "        It is important for him to maintain a healthy lifestyle, including a balanced diet and regular physical activity,\\\n",
    "        to help manage his cholesterol levels. In addition, it is recommended that he have regular check-ups with his healthcare\\\n",
    "        provider to monitor his blood pressure, glucose levels, and cholesterol, and to discuss any necessary changes to his treatment plan.\\\n",
    "        Overall, Mr. Doe's current health status suggests a need for lifestyle modifications and close monitoring by a healthcare professional.\\\n",
    "        Further evaluation and assessments may be necessary to develop an individualized treatment plan and ensure his ongoing health and well-being.\"\n",
    "# Search terms\n",
    "search = \"Name, Age, Gender, Weight, Height, Blood Pressure, Glucose, Cholesterol\"\n",
    "# prompt for generation\n",
    "prompt = (f\"Convert the following data with {search} items,\\\n",
    "            then predict patient risk score then add data dictionary\\\n",
    "            then give suggestion then add data dictionary\\\n",
    "            into a pandas dataframe:\\n\"\n",
    "          f\"{data}\\n\"\n",
    "          f\"Language: Python\")\n",
    "# OpenAI API to generate\n",
    "response = openai.Completion.create(engine=\"text-davinci-002\", # Define model\n",
    "                                    prompt=prompt, # Prompt\n",
    "                                    max_tokens=1000, # Tokens\n",
    "                                    n=10) # Results\n",
    "# select the most relevant dataframe\n",
    "for choice in response.choices:\n",
    "    # Maybe other nine can be more useful I dont know but it is the easiest way \n",
    "    if \"pd.DataFrame\" in choice.text:\n",
    "        relevant_text = choice.text\n",
    "        break\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Age</th>\n",
       "      <th>Gender</th>\n",
       "      <th>Weight</th>\n",
       "      <th>Height</th>\n",
       "      <th>Blood Pressure</th>\n",
       "      <th>Blood Sugar</th>\n",
       "      <th>Cholesterol</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>John Doe</td>\n",
       "      <td>35</td>\n",
       "      <td>Male</td>\n",
       "      <td>80</td>\n",
       "      <td>185</td>\n",
       "      <td>120/80 mmHg</td>\n",
       "      <td>90</td>\n",
       "      <td>200</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Name  Age Gender  Weight  Height Blood Pressure  Blood Sugar  \\\n",
       "0  John Doe   35   Male      80     185    120/80 mmHg           90   \n",
       "\n",
       "   Cholesterol  \n",
       "0          200  "
      ]
     },
     "execution_count": 99,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Raw Result ;\n",
    "# \"\\n\\n\\nimport pandas as pd\\n\\nraw_data = {'Name': ['John Doe'], \\n        'Age': [35], \\n        'Gender': ['Male'], \\n        'Weight': [80], \\n        'Height': [185], \\n        'Blood Pressure': ['120/80 mmHg'], \\n        'Blood Sugar': [90], \\n        'Cholesterol': [200]}\\n\\ndf = pd.DataFrame(raw_data, columns = ['Name', 'Age', 'Gender', 'Weight', 'Height', 'Blood Pressure', 'Blood Sugar', 'Cholesterol'])\\n\\n#Predict patient risk score\\ndf['Risk Score'] = df['Age'] * df['Weight'] / (df['Height'] * df['Blood Pressure'])\\n\\n#Suggestion\\ndf['Suggestion'] = 'None'\\ndf.loc[df['Risk Score'] >= 0.5, 'Suggestion'] = 'Talk to your doctor about starting cholesterol medication'\\n\\nprint(df)\"\n",
    "#  \\n mean is paragraph space\n",
    "# Make a DataFrame;\n",
    "import pandas as pd\n",
    "raw_data = {'Name': ['John Doe'],\n",
    "            'Age': [35],\n",
    "            'Gender': ['Male'],\n",
    "            'Weight': [80],\n",
    "            'Height': [185],\n",
    "            'Blood Pressure': ['120/80 mmHg'],\n",
    "            'Blood Sugar': [90],\n",
    "            'Cholesterol': [200]}\n",
    "\n",
    "df = pd.DataFrame(raw_data, columns = ['Name', 'Age', 'Gender', 'Weight', 'Height', 'Blood Pressure', 'Blood Sugar', 'Cholesterol'])\n",
    "\n",
    "try:\n",
    "    # Predict patient risk score = Are you Serious? \n",
    "    df['Risk Score'] = df['Age'] * df['Weight'] / (df['Height'] * df['Blood Pressure']) # Problem is Blood Pressure type is string\n",
    "    # Suggestion --> Really ?\n",
    "    df['Suggestion'] = 'None'\n",
    "\n",
    "    df.loc[df['Risk Score'] >= 0.5, 'Suggestion'] = 'Talk to your doctor about starting cholesterol medication'\n",
    "\n",
    "except TypeError:\n",
    "    pass\n",
    "#print(df)\n",
    "df"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "datascience",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.15 (default, Nov 24 2022, 08:57:44) \n[Clang 14.0.6 ]"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "628b5b8989eb9200ebfd0cfe658896bfa508e7b699b0af19da59d2b988268b09"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}