[7620a5]: / clinicalTrial_EDA_3rd.ipynb

Download this file

4737 lines (4736 with data), 478.9 kB

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "import json\n",
    "import sys\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "\n",
    "import xml.etree.ElementTree as et\n",
    "import io\n",
    "import glob\n",
    "from tqdm._tqdm_notebook import tqdm_notebook as tqdm\n",
    "\n",
    "# pd.options.display.max_rows = 9999\n",
    "# pd.options.display.max_columns = 9999\n",
    "# pd.set_option('display.max_colwidth', -1)\n",
    "\n",
    "#tqdm_notebook.pandas()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('cancerTrials.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "#df['intervention_name'] = df['intervention_name'].apply(', '.join)\n",
    "df['intervention_name'] = df['intervention_name'].astype(str).str.replace('\\[|\\]|\\'', '')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "61777\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>nct_id</th>\n",
       "      <th>brief_title</th>\n",
       "      <th>official_title</th>\n",
       "      <th>overall_status</th>\n",
       "      <th>start_date</th>\n",
       "      <th>completion_date</th>\n",
       "      <th>phase</th>\n",
       "      <th>study_type</th>\n",
       "      <th>brief_summary</th>\n",
       "      <th>detailed_description</th>\n",
       "      <th>enrollment</th>\n",
       "      <th>condition</th>\n",
       "      <th>intervention_name</th>\n",
       "      <th>eligibility</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NCT00000124</td>\n",
       "      <td>Collaborative Ocular Melanoma Study (COMS)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Unknown status</td>\n",
       "      <td>November 1986</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      To evaluate therapeutic interventions ...</td>\n",
       "      <td>\\n      For more than 100 years, removal of th...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Uveitis</td>\n",
       "      <td>Brachytherapy, Eye Removal</td>\n",
       "      <td>\\n        Men and women eligible for the study...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NCT00000136</td>\n",
       "      <td>Studies of the Ocular Complications of AIDS (S...</td>\n",
       "      <td>Foscarnet-Ganciclovir CMV Retinitis Trial</td>\n",
       "      <td>Completed</td>\n",
       "      <td>March 1990</td>\n",
       "      <td>October 1991</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      To evaluate the relative safety and ef...</td>\n",
       "      <td>\\n      CMV retinitis is the most common intra...</td>\n",
       "      <td>234.0</td>\n",
       "      <td>Cytomegalovirus Retinitis</td>\n",
       "      <td>Ganciclovir, Foscarnet, Phosphonoacetic Acid, ...</td>\n",
       "      <td>\\n        Inclusion criteria:\\n\\n          -  ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NCT00000395</td>\n",
       "      <td>Antifolate Effectiveness in Arthritis</td>\n",
       "      <td>Mechanisms of Antifolate Efficacy in Arthritis</td>\n",
       "      <td>Completed</td>\n",
       "      <td>September 1996</td>\n",
       "      <td>August 2002</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This study looks at how the arthritis ...</td>\n",
       "      <td>\\n      Low-dose methotrexate therapy suppress...</td>\n",
       "      <td>40.0</td>\n",
       "      <td>Adjuvant Arthritis</td>\n",
       "      <td>Methotrexate, Folic Acid Antagonists, Folic Ac...</td>\n",
       "      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        nct_id                                        brief_title  \\\n",
       "0  NCT00000124         Collaborative Ocular Melanoma Study (COMS)   \n",
       "1  NCT00000136  Studies of the Ocular Complications of AIDS (S...   \n",
       "2  NCT00000395              Antifolate Effectiveness in Arthritis   \n",
       "\n",
       "                                   official_title  overall_status  \\\n",
       "0                                             NaN  Unknown status   \n",
       "1       Foscarnet-Ganciclovir CMV Retinitis Trial       Completed   \n",
       "2  Mechanisms of Antifolate Efficacy in Arthritis       Completed   \n",
       "\n",
       "       start_date completion_date    phase      study_type  \\\n",
       "0   November 1986             NaN  Phase 3  Interventional   \n",
       "1      March 1990    October 1991  Phase 3  Interventional   \n",
       "2  September 1996     August 2002  Phase 2  Interventional   \n",
       "\n",
       "                                       brief_summary  \\\n",
       "0  \\n      To evaluate therapeutic interventions ...   \n",
       "1  \\n      To evaluate the relative safety and ef...   \n",
       "2  \\n      This study looks at how the arthritis ...   \n",
       "\n",
       "                                detailed_description  enrollment  \\\n",
       "0  \\n      For more than 100 years, removal of th...         NaN   \n",
       "1  \\n      CMV retinitis is the most common intra...       234.0   \n",
       "2  \\n      Low-dose methotrexate therapy suppress...        40.0   \n",
       "\n",
       "                   condition  \\\n",
       "0                    Uveitis   \n",
       "1  Cytomegalovirus Retinitis   \n",
       "2         Adjuvant Arthritis   \n",
       "\n",
       "                                   intervention_name  \\\n",
       "0                         Brachytherapy, Eye Removal   \n",
       "1  Ganciclovir, Foscarnet, Phosphonoacetic Acid, ...   \n",
       "2  Methotrexate, Folic Acid Antagonists, Folic Ac...   \n",
       "\n",
       "                                         eligibility  \n",
       "0  \\n        Men and women eligible for the study...  \n",
       "1  \\n        Inclusion criteria:\\n\\n          -  ...  \n",
       "2  \\n        Inclusion Criteria:\\n\\n          -  ...  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(len(df))\n",
    "df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 61777 entries, 0 to 61776\n",
      "Data columns (total 14 columns):\n",
      "nct_id                  61777 non-null object\n",
      "brief_title             61777 non-null object\n",
      "official_title          60936 non-null object\n",
      "overall_status          61777 non-null object\n",
      "start_date              61777 non-null object\n",
      "completion_date         56975 non-null object\n",
      "phase                   47919 non-null object\n",
      "study_type              61777 non-null object\n",
      "brief_summary           61776 non-null object\n",
      "detailed_description    43084 non-null object\n",
      "enrollment              60236 non-null float64\n",
      "condition               61777 non-null object\n",
      "intervention_name       61777 non-null object\n",
      "eligibility             61777 non-null object\n",
      "dtypes: float64(1), object(13)\n",
      "memory usage: 6.6+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Phase 3', 'Phase 2', nan, 'Phase 1', 'Phase 1/Phase 2', 'Phase 4',\n",
       "       'Phase 2/Phase 3', 'Early Phase 1'], dtype=object)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.phase.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Phase 2            20198\n",
       "Phase 1            10738\n",
       "Phase 3             7428\n",
       "Phase 1/Phase 2     5273\n",
       "Phase 4             2350\n",
       "Phase 2/Phase 3     1060\n",
       "Early Phase 1        872\n",
       "Name: phase, dtype: int64"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.phase.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>phase</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Phase 2</td>\n",
       "      <td>20198</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Phase 1</td>\n",
       "      <td>10738</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Phase 3</td>\n",
       "      <td>7428</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Phase 1/Phase 2</td>\n",
       "      <td>5273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Phase 4</td>\n",
       "      <td>2350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Phase 2/Phase 3</td>\n",
       "      <td>1060</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Early Phase 1</td>\n",
       "      <td>872</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             index  phase\n",
       "0          Phase 2  20198\n",
       "1          Phase 1  10738\n",
       "2          Phase 3   7428\n",
       "3  Phase 1/Phase 2   5273\n",
       "4          Phase 4   2350\n",
       "5  Phase 2/Phase 3   1060\n",
       "6    Early Phase 1    872"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "phase = df.phase.value_counts(ascending=False, sort=True).reset_index()\n",
    "phase"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x432 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "#df.phase.value_counts().plot(kind='barh')\n",
    "plt.figure(figsize=(6,6))\n",
    "df.phase.value_counts(ascending=True, sort=True).plot(kind='barh', width=0.9, color=sns.color_palette(\"bright\", 7), alpha=0.8)\n",
    "plt.title('Cancer Clinical Trials Status')\n",
    "plt.xlabel('Number of Occurrences', fontsize=12)\n",
    "plt.savefig('./image/phase1.png', bbox_inches = \"tight\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAncAAAE/CAYAAAAkKeX+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xu8JFV57//PN6ACCgIyEuTiIAxG5KdERsRoDNGIiEY0EYWooBJRI/GuoBHlGPXg/ehROXKZgIoCCghRDBKiQQ0oM4hcRYaLMjKB4Q6CKPD8/qi1Q7vZl973PT2f9+vVr9391Kqqp6p79jx7rVrVqSokSZI0GP5orhOQJEnS9LG4kyRJGiAWd5IkSQPE4k6SJGmAWNxJkiQNEIs7SZKkAWJxJ2lgJdk1yYqe15ck2bXPdftuO8Y2Dk3ylalsY9j2Lk/y5320WztJJVk4XfuWtPqwuJMEQJK/S7I0yZ1JVib5TpJnznVe40myc5LTk9ya5OYkP0nympHaVtUTq+r7/Wx3Im0nKskr2nm+M8ndSe7veX3nGDk9vqp+MBM5tbweluT/JPl1kjuSXJ3kEz3LV0yk4E3ylSSHzkSukkZncSeJJG8H/g/wEWBTYCvgC8Cec5lXryRrjxB7OvAfwH8C2wKPAt4IPH92s5uYqjquqh5RVY+gy/W6odct9gdGOvYZ8j7gScBOwAbAs4ELZmnfkqaJxZ20hkvySOCDwJuq6uSq+k1V/b6q/rWq3tXa7JzknNY7tjLJ55I8tGcbleQNSa5IckuSzydJz/LXJbms9QZdmuQpLf6YJCclWdV6id7cs86hSb7Ren9uB149QvofB46tqo9W1Y3VWVZVLxvlWK9J8lc92z8xyZdaXpckWTxK27WSvDfJla3tsiRbtmWfSXJtkttbfNxh0360XrJ3JbkIuKsntmt7/vQk5/a8J59N8pBRtvXCnvO/IsnbRtntU4GTq+q/27m8uqq+0rbxNeAxwHdaD+Pbk/xRe4/+u+Xx/SRPaO3/AXg58N7W/pSRhot7e/eSPHpYL+zZUz6R0hrI4k7S04F1gFPGaHMf8DZgk9b+OcA/DGvzQrri4MnAy4DnASTZCzgU2JeuN+hFwE1J/gj4V+BnwOZtm29N8ryebe4JfAPYEDiud2dJ1mu5fGMiBzvMi4Dj2/ZPAz43Sru3A/sAe7RjeC2t4ALOA3YENga+Cnw9yTpTyKnX3nQ9e48cYdm9wFvo3pNnALsDrx9lO/8C7F9V69P1zP3nKO3OBd6V5I1Jdugt0KtqH+A64Pmth/FTbdG3gEXAHwMXA19u7b8AnAB8pLV/SR/H+y7gKmBB294hfawjaRiLO0mPAm6sqntHa9B6w86tqnur6hrgi8BfDGt2WFXdWlW/Ar5HV/AA/D3wsao6r/UGLa+qX9IVgguq6oNV9buqugo4kq6gGXJOVX2zqu6vqruH7W8jut9hKyd53AA/rKrTq+o+uqLkyaO0+3vgfVV1eTuGn1XVTQBV9ZWquqmdm08CDwMeP4Wcen2mqlaMcOy08/njtt+rgCN48Hsy5PfA9knWr6qbq+r8Udp9CPgE8CpgGbAiyStHS669L8dU1R1V9Vu6In6nJA/v/xAflOdjgK3aZ2K0IlTSGCzuJN0EbDLWdV1JtkvyrTb8djvdtXmbDGv23z3P7wKGrh3bErhyhM0+FnhMG4K7NcmtwHvprvkbcu0Yed8C3A9sNkab8QzPeZ1RzsNox0CSd7Qhz9vaMTySB5+byRr1+JP8SZJv97wnHxxjvy+h66X8VRs6fdpIjVqh+H+r6s/oejM/BhyTZLtRclgryceSXNVyWN4WTfb4DwN+CZzVhsDfNcntSGs0iztJ5wC/BV48RpvDgZ8Di6pqA7oiLGO073UtsM0o8aurasOex/pVtUdPmxpto1V1V8v9b/vMYypGPIZ2fd1BdMPQG1XVhsBt9H9uxjPq8dP1nl4MbNvek/ePtt/Ww/ci4NF0w6jHj7vjqrur6jPAncATRslnX7qh6mfTFbXbtnhGat96h+8B1usJ/3HP8tur6m1VtZDu83hQktF6IyWNwuJOWsNV1W10hcHnk7w4yXpJHpLk+Uk+1pqtD9wO3JnkT+hmpPbrKOCdSXZKZ9skjwV+Atye5KAk67ZeoB2SPHUC23438Oo28eBRAEmenGTc4mWCjgL+OcmidgxPavtbn+7at1XA2kneT3dN3mxYn66Q/E2bxDDi9Xbt3P5dkg2q6vfAHXTXUI7U9m1JntXWWTvJa+muxxyaMXs98LhhOdxD1/u7HvDhYZsc3h66ayxf0d7vFwD/c7udJH+dZJt2rd9tLc8Rc5U0Oos7SbSL499OdyuMVXQ9VQcC32xN3gn8HV1hcCTdhfL9bvvrdP/pf7Wt/01g43ad21/TXZt3NXAjXRE10uSB0bb9X3S9Rs8GrkpyM921Z6f3u40+fQo4EfguXZF7NLAucAbwHeAXdMOJv2XsoeTp9A5gP7pz+kXGfk/2A37Zhk73p7umbiS/pbslzvV078frgb9p10hCNxz/v9ow+lvpJmpc1x6XAP81bHtHAU9ON4N6aOLLm+mGiW8F9qKbyDLk8XS3trkT+BHdNYc/HOO4JI0gVWP1+kuSJGl1Ys+dJEnSALG4kyRJGiAWd5IkSQPE4k6SJGmAWNxJkiQNkFHvSL8m2GSTTWrhwoVznYYkSdK4li1bdmNVLRiv3Rpd3C1cuJClS5fOdRqSJEnjSvLL8Vs5LCtJkjRQLO4kSZIGiMWdJEnSALG4kyRJGiAWd5IkSQPE4k6SJGmAWNxJkiQNEIs7SZKkAWJxJ0mSNEAs7iRJkgbIrBR3SbZM8r0klyW5JMlbWnzjJGcmuaL93KjFk+SzSZYnuTDJU3q2tV9rf0WS/XriOyW5qK3z2SSZjWOTJEmaT2bru2XvBd5RVecnWR9YluRM4NXAWVV1WJKDgYOBg4DnA4va42nA4cDTkmwMfABYDFTbzmlVdUtrcwBwLnA6sDvwnakk/crPfHsqq6/2vvKWF8x1CpIkaYJmpeeuqlZW1fnt+R3AZcDmwJ7Asa3ZscCL2/M9gS9V51xgwySbAc8Dzqyqm1tBdyawe1u2QVWdU1UFfKlnW5IkSWuMWb/mLslC4E+BHwObVtVK6ApA4NGt2ebAtT2rrWixseIrRoiPtP8DkixNsnTVqlVTPRxJkqR5ZVaLuySPAE4C3lpVt4/VdIRYTSL+4GDVEVW1uKoWL1iwYLyUJUmSViuzVtwleQhdYXdcVZ3cwte3IVXazxtafAWwZc/qWwDXjRPfYoS4JEnSGmW2ZssGOBq4rKo+1bPoNGBoxut+wKk98X3brNldgNvasO0ZwG5JNmoza3cDzmjL7kiyS9vXvj3bkiRJWmPM1mzZZwCvAi5KckGLvRc4DDgxyf7Ar4C92rLTgT2A5cBdwGsAqurmJP8MnNfafbCqbm7P3wgcA6xLN0t2SjNlJUmSVkezUtxV1Q8Z+bo4gOeM0L6AN42yrSXAkhHiS4EdppCmJEnSas9vqJAkSRogFneSJEkDxOJOkiRpgFjcSZIkDRCLO0mSpAFicSdJkjRALO4kSZIGiMWdJEnSALG4kyRJGiAWd5IkSQPE4k6SJGmAWNxJkiQNEIs7SZKkAWJxJ0mSNEAs7iRJkgaIxZ0kSdIAsbiTJEkaIBZ3kiRJA8TiTpIkaYBY3EmSJA0QiztJkqQBYnEnSZI0QCzuJEmSBsisFHdJliS5IcnFPbETklzQHtckuaDFFya5u2fZ/+tZZ6ckFyVZnuSzSdLiGyc5M8kV7edGs3FckiRJ881s9dwdA+zeG6iql1fVjlW1I3AScHLP4iuHllXVG3rihwMHAIvaY2ibBwNnVdUi4Kz2WpIkaY0zK8VdVZ0N3DzSstb79jLga2NtI8lmwAZVdU5VFfAl4MVt8Z7Ase35sT1xSZKkNcp8uObuz4Hrq+qKntjWSX6a5D+T/HmLbQ6s6GmzosUANq2qlQDt56NnOmlJkqT5aO25TgDYhz/stVsJbFVVNyXZCfhmkicCGWHdmujOkhxAN7TLVlttNYl0JUmS5q857blLsjbwN8AJQ7GquqeqbmrPlwFXAtvR9dRt0bP6FsB17fn1bdh2aPj2htH2WVVHVNXiqlq8YMGC6TwcSZKkOTfXw7J/Bfy8qv5nuDXJgiRrteePo5s4cVUbbr0jyS7tOr19gVPbaqcB+7Xn+/XEJUmS1iizdSuUrwHnAI9PsiLJ/m3R3jx4IsWzgAuT/Az4BvCGqhqajPFG4ChgOV2P3nda/DDguUmuAJ7bXkuSJK1xZuWau6raZ5T4q0eInUR3a5SR2i8FdhghfhPwnKllKUmStPqb62FZSZIkTSOLO0mSpAFicSdJkjRALO4kSZIGiMWdJEnSALG4kyRJGiAWd5IkSQPE4k6SJGmAWNxJkiQNEIs7SZKkAWJxJ0mSNEAs7iRJkgbIpIq7JOsmeeh0JyNJkqSp6au4S/KJJDu35y8AbgZuTfLXM5mcJEmSJqbfnrtXABe35+8HXgm8CPjITCQlSZKkyVm7z3brVdVdSR4FPK6qTgJI8tiZS02SJEkT1W9x94skrwC2Bc4ESLIJcPdMJSZJkqSJ67e4+wfgM8Dvgde22POA785EUpIkSZqcvoq7qjoP+LNhseOA42YiKUmSJE1O37dCSfLcJEcn+df2enGSZ89capIkSZqofm+F8o/A4cAVwLNa+G7gQzOUlyRJkiah3567twJ/VVWHAfe32M+Bx89IVpIkSZqUfou79YFr2/NqPx8C/G7aM5IkSdKk9VvcnQ0cPCz2ZuB7/aycZEmSG5Jc3BM7NMmvk1zQHnv0LHtPkuVJLk/yvJ747i22PMnBPfGtk/w4yRVJTvCr0SRJ0pqq3+LuH4GXJLkGWD/J5cBewNv7XP8YYPcR4p+uqh3b43SAJNsDewNPbOt8IclaSdYCPg88H9ge2Ke1Bfho29Yi4BZg/z7zkiRJGij93gplZZKnAk8FHks3RPuTqrp/7DX/Z/2zkyzsM6c9geOr6h7g6iTLgZ3bsuVVdRVAkuOBPZNcBjwb+LvW5ljgULoJIJIkSWuUfmfL7ghsUVU/qaqvV9W5wOZJnjzF/R+Y5MI2bLtRi23OA9f3AaxosdHijwJurap7h8UlSZLWOP0Oy36FbgJFr4cCX57Cvg8HtgF2BFYCn2zxjNC2JhEfUZIDkixNsnTVqlUTy1iSJGme67e422poOHRIVV0JLJzsjqvq+qq6rw3tHskDQ68rgC17mm4BXDdG/EZgwyRrD4uPtt8jqmpxVS1esGDBZNOXJEmal/ot7lYkeUpvoL0etYgaT5LNel6+BBiaSXsasHeShyXZGlgE/AQ4D1jUZsY+lG7SxWlVVXSzdl/a1t8POHWyeUmSJK3O+ppQAXwaODXJx4Ar6YZT3wl8uJ+Vk3wN2BXYJMkK4APAru1avgKuAV4PUFWXJDkRuBS4F3hTVd3XtnMgcAawFrCkqi5puzgIOD7Jh4CfAkf3eVySJEkDpd/ZskcmuZXuFiNb0k1seEdVfaPP9fcZITxqAVZVH2aEwrHdLuX0EeJX8cCwriRJ0hqr3547qurrwNdnMBdJkiRNUd/FXZLd6Ga2PqI3XlXvn+6kJEmSNDl9FXdJPge8jG7iwl09i0a95YgkSZJmX789d/sAO1bVteO2lCRJ0pzp91YoNwG3zmQikiRJmrp+e+4+CRyX5H8D1/cuGH5zY0mSJM2dfou7w9vPFw6LF9095yRJkjQP9Hufu36HbyVJkjSHJlS0JdkyyS4zlYwkSZKmpq/iLslWSX4E/Bz49xZ7aZKjZjI5SZIkTUy/PXdfBL4NrA/8vsXOBJ47E0lJkiRpcvqdULEz8IKquj9JAVTVbUkeOXOpSZIkaaL67bm7Hti2N5Bke+BX056RJEmSJq3f4u4TwLeSvAZYO8k+wAnAR2csM0mSJE1Yv7dCWZLkZuAA4FpgX+CQqvrmTCYnSZKkiRm3uEuyFvAB4MMWc5IkSfPbuMOyVXUf8CYemCUrSZKkearfa+6OBd4wk4lIkiRp6iZyK5R/TPJuumvuamhBVT1rJhKTJEnSxPVb3B3ZHpIkSZrH+p1QsQ3dhIp7Zj4lSZIkTZYTKiRJkgaIEyokSZIGSL/F3c7AZ5Jck+QHSc4eevSzcpIlSW5IcnFP7ONJfp7kwiSnJNmwxRcmuTvJBe3x/3rW2SnJRUmWJ/lskrT4xknOTHJF+7lR/6dAkiRpcPRb3B0J/D3dzYyPAo7uefTjGGD3YbEzgR2q6knAL4D39Cy7sqp2bI/eHsPD6b4lY1F7DG3zYOCsqloEnNVeS5IkrXH6/fqxY6eyk6o6O8nCYbHv9rw8F3jpWNtIshmwQVWd015/CXgx8B1gT2DX1vRY4PvAQVPJWZIkaXXUV3GX5LWjLauqJdOQx2uBE3peb53kp8DtwPuq6gfA5sCKnjYrWgxg06pa2fJZmeTR05CTJEnSaqff+9y9atjrP6a7PcqPgCkVd0n+CbgXOK6FVgJbVdVNSXYCvpnkiUBGWL1GiI23vwPohnbZaqutJpe0JEnSPNXvsOxfDo+13rwnTGXnSfYDXgg8p6qq7ese4J72fFmSK4Ht6HrqtuhZfQvguvb8+iSbtV67zYAbxjiWI4AjABYvXjzh4lCSJGk+63dCxUiOAfaf7MpJdqe7Lu5FVXVXT3xBu3EySR5HN3HiqjbsekeSXdos2X2BU9tqpwH7tef79cQlSZLWKP1ecze8CFwPeCVwa5/rf41uwsMmSVbQzbp9D/Aw4Mx2R5Nz28zYZwEfTHIvcB/whqq6uW3qjXRF5bp0Eym+0+KHAScm2R/4FbBXP3lJkiQNmn6vubuXB1/f9mvatWvjqap9RgiPeBuVqjoJOGmUZUuBHUaI3wQ8p59cJEmSBlm/xd3Ww17/pqpunO5kJEmSNDUT6bm7q6puGQq0b4FYt6quG301SZIkzaZ+J1R8kz+cqUp7fcr0piNJkqSp6Le4e3xVXdQbaK//ZPpTkiRJ0mT1W9zdkGTb3kB7fdP0pyRJkqTJ6re4WwKclOSFSbZP8tfAN4CjZi41SZIkTVS/EyoOA34PfALYku5eckcDn5qhvCRJkjQJ/X792P3Ax9tDkiRJ81Rfw7JJDk7y1GGxnZO8e2bSkiRJ0mT0e83dW4BLh8UuBd46velIkiRpKvot7h5Kd81dr98B60xvOpIkSZqKfou7ZcA/DIu9ATh/etORJEnSVPQ7W/ZtwJlJXgVcCWwLbAo8d6YSkyRJ0sT1O1v2kiTbAS+kuxXKycC3qurOmUxOkiRJE9Nvzx3AZsAvgWVVdcUM5SNJkqQpGPeauyR/k+Qa4HLgR8DPk1yT5KUznZwkSZImZsziLskLgH8BvgA8DlgX2AY4HDgqyQtnPENJkiT1bbxh2UOA11fV8T2xa4CPJvlVW/6tGcpNq7n/PvLlc53CnPnj150w1ylIktZQ4w3LPhE4ZZRlJwPbT286kiRJmorxirt7gA1GWbYh3Y2MJUmSNE+MV9z9G/C/R1n2EeCM6U1HkiRJUzHeNXcHAT9MciFwErCS7pYof0vXo/fMmU1PkiRJEzFmcVdVv07yFODtwO7AJsCNwKnAp6vq5plPUZIkSf0a9z53VXVLVR1SVU+vqkXt5yETLeySLElyQ5KLe2IbJzkzyRXt50YtniSfTbI8yYWtwBxaZ7/W/ook+/XEd0pyUVvns0kykfwkSZIGwbjF3TQ6hq73r9fBwFlVtQg4q70GeD6wqD0OoLuvHkk2Bj4APA3YGfjAUEHY2hzQs97wfUmSJA28WSvuqupsYHhv357Ase35scCLe+Jfqs65wIZJNgOeB5xZVTdX1S3AmcDubdkGVXVOVRXwpZ5tSZIkrTFms+duJJtW1UqA9vPRLb45cG1PuxUtNlZ8xQhxSZKkNcqoxV2Sc3uef2B20nlg9yPEahLxB284OSDJ0iRLV61aNYUUJUmS5p+xeu62S7JOe/6OGdr/9W1IlfbzhhZfAWzZ024L4Lpx4luMEH+QqjqiqhZX1eIFCxZMy0FIkiTNF2MVd6cCv0hyNrBukrNHekxx/6cBQzNe92v7HIrv22bN7gLc1oZtzwB2S7JRm0ixG3BGW3ZHkl3aLNl9e7YlSZK0xhj1PndV9ZokzwQWAk8Fjp7KjpJ8DdgV2CTJCrpZr4cBJybZH/gVsFdrfjqwB7AcuAt4Tcvp5iT/DJzX2n2w55Ysb6Sbkbsu8J32kCRJWqOMdxPjH9J9Q8VDq+rYsdqOp6r2GWXRc0ZoW8CbRtnOEmDJCPGlwA5TyVGSJGl1N97XjwFdQZXkL4FX0c1C/TXwlar6j5lMTpIkSRPT161Qkvw9cALw38DJdN8x+9Ukr5vB3CRJkjRBffXcAe8GnltVPxsKJDkBOAk4ciYSkyRJ0sT1exPjRwGXDotdDmw8velIkiRpKvot7n4IfCrJegBJHg58HPivmUpMkiRJE9dvcfcG4EnAbUmuB24Fngy8fqYSkyRJ0sT1O1t2JfAXSbYAHgNcV1UrxllNkiRJs6zfCRUAtILOok6SJGme6ndYVpIkSasBiztJkqQBMm5xl+SPkjw7yUNnIyFJkiRN3rjFXVXdD5xaVb+bhXwkSZI0Bf0Oy56dZJcZzUSSJElT1u9s2V8C30lyKnAtUEMLqur9M5GYJEmSJq7f4m5d4Jvt+RYzlIskSZKmqN+bGL9mphORJEnS1PV9E+MkTwBeCmxaVQcmeTzwsKq6cMaykyRJ0oT0NaEiyV7A2cDmwL4tvD7wqRnKS5IkSZPQ72zZDwLPrao3APe12M+AJ89IVpIkSZqUfou7R9MVc/DATNnqeS5JkqR5oN/ibhnwqmGxvYGfTG86kiRJmop+J1S8Gfhukv2Bhyc5A9gO2G3GMpMkSdKE9XsrlJ8n+RPghcC36G5k/K2qunMmk5MkSdLE9DssS1XdBfwI+D7wg+ko7JI8PskFPY/bk7w1yaFJft0T36NnnfckWZ7k8iTP64nv3mLLkxw81dwkSZJWR3313CXZCjgO2AW4BdgoyY+BV1TVLye786q6HNix7WMt4NfAKcBrgE9X1SeG5bE93bV+TwQeA/x7ku3a4s8DzwVWAOclOa2qLp1sbpIkSaujfnvujqWbVLFhVT0a2Ag4r8Wny3OAK8cpFvcEjq+qe6rqamA5sHN7LK+qq6rqd8Dxra0kSdIapd/ibifgXVX1G4A2JHtQi0+XvYGv9bw+MMmFSZYk2ajFNqe73m/IihYbLS5JkrRG6be4O5eud6zXYuCc6UgiyUOBFwFfb6HDgW3ohmxXAp8cajrC6jVGfKR9HZBkaZKlq1atmlLekiRJ882o19wl+WDPyyuB05N8m66HbEtgD+Cr05TH84Hzq+p6gKGfLY8j6WboQtcjt2XPelsA17Xno8X/QFUdARwBsHjxYm/CrHnptSe8dq5TmFNLXr5krlOQpNXWWD13W/Y81gFOBu6h+7aKe+gmPqwzTXnsQ8+QbJLNepa9BLi4PT8N2DvJw5JsDSyiu5HyecCiJFu3XsC9W1tJkqQ1yqg9d1X1mtlIIMl6dLNcX98T/liSHemGVq8ZWlZVlyQ5EbgUuBd4U1Xd17ZzIHAGsBawpKoumY38JUmS5pN+v6FiqAjbFnhEb7yq/msqCbT75z1qWGz4V531Lvsw8OER4qcDp08lF0mSpNVdv/e52xf4HPA74O6eRQVsNQN5SZIkaRL67bn7GPC3VXXmTCYjSZKkqen3Vii/o/vaMUmSJM1j/RZ3hwCfSrLJTCYjSZKkqem3uPsF3U2Gr09yX3vcn+S+GcxNkiRJE9TvNXdfBr4EnMAfTqiQJEnSPNJvcfco4P1V5Tc6SJIkzWP9Dsv+CzDqveckSZI0P/Tbc7czcGCSfwKu711QVc+a9qwkSZI0Kf0Wd0e2hyRJkuaxvoq7qjp2phORJEnS1PX79WOvHW1ZVS2ZvnQkSZI0Ff0Oyw6fTPHHwDbAjwCLO0mSpHmi32HZvxwea715T5j2jCRJkjRp/d4KZSTHAPtPUx6SJEmaBv1ecze8CFwPeCVw67RnJEmSpEnr95q7e4Hh307xa+B105uOJEmSpqLf4m7rYa9/U1U3TncykiRJmpp+J1T8cqYTkSRJ0tSNWdwl+R4PHo7tVVX1nOlNSZIkSZM1Xs/dV0aJbw68mW5ihSRJkuaJMYu7qjq693WSRwHvoZtIcQLwwZlLTZIkSRPV133ukmyQ5J+B5cCmwFOq6oCqWjGj2UmSJGlCxrvmbl3grcA7gO8Dz6yqS6Y7iSTXAHcA9wH3VtXiJBvT9Q4uBK4BXlZVtyQJ8BlgD+Au4NVVdX7bzn7A+9pmP1RVx053rpLmv2WvO2CuU5hTOx15xFynIGkOjXfN3dXAWsDHgKXApkk27W1QVf8xTbn85bDbqxwMnFVVhyU5uL0+CHg+sKg9ngYcDjytFYMfABbTTQJZluS0qrplmvKTJEma98Yr7n5LVyi9cZTlBTxuWjN6wJ7Aru35sXQ9hwe1+JeqqoBzk2yYZLPW9syquhkgyZnA7sDXZig/SZKkeWe8CRULZymPAr6bpIAvVtURwKZVtbLlsTLJo1vbzYFre9Zd0WKjxSVJktYY/X5DxUx7RlVd1wq4M5P8fIy2GSFWY8T/cOXkAOAAgK222moyuUqSJM1bfc2WnWlVdV37eQNwCrAzcH0bbqX9vKE1XwFs2bP6FsB1Y8SH7+uIqlpcVYsXLFgw3YciSZI0p+a8uEvy8CTrDz0HdgMuBk4D9mvN9gNObc9PA/ZNZxfgtjZ8ewawW5KNkmzUtnPGLB6KJEnSnJsPw7KbAqd0dzhhbeCrVfVvSc4DTkyyP/ArYK/W/nS626Asp7sVymsAqurmdi++81q7Dw5NrpAkSVpTzHlxV1VXAU8eIX4T8KDvrW2zZN80yraWAEumO0dJkqTVxZwPy0qSJGn6WNxJkiQNEIs7SZKkAWJxJ0mSNEAs7iRJkgaIxZ0kSdIAsbiTJEkaIBZ3kiRJA8TiTpIkaYBY3EmSJA0QiztJkqQBYnEnSZI0QCzuJEmSBojFnSRJ0gCxuJMkSRrPmHGKAAAMV0lEQVQgFneSJEkDxOJOkiRpgFjcSZIkDRCLO0mSpAFicSdJkjRALO4kSZIGiMWdJEnSALG4kyRJGiBzWtwl2TLJ95JcluSSJG9p8UOT/DrJBe2xR88670myPMnlSZ7XE9+9xZYnOXgujkeSJGmurT3H+78XeEdVnZ9kfWBZkjPbsk9X1Sd6GyfZHtgbeCLwGODfk2zXFn8eeC6wAjgvyWlVdemsHIUkSdI8MafFXVWtBFa253ckuQzYfIxV9gSOr6p7gKuTLAd2bsuWV9VVAEmOb20t7iRJ0hpl3lxzl2Qh8KfAj1vowCQXJlmSZKMW2xy4tme1FS02WlySJGmNMi+KuySPAE4C3lpVtwOHA9sAO9L17H1yqOkIq9cY8ZH2dUCSpUmWrlq1asq5S5IkzSdzXtwleQhdYXdcVZ0MUFXXV9V9VXU/cCQPDL2uALbsWX0L4Lox4g9SVUdU1eKqWrxgwYLpPRhJkqQ5NqfX3CUJcDRwWVV9qie+WbseD+AlwMXt+WnAV5N8im5CxSLgJ3Q9d4uSbA38mm7Sxd/NzlFI0mA54dNnz3UKc+blb3vWXKcgTdlcz5Z9BvAq4KIkF7TYe4F9kuxIN7R6DfB6gKq6JMmJdBMl7gXeVFX3ASQ5EDgDWAtYUlWXzOaBSJIkzQdzPVv2h4x8vdzpY6zzYeDDI8RPH2s9SZKkNcGcX3MnSZKk6WNxJ0mSNEAs7iRJkgaIxZ0kSdIAsbiTJEkaIBZ3kiRJA8TiTpIkaYBY3EmSJA0QiztJkqQBYnEnSZI0QCzuJEmSBojFnSRJ0gCxuJMkSRoga891ApIkDYqjD3n3XKcwp/b/54/NdQrCnjtJkqSBYnEnSZI0QByWlSRJ88LVR5831ynMqa33f+q0bMeeO0mSpAFicSdJkjRALO4kSZIGiMWdJEnSALG4kyRJGiAWd5IkSQNkoIq7JLsnuTzJ8iQHz3U+kiRJs21girskawGfB54PbA/sk2T7uc1KkiRpdg1McQfsDCyvqquq6nfA8cCec5yTJEnSrBqk4m5z4Nqe1ytaTJIkaY2RqprrHKZFkr2A51XV37fXrwJ2rqp/HNbuAOCA9vLxwOWzmujEbALcONdJrKY8d1Pj+Zsaz9/UeP4mz3M3NfP9/D22qhaM12iQvlt2BbBlz+stgOuGN6qqI4AjZiupqUiytKoWz3UeqyPP3dR4/qbG8zc1nr/J89xNzaCcv0Ealj0PWJRk6yQPBfYGTpvjnCRJkmbVwPTcVdW9SQ4EzgDWApZU1SVznJYkSdKsGpjiDqCqTgdOn+s8ptFqMXw8T3nupsbzNzWev6nx/E2e525qBuL8DcyECkmSJA3WNXeSJElrPIu7aZbkviQXJLk4ydeTrJdkYZKLZ2n/Wyb5XpLLklyS5C2zsd/pMtfnr+WwJMkNs7nP6TDX5y7JOkl+kuRn7bP3v2ZwX/P6c5Lk6UmOTLJrktuS/LT9m/xAW/7qJJ+bpTyfm2RZkovaz2fP0H7m/D1peazVzve3Zmj7c/3vbMzf8fPss7dzO1cXtN8LLxmn/X097S+Y6NeIJjk0yTsn0P6YJFe3fZ2f5Okt/v0kszJjNsle7X28fzr3aXE3/e6uqh2ragfgd8AbZnn/9wLvqKonALsAb1rNvoZtrs8fwDHA7nOw36ma63N3D/DsqnoysCOwe5JdZmhfc32sMPbnZHfg39rzH1TVnwKLgVcm2WkWcut1I/DXVfX/AfsBX56h/cyH9wTgLcBlM7j9uT7O8X7Hz6fP3sXA4qraseX1xSRjXes/dG6HHof1u6NxtjuWd7X8Dga+OMltTMXFwN8AZ0/nRi3uZtYPgG3b87XaX1OXJPluknUBkrwuyXntr5qTkqzX4nu1vwx/luTsFlsrycdb+wuTvH74DqtqZVWd357fQfdLbnX9po5ZP38AVXU2cPMsHN9MmovPXlXVne3lQ9pjNi7qnY+fk+cA/z6s/W+AZcA2LfSYJP+W5IokHxtql+TwJEszrPczyWFJLm05faLFFrTjOa89njFCnj+tqqF7fl4CrJPkYaOfzmkxJ+9Jki2AFwBHzfDxDZmPv+Pn02fvrqq6t71ch0n+Pkjy/raPi5MckSQt/v0kH0nyn3RF/VD7bZKc3/N6UZJl4+zmbB54LwH2SjcS8Yskf962szDJD9L18p2f5M9afLMkZ+eBHt2h9rslOae1/XqSRwzfaVVdVlXT/2UKVeVjGh/Ane3n2sCpwBuBhXR/be3Ylp0IvLI9f1TPuh8C/rE9vwjYvD3fsP08AHhfe/4wYCmw9Ri5LAR+BWww1+dldTt/bZ8Xz/X5WN3OHd1tiC4A7gQ+OsjHOtrnhO4O999rz3cFvjWUA3AN8ETg1cBVwCPp/tP7JbBla7dxz7n8PvAkYGO6b9PJsFy/CjyzPd8KuGyc8/ZS4N8H9T0BvgHs1HveB/E4h33+/ud3/Hz87AFPo/uj4k7gJeOc2/vofn8MPV7em1d7/mW6nmhajl/oWXYo8M72/Hs978dHhs77sP0dA7y0Pd8L+HHPdj/Znu9B+zcDrAes054vApa25+8A/qnn3K3f3ouzgYe3+EHA+8c49u/T9XJOy+d0oG6FMk+sm+SC9vwHwNHAY4Crq2oovozuHyXADkk+BGwIPILuPn0APwKOSXIicHKL7QY8KclL2+tH0n3Arh6eRPsL4STgrVV1+zQd22yYF+dvNTXn566q7gN2TLIhcEqSHapqJq5FmvNjHcNuwHd7Xv95kp8C9wOHVdUlSZ4KnFVVtwEkuRR4LN33Y78s3dckrg1sBmwPXAr8FjgqybeBoevJ/grYvnVkAGyQZP3qenT+QJInAh9t+c2EOX1PkrwQuKGqliXZdZqPrde8+OyN8jt+3n32qurHwBOTPAE4Nsl3quq3DzqrnburGyId7i+TvJuuuNqYrlj817bshFG2dRTwmiRvB14O7DxKu48neR+wCti/Jz70nvS+lw8BPpdkR7pCdLsWPw9YkuQhwDer6oIkf0F3/n7UztFDgXNGyWHaWdxNvwd9ONsbe09P6D5g3fb8GODFVfWzJK+m+2uLqnpDkqfRDTFc0D5Mofvr4wzG0D5gJwHHVdXJY7Wdh+b8/K3G5s25q6pbk3yf7jqbmSju5s2xjuD5wKd6Xv+gql44Qrvhua6dZGvgncBTq+qWJMfQ9RTcm2RnuiG3vYEDgWfTXVrz9Kq6e6yE0g1XngLsW1VXTvK4xjPX78kzgBcl2YOuR2qDJF+pqldO9cCGmevjHOt3/Lz77A2pqsuS/AbYga5Hsi9J1gG+QNerdW2SQ+ne3yG/GWXVk4APAP8BLKuqm0Zp966q+sYI8aFzdB8P1EpvA64Hnkx3/L+F7hKNJM+iey+/nOTjwC3AmVW1z/hHOf285m7urQ+sbP9YXzEUTLJNVf24qt5Pd0H0lnR/8b2xtSXJdkke3ruxdL9ljqbrIu/9Rz6opvX8rWGm+7O3oPXYke56o78Cfj47hzKuWfmctH9/T6IbUpqMDej+s7otyaZ0/1kP9dI8srobtb+VbsIKdL00B/bs/0G9Hu09+Tbwnqr60STzmgnT+p5U1XuqaouqWkhXhPzHDBR2kzErv+Pn6Wdv67SJDkkeCzyebnh4IoYKuRtbLi8dq/GQ1jt4BnA48C8T3OdoHgmsrKr7gVfRDcEOHdsNVXUk3XvzFOBc4BlJtm1t1kuy3cibnX723M29Q4Af0133cBHdLwLouooX0f0ldxbwM+BCuu7h89s/5FXAi4dt7xl0H7qLeoYO3tv+YQ6i6T5/JPka3V/XmyRZAXygqo6e2cOYE9N97jajG3ZZi+4PxxOrakZuRzEJs/I5aev/tNpFNBPVend+SjfsdBXd0B0t31NbL0boehAA3gx8PsmFdL/Pz+bBszcPpLtQ/JAkh7TYblV1w2RynEbT/p7MU7PyOx64gfn32XsmcHCS39MNDf9DVd04Rhq9Q94A/1ZVByc5ku7cXUM3BNqv4+hmon53vIZ9+gJwUpK96K7pG+o13BV4VzvOO+l6yFe1ntqv5YEJTO8DftG7wXS3h/m/wALg20kuqKrnTTVRv6FCkqZJumt3llfV8XOdi9YsfvYeLN097x5ZVYeM23jAWNxJkqSBkuQUulu/PHuc3sKBZHEnSZI0QJxQIUmSNEAs7iRJkgaIxZ0kSdIAsbiTJEkaIBZ3kiRJA8TiTpIkaYD8/w3p2/MPN9/EAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 720x360 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "phase = df.phase.value_counts(ascending=False, sort=True)#.reset_index()\n",
    "plt.figure(figsize=(10,5))\n",
    "sns.barplot(phase.index, phase.values, alpha=0.8)\n",
    "plt.title('Cancer Clinical Trials Status')\n",
    "plt.ylabel('Number of Occurrences', fontsize=12)\n",
    "#plt.xlabel('city', fontsize=12)\n",
    "plt.savefig('./image/phase2.png')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>nct_id</th>\n",
       "      <th>brief_title</th>\n",
       "      <th>official_title</th>\n",
       "      <th>overall_status</th>\n",
       "      <th>start_date</th>\n",
       "      <th>completion_date</th>\n",
       "      <th>phase</th>\n",
       "      <th>study_type</th>\n",
       "      <th>brief_summary</th>\n",
       "      <th>detailed_description</th>\n",
       "      <th>enrollment</th>\n",
       "      <th>condition</th>\n",
       "      <th>intervention_name</th>\n",
       "      <th>eligibility</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NCT00000124</td>\n",
       "      <td>Collaborative Ocular Melanoma Study (COMS)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Unknown status</td>\n",
       "      <td>November 1986</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      To evaluate therapeutic interventions ...</td>\n",
       "      <td>\\n      For more than 100 years, removal of th...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Uveitis</td>\n",
       "      <td>Brachytherapy, Eye Removal</td>\n",
       "      <td>\\n        Men and women eligible for the study...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        nct_id                                 brief_title official_title  \\\n",
       "0  NCT00000124  Collaborative Ocular Melanoma Study (COMS)            NaN   \n",
       "\n",
       "   overall_status     start_date completion_date    phase      study_type  \\\n",
       "0  Unknown status  November 1986             NaN  Phase 3  Interventional   \n",
       "\n",
       "                                       brief_summary  \\\n",
       "0  \\n      To evaluate therapeutic interventions ...   \n",
       "\n",
       "                                detailed_description  enrollment condition  \\\n",
       "0  \\n      For more than 100 years, removal of th...         NaN   Uveitis   \n",
       "\n",
       "            intervention_name  \\\n",
       "0  Brachytherapy, Eye Removal   \n",
       "\n",
       "                                         eligibility  \n",
       "0  \\n        Men and women eligible for the study...  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Amrit\\Anaconda3\\envs\\ML\\lib\\site-packages\\ipykernel_launcher.py:2: FutureWarning: using a dict on a Series for aggregation\n",
      "is deprecated and will be removed in a future version\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>phase</th>\n",
       "      <th>enrolment</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Phase 3</td>\n",
       "      <td>626</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Phase 4</td>\n",
       "      <td>570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Phase 2/Phase 3</td>\n",
       "      <td>350</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Phase 2</td>\n",
       "      <td>91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Early Phase 1</td>\n",
       "      <td>87</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Phase 1/Phase 2</td>\n",
       "      <td>64</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Phase 1</td>\n",
       "      <td>44</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             phase  enrolment\n",
       "0          Phase 3        626\n",
       "1          Phase 4        570\n",
       "2  Phase 2/Phase 3        350\n",
       "3          Phase 2         91\n",
       "4    Early Phase 1         87\n",
       "5  Phase 1/Phase 2         64\n",
       "6          Phase 1         44"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "<Figure size 432x432 with 0 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(6,6))\n",
    "patient = df.groupby('phase')['enrollment'].aggregate({'enrolment':'mean'}).sort_values('enrolment',ascending=False).astype(int).reset_index()#.plot(kind='barh', width=0.9, color=sns.color_palette(\"bright\", 7), alpha=0.8)\n",
    "patient"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     44\n",
       "1     64\n",
       "2     87\n",
       "3     91\n",
       "4    350\n",
       "5    570\n",
       "6    626\n",
       "Name: enrolment, dtype: int32"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "patient.enrolment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 648x432 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(9,6))\n",
    "sns.barplot(patient.phase, patient.enrolment, alpha=0.8)\n",
    "plt.title('Average Patient Enrollment', fontsize=14)\n",
    "plt.xlabel('', fontsize=12)\n",
    "plt.ylabel('Patient number', fontsize=14)\n",
    "#plt.xlabel('city', fontsize=12)\n",
    "plt.savefig('./image/patient_number.png')\n",
    "plt.show()\n",
    "\n",
    "# plt.plot(x = 'phase', y='mean', data=patient, kind='barh', width=0.9, color=sns.color_palette(\"bright\", 7), alpha=0.8)\n",
    "# plt.title('Average Patient Participation for ')\n",
    "# plt.xlabel('Average Patient', fontsize=12)\n",
    "# plt.savefig('./image/phase1.png', bbox_inches = \"tight\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# grouped = data.groupby('month').agg(\"duration\": [min, max, mean])\n",
    "# grouped.columns = grouped.columns.droplevel(level=0)\n",
    "# grouped.rename(columns={\n",
    "#     \"min\": \"min_duration\", \"max\": \"max_duration\", \"mean\": \"mean_duration\"\n",
    "# })\n",
    "# grouped.head()\n",
    "#df['enrollment'] = df['enrollment'].astype(int)\n",
    "# patient = df.groupby('phase')['enrollment'].describe().unstack()\n",
    "# patient = patient.astype(int)\n",
    "# patient\n",
    "# patient = df.groupby('phase')['enrollment'].aggregate(['min', np.median, max])\n",
    "# patient = patient.astype(int)\n",
    "#patient.columns = patient.columns.droplevel(level=0)\n",
    "#grouped.rename(columns={\n",
    "#    \"min\": \"min_duration\", \"max\": \"max_duration\", \"mean\": \"mean_duration\"\n",
    "#3})\n",
    "ax = sns.boxplot(x=\"phase\", y=\"enrollment\", hue='phase',data=df, palette='bright')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 84,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 648x432 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "#sns.set(style=\"darkgrid\", rc={'figure.figsize':(20,15)},color_codes=True,font_scale=3)\n",
    "plt.figure(figsize=(9,6))\n",
    "sns.set_style(\"ticks\")\n",
    "b = sns.boxplot(x='phase', y='enrollment',hue='phase', data=df)\n",
    "#b.set(ylim=(0, 1000))\n",
    "#plt.yticks(list(range(10000, 0, -1000)))\n",
    "plt.title('Patient distribution', fontsize=14)\n",
    "plt.xlabel('')           \n",
    "plt.ylabel('Patient number', fontsize=14)\n",
    "plt.savefig('./image/patient_stat.png');\n",
    "\n",
    "#plt.savefig('character length of reviews.png');"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Completed                  26678\n",
       "Recruiting                 12889\n",
       "Active, not recruiting      5803\n",
       "Terminated                  5695\n",
       "Unknown status              5593\n",
       "Not yet recruiting          2683\n",
       "Withdrawn                   1749\n",
       "Enrolling by invitation      363\n",
       "Suspended                    324\n",
       "Name: overall_status, dtype: int64"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.overall_status.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 432x432 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.figure(figsize=(6,6))\n",
    "df.overall_status.value_counts(ascending=True, sort=True).plot(kind='barh', width=0.9, color=sns.color_palette(\"bright\", 7), alpha=0.8)\n",
    "plt.title('Overall Status of Cancer Trials')\n",
    "plt.xlabel('Number of Occurrences', fontsize=12)\n",
    "plt.savefig('./image/status1.png', bbox_inches = \"tight\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 720x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "status = df.overall_status.value_counts(ascending=False, sort=True)#.reset_index()\n",
    "plt.figure(figsize=(10,5))\n",
    "sns.barplot(status.index, status.values, alpha=0.8)\n",
    "plt.title('Overall Status of Cancer Trials')\n",
    "plt.ylabel('Number of Occurrences', fontsize=12)\n",
    "#plt.xlabel('city', fontsize=12)\n",
    "plt.savefig('./image/status2.png')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['Unknown status', 'Completed', 'Recruiting', 'Terminated',\n",
       "       'Active, not recruiting', 'Withdrawn', 'Suspended',\n",
       "       'Enrolling by invitation', 'Not yet recruiting'], dtype=object)"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.overall_status.unique()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0                                                  Uveitis\n",
       "1                                Cytomegalovirus Retinitis\n",
       "2                                       Adjuvant Arthritis\n",
       "3                                  Spondylitis, Ankylosing\n",
       "4                                        Vascular Diseases\n",
       "5                                            Postmenopause\n",
       "6                                      Myocardial Ischemia\n",
       "7                                      Myocardial Ischemia\n",
       "8                                      Myocardial Ischemia\n",
       "9                                        Blood Transfusion\n",
       "10                               Myelodysplastic Syndromes\n",
       "11                               Myelodysplastic Syndromes\n",
       "12                                          HIV Infections\n",
       "13                                          HIV Infections\n",
       "14                                   Graft vs Host Disease\n",
       "15                            Immunoproliferative Disorder\n",
       "16                              Zollinger Ellison Syndrome\n",
       "17                                                 Sarcoma\n",
       "18                                               Neoplasms\n",
       "19                              Zollinger Ellison Syndrome\n",
       "20                                     Neoplasm Metastasis\n",
       "21                                        Sarcoma, Ewing's\n",
       "22                                            Osteosarcoma\n",
       "23                              Zollinger Ellison Syndrome\n",
       "24                                       Gaucher's Disease\n",
       "25                         Lymphoma, Small Noncleaved-Cell\n",
       "26                                        Breast Neoplasms\n",
       "27                             Lymphoma, T-Cell, Cutaneous\n",
       "28                                     Neoplasm Metastasis\n",
       "29                                      Meningeal Neoplasm\n",
       "                               ...                        \n",
       "61747    Secondary Malignant Neoplasm of Brain and Cere...\n",
       "61748                                      Raw Corn Starch\n",
       "61749                                      Health Behavior\n",
       "61750                   Recurrent Nasopharyngeal Carcinoma\n",
       "61751                                              Healthy\n",
       "61752                         Recurrent Pituitary Adenomas\n",
       "61753                                        Breast Cancer\n",
       "61754                                Lymphoma, Non-Hodgkin\n",
       "61755                               Secondary Osteoporosis\n",
       "61756                                  Sentinel Lymph Node\n",
       "61757                                           AML or MDS\n",
       "61758                                     Multiple Myeloma\n",
       "61759                               Advanced Breast Cancer\n",
       "61760                                     Richter Syndrome\n",
       "61761                                        Breast Cancer\n",
       "61762                                                PHA1A\n",
       "61763                                Advanced Solid Tumors\n",
       "61764                                      Malignant Tumor\n",
       "61765                                                 DCIS\n",
       "61766                                        Rectal Cancer\n",
       "61767                                       Tumor Necrosis\n",
       "61768                                                Aging\n",
       "61769                                             Melanoma\n",
       "61770                             Nasopharyngeal Carcinoma\n",
       "61771                      Oral Mucositis Due to Radiation\n",
       "61772                                   Relapsed Adult AML\n",
       "61773    Phase II: Relapsed or Refractory Mantle Cell L...\n",
       "61774                                   Endometrial Cancer\n",
       "61775                        Triple-negative Breast Cancer\n",
       "61776      Nectin4-positive Advanced Malignant Solid Tumor\n",
       "Name: condition, Length: 61777, dtype: object"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.condition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Searching for conditions with related to cancer only\n",
    "df.condition = df.condition.str.lower()\n",
    "\n",
    "cancer = [\"cancer\" , \"neoplasm\" , \"oma\", \"tumor\"]\n",
    "\n",
    "pattern = '|'.join(cancer)\n",
    "#pattern\n",
    "\n",
    "df['condition_cancer'] = df.condition.str.contains(pattern)\n",
    "\n",
    "#df['condition_cancer'] = df.loc[df.condition.isin]\n",
    "\n",
    "# def conditionCancerOnly(conditions):\n",
    "#     filteredList = []\n",
    "#     for c in cancer:\n",
    "#         if isCancer(c):\n",
    "#             filteredList.append(c )\n",
    "\n",
    "#     return filteredList\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>nct_id</th>\n",
       "      <th>brief_title</th>\n",
       "      <th>official_title</th>\n",
       "      <th>overall_status</th>\n",
       "      <th>start_date</th>\n",
       "      <th>completion_date</th>\n",
       "      <th>phase</th>\n",
       "      <th>study_type</th>\n",
       "      <th>brief_summary</th>\n",
       "      <th>detailed_description</th>\n",
       "      <th>enrollment</th>\n",
       "      <th>condition</th>\n",
       "      <th>intervention_name</th>\n",
       "      <th>eligibility</th>\n",
       "      <th>condition_cancer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>NCT00000124</td>\n",
       "      <td>Collaborative Ocular Melanoma Study (COMS)</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Unknown status</td>\n",
       "      <td>November 1986</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      To evaluate therapeutic interventions ...</td>\n",
       "      <td>\\n      For more than 100 years, removal of th...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>uveitis</td>\n",
       "      <td>Brachytherapy, Eye Removal</td>\n",
       "      <td>\\n        Men and women eligible for the study...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>NCT00000136</td>\n",
       "      <td>Studies of the Ocular Complications of AIDS (S...</td>\n",
       "      <td>Foscarnet-Ganciclovir CMV Retinitis Trial</td>\n",
       "      <td>Completed</td>\n",
       "      <td>March 1990</td>\n",
       "      <td>October 1991</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      To evaluate the relative safety and ef...</td>\n",
       "      <td>\\n      CMV retinitis is the most common intra...</td>\n",
       "      <td>234.0</td>\n",
       "      <td>cytomegalovirus retinitis</td>\n",
       "      <td>Ganciclovir, Foscarnet, Phosphonoacetic Acid, ...</td>\n",
       "      <td>\\n        Inclusion criteria:\\n\\n          -  ...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NCT00000395</td>\n",
       "      <td>Antifolate Effectiveness in Arthritis</td>\n",
       "      <td>Mechanisms of Antifolate Efficacy in Arthritis</td>\n",
       "      <td>Completed</td>\n",
       "      <td>September 1996</td>\n",
       "      <td>August 2002</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This study looks at how the arthritis ...</td>\n",
       "      <td>\\n      Low-dose methotrexate therapy suppress...</td>\n",
       "      <td>40.0</td>\n",
       "      <td>adjuvant arthritis</td>\n",
       "      <td>Methotrexate, Folic Acid Antagonists, Folic Ac...</td>\n",
       "      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>NCT00000433</td>\n",
       "      <td>Blocking Tumor Necrosis Factor in Ankylosing S...</td>\n",
       "      <td>Anti-Tumor Necrosis Factor (TNFR:Fc) in Ankylo...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>October 1999</td>\n",
       "      <td>March 2002</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      The Division of Rheumatology at Univer...</td>\n",
       "      <td>\\n      In this Phase II clinical trial we wil...</td>\n",
       "      <td>42.0</td>\n",
       "      <td>spondylitis, ankylosing</td>\n",
       "      <td>Anti-Tumor Necrosis Factor</td>\n",
       "      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>NCT00000479</td>\n",
       "      <td>Women's Health Study (WHS): A Randomized Trial...</td>\n",
       "      <td>Women's Health Study of Low-dose Aspirin and V...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>September 1992</td>\n",
       "      <td>February 2005</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      The purpose of this study is to evalua...</td>\n",
       "      <td>\\n      BACKGROUND:\\n\\n      Various doses of ...</td>\n",
       "      <td>39876.0</td>\n",
       "      <td>vascular diseases</td>\n",
       "      <td>Vitamins, Vitamin E, Tocopherols, Tocotrienols...</td>\n",
       "      <td>\\n        Inclusion Criteria:\\n\\n          -  ...</td>\n",
       "      <td>False</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        nct_id                                        brief_title  \\\n",
       "0  NCT00000124         Collaborative Ocular Melanoma Study (COMS)   \n",
       "1  NCT00000136  Studies of the Ocular Complications of AIDS (S...   \n",
       "2  NCT00000395              Antifolate Effectiveness in Arthritis   \n",
       "3  NCT00000433  Blocking Tumor Necrosis Factor in Ankylosing S...   \n",
       "4  NCT00000479  Women's Health Study (WHS): A Randomized Trial...   \n",
       "\n",
       "                                      official_title  overall_status  \\\n",
       "0                                                NaN  Unknown status   \n",
       "1          Foscarnet-Ganciclovir CMV Retinitis Trial       Completed   \n",
       "2     Mechanisms of Antifolate Efficacy in Arthritis       Completed   \n",
       "3  Anti-Tumor Necrosis Factor (TNFR:Fc) in Ankylo...       Completed   \n",
       "4  Women's Health Study of Low-dose Aspirin and V...       Completed   \n",
       "\n",
       "       start_date completion_date    phase      study_type  \\\n",
       "0   November 1986             NaN  Phase 3  Interventional   \n",
       "1      March 1990    October 1991  Phase 3  Interventional   \n",
       "2  September 1996     August 2002  Phase 2  Interventional   \n",
       "3    October 1999      March 2002  Phase 2  Interventional   \n",
       "4  September 1992   February 2005  Phase 3  Interventional   \n",
       "\n",
       "                                       brief_summary  \\\n",
       "0  \\n      To evaluate therapeutic interventions ...   \n",
       "1  \\n      To evaluate the relative safety and ef...   \n",
       "2  \\n      This study looks at how the arthritis ...   \n",
       "3  \\n      The Division of Rheumatology at Univer...   \n",
       "4  \\n      The purpose of this study is to evalua...   \n",
       "\n",
       "                                detailed_description  enrollment  \\\n",
       "0  \\n      For more than 100 years, removal of th...         NaN   \n",
       "1  \\n      CMV retinitis is the most common intra...       234.0   \n",
       "2  \\n      Low-dose methotrexate therapy suppress...        40.0   \n",
       "3  \\n      In this Phase II clinical trial we wil...        42.0   \n",
       "4  \\n      BACKGROUND:\\n\\n      Various doses of ...     39876.0   \n",
       "\n",
       "                   condition  \\\n",
       "0                    uveitis   \n",
       "1  cytomegalovirus retinitis   \n",
       "2         adjuvant arthritis   \n",
       "3    spondylitis, ankylosing   \n",
       "4          vascular diseases   \n",
       "\n",
       "                                   intervention_name  \\\n",
       "0                         Brachytherapy, Eye Removal   \n",
       "1  Ganciclovir, Foscarnet, Phosphonoacetic Acid, ...   \n",
       "2  Methotrexate, Folic Acid Antagonists, Folic Ac...   \n",
       "3                         Anti-Tumor Necrosis Factor   \n",
       "4  Vitamins, Vitamin E, Tocopherols, Tocotrienols...   \n",
       "\n",
       "                                         eligibility  condition_cancer  \n",
       "0  \\n        Men and women eligible for the study...             False  \n",
       "1  \\n        Inclusion criteria:\\n\\n          -  ...             False  \n",
       "2  \\n        Inclusion Criteria:\\n\\n          -  ...             False  \n",
       "3  \\n        Inclusion Criteria:\\n\\n          -  ...             False  \n",
       "4  \\n        Inclusion Criteria:\\n\\n          -  ...             False  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.loc[df['condition_cancer'] ==True,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(42725, 15)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "breast cancer                                                                                            2919\n",
       "prostate cancer                                                                                          1888\n",
       "colorectal cancer                                                                                        1012\n",
       "lung cancer                                                                                              1009\n",
       "multiple myeloma                                                                                          996\n",
       "cancer                                                                                                    972\n",
       "lymphoma                                                                                                  855\n",
       "non-small cell lung cancer                                                                                731\n",
       "pancreatic cancer                                                                                         659\n",
       "unspecified adult solid tumor, protocol specific                                                          656\n",
       "head and neck cancer                                                                                      524\n",
       "hepatocellular carcinoma                                                                                  496\n",
       "melanoma                                                                                                  481\n",
       "gastric cancer                                                                                            460\n",
       "ovarian cancer                                                                                            441\n",
       "neoplasms                                                                                                 424\n",
       "solid tumors                                                                                              375\n",
       "metastatic breast cancer                                                                                  370\n",
       "breast neoplasms                                                                                          317\n",
       "carcinoma, non-small-cell lung                                                                            315\n",
       "cervical cancer                                                                                           289\n",
       "non small cell lung cancer                                                                                289\n",
       "sarcoma                                                                                                   288\n",
       "metastatic colorectal cancer                                                                              285\n",
       "bladder cancer                                                                                            285\n",
       "rectal cancer                                                                                             282\n",
       "brain and central nervous system tumors                                                                   271\n",
       "esophageal cancer                                                                                         260\n",
       "advanced solid tumors                                                                                     249\n",
       "glioblastoma                                                                                              223\n",
       "                                                                                                         ... \n",
       "intraabdominal cancers (various types)                                                                      1\n",
       "carcinoma of renal pelvis                                                                                   1\n",
       "recurrent idhwt gliomas with fgfr1-tacc1 fusion                                                             1\n",
       "cancer of the larynx                                                                                        1\n",
       "respiratory cancer                                                                                          1\n",
       "urothelial bladder carcinoma                                                                                1\n",
       "locally advanced and unresectable, but non-metastatic pancreatic adenocarcinoma or cholangiocarcinoma       1\n",
       "tumor with alterations of the fgf-r                                                                         1\n",
       "nasal cancer                                                                                                1\n",
       "macroprolactinoma                                                                                           1\n",
       "ovarian, fallopian tube, and primary peritoneal cancer                                                      1\n",
       "non-squamous cell lung cancer with wild-type kras                                                           1\n",
       "stage ib1 cervical cancer ajcc v6 and v7                                                                    1\n",
       "refractory transformed non-hodgkin lymphoma                                                                 1\n",
       "malt-lymphoma                                                                                               1\n",
       "progressive metastatic prostate cancer                                                                      1\n",
       "stage i prostate cancer                                                                                     1\n",
       "mandibular neoplasms                                                                                        1\n",
       "glioblastoma, who grade iv                                                                                  1\n",
       "advanced or metastatic biliary tract cancer                                                                 1\n",
       "non-small cell lung cancer - completely resectable                                                          1\n",
       "phase ii: relapsed or refractory mantle cell lymphoma                                                       1\n",
       "related distress among cancer caregivers                                                                    1\n",
       "left colonic adenocarcinoma                                                                                 1\n",
       "crohn disease-associated colorectal adenocarcinoma                                                          1\n",
       "localized hepatocellular carcinoma                                                                          1\n",
       "pancreatic intraductal papillary mucinous neoplasm                                                          1\n",
       "non-hodgkin's lymphomas                                                                                     1\n",
       "children cancer, solid tumor                                                                                1\n",
       "cancer-related cognitive difficulties                                                                       1\n",
       "Name: condition, Length: 5865, dtype: int64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.condition.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['breast cancer', 'prostate cancer', 'colorectal cancer', 'lung cancer',\n",
       "       'multiple myeloma', 'cancer', 'lymphoma', 'non-small cell lung cancer',\n",
       "       'pancreatic cancer',\n",
       "       'unspecified adult solid tumor, protocol specific'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "condition = df.condition.value_counts(ascending=False, sort=True)\n",
    "condition.index[:10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 1080x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "condition = df.condition.value_counts(ascending=False, sort=True)\n",
    "plt.figure(figsize=(15,5))\n",
    "sns.barplot(condition.index[:20], condition.values[:20], alpha=0.8)\n",
    "plt.title('Patient Condition')\n",
    "plt.ylabel('Number of Occurrences', fontsize=12)\n",
    "#plt.xlabel('city', fontsize=12)\n",
    "plt.xticks(rotation=90)\n",
    "plt.savefig('./image/condition.png', bbox_inches = \"tight\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Vaccines                                                                                                                                                                                                                                                                                                                                                                                               492\n",
       "Pembrolizumab                                                                                                                                                                                                                                                                                                                                                                                          392\n",
       "Paclitaxel, Albumin-Bound Paclitaxel                                                                                                                                                                                                                                                                                                                                                                   284\n",
       "Gemcitabine                                                                                                                                                                                                                                                                                                                                                                                            239\n",
       "Docetaxel                                                                                                                                                                                                                                                                                                                                                                                              229\n",
       "Everolimus, Sirolimus                                                                                                                                                                                                                                                                                                                                                                                  226\n",
       "Sorafenib                                                                                                                                                                                                                                                                                                                                                                                              219\n",
       "Nivolumab                                                                                                                                                                                                                                                                                                                                                                                              219\n",
       "Temozolomide                                                                                                                                                                                                                                                                                                                                                                                           217\n",
       "Antibodies, Monoclonal                                                                                                                                                                                                                                                                                                                                                                                 204\n",
       "Cisplatin                                                                                                                                                                                                                                                                                                                                                                                              198\n",
       "Erlotinib Hydrochloride                                                                                                                                                                                                                                                                                                                                                                                194\n",
       "Capecitabine                                                                                                                                                                                                                                                                                                                                                                                           185\n",
       "Sunitinib                                                                                                                                                                                                                                                                                                                                                                                              177\n",
       "Paclitaxel, Albumin-Bound Paclitaxel, Carboplatin                                                                                                                                                                                                                                                                                                                                                      175\n",
       "Nivolumab, Ipilimumab                                                                                                                                                                                                                                                                                                                                                                                  163\n",
       "Bevacizumab                                                                                                                                                                                                                                                                                                                                                                                            159\n",
       "Rituximab                                                                                                                                                                                                                                                                                                                                                                                              158\n",
       "Cetuximab                                                                                                                                                                                                                                                                                                                                                                                              143\n",
       "Cisplatin, Gemcitabine                                                                                                                                                                                                                                                                                                                                                                                 132\n",
       "Doxorubicin, Liposomal doxorubicin                                                                                                                                                                                                                                                                                                                                                                     127\n",
       "Irinotecan                                                                                                                                                                                                                                                                                                                                                                                             119\n",
       "Gefitinib                                                                                                                                                                                                                                                                                                                                                                                              116\n",
       "Apatinib                                                                                                                                                                                                                                                                                                                                                                                               114\n",
       "Paclitaxel, Albumin-Bound Paclitaxel, Gemcitabine                                                                                                                                                                                                                                                                                                                                                      109\n",
       "Imatinib Mesylate                                                                                                                                                                                                                                                                                                                                                                                      107\n",
       "Oxaliplatin                                                                                                                                                                                                                                                                                                                                                                                            104\n",
       "Trastuzumab                                                                                                                                                                                                                                                                                                                                                                                            100\n",
       "Fluorodeoxyglucose F18                                                                                                                                                                                                                                                                                                                                                                                  99\n",
       "Capecitabine, Oxaliplatin                                                                                                                                                                                                                                                                                                                                                                               98\n",
       "                                                                                                                                                                                                                                                                                                                                                                                                      ... \n",
       "Dermal Autograft, AlloDerm                                                                                                                                                                                                                                                                                                                                                                               1\n",
       "RevM10 gene, RevM10/polAS gene, in vitro-treated peripheral blood stem cell transplantation, peripheral blood stem cell transplantation                                                                                                                                                                                                                                                                  1\n",
       "SHR3680, SHR2554                                                                                                                                                                                                                                                                                                                                                                                         1\n",
       "Bevacizumab, Carboplatin, Liposomal doxorubicin, Doxorubicin                                                                                                                                                                                                                                                                                                                                             1\n",
       "Benzocaine, Estrogens                                                                                                                                                                                                                                                                                                                                                                                    1\n",
       "Paclitaxel, Albumin-Bound Paclitaxel, Semaxinib, Angiogenesis Inhibitors                                                                                                                                                                                                                                                                                                                                 1\n",
       "adjuvant therapy, 3-dimensional conformal radiation therapy, brachytherapy                                                                                                                                                                                                                                                                                                                               1\n",
       "Telerehabilitation group                                                                                                                                                                                                                                                                                                                                                                                 1\n",
       "FLOT regimen chemotherapy, D2 gastric and imaging metastases resection                                                                                                                                                                                                                                                                                                                                   1\n",
       "Antigen-specific cytotoxic T lymphocytes induced by dendritic cells infected by recombinant adeno-associated virus with CEA gene                                                                                                                                                                                                                                                                         1\n",
       "Cyclophosphamide, Methotrexate, Cytarabine, Rituximab, Ifosfamide, Isophosphamide mustard, Doxorubicin, Liposomal doxorubicin, Prednisone, Etoposide, Etoposide phosphate, Vincristine, Prednisolone, Methylprednisolone Hemisuccinate, Bleomycin, Vindesine, Lenograstim, Methylprednisolone Acetate, Methylprednisolone, Prednisolone acetate, Prednisolone hemisuccinate, Prednisolone phosphate      1\n",
       "Doxorubicin, Lenalidomide, Vinblastine                                                                                                                                                                                                                                                                                                                                                                   1\n",
       "Partial Breast Irradiation (PBI)                                                                                                                                                                                                                                                                                                                                                                         1\n",
       "Axitinib, Selenium                                                                                                                                                                                                                                                                                                                                                                                       1\n",
       "TroVax                                                                                                                                                                                                                                                                                                                                                                                                   1\n",
       "Camptothecin, Namitecan                                                                                                                                                                                                                                                                                                                                                                                  1\n",
       "Oxaliplatin, Fluorouracil, Eniluracil                                                                                                                                                                                                                                                                                                                                                                    1\n",
       "Crotoxin                                                                                                                                                                                                                                                                                                                                                                                                 1\n",
       "custom work endurance, dietary management adapted to the nutritional status                                                                                                                                                                                                                                                                                                                              1\n",
       "CT-scan, CEA, X-ray of lungs, CT-scan, CEA, X-ray of lungs                                                                                                                                                                                                                                                                                                                                               1\n",
       "Paclitaxel, Albumin-Bound Paclitaxel, Bavituximab, Antibodies, Monoclonal                                                                                                                                                                                                                                                                                                                                1\n",
       "Interactive Voice Response Symptom Management, Tailored Newsletters                                                                                                                                                                                                                                                                                                                                      1\n",
       "CMB305, Placebo                                                                                                                                                                                                                                                                                                                                                                                          1\n",
       "Doxorubicin, Liposomal doxorubicin, Ifosfamide, Semaxinib, Angiogenesis Inhibitors                                                                                                                                                                                                                                                                                                                       1\n",
       "injection of the cell therapy product                                                                                                                                                                                                                                                                                                                                                                    1\n",
       "ETAF, Usual care                                                                                                                                                                                                                                                                                                                                                                                         1\n",
       "Gemcitabine, Capecitabine, Fluorouracil, Irinotecan, Oxaliplatin                                                                                                                                                                                                                                                                                                                                         1\n",
       "Immunologic Factors, Fluorodeoxyglucose F18                                                                                                                                                                                                                                                                                                                                                              1\n",
       "Football                                                                                                                                                                                                                                                                                                                                                                                                 1\n",
       "Paclitaxel, Albumin-Bound Paclitaxel, Carboplatin, Atezolizumab, Antibodies, Immunoglobulins, Antibodies, Monoclonal                                                                                                                                                                                                                                                                                     1\n",
       "Name: intervention_name, Length: 22902, dtype: int64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "\n",
    "df.intervention_name.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 1080x360 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "intervention = df.intervention_name.value_counts(ascending=False, sort=True)\n",
    "plt.figure(figsize=(15,5))\n",
    "sns.barplot(intervention.index[:20], intervention.values[:20], alpha=0.8)\n",
    "plt.title('Treatment procedure')\n",
    "plt.ylabel('Number of Occurrences', fontsize=12)\n",
    "#plt.xlabel('city', fontsize=12)\n",
    "plt.xticks(rotation=90)\n",
    "plt.savefig('./image/intervention.png', bbox_inches = \"tight\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>nct_id</th>\n",
       "      <th>brief_title</th>\n",
       "      <th>official_title</th>\n",
       "      <th>overall_status</th>\n",
       "      <th>start_date</th>\n",
       "      <th>completion_date</th>\n",
       "      <th>phase</th>\n",
       "      <th>study_type</th>\n",
       "      <th>brief_summary</th>\n",
       "      <th>detailed_description</th>\n",
       "      <th>enrollment</th>\n",
       "      <th>condition</th>\n",
       "      <th>intervention_name</th>\n",
       "      <th>eligibility</th>\n",
       "      <th>condition_cancer</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>NCT00001188</td>\n",
       "      <td>The Role of Multi-Modality Therapy for the Tre...</td>\n",
       "      <td>The Role of Multi-Modality Therapy for the Tre...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>December 1983</td>\n",
       "      <td>September 2000</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients with Grade II and III soft ti...</td>\n",
       "      <td>\\n      Patients with Grade II and III soft ti...</td>\n",
       "      <td>100.0</td>\n",
       "      <td>sarcoma</td>\n",
       "      <td>radiation therapy following surgery</td>\n",
       "      <td>\\n        Patients must have biopsy-proven sof...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>NCT00001189</td>\n",
       "      <td>The Treatment of Grade I Sarcomas and Benign, ...</td>\n",
       "      <td>The Treatment of Grade I Sarcomas and Benign, ...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>December 1983</td>\n",
       "      <td>April 2001</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients with Grade I soft tissue sarc...</td>\n",
       "      <td>\\n      This is a randomized study. Patients u...</td>\n",
       "      <td>150.0</td>\n",
       "      <td>neoplasms</td>\n",
       "      <td>radiotherapy</td>\n",
       "      <td>\\n        DISEASE CHARACTERISTICS:\\n\\n        ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>NCT00001193</td>\n",
       "      <td>A Multimodality Treatment Approach to Patients...</td>\n",
       "      <td>A Multimodality Treatment Approach to Patients...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>November 1984</td>\n",
       "      <td>September 2000</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This study is designed to evaluate the...</td>\n",
       "      <td>\\n      This study is designed to evaluate the...</td>\n",
       "      <td>200.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>Melphalan</td>\n",
       "      <td>\\n        Patients must have a histologically ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         nct_id                                        brief_title  \\\n",
       "17  NCT00001188  The Role of Multi-Modality Therapy for the Tre...   \n",
       "18  NCT00001189  The Treatment of Grade I Sarcomas and Benign, ...   \n",
       "20  NCT00001193  A Multimodality Treatment Approach to Patients...   \n",
       "\n",
       "                                       official_title overall_status  \\\n",
       "17  The Role of Multi-Modality Therapy for the Tre...      Completed   \n",
       "18  The Treatment of Grade I Sarcomas and Benign, ...      Completed   \n",
       "20  A Multimodality Treatment Approach to Patients...      Completed   \n",
       "\n",
       "       start_date completion_date    phase      study_type  \\\n",
       "17  December 1983  September 2000  Phase 2  Interventional   \n",
       "18  December 1983      April 2001  Phase 2  Interventional   \n",
       "20  November 1984  September 2000  Phase 2  Interventional   \n",
       "\n",
       "                                        brief_summary  \\\n",
       "17  \\n      Patients with Grade II and III soft ti...   \n",
       "18  \\n      Patients with Grade I soft tissue sarc...   \n",
       "20  \\n      This study is designed to evaluate the...   \n",
       "\n",
       "                                 detailed_description  enrollment  \\\n",
       "17  \\n      Patients with Grade II and III soft ti...       100.0   \n",
       "18  \\n      This is a randomized study. Patients u...       150.0   \n",
       "20  \\n      This study is designed to evaluate the...       200.0   \n",
       "\n",
       "              condition                    intervention_name  \\\n",
       "17              sarcoma  radiation therapy following surgery   \n",
       "18            neoplasms                         radiotherapy   \n",
       "20  neoplasm metastasis                            Melphalan   \n",
       "\n",
       "                                          eligibility  condition_cancer  \n",
       "17  \\n        Patients must have biopsy-proven sof...              True  \n",
       "18  \\n        DISEASE CHARACTERISTICS:\\n\\n        ...              True  \n",
       "20  \\n        Patients must have a histologically ...              True  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extracting Inclusion/Exclusion criteria from eligibility column\n",
    "df.eligibility = df.eligibility.str.lower()\n",
    "df['eligible'] = df.eligibility.str.split('(inclusion criteria:)').str[2]\n",
    "df['ineligible'] = df.eligibility.str.split('(exclusion criteria:)').str[2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 42725 entries, 17 to 61776\n",
      "Data columns (total 17 columns):\n",
      "nct_id                  42725 non-null object\n",
      "brief_title             42725 non-null object\n",
      "official_title          42169 non-null object\n",
      "overall_status          42725 non-null object\n",
      "start_date              42725 non-null object\n",
      "completion_date         38927 non-null object\n",
      "phase                   34821 non-null object\n",
      "study_type              42725 non-null object\n",
      "brief_summary           42724 non-null object\n",
      "detailed_description    29941 non-null object\n",
      "enrollment              41413 non-null float64\n",
      "condition               42725 non-null object\n",
      "intervention_name       42725 non-null object\n",
      "eligibility             42725 non-null object\n",
      "condition_cancer        42725 non-null bool\n",
      "eligible                36042 non-null object\n",
      "ineligible              34798 non-null object\n",
      "dtypes: bool(1), float64(1), object(15)\n",
      "memory usage: 5.6+ MB\n"
     ]
    }
   ],
   "source": [
    "df.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>nct_id</th>\n",
       "      <th>brief_title</th>\n",
       "      <th>official_title</th>\n",
       "      <th>overall_status</th>\n",
       "      <th>start_date</th>\n",
       "      <th>completion_date</th>\n",
       "      <th>phase</th>\n",
       "      <th>study_type</th>\n",
       "      <th>brief_summary</th>\n",
       "      <th>detailed_description</th>\n",
       "      <th>enrollment</th>\n",
       "      <th>condition</th>\n",
       "      <th>intervention_name</th>\n",
       "      <th>eligibility</th>\n",
       "      <th>condition_cancer</th>\n",
       "      <th>eligible</th>\n",
       "      <th>ineligible</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>NCT00001188</td>\n",
       "      <td>The Role of Multi-Modality Therapy for the Tre...</td>\n",
       "      <td>The Role of Multi-Modality Therapy for the Tre...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>December 1983</td>\n",
       "      <td>September 2000</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients with Grade II and III soft ti...</td>\n",
       "      <td>\\n      Patients with Grade II and III soft ti...</td>\n",
       "      <td>100.0</td>\n",
       "      <td>sarcoma</td>\n",
       "      <td>radiation therapy following surgery</td>\n",
       "      <td>\\n        patients must have biopsy-proven sof...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>NCT00001189</td>\n",
       "      <td>The Treatment of Grade I Sarcomas and Benign, ...</td>\n",
       "      <td>The Treatment of Grade I Sarcomas and Benign, ...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>December 1983</td>\n",
       "      <td>April 2001</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients with Grade I soft tissue sarc...</td>\n",
       "      <td>\\n      This is a randomized study. Patients u...</td>\n",
       "      <td>150.0</td>\n",
       "      <td>neoplasms</td>\n",
       "      <td>radiotherapy</td>\n",
       "      <td>\\n        disease characteristics:\\n\\n        ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>NCT00001193</td>\n",
       "      <td>A Multimodality Treatment Approach to Patients...</td>\n",
       "      <td>A Multimodality Treatment Approach to Patients...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>November 1984</td>\n",
       "      <td>September 2000</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This study is designed to evaluate the...</td>\n",
       "      <td>\\n      This study is designed to evaluate the...</td>\n",
       "      <td>200.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>Melphalan</td>\n",
       "      <td>\\n        patients must have a histologically ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>NCT00001209</td>\n",
       "      <td>A Pilot Study for the Treatment of Patients Wi...</td>\n",
       "      <td>A Pilot Study for the Treatment of Patients Wi...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>October 1986</td>\n",
       "      <td>August 2000</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This protocol is designed to test the ...</td>\n",
       "      <td>\\n      This protocol is designed to test the ...</td>\n",
       "      <td>120.0</td>\n",
       "      <td>sarcoma, ewing's</td>\n",
       "      <td>Vincristine, Doxorubicin, Ifosfamide, Cyclopho...</td>\n",
       "      <td>\\n        patients with high grade soft tissue...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>NCT00001217</td>\n",
       "      <td>Osteosarcoma Study #2: A Randomized Trial of P...</td>\n",
       "      <td>Osteosarcoma Study #2: A Randomized Trial of P...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>May 1987</td>\n",
       "      <td>December 2000</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      The study is designed to determine if ...</td>\n",
       "      <td>\\n      The study is designed to determine if ...</td>\n",
       "      <td>260.0</td>\n",
       "      <td>osteosarcoma</td>\n",
       "      <td>pre-surgical chemotherapy</td>\n",
       "      <td>\\n        must be less than or equal to 30 yea...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>NCT00001237</td>\n",
       "      <td>Pilot Protocol for the Treatment of Patients W...</td>\n",
       "      <td>Pilot Protocol for the Treatment of Patients W...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>March 1989</td>\n",
       "      <td>April 2000</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Major improvements in the treatment of...</td>\n",
       "      <td>\\n      Major improvements in the treatment of...</td>\n",
       "      <td>120.0</td>\n",
       "      <td>lymphoma, small noncleaved-cell</td>\n",
       "      <td>Sargramostim</td>\n",
       "      <td>\\n        high risk protocol: patients with sm...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>NCT00001239</td>\n",
       "      <td>Combination Chemotherapy (FLAC) Combined With ...</td>\n",
       "      <td>Combination Chemotherapy (FLAC) Combined With ...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>July 1989</td>\n",
       "      <td>January 2001</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      To evaluate a dose intensive chemother...</td>\n",
       "      <td>\\n      To evaluate a dose intensive chemother...</td>\n",
       "      <td>100.0</td>\n",
       "      <td>breast neoplasms</td>\n",
       "      <td>Sargramostim</td>\n",
       "      <td>\\n        all stage iii or clinical t3n0 or tx...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>NCT00001249</td>\n",
       "      <td>Treatment of Tac-Expressing Cutaneous T-Cell L...</td>\n",
       "      <td>Treatment of Tac-Expressing Cutaneous T-Cell L...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>December 1989</td>\n",
       "      <td>October 2000</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      The study purpose is to evaluate the c...</td>\n",
       "      <td>\\n      The study purpose is to evaluate the c...</td>\n",
       "      <td>30.0</td>\n",
       "      <td>lymphoma, t-cell, cutaneous</td>\n",
       "      <td>Antibodies, Daclizumab</td>\n",
       "      <td>\\n        disease characteristics:\\n\\n        ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>NCT00001250</td>\n",
       "      <td>Effect of Preoperative Chemotherapy on Axillar...</td>\n",
       "      <td>Effect of Preoperative Chemotherapy on Axillar...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>December 1989</td>\n",
       "      <td>October 2002</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients with untreated clinical stage...</td>\n",
       "      <td>\\n      A prospective randomized trial evaluat...</td>\n",
       "      <td>130.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>preoperative dose intense chemotherapy (FLAC/G...</td>\n",
       "      <td>\\n        inclusion criteria\\n\\n        women ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>NCT00001251</td>\n",
       "      <td>Phase I Study of Intrathecal Mafosfamide</td>\n",
       "      <td>Phase I Study of Intrathecal Mafosfamide</td>\n",
       "      <td>Completed</td>\n",
       "      <td>November 1989</td>\n",
       "      <td>November 2003</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      The purpose of this study is to determ...</td>\n",
       "      <td>\\n      The purpose of this study is to determ...</td>\n",
       "      <td>65.0</td>\n",
       "      <td>meningeal neoplasm</td>\n",
       "      <td>Mafosfamide, Cyclophosphamide</td>\n",
       "      <td>\\n        inclusion criteria:\\n\\n        all p...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        all patients over 3 years of age w...</td>\n",
       "      <td>\\n\\n        patients receiving other therapy (...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>30</th>\n",
       "      <td>NCT00001256</td>\n",
       "      <td>Steroids and Methotrexate to Treat Systemic Va...</td>\n",
       "      <td>An Open Trial of the Efficacy of Glucocorticoi...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>March 1990</td>\n",
       "      <td>February 2004</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This study will evaluate the safety an...</td>\n",
       "      <td>\\n      Previous studies at the NIH have demon...</td>\n",
       "      <td>100.0</td>\n",
       "      <td>wegener's granulomatosis</td>\n",
       "      <td>Methotrexate, Prednisone</td>\n",
       "      <td>\\n        inclusion criteria:\\n\\n        diagn...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        diagnosis: wegener's granulomatosi...</td>\n",
       "      <td>\\n\\n        evidence of infection by gram stai...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>31</th>\n",
       "      <td>NCT00001266</td>\n",
       "      <td>A Phase II Trial of Leuprolide + Flutamide + S...</td>\n",
       "      <td>A Phase II Trial of Leuprolide + Flutamide + S...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>October 1990</td>\n",
       "      <td>August 2003</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      One current hypothesis as to what limi...</td>\n",
       "      <td>\\n      The purpose of this study is to assess...</td>\n",
       "      <td>70.0</td>\n",
       "      <td>prostatic neoplasm</td>\n",
       "      <td>Leuprolide, Flutamide, Suramin</td>\n",
       "      <td>\\n        inclusion criteria:\\n\\n        patie...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        patients must have a histologic di...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>33</th>\n",
       "      <td>NCT00001269</td>\n",
       "      <td>Phase I Trial of FLAC (5-Fluorouracil, Leucovo...</td>\n",
       "      <td>Phase I Trial of FLAC (5-Fluorouracil, Leucovo...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>May 1991</td>\n",
       "      <td>February 2001</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This is a phase I study to determine t...</td>\n",
       "      <td>\\n      Phase I study to determine the maximal...</td>\n",
       "      <td>100.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>Fluorouracil, Cyclophosphamide, Doxorubicin, L...</td>\n",
       "      <td>\\n        patients with stage iv (metastatic) ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>34</th>\n",
       "      <td>NCT00001270</td>\n",
       "      <td>Feasibility Study of Interleukin 1-Alpha With ...</td>\n",
       "      <td>Feasibility Study of Interleukin 1-Alpha With ...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>June 1991</td>\n",
       "      <td>March 2000</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This is a phase I/II study of interleu...</td>\n",
       "      <td>\\n      This is a phase I/II study of interleu...</td>\n",
       "      <td>85.0</td>\n",
       "      <td>testicular neoplasms</td>\n",
       "      <td>Etoposide, Ifosfamide, Isophosphamide mustard</td>\n",
       "      <td>\\n        a history of pathologically document...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>NCT00001271</td>\n",
       "      <td>A Phase I Study of Continuous Infusion Immunot...</td>\n",
       "      <td>A Phase I Study of Continuous Infusion Immunot...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>July 1991</td>\n",
       "      <td>April 2001</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients with CD22(+) B-cell lymphomas...</td>\n",
       "      <td>\\n      Patients with CD22(+) B-cell lymphomas...</td>\n",
       "      <td>24.0</td>\n",
       "      <td>b cell lymphoma</td>\n",
       "      <td>Immunotoxins</td>\n",
       "      <td>\\n        patients with a histologic diagnosis...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>NCT00001272</td>\n",
       "      <td>A Phase I Study of Taxol, Cisplatin, Cyclophos...</td>\n",
       "      <td>A Phase I Study of Taxol, Cisplatin, Cyclophos...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>September 1991</td>\n",
       "      <td>May 2000</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This is a Phase I study which addresse...</td>\n",
       "      <td>\\n      This is a Phase I study which addresse...</td>\n",
       "      <td>60.0</td>\n",
       "      <td>ovarian neoplasms</td>\n",
       "      <td>Cisplatin, Cyclophosphamide, Paclitaxel, Album...</td>\n",
       "      <td>\\n        all patients must have biopsy proven...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>NCT00001296</td>\n",
       "      <td>A Randomized Phase III Trial of Hyperthermic I...</td>\n",
       "      <td>A Randomized Phase III Trial of Hyperthermic I...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>February 1992</td>\n",
       "      <td>October 2000</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Randomized study. Initially, 3 patient...</td>\n",
       "      <td>\\n      Patients with locally advanced melanom...</td>\n",
       "      <td>122.0</td>\n",
       "      <td>melanoma</td>\n",
       "      <td>Interferons, Melphalan, Interferon-gamma</td>\n",
       "      <td>\\n        disease characteristics:\\n\\n        ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>NCT00001300</td>\n",
       "      <td>A Randomized Study of the Effect of Adjuvant C...</td>\n",
       "      <td>A Randomized Study of the Effect of Adjuvant C...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>June 1992</td>\n",
       "      <td>March 2001</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Randomized study. All patients must be...</td>\n",
       "      <td>\\n      Patients with primary, high-grade soft...</td>\n",
       "      <td>150.0</td>\n",
       "      <td>sarcoma</td>\n",
       "      <td>Doxorubicin, Liposomal doxorubicin, Ifosfamide...</td>\n",
       "      <td>\\n        disease characteristics:\\n\\n        ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>NCT00001302</td>\n",
       "      <td>A Phase I Study of Infusional Chemotherapy Wit...</td>\n",
       "      <td>A Phase I Study of Infusional Chemotherapy Wit...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>September 1992</td>\n",
       "      <td>June 2002</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      The clinical study entitled \"A Phase I...</td>\n",
       "      <td>\\n      The clinical study entitled \"A Phase I...</td>\n",
       "      <td>80.0</td>\n",
       "      <td>ovarian cancer</td>\n",
       "      <td>polysaccharide-K</td>\n",
       "      <td>\\n        biopsy proven metastatic cancer, for...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>41</th>\n",
       "      <td>NCT00001328</td>\n",
       "      <td>Gene Therapy for the Treatment of Brain Tumors</td>\n",
       "      <td>Gene Therapy for the Treatment of Brain Tumors...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>August 21, 1992</td>\n",
       "      <td>April 30, 2010</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Malignant brain tumors are responsible...</td>\n",
       "      <td>\\n      Malignant brain tumors are responsible...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>Ganciclovir, Ganciclovir triphosphate</td>\n",
       "      <td>\\n        -  inclusion criteria:\\n\\n        al...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        all adults, greater than 18 years ...</td>\n",
       "      <td>\\n\\n        no pregnant women will be entered ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>42</th>\n",
       "      <td>NCT00001332</td>\n",
       "      <td>Phase I Study of Continuous Hyperthermic Perit...</td>\n",
       "      <td>Phase I Study of Continuous Hyperthermic Perit...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>December 1992</td>\n",
       "      <td>October 2000</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients with gastric adenocarcinoma a...</td>\n",
       "      <td>\\n      Patients with gastric adenocarcinoma a...</td>\n",
       "      <td>50.0</td>\n",
       "      <td>stomach neoplasms</td>\n",
       "      <td>CHPP with cisplatin</td>\n",
       "      <td>\\n        patients age greater than or equal t...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>43</th>\n",
       "      <td>NCT00001333</td>\n",
       "      <td>Phase I Study of Intrathecal Topotecan</td>\n",
       "      <td>Phase I Study of Intrathecal Topotecan</td>\n",
       "      <td>Completed</td>\n",
       "      <td>February 1993</td>\n",
       "      <td>December 2000</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      The purpose of this study is to determ...</td>\n",
       "      <td>\\n      The purpose of this study is to determ...</td>\n",
       "      <td>30.0</td>\n",
       "      <td>meningeal neoplasms</td>\n",
       "      <td>Topotecan</td>\n",
       "      <td>\\n        disease characteristics:\\n\\n        ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>NCT00001335</td>\n",
       "      <td>New Therapeutic Strategies for Patients With E...</td>\n",
       "      <td>New Therapeutic Strategies for Patients With E...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>April 1993</td>\n",
       "      <td>January 2002</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      The prognosis for patients with metast...</td>\n",
       "      <td>\\n      The prognosis for patients with metast...</td>\n",
       "      <td>90.0</td>\n",
       "      <td>rhabdomyosarcoma</td>\n",
       "      <td>Topotecan, Dexrazoxane, Razoxane</td>\n",
       "      <td>\\n        the patient must fall into one of th...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>NCT00001337</td>\n",
       "      <td>Dose-Adjusted EPOCH Chemotherapy and Rituximab...</td>\n",
       "      <td>Dose-Adjusted EPOCH Chemotherapy and Rituximab...</td>\n",
       "      <td>Recruiting</td>\n",
       "      <td>May 8, 1993</td>\n",
       "      <td>March 31, 2022</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      5-Drug Combination Chemotherapy with H...</td>\n",
       "      <td>\\n      Background:\\n\\n      The treatment of ...</td>\n",
       "      <td>348.0</td>\n",
       "      <td>gray zone lymphoma</td>\n",
       "      <td>Rituximab</td>\n",
       "      <td>\\n        -  inclusion criteria:\\n\\n        no...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        non-hodgkin's lymphomas in the fol...</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>NCT00001339</td>\n",
       "      <td>A Study of Combination Chemotherapy and Surgic...</td>\n",
       "      <td>A Study of Combination Chemotherapy and Surgic...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>August 1993</td>\n",
       "      <td>August 2002</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients who have no response to preop...</td>\n",
       "      <td>\\n      This is a study of infusional doxorubi...</td>\n",
       "      <td>42.0</td>\n",
       "      <td>adrenal cortical carcinoma</td>\n",
       "      <td>Doxorubicin, Liposomal doxorubicin, Etoposide,...</td>\n",
       "      <td>\\n        biopsy-proven primary or recurrent a...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>NCT00001341</td>\n",
       "      <td>A Phase I Trial of ZD1694 (TOMUDEX), an Inhibi...</td>\n",
       "      <td>A Phase I Trial of ZD1694 (TOMUDEX® (Registere...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>September 1993</td>\n",
       "      <td>June 2001</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Thymidylate synthase (TS), an enzyme w...</td>\n",
       "      <td>\\n      Thymidylate synthase (TS), an enzyme w...</td>\n",
       "      <td>60.0</td>\n",
       "      <td>neoplasm</td>\n",
       "      <td>Raltitrexed</td>\n",
       "      <td>\\n        disease characteristics:\\n\\n        ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>NCT00001378</td>\n",
       "      <td>A Pilot Trial of Tamoxifen and 4-HPR (4-N-Hydr...</td>\n",
       "      <td>A Pilot Trial of Tamoxifen and 4-HPR (4-N-Hydr...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>January 1994</td>\n",
       "      <td>November 2000</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This is a pilot, chemoprevention study...</td>\n",
       "      <td>\\n      This is a pilot chemo-prevention study...</td>\n",
       "      <td>75.0</td>\n",
       "      <td>breast neoplasms</td>\n",
       "      <td>Tamoxifen, Retinamide</td>\n",
       "      <td>\\n        population characteristics:\\n\\n     ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>NCT00001381</td>\n",
       "      <td>A Phase I Trial Using Suramin to Treat Superfi...</td>\n",
       "      <td>A Phase I Trial Using Suramin to Treat Superfi...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>March 1994</td>\n",
       "      <td>December 2000</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients with superficial transitional...</td>\n",
       "      <td>\\n      Patients with superficial transitional...</td>\n",
       "      <td>18.0</td>\n",
       "      <td>carcinoma, transitional cell</td>\n",
       "      <td>Suramin</td>\n",
       "      <td>\\n        disease characteristics:\\n\\n        ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>NCT00001382</td>\n",
       "      <td>A Phase I Study of Recombinant Vaccinia Virus ...</td>\n",
       "      <td>A Phase I Study of Recombinant Vaccinia Virus ...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>March 1994</td>\n",
       "      <td>March 2000</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This trial will evaluate, in patients ...</td>\n",
       "      <td>\\n      This trial will evaluate, in patients ...</td>\n",
       "      <td>75.0</td>\n",
       "      <td>prostatic neoplasms</td>\n",
       "      <td>PROSTVAC</td>\n",
       "      <td>\\n        disease characteristics:\\n\\n        ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>NCT00001383</td>\n",
       "      <td>A Phase I Study of Infusional Paclitaxel With ...</td>\n",
       "      <td>A Phase I Study of Infusional Paclitaxel With ...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>March 1994</td>\n",
       "      <td>January 2001</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This is a dosage escalation study to e...</td>\n",
       "      <td>\\n      The clinical study entitled \"A Phase I...</td>\n",
       "      <td>52.0</td>\n",
       "      <td>ovarian cancer</td>\n",
       "      <td>Paclitaxel, Albumin-Bound Paclitaxel, polysacc...</td>\n",
       "      <td>\\n        biopsy proven advanced cancer, for w...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>109</th>\n",
       "      <td>NCT00001587</td>\n",
       "      <td>A Phase I Study of Isolated Hepatic Portal and...</td>\n",
       "      <td>A Phase I Study of Isolated Hepatic Portal and...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>September 1997</td>\n",
       "      <td>March 2001</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients with unresectable primary or ...</td>\n",
       "      <td>\\n      Patients with unresectable primary or ...</td>\n",
       "      <td>30.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>Melphalan</td>\n",
       "      <td>\\n        histologically or cytologically prov...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>115</th>\n",
       "      <td>NCT00001683</td>\n",
       "      <td>A Phase I Study of Oral COL-3 (NSC-683551), a ...</td>\n",
       "      <td>A Phase I Study of Oral COL-3 (NSC-683551), a ...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>October 1997</td>\n",
       "      <td>August 2003</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Matrix metalloproteinases (MMPs) are a...</td>\n",
       "      <td>\\n      Matrix metalloproteinases (MMPs) are a...</td>\n",
       "      <td>35.0</td>\n",
       "      <td>renal cell carcinoma</td>\n",
       "      <td>Tissue Inhibitor of Metalloproteinases, Matrix...</td>\n",
       "      <td>\\n        inclusion criteria:\\n\\n        all p...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        all patients with refractory solid...</td>\n",
       "      <td>\\n\\n        active infection, including positi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>116</th>\n",
       "      <td>NCT00001685</td>\n",
       "      <td>Immunization of HLA-A201 Patients With Metasta...</td>\n",
       "      <td>Immunization of HLA-A201 Patients With Metasta...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>November 1997</td>\n",
       "      <td>September 2000</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This is a study of a melanoma tumor an...</td>\n",
       "      <td>\\n      This is a study of a melanoma tumor an...</td>\n",
       "      <td>114.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>Vaccines</td>\n",
       "      <td>\\n        any patient 16 years of age or older...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>120</th>\n",
       "      <td>NCT00001696</td>\n",
       "      <td>A Pharmacokinetic Study of Genistein, a Tyrosi...</td>\n",
       "      <td>A Pharmacokinetic Study of Genistein, a Tyrosi...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>April 1998</td>\n",
       "      <td>March 2001</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Genistein is a natural product found i...</td>\n",
       "      <td>\\n      Genistein is a natural product found i...</td>\n",
       "      <td>15.0</td>\n",
       "      <td>cancer</td>\n",
       "      <td>Genistein</td>\n",
       "      <td>\\n        must be 18 years old or greater.\\n\\n...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>122</th>\n",
       "      <td>NCT00001703</td>\n",
       "      <td>Vaccine Therapy With Tumor Specific Mutated VH...</td>\n",
       "      <td>Vaccine Therapy With Tumor Specific Mutated VH...</td>\n",
       "      <td>Terminated</td>\n",
       "      <td>August 1998</td>\n",
       "      <td>November 2008</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      About 27,000 new cases of renal cell c...</td>\n",
       "      <td>\\n      About 27,000 new cases of renal cell c...</td>\n",
       "      <td>6.0</td>\n",
       "      <td>renal cell carcinoma</td>\n",
       "      <td>Vaccines, \"Freunds Adjuvant\"</td>\n",
       "      <td>\\n        inclusion criteria:\\n\\n          -  ...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n          -  patients must be 18 years of ...</td>\n",
       "      <td>\\n\\n          -  any condition that does not f...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>123</th>\n",
       "      <td>NCT00001705</td>\n",
       "      <td>Immunization of Patients With Metastatic Melan...</td>\n",
       "      <td>Immunization of Patients With Metastatic Melan...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>July 1998</td>\n",
       "      <td>June 2001</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Patients with metastatic melanoma who ...</td>\n",
       "      <td>\\n      Patients with metastatic melanoma who ...</td>\n",
       "      <td>141.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>Vaccines</td>\n",
       "      <td>\\n        any patient age greater than or equa...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>125</th>\n",
       "      <td>NCT00001730</td>\n",
       "      <td>Study of Radioiodine (131-I) Uptake Following ...</td>\n",
       "      <td>A Dosimetry Study of Radioiodine (131-I) Uptak...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>December 1997</td>\n",
       "      <td>April 2000</td>\n",
       "      <td>Phase 4</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Thyroid cancer is typically treated wi...</td>\n",
       "      <td>\\n      This is a multi-centered, open-labeled...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>thyroid neoplasms</td>\n",
       "      <td>Hormones</td>\n",
       "      <td>\\n        patients greater than or equal to 18...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>127</th>\n",
       "      <td>NCT00001750</td>\n",
       "      <td>Comparing Treatments for Multiple Myeloma</td>\n",
       "      <td>Randomized Trial of Autologous Transplantation...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>September 1998</td>\n",
       "      <td>August 2002</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Some drugs have the ability to push st...</td>\n",
       "      <td>\\n      Some drugs, such as hematopoietic cyto...</td>\n",
       "      <td>32.0</td>\n",
       "      <td>multiple myeloma</td>\n",
       "      <td>Stemgen</td>\n",
       "      <td>\\n        inclusion criteria\\n\\n        age 70...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>128</th>\n",
       "      <td>NCT00001765</td>\n",
       "      <td>Stem Cell Transplant Following Low-Intensity C...</td>\n",
       "      <td>Low Intensity Preparative Regimen Followed by ...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>April 1998</td>\n",
       "      <td>February 2005</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This study will investigate the safety...</td>\n",
       "      <td>\\n      Chronic Granulomatous Disease (CGD) is...</td>\n",
       "      <td>60.0</td>\n",
       "      <td>chronic granulomatous disease</td>\n",
       "      <td>Nexell Isolex with T-cell Depletion, Baxter is...</td>\n",
       "      <td>\\n        inclusion criteria:\\n\\n        patie...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        patient criteria:\\n\\n        ages ...</td>\n",
       "      <td>\\n\\n        patient or donor pregnant.\\n\\n    ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>131</th>\n",
       "      <td>NCT00001805</td>\n",
       "      <td>A Phase II Clinical Trial of Suppression of Hu...</td>\n",
       "      <td>A Phase II Clinical Trial of Suppression of Hu...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>March 1999</td>\n",
       "      <td>June 2000</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This is a phase II clinical and pharma...</td>\n",
       "      <td>\\n      This is a phase II clinical and pharma...</td>\n",
       "      <td>20.0</td>\n",
       "      <td>stomach neoplasms</td>\n",
       "      <td>Antibodies, Rituximab, Immunotoxins, Antitoxins</td>\n",
       "      <td>\\n        patients must have advanced stage so...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>132</th>\n",
       "      <td>NCT00001806</td>\n",
       "      <td>Methods in Education for Breast Cancer Genetics</td>\n",
       "      <td>Methods in Education for Breast Cancer Genetics</td>\n",
       "      <td>Completed</td>\n",
       "      <td>April 6, 1999</td>\n",
       "      <td>December 6, 2017</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      In 1997, the Genetics Department of th...</td>\n",
       "      <td>\\n      In October 1995 the National Naval Med...</td>\n",
       "      <td>170.0</td>\n",
       "      <td>ovarian cancer</td>\n",
       "      <td>Genetic Education and Counseling, Genetic Educ...</td>\n",
       "      <td>\\n        -  inclusion criteria:\\n\\n        at...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        at least one of the following:\\n\\n...</td>\n",
       "      <td>\\n\\n        patients will be considered inelig...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>NCT00001812</td>\n",
       "      <td>A Randomized, Double-Blind, Placebo Controlled...</td>\n",
       "      <td>A Randomized, Double-Blind, Placebo Controlled...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>April 1999</td>\n",
       "      <td>August 2000</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      In patients who are receiving intraven...</td>\n",
       "      <td>\\n      In patients who are receiving intraven...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>stomatitis</td>\n",
       "      <td>Interleukin-2, Nystatin</td>\n",
       "      <td>\\n        all patients enrolled on high dose i...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>NCT00001827</td>\n",
       "      <td>p53 Vaccine for Ovarian Cancer</td>\n",
       "      <td>Vaccine Therapy With Tumor Specific p53 Peptid...</td>\n",
       "      <td>Terminated</td>\n",
       "      <td>July 26, 1999</td>\n",
       "      <td>January 25, 2013</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This study will examine whether vaccin...</td>\n",
       "      <td>\\n      P53 is the most commonly mutated gene ...</td>\n",
       "      <td>21.0</td>\n",
       "      <td>ovarian neoplasm</td>\n",
       "      <td>Vaccines, Sargramostim, \"Freunds Adjuvant\", Al...</td>\n",
       "      <td>\\n        -  inclusion criteria:\\n\\n        pa...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        patients must be 18 years of age o...</td>\n",
       "      <td>\\n\\n        any condition that does not fit wi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>137</th>\n",
       "      <td>NCT00001830</td>\n",
       "      <td>Donor Th2 Cells to Prevent Graft-Versus-Host D...</td>\n",
       "      <td>Pilot Study of Donor Th2 Cells for the Prevent...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>July 20, 1999</td>\n",
       "      <td>May 19, 2015</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Allogeneic peripheral blood stem cell ...</td>\n",
       "      <td>\\n      Allogeneic peripheral blood stem cell ...</td>\n",
       "      <td>110.0</td>\n",
       "      <td>non hodgkin's lymphoma</td>\n",
       "      <td>Th2 cells in allo HSCTT, Th2 Cells</td>\n",
       "      <td>\\n        -  inclusion criteria - patient:\\n\\n...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>138</th>\n",
       "      <td>NCT00001832</td>\n",
       "      <td>Lymphocyte Re-infusion During Immune Suppressi...</td>\n",
       "      <td>Treatment of Patients With Metastatic Melanoma...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>August 1999</td>\n",
       "      <td>May 2010</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This experiment will test the safety a...</td>\n",
       "      <td>\\n      Patients with metastatic melanoma who ...</td>\n",
       "      <td>170.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>Cyclophosphamide, Fludarabine phosphate, Fluda...</td>\n",
       "      <td>\\n        -  inclusion criteria\\n\\n          -...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>139</th>\n",
       "      <td>NCT00001835</td>\n",
       "      <td>Oxaliplatin in Cancer Patients With Impaired K...</td>\n",
       "      <td>A Phase I Study of Oxaliplatin in Adult Cancer...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>September 1999</td>\n",
       "      <td>December 2001</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Oxaliplatin is an experimental anti-ca...</td>\n",
       "      <td>\\n      Oxaliplatin is a diaminocyclohexane pl...</td>\n",
       "      <td>60.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>Oxaliplatin</td>\n",
       "      <td>\\n        patients must have histologically co...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>144</th>\n",
       "      <td>NCT00001860</td>\n",
       "      <td>Sandostatin LAR Depot vs. Surgery for Treating...</td>\n",
       "      <td>Sandostatin LAR vs. Surgery in Acromegalics Wi...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>August 1999</td>\n",
       "      <td>July 2002</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      The purpose of this study is to compar...</td>\n",
       "      <td>\\n      The purpose of this study is to compar...</td>\n",
       "      <td>5.0</td>\n",
       "      <td>pituitary neoplasm</td>\n",
       "      <td>Octreotide</td>\n",
       "      <td>\\n        inclusion criteria:\\n\\n        male ...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        male or female patients, 18 years ...</td>\n",
       "      <td>\\n\\n        patients demonstrating intolerance...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>148</th>\n",
       "      <td>NCT00001880</td>\n",
       "      <td>Stem Cell Transplantation for Metastatic Solid...</td>\n",
       "      <td>Exploratory Study of Non-Myeloablative Allogen...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>March 12, 1999</td>\n",
       "      <td>September 23, 2008</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      The goal of this research study is to ...</td>\n",
       "      <td>\\n      The main objective of this study is to...</td>\n",
       "      <td>84.0</td>\n",
       "      <td>neoplasm metastasis</td>\n",
       "      <td>Methotrexate, Cyclosporine, Cyclosporins</td>\n",
       "      <td>\\n        -  inclusion criteria:\\n\\n        pa...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        patients:\\n\\n        patients with...</td>\n",
       "      <td>\\n\\n        patient:\\n\\n        pregnant or la...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>150</th>\n",
       "      <td>NCT00001901</td>\n",
       "      <td>Etanercept to Treat Wegener's Granulomatosis</td>\n",
       "      <td>Phase I/II Trial of TNFR:Fc (Etanercept) in Pa...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>February 1999</td>\n",
       "      <td>March 2005</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This study will examine the use of eta...</td>\n",
       "      <td>\\n      The purpose of the study is to assess ...</td>\n",
       "      <td>60.0</td>\n",
       "      <td>wegener's granulomatosis</td>\n",
       "      <td>Etanercept</td>\n",
       "      <td>\\n        inclusion criteria:\\n\\n        docum...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        documentation of wegener's granulo...</td>\n",
       "      <td>\\n\\n        patients with evidence of bacteria...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>155</th>\n",
       "      <td>NCT00001944</td>\n",
       "      <td>Vinorelbine and XR9576 to Treat Cancer</td>\n",
       "      <td>A Clinical Trial of the P-Glycoprotein Antagon...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>December 1999</td>\n",
       "      <td>June 2001</td>\n",
       "      <td>Phase 1</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      Tumor resistance to anti-cancer drugs ...</td>\n",
       "      <td>\\n      Intrinsic and acquired drug resistance...</td>\n",
       "      <td>30.0</td>\n",
       "      <td>ovarian cancer</td>\n",
       "      <td>Vinorelbine</td>\n",
       "      <td>\\n        age greater than or equal to 18 year...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>156</th>\n",
       "      <td>NCT00001955</td>\n",
       "      <td>Study of Etanercept and Celecoxib to Treat Tem...</td>\n",
       "      <td>The Role of Cytokines as Inflammatory Mediator...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>December 1999</td>\n",
       "      <td>February 2004</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      This 2-part study will evaluate the ef...</td>\n",
       "      <td>\\n      The proposed clinical trial will consi...</td>\n",
       "      <td>150.0</td>\n",
       "      <td>temporomandibular joint disorder</td>\n",
       "      <td>Celecoxib, Etanercept</td>\n",
       "      <td>\\n        celecoxib study:\\n\\n        inclusio...</td>\n",
       "      <td>True</td>\n",
       "      <td>\\n\\n        recruitment will include patients ...</td>\n",
       "      <td>\\n\\n        subjects who had undergone any tmj...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>158</th>\n",
       "      <td>NCT00002454</td>\n",
       "      <td>Papilloma Virus Vaccine Therapy in Treating Yo...</td>\n",
       "      <td>Phase II Study of Immunotherapy With Autogenou...</td>\n",
       "      <td>Unknown status</td>\n",
       "      <td>December 1971</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      RATIONALE: Vaccines made from papillom...</td>\n",
       "      <td>\\n      OBJECTIVES: I. Determine the immune re...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>precancerous condition</td>\n",
       "      <td>Vaccines</td>\n",
       "      <td>\\n        disease characteristics: diagnosis o...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>159</th>\n",
       "      <td>NCT00002455</td>\n",
       "      <td>Immunotherapy After Surgery in Treating Patien...</td>\n",
       "      <td>Immunotherapy of Colon Cancer With Autologous ...</td>\n",
       "      <td>Unknown status</td>\n",
       "      <td>April 1971</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      RATIONALE: Immunotherapy uses differen...</td>\n",
       "      <td>\\n      OBJECTIVES:\\n\\n        -  Determine th...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>melanoma (skin)</td>\n",
       "      <td>Corynebacterium granulosum P40, adjuvant therapy</td>\n",
       "      <td>\\n        disease characteristics:\\n\\n        ...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>160</th>\n",
       "      <td>NCT00002456</td>\n",
       "      <td>Graft-Versus-Host Disease Prevention in Treati...</td>\n",
       "      <td>Postgrafting Methotrexate and Cyclosporine for...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>May 1986</td>\n",
       "      <td>April 2002</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      RATIONALE: Bone marrow transplantation...</td>\n",
       "      <td>\\n      OBJECTIVES: I. Determine the efficacy ...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>lymphoma</td>\n",
       "      <td>Methotrexate, Cyclosporine, Cyclosporins</td>\n",
       "      <td>\\n        disease characteristics: ongoing bon...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>161</th>\n",
       "      <td>NCT00002458</td>\n",
       "      <td>Monoclonal Antibody Therapy in Treating Childr...</td>\n",
       "      <td>Phase II Study of Adjuvant Therapy With Antiga...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>November 1987</td>\n",
       "      <td>September 2001</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      RATIONALE: Monoclonal antibodies can l...</td>\n",
       "      <td>\\n      OBJECTIVES: I. Evaluate the efficacy o...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>neuroblastoma</td>\n",
       "      <td>Antibodies, Immunoglobulins, Antibodies, Monoc...</td>\n",
       "      <td>\\n        disease characteristics: histologica...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>162</th>\n",
       "      <td>NCT00002459</td>\n",
       "      <td>Radiation Therapy or No Further Treatment Foll...</td>\n",
       "      <td>Phase III Randomized Study of Adjuvant Pelvic ...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>April 1988</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      RATIONALE: Radiation therapy uses high...</td>\n",
       "      <td>\\n      OBJECTIVES: I. Compare the rates of pe...</td>\n",
       "      <td>224.0</td>\n",
       "      <td>sarcoma</td>\n",
       "      <td>radiation therapy</td>\n",
       "      <td>\\n        disease characteristics: histologica...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>163</th>\n",
       "      <td>NCT00002460</td>\n",
       "      <td>Adjuvant Hormone Therapy in Treating Women Wit...</td>\n",
       "      <td>Phase III Randomized Study of Adjuvant Therapy...</td>\n",
       "      <td>Unknown status</td>\n",
       "      <td>September 1987</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      RATIONALE: Estrogen can stimulate the ...</td>\n",
       "      <td>\\n      OBJECTIVES: I. Determine, in a prospec...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>breast cancer</td>\n",
       "      <td>Tamoxifen, Goserelin</td>\n",
       "      <td>\\n        disease characteristics: operable, c...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>164</th>\n",
       "      <td>NCT00002461</td>\n",
       "      <td>Combination Chemotherapy Followed by Bone Marr...</td>\n",
       "      <td>Phase II Study of Intensive Carmustine and Eto...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>April 1988</td>\n",
       "      <td>July 1991</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      RATIONALE: Drugs used in chemotherapy ...</td>\n",
       "      <td>\\n      OBJECTIVES: I. Determine the antitumor...</td>\n",
       "      <td>35.0</td>\n",
       "      <td>lymphoma</td>\n",
       "      <td>Cisplatin, Cyclophosphamide, Etoposide, Etopos...</td>\n",
       "      <td>\\n        disease characteristics: diagnosis o...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>165</th>\n",
       "      <td>NCT00002462</td>\n",
       "      <td>RT or No RT Following Chemotherapy in Treating...</td>\n",
       "      <td>Phase III Randomized Trial of Adjuvant Involve...</td>\n",
       "      <td>Active, not recruiting</td>\n",
       "      <td>September 1989</td>\n",
       "      <td>NaN</td>\n",
       "      <td>Phase 3</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      RATIONALE: Drugs used in chemotherapy ...</td>\n",
       "      <td>\\n      OBJECTIVES: I. Compare relapse-free su...</td>\n",
       "      <td>615.0</td>\n",
       "      <td>lymphoma</td>\n",
       "      <td>Doxorubicin, Liposomal doxorubicin, Bleomycin,...</td>\n",
       "      <td>\\n        disease characteristics: histologica...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>166</th>\n",
       "      <td>NCT00002463</td>\n",
       "      <td>Combination Chemotherapy in Treating Children ...</td>\n",
       "      <td>Phase II Study of Methotrexate, Mechlorethamin...</td>\n",
       "      <td>Completed</td>\n",
       "      <td>February 1989</td>\n",
       "      <td>January 2008</td>\n",
       "      <td>Phase 2</td>\n",
       "      <td>Interventional</td>\n",
       "      <td>\\n      RATIONALE: Drugs used in chemotherapy ...</td>\n",
       "      <td>\\n      OBJECTIVES: I. Determine the efficacy ...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>brain and central nervous system tumors</td>\n",
       "      <td>Methotrexate, Prednisone, Vincristine, Procarb...</td>\n",
       "      <td>\\n        disease characteristics: histologica...</td>\n",
       "      <td>True</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 17 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          nct_id                                        brief_title  \\\n",
       "17   NCT00001188  The Role of Multi-Modality Therapy for the Tre...   \n",
       "18   NCT00001189  The Treatment of Grade I Sarcomas and Benign, ...   \n",
       "20   NCT00001193  A Multimodality Treatment Approach to Patients...   \n",
       "21   NCT00001209  A Pilot Study for the Treatment of Patients Wi...   \n",
       "22   NCT00001217  Osteosarcoma Study #2: A Randomized Trial of P...   \n",
       "25   NCT00001237  Pilot Protocol for the Treatment of Patients W...   \n",
       "26   NCT00001239  Combination Chemotherapy (FLAC) Combined With ...   \n",
       "27   NCT00001249  Treatment of Tac-Expressing Cutaneous T-Cell L...   \n",
       "28   NCT00001250  Effect of Preoperative Chemotherapy on Axillar...   \n",
       "29   NCT00001251           Phase I Study of Intrathecal Mafosfamide   \n",
       "30   NCT00001256  Steroids and Methotrexate to Treat Systemic Va...   \n",
       "31   NCT00001266  A Phase II Trial of Leuprolide + Flutamide + S...   \n",
       "33   NCT00001269  Phase I Trial of FLAC (5-Fluorouracil, Leucovo...   \n",
       "34   NCT00001270  Feasibility Study of Interleukin 1-Alpha With ...   \n",
       "35   NCT00001271  A Phase I Study of Continuous Infusion Immunot...   \n",
       "36   NCT00001272  A Phase I Study of Taxol, Cisplatin, Cyclophos...   \n",
       "37   NCT00001296  A Randomized Phase III Trial of Hyperthermic I...   \n",
       "39   NCT00001300  A Randomized Study of the Effect of Adjuvant C...   \n",
       "40   NCT00001302  A Phase I Study of Infusional Chemotherapy Wit...   \n",
       "41   NCT00001328     Gene Therapy for the Treatment of Brain Tumors   \n",
       "42   NCT00001332  Phase I Study of Continuous Hyperthermic Perit...   \n",
       "43   NCT00001333             Phase I Study of Intrathecal Topotecan   \n",
       "44   NCT00001335  New Therapeutic Strategies for Patients With E...   \n",
       "45   NCT00001337  Dose-Adjusted EPOCH Chemotherapy and Rituximab...   \n",
       "47   NCT00001339  A Study of Combination Chemotherapy and Surgic...   \n",
       "48   NCT00001341  A Phase I Trial of ZD1694 (TOMUDEX), an Inhibi...   \n",
       "49   NCT00001378  A Pilot Trial of Tamoxifen and 4-HPR (4-N-Hydr...   \n",
       "51   NCT00001381  A Phase I Trial Using Suramin to Treat Superfi...   \n",
       "52   NCT00001382  A Phase I Study of Recombinant Vaccinia Virus ...   \n",
       "53   NCT00001383  A Phase I Study of Infusional Paclitaxel With ...   \n",
       "..           ...                                                ...   \n",
       "109  NCT00001587  A Phase I Study of Isolated Hepatic Portal and...   \n",
       "115  NCT00001683  A Phase I Study of Oral COL-3 (NSC-683551), a ...   \n",
       "116  NCT00001685  Immunization of HLA-A201 Patients With Metasta...   \n",
       "120  NCT00001696  A Pharmacokinetic Study of Genistein, a Tyrosi...   \n",
       "122  NCT00001703  Vaccine Therapy With Tumor Specific Mutated VH...   \n",
       "123  NCT00001705  Immunization of Patients With Metastatic Melan...   \n",
       "125  NCT00001730  Study of Radioiodine (131-I) Uptake Following ...   \n",
       "127  NCT00001750          Comparing Treatments for Multiple Myeloma   \n",
       "128  NCT00001765  Stem Cell Transplant Following Low-Intensity C...   \n",
       "131  NCT00001805  A Phase II Clinical Trial of Suppression of Hu...   \n",
       "132  NCT00001806    Methods in Education for Breast Cancer Genetics   \n",
       "134  NCT00001812  A Randomized, Double-Blind, Placebo Controlled...   \n",
       "136  NCT00001827                     p53 Vaccine for Ovarian Cancer   \n",
       "137  NCT00001830  Donor Th2 Cells to Prevent Graft-Versus-Host D...   \n",
       "138  NCT00001832  Lymphocyte Re-infusion During Immune Suppressi...   \n",
       "139  NCT00001835  Oxaliplatin in Cancer Patients With Impaired K...   \n",
       "144  NCT00001860  Sandostatin LAR Depot vs. Surgery for Treating...   \n",
       "148  NCT00001880  Stem Cell Transplantation for Metastatic Solid...   \n",
       "150  NCT00001901       Etanercept to Treat Wegener's Granulomatosis   \n",
       "155  NCT00001944             Vinorelbine and XR9576 to Treat Cancer   \n",
       "156  NCT00001955  Study of Etanercept and Celecoxib to Treat Tem...   \n",
       "158  NCT00002454  Papilloma Virus Vaccine Therapy in Treating Yo...   \n",
       "159  NCT00002455  Immunotherapy After Surgery in Treating Patien...   \n",
       "160  NCT00002456  Graft-Versus-Host Disease Prevention in Treati...   \n",
       "161  NCT00002458  Monoclonal Antibody Therapy in Treating Childr...   \n",
       "162  NCT00002459  Radiation Therapy or No Further Treatment Foll...   \n",
       "163  NCT00002460  Adjuvant Hormone Therapy in Treating Women Wit...   \n",
       "164  NCT00002461  Combination Chemotherapy Followed by Bone Marr...   \n",
       "165  NCT00002462  RT or No RT Following Chemotherapy in Treating...   \n",
       "166  NCT00002463  Combination Chemotherapy in Treating Children ...   \n",
       "\n",
       "                                        official_title  \\\n",
       "17   The Role of Multi-Modality Therapy for the Tre...   \n",
       "18   The Treatment of Grade I Sarcomas and Benign, ...   \n",
       "20   A Multimodality Treatment Approach to Patients...   \n",
       "21   A Pilot Study for the Treatment of Patients Wi...   \n",
       "22   Osteosarcoma Study #2: A Randomized Trial of P...   \n",
       "25   Pilot Protocol for the Treatment of Patients W...   \n",
       "26   Combination Chemotherapy (FLAC) Combined With ...   \n",
       "27   Treatment of Tac-Expressing Cutaneous T-Cell L...   \n",
       "28   Effect of Preoperative Chemotherapy on Axillar...   \n",
       "29            Phase I Study of Intrathecal Mafosfamide   \n",
       "30   An Open Trial of the Efficacy of Glucocorticoi...   \n",
       "31   A Phase II Trial of Leuprolide + Flutamide + S...   \n",
       "33   Phase I Trial of FLAC (5-Fluorouracil, Leucovo...   \n",
       "34   Feasibility Study of Interleukin 1-Alpha With ...   \n",
       "35   A Phase I Study of Continuous Infusion Immunot...   \n",
       "36   A Phase I Study of Taxol, Cisplatin, Cyclophos...   \n",
       "37   A Randomized Phase III Trial of Hyperthermic I...   \n",
       "39   A Randomized Study of the Effect of Adjuvant C...   \n",
       "40   A Phase I Study of Infusional Chemotherapy Wit...   \n",
       "41   Gene Therapy for the Treatment of Brain Tumors...   \n",
       "42   Phase I Study of Continuous Hyperthermic Perit...   \n",
       "43              Phase I Study of Intrathecal Topotecan   \n",
       "44   New Therapeutic Strategies for Patients With E...   \n",
       "45   Dose-Adjusted EPOCH Chemotherapy and Rituximab...   \n",
       "47   A Study of Combination Chemotherapy and Surgic...   \n",
       "48   A Phase I Trial of ZD1694 (TOMUDEX® (Registere...   \n",
       "49   A Pilot Trial of Tamoxifen and 4-HPR (4-N-Hydr...   \n",
       "51   A Phase I Trial Using Suramin to Treat Superfi...   \n",
       "52   A Phase I Study of Recombinant Vaccinia Virus ...   \n",
       "53   A Phase I Study of Infusional Paclitaxel With ...   \n",
       "..                                                 ...   \n",
       "109  A Phase I Study of Isolated Hepatic Portal and...   \n",
       "115  A Phase I Study of Oral COL-3 (NSC-683551), a ...   \n",
       "116  Immunization of HLA-A201 Patients With Metasta...   \n",
       "120  A Pharmacokinetic Study of Genistein, a Tyrosi...   \n",
       "122  Vaccine Therapy With Tumor Specific Mutated VH...   \n",
       "123  Immunization of Patients With Metastatic Melan...   \n",
       "125  A Dosimetry Study of Radioiodine (131-I) Uptak...   \n",
       "127  Randomized Trial of Autologous Transplantation...   \n",
       "128  Low Intensity Preparative Regimen Followed by ...   \n",
       "131  A Phase II Clinical Trial of Suppression of Hu...   \n",
       "132    Methods in Education for Breast Cancer Genetics   \n",
       "134  A Randomized, Double-Blind, Placebo Controlled...   \n",
       "136  Vaccine Therapy With Tumor Specific p53 Peptid...   \n",
       "137  Pilot Study of Donor Th2 Cells for the Prevent...   \n",
       "138  Treatment of Patients With Metastatic Melanoma...   \n",
       "139  A Phase I Study of Oxaliplatin in Adult Cancer...   \n",
       "144  Sandostatin LAR vs. Surgery in Acromegalics Wi...   \n",
       "148  Exploratory Study of Non-Myeloablative Allogen...   \n",
       "150  Phase I/II Trial of TNFR:Fc (Etanercept) in Pa...   \n",
       "155  A Clinical Trial of the P-Glycoprotein Antagon...   \n",
       "156  The Role of Cytokines as Inflammatory Mediator...   \n",
       "158  Phase II Study of Immunotherapy With Autogenou...   \n",
       "159  Immunotherapy of Colon Cancer With Autologous ...   \n",
       "160  Postgrafting Methotrexate and Cyclosporine for...   \n",
       "161  Phase II Study of Adjuvant Therapy With Antiga...   \n",
       "162  Phase III Randomized Study of Adjuvant Pelvic ...   \n",
       "163  Phase III Randomized Study of Adjuvant Therapy...   \n",
       "164  Phase II Study of Intensive Carmustine and Eto...   \n",
       "165  Phase III Randomized Trial of Adjuvant Involve...   \n",
       "166  Phase II Study of Methotrexate, Mechlorethamin...   \n",
       "\n",
       "             overall_status       start_date     completion_date    phase  \\\n",
       "17                Completed    December 1983      September 2000  Phase 2   \n",
       "18                Completed    December 1983          April 2001  Phase 2   \n",
       "20                Completed    November 1984      September 2000  Phase 2   \n",
       "21                Completed     October 1986         August 2000  Phase 1   \n",
       "22                Completed         May 1987       December 2000  Phase 3   \n",
       "25                Completed       March 1989          April 2000  Phase 2   \n",
       "26                Completed        July 1989        January 2001  Phase 2   \n",
       "27                Completed    December 1989        October 2000  Phase 1   \n",
       "28                Completed    December 1989        October 2002  Phase 2   \n",
       "29                Completed    November 1989       November 2003  Phase 1   \n",
       "30                Completed       March 1990       February 2004  Phase 2   \n",
       "31                Completed     October 1990         August 2003  Phase 2   \n",
       "33                Completed         May 1991       February 2001  Phase 1   \n",
       "34                Completed        June 1991          March 2000  Phase 1   \n",
       "35                Completed        July 1991          April 2001  Phase 1   \n",
       "36                Completed   September 1991            May 2000  Phase 1   \n",
       "37                Completed    February 1992        October 2000  Phase 3   \n",
       "39                Completed        June 1992          March 2001  Phase 3   \n",
       "40                Completed   September 1992           June 2002  Phase 1   \n",
       "41                Completed  August 21, 1992      April 30, 2010  Phase 1   \n",
       "42                Completed    December 1992        October 2000  Phase 1   \n",
       "43                Completed    February 1993       December 2000  Phase 1   \n",
       "44                Completed       April 1993        January 2002  Phase 2   \n",
       "45               Recruiting      May 8, 1993      March 31, 2022  Phase 2   \n",
       "47                Completed      August 1993         August 2002  Phase 2   \n",
       "48                Completed   September 1993           June 2001  Phase 1   \n",
       "49                Completed     January 1994       November 2000  Phase 1   \n",
       "51                Completed       March 1994       December 2000  Phase 1   \n",
       "52                Completed       March 1994          March 2000  Phase 1   \n",
       "53                Completed       March 1994        January 2001  Phase 1   \n",
       "..                      ...              ...                 ...      ...   \n",
       "109               Completed   September 1997          March 2001  Phase 1   \n",
       "115               Completed     October 1997         August 2003  Phase 1   \n",
       "116               Completed    November 1997      September 2000  Phase 2   \n",
       "120               Completed       April 1998          March 2001  Phase 1   \n",
       "122              Terminated      August 1998       November 2008  Phase 2   \n",
       "123               Completed        July 1998           June 2001  Phase 2   \n",
       "125               Completed    December 1997          April 2000  Phase 4   \n",
       "127               Completed   September 1998         August 2002  Phase 2   \n",
       "128               Completed       April 1998       February 2005  Phase 1   \n",
       "131               Completed       March 1999           June 2000  Phase 2   \n",
       "132               Completed    April 6, 1999    December 6, 2017  Phase 3   \n",
       "134               Completed       April 1999         August 2000  Phase 3   \n",
       "136              Terminated    July 26, 1999    January 25, 2013  Phase 2   \n",
       "137               Completed    July 20, 1999        May 19, 2015  Phase 1   \n",
       "138               Completed      August 1999            May 2010  Phase 2   \n",
       "139               Completed   September 1999       December 2001  Phase 1   \n",
       "144               Completed      August 1999           July 2002  Phase 2   \n",
       "148               Completed   March 12, 1999  September 23, 2008  Phase 2   \n",
       "150               Completed    February 1999          March 2005  Phase 2   \n",
       "155               Completed    December 1999           June 2001  Phase 1   \n",
       "156               Completed    December 1999       February 2004  Phase 2   \n",
       "158          Unknown status    December 1971                 NaN  Phase 2   \n",
       "159          Unknown status       April 1971                 NaN  Phase 3   \n",
       "160               Completed         May 1986          April 2002  Phase 3   \n",
       "161               Completed    November 1987      September 2001  Phase 2   \n",
       "162               Completed       April 1988                 NaN  Phase 3   \n",
       "163          Unknown status   September 1987                 NaN  Phase 3   \n",
       "164               Completed       April 1988           July 1991  Phase 2   \n",
       "165  Active, not recruiting   September 1989                 NaN  Phase 3   \n",
       "166               Completed    February 1989        January 2008  Phase 2   \n",
       "\n",
       "         study_type                                      brief_summary  \\\n",
       "17   Interventional  \\n      Patients with Grade II and III soft ti...   \n",
       "18   Interventional  \\n      Patients with Grade I soft tissue sarc...   \n",
       "20   Interventional  \\n      This study is designed to evaluate the...   \n",
       "21   Interventional  \\n      This protocol is designed to test the ...   \n",
       "22   Interventional  \\n      The study is designed to determine if ...   \n",
       "25   Interventional  \\n      Major improvements in the treatment of...   \n",
       "26   Interventional  \\n      To evaluate a dose intensive chemother...   \n",
       "27   Interventional  \\n      The study purpose is to evaluate the c...   \n",
       "28   Interventional  \\n      Patients with untreated clinical stage...   \n",
       "29   Interventional  \\n      The purpose of this study is to determ...   \n",
       "30   Interventional  \\n      This study will evaluate the safety an...   \n",
       "31   Interventional  \\n      One current hypothesis as to what limi...   \n",
       "33   Interventional  \\n      This is a phase I study to determine t...   \n",
       "34   Interventional  \\n      This is a phase I/II study of interleu...   \n",
       "35   Interventional  \\n      Patients with CD22(+) B-cell lymphomas...   \n",
       "36   Interventional  \\n      This is a Phase I study which addresse...   \n",
       "37   Interventional  \\n      Randomized study. Initially, 3 patient...   \n",
       "39   Interventional  \\n      Randomized study. All patients must be...   \n",
       "40   Interventional  \\n      The clinical study entitled \"A Phase I...   \n",
       "41   Interventional  \\n      Malignant brain tumors are responsible...   \n",
       "42   Interventional  \\n      Patients with gastric adenocarcinoma a...   \n",
       "43   Interventional  \\n      The purpose of this study is to determ...   \n",
       "44   Interventional  \\n      The prognosis for patients with metast...   \n",
       "45   Interventional  \\n      5-Drug Combination Chemotherapy with H...   \n",
       "47   Interventional  \\n      Patients who have no response to preop...   \n",
       "48   Interventional  \\n      Thymidylate synthase (TS), an enzyme w...   \n",
       "49   Interventional  \\n      This is a pilot, chemoprevention study...   \n",
       "51   Interventional  \\n      Patients with superficial transitional...   \n",
       "52   Interventional  \\n      This trial will evaluate, in patients ...   \n",
       "53   Interventional  \\n      This is a dosage escalation study to e...   \n",
       "..              ...                                                ...   \n",
       "109  Interventional  \\n      Patients with unresectable primary or ...   \n",
       "115  Interventional  \\n      Matrix metalloproteinases (MMPs) are a...   \n",
       "116  Interventional  \\n      This is a study of a melanoma tumor an...   \n",
       "120  Interventional  \\n      Genistein is a natural product found i...   \n",
       "122  Interventional  \\n      About 27,000 new cases of renal cell c...   \n",
       "123  Interventional  \\n      Patients with metastatic melanoma who ...   \n",
       "125  Interventional  \\n      Thyroid cancer is typically treated wi...   \n",
       "127  Interventional  \\n      Some drugs have the ability to push st...   \n",
       "128  Interventional  \\n      This study will investigate the safety...   \n",
       "131  Interventional  \\n      This is a phase II clinical and pharma...   \n",
       "132  Interventional  \\n      In 1997, the Genetics Department of th...   \n",
       "134  Interventional  \\n      In patients who are receiving intraven...   \n",
       "136  Interventional  \\n      This study will examine whether vaccin...   \n",
       "137  Interventional  \\n      Allogeneic peripheral blood stem cell ...   \n",
       "138  Interventional  \\n      This experiment will test the safety a...   \n",
       "139  Interventional  \\n      Oxaliplatin is an experimental anti-ca...   \n",
       "144  Interventional  \\n      The purpose of this study is to compar...   \n",
       "148  Interventional  \\n      The goal of this research study is to ...   \n",
       "150  Interventional  \\n      This study will examine the use of eta...   \n",
       "155  Interventional  \\n      Tumor resistance to anti-cancer drugs ...   \n",
       "156  Interventional  \\n      This 2-part study will evaluate the ef...   \n",
       "158  Interventional  \\n      RATIONALE: Vaccines made from papillom...   \n",
       "159  Interventional  \\n      RATIONALE: Immunotherapy uses differen...   \n",
       "160  Interventional  \\n      RATIONALE: Bone marrow transplantation...   \n",
       "161  Interventional  \\n      RATIONALE: Monoclonal antibodies can l...   \n",
       "162  Interventional  \\n      RATIONALE: Radiation therapy uses high...   \n",
       "163  Interventional  \\n      RATIONALE: Estrogen can stimulate the ...   \n",
       "164  Interventional  \\n      RATIONALE: Drugs used in chemotherapy ...   \n",
       "165  Interventional  \\n      RATIONALE: Drugs used in chemotherapy ...   \n",
       "166  Interventional  \\n      RATIONALE: Drugs used in chemotherapy ...   \n",
       "\n",
       "                                  detailed_description  enrollment  \\\n",
       "17   \\n      Patients with Grade II and III soft ti...       100.0   \n",
       "18   \\n      This is a randomized study. Patients u...       150.0   \n",
       "20   \\n      This study is designed to evaluate the...       200.0   \n",
       "21   \\n      This protocol is designed to test the ...       120.0   \n",
       "22   \\n      The study is designed to determine if ...       260.0   \n",
       "25   \\n      Major improvements in the treatment of...       120.0   \n",
       "26   \\n      To evaluate a dose intensive chemother...       100.0   \n",
       "27   \\n      The study purpose is to evaluate the c...        30.0   \n",
       "28   \\n      A prospective randomized trial evaluat...       130.0   \n",
       "29   \\n      The purpose of this study is to determ...        65.0   \n",
       "30   \\n      Previous studies at the NIH have demon...       100.0   \n",
       "31   \\n      The purpose of this study is to assess...        70.0   \n",
       "33   \\n      Phase I study to determine the maximal...       100.0   \n",
       "34   \\n      This is a phase I/II study of interleu...        85.0   \n",
       "35   \\n      Patients with CD22(+) B-cell lymphomas...        24.0   \n",
       "36   \\n      This is a Phase I study which addresse...        60.0   \n",
       "37   \\n      Patients with locally advanced melanom...       122.0   \n",
       "39   \\n      Patients with primary, high-grade soft...       150.0   \n",
       "40   \\n      The clinical study entitled \"A Phase I...        80.0   \n",
       "41   \\n      Malignant brain tumors are responsible...        15.0   \n",
       "42   \\n      Patients with gastric adenocarcinoma a...        50.0   \n",
       "43   \\n      The purpose of this study is to determ...        30.0   \n",
       "44   \\n      The prognosis for patients with metast...        90.0   \n",
       "45   \\n      Background:\\n\\n      The treatment of ...       348.0   \n",
       "47   \\n      This is a study of infusional doxorubi...        42.0   \n",
       "48   \\n      Thymidylate synthase (TS), an enzyme w...        60.0   \n",
       "49   \\n      This is a pilot chemo-prevention study...        75.0   \n",
       "51   \\n      Patients with superficial transitional...        18.0   \n",
       "52   \\n      This trial will evaluate, in patients ...        75.0   \n",
       "53   \\n      The clinical study entitled \"A Phase I...        52.0   \n",
       "..                                                 ...         ...   \n",
       "109  \\n      Patients with unresectable primary or ...        30.0   \n",
       "115  \\n      Matrix metalloproteinases (MMPs) are a...        35.0   \n",
       "116  \\n      This is a study of a melanoma tumor an...       114.0   \n",
       "120  \\n      Genistein is a natural product found i...        15.0   \n",
       "122  \\n      About 27,000 new cases of renal cell c...         6.0   \n",
       "123  \\n      Patients with metastatic melanoma who ...       141.0   \n",
       "125  \\n      This is a multi-centered, open-labeled...        20.0   \n",
       "127  \\n      Some drugs, such as hematopoietic cyto...        32.0   \n",
       "128  \\n      Chronic Granulomatous Disease (CGD) is...        60.0   \n",
       "131  \\n      This is a phase II clinical and pharma...        20.0   \n",
       "132  \\n      In October 1995 the National Naval Med...       170.0   \n",
       "134  \\n      In patients who are receiving intraven...        84.0   \n",
       "136  \\n      P53 is the most commonly mutated gene ...        21.0   \n",
       "137  \\n      Allogeneic peripheral blood stem cell ...       110.0   \n",
       "138  \\n      Patients with metastatic melanoma who ...       170.0   \n",
       "139  \\n      Oxaliplatin is a diaminocyclohexane pl...        60.0   \n",
       "144  \\n      The purpose of this study is to compar...         5.0   \n",
       "148  \\n      The main objective of this study is to...        84.0   \n",
       "150  \\n      The purpose of the study is to assess ...        60.0   \n",
       "155  \\n      Intrinsic and acquired drug resistance...        30.0   \n",
       "156  \\n      The proposed clinical trial will consi...       150.0   \n",
       "158  \\n      OBJECTIVES: I. Determine the immune re...         NaN   \n",
       "159  \\n      OBJECTIVES:\\n\\n        -  Determine th...         NaN   \n",
       "160  \\n      OBJECTIVES: I. Determine the efficacy ...         NaN   \n",
       "161  \\n      OBJECTIVES: I. Evaluate the efficacy o...         NaN   \n",
       "162  \\n      OBJECTIVES: I. Compare the rates of pe...       224.0   \n",
       "163  \\n      OBJECTIVES: I. Determine, in a prospec...         NaN   \n",
       "164  \\n      OBJECTIVES: I. Determine the antitumor...        35.0   \n",
       "165  \\n      OBJECTIVES: I. Compare relapse-free su...       615.0   \n",
       "166  \\n      OBJECTIVES: I. Determine the efficacy ...         4.0   \n",
       "\n",
       "                                   condition  \\\n",
       "17                                   sarcoma   \n",
       "18                                 neoplasms   \n",
       "20                       neoplasm metastasis   \n",
       "21                          sarcoma, ewing's   \n",
       "22                              osteosarcoma   \n",
       "25           lymphoma, small noncleaved-cell   \n",
       "26                          breast neoplasms   \n",
       "27               lymphoma, t-cell, cutaneous   \n",
       "28                       neoplasm metastasis   \n",
       "29                        meningeal neoplasm   \n",
       "30                  wegener's granulomatosis   \n",
       "31                        prostatic neoplasm   \n",
       "33                       neoplasm metastasis   \n",
       "34                      testicular neoplasms   \n",
       "35                           b cell lymphoma   \n",
       "36                         ovarian neoplasms   \n",
       "37                                  melanoma   \n",
       "39                                   sarcoma   \n",
       "40                            ovarian cancer   \n",
       "41                       neoplasm metastasis   \n",
       "42                         stomach neoplasms   \n",
       "43                       meningeal neoplasms   \n",
       "44                          rhabdomyosarcoma   \n",
       "45                        gray zone lymphoma   \n",
       "47                adrenal cortical carcinoma   \n",
       "48                                  neoplasm   \n",
       "49                          breast neoplasms   \n",
       "51              carcinoma, transitional cell   \n",
       "52                       prostatic neoplasms   \n",
       "53                            ovarian cancer   \n",
       "..                                       ...   \n",
       "109                      neoplasm metastasis   \n",
       "115                     renal cell carcinoma   \n",
       "116                      neoplasm metastasis   \n",
       "120                                   cancer   \n",
       "122                     renal cell carcinoma   \n",
       "123                      neoplasm metastasis   \n",
       "125                        thyroid neoplasms   \n",
       "127                         multiple myeloma   \n",
       "128            chronic granulomatous disease   \n",
       "131                        stomach neoplasms   \n",
       "132                           ovarian cancer   \n",
       "134                               stomatitis   \n",
       "136                         ovarian neoplasm   \n",
       "137                   non hodgkin's lymphoma   \n",
       "138                      neoplasm metastasis   \n",
       "139                      neoplasm metastasis   \n",
       "144                       pituitary neoplasm   \n",
       "148                      neoplasm metastasis   \n",
       "150                 wegener's granulomatosis   \n",
       "155                           ovarian cancer   \n",
       "156         temporomandibular joint disorder   \n",
       "158                   precancerous condition   \n",
       "159                          melanoma (skin)   \n",
       "160                                 lymphoma   \n",
       "161                            neuroblastoma   \n",
       "162                                  sarcoma   \n",
       "163                            breast cancer   \n",
       "164                                 lymphoma   \n",
       "165                                 lymphoma   \n",
       "166  brain and central nervous system tumors   \n",
       "\n",
       "                                     intervention_name  \\\n",
       "17                 radiation therapy following surgery   \n",
       "18                                        radiotherapy   \n",
       "20                                           Melphalan   \n",
       "21   Vincristine, Doxorubicin, Ifosfamide, Cyclopho...   \n",
       "22                           pre-surgical chemotherapy   \n",
       "25                                        Sargramostim   \n",
       "26                                        Sargramostim   \n",
       "27                              Antibodies, Daclizumab   \n",
       "28   preoperative dose intense chemotherapy (FLAC/G...   \n",
       "29                       Mafosfamide, Cyclophosphamide   \n",
       "30                            Methotrexate, Prednisone   \n",
       "31                      Leuprolide, Flutamide, Suramin   \n",
       "33   Fluorouracil, Cyclophosphamide, Doxorubicin, L...   \n",
       "34       Etoposide, Ifosfamide, Isophosphamide mustard   \n",
       "35                                        Immunotoxins   \n",
       "36   Cisplatin, Cyclophosphamide, Paclitaxel, Album...   \n",
       "37            Interferons, Melphalan, Interferon-gamma   \n",
       "39   Doxorubicin, Liposomal doxorubicin, Ifosfamide...   \n",
       "40                                    polysaccharide-K   \n",
       "41               Ganciclovir, Ganciclovir triphosphate   \n",
       "42                                 CHPP with cisplatin   \n",
       "43                                           Topotecan   \n",
       "44                    Topotecan, Dexrazoxane, Razoxane   \n",
       "45                                           Rituximab   \n",
       "47   Doxorubicin, Liposomal doxorubicin, Etoposide,...   \n",
       "48                                         Raltitrexed   \n",
       "49                               Tamoxifen, Retinamide   \n",
       "51                                             Suramin   \n",
       "52                                            PROSTVAC   \n",
       "53   Paclitaxel, Albumin-Bound Paclitaxel, polysacc...   \n",
       "..                                                 ...   \n",
       "109                                          Melphalan   \n",
       "115  Tissue Inhibitor of Metalloproteinases, Matrix...   \n",
       "116                                           Vaccines   \n",
       "120                                          Genistein   \n",
       "122                       Vaccines, \"Freunds Adjuvant\"   \n",
       "123                                           Vaccines   \n",
       "125                                           Hormones   \n",
       "127                                            Stemgen   \n",
       "128  Nexell Isolex with T-cell Depletion, Baxter is...   \n",
       "131    Antibodies, Rituximab, Immunotoxins, Antitoxins   \n",
       "132  Genetic Education and Counseling, Genetic Educ...   \n",
       "134                            Interleukin-2, Nystatin   \n",
       "136  Vaccines, Sargramostim, \"Freunds Adjuvant\", Al...   \n",
       "137                 Th2 cells in allo HSCTT, Th2 Cells   \n",
       "138  Cyclophosphamide, Fludarabine phosphate, Fluda...   \n",
       "139                                        Oxaliplatin   \n",
       "144                                         Octreotide   \n",
       "148           Methotrexate, Cyclosporine, Cyclosporins   \n",
       "150                                         Etanercept   \n",
       "155                                        Vinorelbine   \n",
       "156                              Celecoxib, Etanercept   \n",
       "158                                           Vaccines   \n",
       "159   Corynebacterium granulosum P40, adjuvant therapy   \n",
       "160           Methotrexate, Cyclosporine, Cyclosporins   \n",
       "161  Antibodies, Immunoglobulins, Antibodies, Monoc...   \n",
       "162                                  radiation therapy   \n",
       "163                               Tamoxifen, Goserelin   \n",
       "164  Cisplatin, Cyclophosphamide, Etoposide, Etopos...   \n",
       "165  Doxorubicin, Liposomal doxorubicin, Bleomycin,...   \n",
       "166  Methotrexate, Prednisone, Vincristine, Procarb...   \n",
       "\n",
       "                                           eligibility  condition_cancer  \\\n",
       "17   \\n        patients must have biopsy-proven sof...              True   \n",
       "18   \\n        disease characteristics:\\n\\n        ...              True   \n",
       "20   \\n        patients must have a histologically ...              True   \n",
       "21   \\n        patients with high grade soft tissue...              True   \n",
       "22   \\n        must be less than or equal to 30 yea...              True   \n",
       "25   \\n        high risk protocol: patients with sm...              True   \n",
       "26   \\n        all stage iii or clinical t3n0 or tx...              True   \n",
       "27   \\n        disease characteristics:\\n\\n        ...              True   \n",
       "28   \\n        inclusion criteria\\n\\n        women ...              True   \n",
       "29   \\n        inclusion criteria:\\n\\n        all p...              True   \n",
       "30   \\n        inclusion criteria:\\n\\n        diagn...              True   \n",
       "31   \\n        inclusion criteria:\\n\\n        patie...              True   \n",
       "33   \\n        patients with stage iv (metastatic) ...              True   \n",
       "34   \\n        a history of pathologically document...              True   \n",
       "35   \\n        patients with a histologic diagnosis...              True   \n",
       "36   \\n        all patients must have biopsy proven...              True   \n",
       "37   \\n        disease characteristics:\\n\\n        ...              True   \n",
       "39   \\n        disease characteristics:\\n\\n        ...              True   \n",
       "40   \\n        biopsy proven metastatic cancer, for...              True   \n",
       "41   \\n        -  inclusion criteria:\\n\\n        al...              True   \n",
       "42   \\n        patients age greater than or equal t...              True   \n",
       "43   \\n        disease characteristics:\\n\\n        ...              True   \n",
       "44   \\n        the patient must fall into one of th...              True   \n",
       "45   \\n        -  inclusion criteria:\\n\\n        no...              True   \n",
       "47   \\n        biopsy-proven primary or recurrent a...              True   \n",
       "48   \\n        disease characteristics:\\n\\n        ...              True   \n",
       "49   \\n        population characteristics:\\n\\n     ...              True   \n",
       "51   \\n        disease characteristics:\\n\\n        ...              True   \n",
       "52   \\n        disease characteristics:\\n\\n        ...              True   \n",
       "53   \\n        biopsy proven advanced cancer, for w...              True   \n",
       "..                                                 ...               ...   \n",
       "109  \\n        histologically or cytologically prov...              True   \n",
       "115  \\n        inclusion criteria:\\n\\n        all p...              True   \n",
       "116  \\n        any patient 16 years of age or older...              True   \n",
       "120  \\n        must be 18 years old or greater.\\n\\n...              True   \n",
       "122  \\n        inclusion criteria:\\n\\n          -  ...              True   \n",
       "123  \\n        any patient age greater than or equa...              True   \n",
       "125  \\n        patients greater than or equal to 18...              True   \n",
       "127  \\n        inclusion criteria\\n\\n        age 70...              True   \n",
       "128  \\n        inclusion criteria:\\n\\n        patie...              True   \n",
       "131  \\n        patients must have advanced stage so...              True   \n",
       "132  \\n        -  inclusion criteria:\\n\\n        at...              True   \n",
       "134  \\n        all patients enrolled on high dose i...              True   \n",
       "136  \\n        -  inclusion criteria:\\n\\n        pa...              True   \n",
       "137  \\n        -  inclusion criteria - patient:\\n\\n...              True   \n",
       "138  \\n        -  inclusion criteria\\n\\n          -...              True   \n",
       "139  \\n        patients must have histologically co...              True   \n",
       "144  \\n        inclusion criteria:\\n\\n        male ...              True   \n",
       "148  \\n        -  inclusion criteria:\\n\\n        pa...              True   \n",
       "150  \\n        inclusion criteria:\\n\\n        docum...              True   \n",
       "155  \\n        age greater than or equal to 18 year...              True   \n",
       "156  \\n        celecoxib study:\\n\\n        inclusio...              True   \n",
       "158  \\n        disease characteristics: diagnosis o...              True   \n",
       "159  \\n        disease characteristics:\\n\\n        ...              True   \n",
       "160  \\n        disease characteristics: ongoing bon...              True   \n",
       "161  \\n        disease characteristics: histologica...              True   \n",
       "162  \\n        disease characteristics: histologica...              True   \n",
       "163  \\n        disease characteristics: operable, c...              True   \n",
       "164  \\n        disease characteristics: diagnosis o...              True   \n",
       "165  \\n        disease characteristics: histologica...              True   \n",
       "166  \\n        disease characteristics: histologica...              True   \n",
       "\n",
       "                                              eligible  \\\n",
       "17                                                 NaN   \n",
       "18                                                 NaN   \n",
       "20                                                 NaN   \n",
       "21                                                 NaN   \n",
       "22                                                 NaN   \n",
       "25                                                 NaN   \n",
       "26                                                 NaN   \n",
       "27                                                 NaN   \n",
       "28                                                 NaN   \n",
       "29   \\n\\n        all patients over 3 years of age w...   \n",
       "30   \\n\\n        diagnosis: wegener's granulomatosi...   \n",
       "31   \\n\\n        patients must have a histologic di...   \n",
       "33                                                 NaN   \n",
       "34                                                 NaN   \n",
       "35                                                 NaN   \n",
       "36                                                 NaN   \n",
       "37                                                 NaN   \n",
       "39                                                 NaN   \n",
       "40                                                 NaN   \n",
       "41   \\n\\n        all adults, greater than 18 years ...   \n",
       "42                                                 NaN   \n",
       "43                                                 NaN   \n",
       "44                                                 NaN   \n",
       "45   \\n\\n        non-hodgkin's lymphomas in the fol...   \n",
       "47                                                 NaN   \n",
       "48                                                 NaN   \n",
       "49                                                 NaN   \n",
       "51                                                 NaN   \n",
       "52                                                 NaN   \n",
       "53                                                 NaN   \n",
       "..                                                 ...   \n",
       "109                                                NaN   \n",
       "115  \\n\\n        all patients with refractory solid...   \n",
       "116                                                NaN   \n",
       "120                                                NaN   \n",
       "122  \\n\\n          -  patients must be 18 years of ...   \n",
       "123                                                NaN   \n",
       "125                                                NaN   \n",
       "127                                                NaN   \n",
       "128  \\n\\n        patient criteria:\\n\\n        ages ...   \n",
       "131                                                NaN   \n",
       "132  \\n\\n        at least one of the following:\\n\\n...   \n",
       "134                                                NaN   \n",
       "136  \\n\\n        patients must be 18 years of age o...   \n",
       "137                                                NaN   \n",
       "138                                                NaN   \n",
       "139                                                NaN   \n",
       "144  \\n\\n        male or female patients, 18 years ...   \n",
       "148  \\n\\n        patients:\\n\\n        patients with...   \n",
       "150  \\n\\n        documentation of wegener's granulo...   \n",
       "155                                                NaN   \n",
       "156  \\n\\n        recruitment will include patients ...   \n",
       "158                                                NaN   \n",
       "159                                                NaN   \n",
       "160                                                NaN   \n",
       "161                                                NaN   \n",
       "162                                                NaN   \n",
       "163                                                NaN   \n",
       "164                                                NaN   \n",
       "165                                                NaN   \n",
       "166                                                NaN   \n",
       "\n",
       "                                            ineligible  \n",
       "17                                                 NaN  \n",
       "18                                                 NaN  \n",
       "20                                                 NaN  \n",
       "21                                                 NaN  \n",
       "22                                                 NaN  \n",
       "25                                                 NaN  \n",
       "26                                                 NaN  \n",
       "27                                                 NaN  \n",
       "28                                                 NaN  \n",
       "29   \\n\\n        patients receiving other therapy (...  \n",
       "30   \\n\\n        evidence of infection by gram stai...  \n",
       "31                                                 NaN  \n",
       "33                                                 NaN  \n",
       "34                                                 NaN  \n",
       "35                                                 NaN  \n",
       "36                                                 NaN  \n",
       "37                                                 NaN  \n",
       "39                                                 NaN  \n",
       "40                                                 NaN  \n",
       "41   \\n\\n        no pregnant women will be entered ...  \n",
       "42                                                 NaN  \n",
       "43                                                 NaN  \n",
       "44                                                 NaN  \n",
       "45                                                 NaN  \n",
       "47                                                 NaN  \n",
       "48                                                 NaN  \n",
       "49                                                 NaN  \n",
       "51                                                 NaN  \n",
       "52                                                 NaN  \n",
       "53                                                 NaN  \n",
       "..                                                 ...  \n",
       "109                                                NaN  \n",
       "115  \\n\\n        active infection, including positi...  \n",
       "116                                                NaN  \n",
       "120                                                NaN  \n",
       "122  \\n\\n          -  any condition that does not f...  \n",
       "123                                                NaN  \n",
       "125                                                NaN  \n",
       "127                                                NaN  \n",
       "128  \\n\\n        patient or donor pregnant.\\n\\n    ...  \n",
       "131                                                NaN  \n",
       "132  \\n\\n        patients will be considered inelig...  \n",
       "134                                                NaN  \n",
       "136  \\n\\n        any condition that does not fit wi...  \n",
       "137                                                NaN  \n",
       "138                                                NaN  \n",
       "139                                                NaN  \n",
       "144  \\n\\n        patients demonstrating intolerance...  \n",
       "148  \\n\\n        patient:\\n\\n        pregnant or la...  \n",
       "150  \\n\\n        patients with evidence of bacteria...  \n",
       "155                                                NaN  \n",
       "156  \\n\\n        subjects who had undergone any tmj...  \n",
       "158                                                NaN  \n",
       "159                                                NaN  \n",
       "160                                                NaN  \n",
       "161                                                NaN  \n",
       "162                                                NaN  \n",
       "163                                                NaN  \n",
       "164                                                NaN  \n",
       "165                                                NaN  \n",
       "166                                                NaN  \n",
       "\n",
       "[100 rows x 17 columns]"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.head(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Amrit\\Anaconda3\\envs\\ML\\lib\\site-packages\\ipykernel_launcher.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
      "  \n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(34494, 17)"
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2=df[df['eligible'].notnull()]\n",
    "df3=df2[df['ineligible'].notnull()]\n",
    "df3.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "Int64Index: 34494 entries, 29 to 61776\n",
      "Data columns (total 17 columns):\n",
      "nct_id                  34494 non-null object\n",
      "brief_title             34494 non-null object\n",
      "official_title          33991 non-null object\n",
      "overall_status          34494 non-null object\n",
      "start_date              34494 non-null object\n",
      "completion_date         32854 non-null object\n",
      "phase                   27400 non-null object\n",
      "study_type              34494 non-null object\n",
      "brief_summary           34493 non-null object\n",
      "detailed_description    22562 non-null object\n",
      "enrollment              34382 non-null float64\n",
      "condition               34494 non-null object\n",
      "intervention_name       34494 non-null object\n",
      "eligibility             34494 non-null object\n",
      "condition_cancer        34494 non-null bool\n",
      "eligible                34494 non-null object\n",
      "ineligible              34494 non-null object\n",
      "dtypes: bool(1), float64(1), object(15)\n",
      "memory usage: 4.5+ MB\n"
     ]
    }
   ],
   "source": [
    "df3.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "\n",
      "text/plain": [
       "<Figure size 648x432 with 1 Axes>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "eligible_length = df3['eligible'].map(len)\n",
    "\n",
    "plt.figure(figsize=(9,6))\n",
    "plt.hist(eligible_length)\n",
    "plt.xlabel('Length (characters)', fontsize=16)\n",
    "plt.ylabel('Inclusion', fontsize=16)\n",
    "plt.savefig('./image/inclusion.png', bbox_inches = \"tight\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "ename": "KeyError",
     "evalue": "0",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-36-54d0bfb05f43>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[1;31m#df.description[0]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mText\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdf3\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0meligible\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      3\u001b[0m \u001b[0mText\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\envs\\ML\\lib\\site-packages\\pandas\\core\\series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m    866\u001b[0m         \u001b[0mkey\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    867\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 868\u001b[1;33m             \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    869\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    870\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m~\\Anaconda3\\envs\\ML\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_value\u001b[1;34m(self, series, key)\u001b[0m\n\u001b[0;32m   4373\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4374\u001b[0m             return self._engine.get_value(s, k,\n\u001b[1;32m-> 4375\u001b[1;33m                                           tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[0;32m   4376\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   4377\u001b[0m             \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mholds_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_boolean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[1;34m()\u001b[0m\n",
      "\u001b[1;31mKeyError\u001b[0m: 0"
     ]
    }
   ],
   "source": [
    "#df.description[0]\n",
    "Text=df3.eligible[0]\n",
    "Text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import preprocessing\n",
    "from wordcloud import WordCloud, STOPWORDS\n",
    "Text=df3['eligible'][:0]\n",
    "\n",
    "wc=WordCloud().generate(Text)\n",
    "plt.figure(figsize=(16,12))\n",
    "plt.imshow(wc, interpolation='bilinear')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ineligible_length = df3['eligible'].map(len)\n",
    "\n",
    "plt.figure(figsize=(9,6))\n",
    "plt.hist(ineligible_length)\n",
    "plt.xlabel('Length (characters)', fontsize=16)\n",
    "plt.ylabel('Exclusion', fontsize=16)\n",
    "plt.savefig('./image/Exclusion.png', bbox_inches = \"tight\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df4 = df3[['condition','intervention_name', 'eligible', 'ineligible']]\n",
    "df4.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4.condition = df4.condition.str.lower()\n",
    "df4.condition = df4.condition.str.replace('/', ' ')\n",
    "df4.condition = df4.condition.str.replace('-', ' ')\n",
    "df4.condition = df4.condition.str.strip()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4.condition.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "condition_select = ['breast cancer', 'prostate cancer' 'colorectal cancer', 'lung cancer',\n",
    "                    'multiple myeloma', 'lymphoma', 'non-small cell lung cancer'\n",
    "                    'pancreatic cancer', 'head and neck cancer', 'hepatocellular carcinoma',\n",
    "                    'melanoma', 'gastric cancer', 'ovarian cancer', 'solid tumors',\n",
    "                    'metastatic breast cancer']\n",
    "\n",
    "df10 = df4\n",
    "def add_condition(condition):\n",
    "    for catagory in condition_select:\n",
    "        if catagory == condition:\n",
    "            return catagory\n",
    "        \n",
    "    return None\n",
    "\n",
    "df10['cancer_type'] = df10.condition.apply(add_condition)\n",
    "df10.head(100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df11 = df10[['cancer_type', 'eligible', 'ineligible']]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df11 = df11.loc[df11.cancer_type.notnull()]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df11.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df11.to_csv('df11.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df12 = df11.groupby('cancer_type')['eligible'].apply(' '.join).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df12.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df12.cancer_type.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df12.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df12.to_csv('df12.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4.eligible[30]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Wordcounts for eligible\n",
    "df4['word_count_eligible'] = df4['eligible'].apply(lambda x: len(str(x).split(\" \")))\n",
    "df4['word_count_ineligible'] = df4['ineligible'].apply(lambda x: len(str(x).split(\" \")))\n",
    "df4[['word_count_eligible','word_count_ineligible']].head().sort_values('word_count_eligible', ascending=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4.word_count_eligible.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4.word_count_ineligible.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Identify common words\n",
    "freq_eli = pd.Series(' '.join(df4['eligible']).split()).value_counts()[:20]\n",
    "freq_eli"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Identify uncommon words\n",
    "least_eli = pd.Series(' '.join(df4['eligible']).split()).value_counts()[-20:]\n",
    "least_eli"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "freq_in = pd.Series(' '.join(df4['ineligible']).split()).value_counts()[:20]\n",
    "freq_in"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "clean_text = []\n",
    "#def cleanText(file):\n",
    "for index in range(len(df4.eligible)):\n",
    "    #Remove punctuations\n",
    "    text = re.sub('[^a-zA-Z]', ' ', str(index))\n",
    "\n",
    "    #Convert to lowercase\n",
    "    text = text.lower()\n",
    "\n",
    "    # Additiona characters\n",
    "    #text_col_tags = re.sub(\"[!@#$+%*:()'-]\", ' ', text_col_lower)\n",
    "\n",
    "    #remove tags\n",
    "    text=re.sub(\"&lt;/?.*?&gt;\",\" &lt;&gt; \",text)\n",
    "\n",
    "    # remove special characters and digits\n",
    "    #text=re.sub(\"(\\\\d|\\\\W)+\",\" \",text)\n",
    "\n",
    "    ##Convert to list from string\n",
    "    text = text.split()\n",
    "\n",
    "#     ##Stemming\n",
    "#     ps=PorterStemmer()\n",
    "#     text = [ps.stem(word) for word in text] \n",
    "\n",
    "    #Lemmatisation\n",
    "#     word_lemma = WordNetLemmatizer()\n",
    "#     text = [word_lemma.lemmatize(word) for word in text] \n",
    "    text = \" \".join(text)\n",
    "    clean_text.append(text)\n",
    "    \n",
    "df4['clean_text'] = clean_text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "X = df4.eligible\n",
    "tfidf= TfidfVectorizer(ngram_range = (1,3), min_df = 10,sublinear_tf=True)\n",
    "X =tfidf.fit_transform(X.ravel())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y = df4.condition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(X.shape)\n",
    "print(y.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import re\n",
    "import string\n",
    "import matplotlib.pyplot as plt\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.linear_model import LogisticRegression\n",
    "from sklearn.naive_bayes import MultinomialNB\n",
    "from sklearn.feature_extraction.text import TfidfTransformer\n",
    "from sklearn.pipeline import Pipeline\n",
    "from sklearn.metrics import confusion_matrix,classification_report\n",
    "from sklearn.feature_extraction.text import CountVectorizer\n",
    "from sklearn.svm import SVC\n",
    "from sklearn import metrics\n",
    "from sklearn.feature_extraction.text import TfidfTransformer\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.svm import LinearSVC\n",
    "import nltk\n",
    "from nltk.corpus import stopwords\n",
    "from nltk.tokenize import word_tokenize\n",
    "from nltk.tokenize import sent_tokenize\n",
    "from nltk.stem import WordNetLemmatizer \n",
    "from nltk.corpus import stopwords\n",
    "from os import path\n",
    "from PIL import Image\n",
    "#from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator\n",
    "#from imblearn.over_sampling import SMOTE\n",
    "#from imblearn.combine import SMOTEENN\n",
    "from sklearn.utils import class_weight\n",
    "#import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.30, random_state = 42)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mnb = MultinomialNB(alpha = 0.001,fit_prior=True)\n",
    "mnb.fit(X_train,y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mnb_predictions = mnb.predict(X_test)\n",
    "print(confusion_matrix(y_test,mnb_predictions))\n",
    "print('\\n', 'Classification report')\n",
    "print(classification_report(y_test,mnb_predictions))\n",
    "# Print the overall accuracy\n",
    "print('\\n', 'accuracy Score')\n",
    "print(metrics.accuracy_score(y_test,mnb_predictions))\n",
    "\n",
    "print('\\n', 'f1 Score')\n",
    "print(metrics.f1_score(y_test, mnb_predictions, average='weighted'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import string\n",
    "def text_process(mess):\n",
    "    \"\"\"\n",
    "    Takes in a string of text, then performs the following:\n",
    "    1. Remove all punctuation\n",
    "    2. Remove all stopwords\n",
    "    3. Returns a list of the cleaned text\n",
    "    \"\"\"\n",
    "    # Check characters to see if they are in punctuation\n",
    "    nopunc = [char for char in mess if char not in string.punctuation]\n",
    "\n",
    "    # Join the characters again to form the string.\n",
    "    nopunc = ''.join(nopunc)\n",
    "    \n",
    "    # Now just remove any stopwords\n",
    "    return [word for word in nopunc.split()]# if word.lower() not in stopwords.words('english')]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4['new'] = df4.eligible.apply(text_process)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df5 = df4.groupby('condition')['eligible'].apply(' '.join).reset_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df5.condition.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "condition_select = ['breast cancer', 'prostate cancer' 'colorectal cancer', 'lung cancer',\n",
    "                    'multiple myeloma', 'lymphoma', 'non-small cell lung cancer'\n",
    "                    'pancreatic cancer', 'head and neck cancer', 'hepatocellular carcinoma',\n",
    "                    'melanoma', 'gastric cancer', 'ovarian cancer', 'solid tumors',\n",
    "                    'metastatic breast cancer']\n",
    "\n",
    "# df5['condition_select'] = df5.condition.str.contains('|'.join(condition_select))\n",
    "# df5.head()\n",
    "\n",
    "def add_condition(condition):\n",
    "    for catagory in condition_select:\n",
    "        if catagory == condition:\n",
    "            return catagory\n",
    "        \n",
    "    return None\n",
    "\n",
    "df5['cancer_type'] = df5.condition.apply(add_condition)\n",
    "df5.head(100)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df6 = df5[df5.condition_select == True]\n",
    "df6.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df6.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df6.condition.value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4['eligible'][df4.condition == 'breast cancer']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4.eligible[30]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#cleanText(df4.eligible)\n",
    "for index in range(len(df4.eligible)):\n",
    "    print(index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dp6 = df.sample(10)\n",
    "dp6.eligibility = dp6.eligibility.str.lower()\n",
    "dp6['eli'] = dp6.eligibility.str.split('(inclusion criteria:)').str[2]#apply(pd.Series)[[0]].rename(columns={0: 'eligible'})\n",
    "dp6.head(1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(dp6.eli.to_string(index=False))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for v in dp6.eli:\n",
    "    print(v)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#dp.eligibility.str.split('inclusion criteria:')\n",
    "\n",
    "cancer = [\"inclusion criteria\" , \"neoplasm\" , \"oma\", \"tumor\"]\n",
    "\n",
    "pattern = '|'.join(cancer)\n",
    "pattern\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.eligibility = df.eligibility.str.lower()\n",
    "df['eligible'] = df.eligibility.str.contains('inclusion criteria')\n",
    "df2 = df.loc[df['eligible'] ==True,:]\n",
    "df2.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.eligibility = df.eligibility.str.lower()\n",
    "df['eligible'] = df.eligibility.str.contains('inclusion criteria')\n",
    "df4 = df.loc[df['eligible'] ==False,:]\n",
    "print(df4.shape)\n",
    "df4.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df4.iloc[0,13]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.Series(['a1', 'b2', 'c3'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.Series(['a1', 'b2', 'c3']).str.extract(r'([ab])(\\d)', expand=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.eligibility = df.eligibility.str.lower()\n",
    "df['ineligible'] = df.eligibility.str.contains('exclusion criteria')\n",
    "df3 = df.loc[df['ineligible'] ==True,:]\n",
    "df3.shape"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.eligibility = df.eligibility.str.lower()\n",
    "df['ineligible'] = df.eligibility.str.contains('exclusion criteria')\n",
    "df5 = df.loc[df['ineligible'] ==False,:]\n",
    "print(df5.shape)\n",
    "df5.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# dp.eligibility = dp.eligibility.str.lower()\n",
    "# dp2 = dp.eligibility.str.split('(*inclusion criteria*:|*exclusion criteria*:)').apply(pd.Series)[[2,4]].rename(columns={2: 'eligible', 4: 'ineligible'})\n",
    "# dp2.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dp = df.sample(10)\n",
    "dp.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#dp = df.sample(100)\n",
    "dp.eligibility = dp.eligibility.str.lower()\n",
    "dp3 = dp.eligibility.str.split('(\\**inclusion criteria\\**:|\\**exclusion criteria\\**:)').apply(pd.Series)[[2,4]].rename(columns={2: 'eligible', 4: 'ineligible'})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(dp.eligibility.values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "file_path = \"./data/search_result/NCT00000479.xml\"\n",
    "\n",
    "parse_XML(file_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for child in root:\n",
    "    print(child.tag, child.attrib)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from xmljson import badgerfish as bf\n",
    "from xml.etree.ElementTree import fromstring"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!python -m xmljson -d badgerfish ./data/search_result/NCT00000479.xml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!for f in $(ls | grep '.xml'); do python -m xmljson -d yahoo $f > $json/f.json ; done"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for child in root:\n",
    "    print(child.tag, child.attrib)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dfcols = ['clinical_study', 'nct_id', 'brief_title']\n",
    "df = pd.DataFrame(columns=dfcols)\n",
    "\n",
    "for i in etree.iter(tag='data'):\n",
    "df = df.append(\n",
    "        pd.Series([i.get('id'), i.get('name')], index=dfcols),\n",
    "        ignore_index=True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "trials.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Retrieve Tweets\n",
    "MaxTweets = 10000000\n",
    "tweetsPerQry = 100\n",
    "fName = 'tweets.txt' # Storage name\n",
    "\n",
    "tweetCount = 0\n",
    "print(\"Downloading max {0} tweets\".format(MaxTweets))\n",
    "with open(fName, 'w') as f:\n",
    "    for tweet in tweepy.Cursor(api.search,q=query, wait_on_rate_limit=True).items(MaxTweets) :         \n",
    "\n",
    "        # Verify the tweet has place info before writing\n",
    "        if tweet.place is not None:\n",
    "            \n",
    "            #Write to the text file, and add one to the number of tweets\n",
    "            f.write(jsonpickle.encode(tweet._json, unpicklable=False) + '\\n')\n",
    "            tweetCount += 1\n",
    "\n",
    "    #Display how many tweets we have collected\n",
    "    print(\"Downloaded {0} tweets\".format(tweetCount))\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Checking the data\n",
    "tweet._json"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_id = -1\n",
    "tweetCount = 0\n",
    "with open('PoGo_USA_Tutorial.json', 'w') as f:\n",
    "    #While we still want to collect more tweets\n",
    "    while tweetCount < MaxTweets:\n",
    "        try:\n",
    "            #Look for more tweets, resuming where we left off\n",
    "            if max_id <= 0:\n",
    "                new_tweets = api.search(q=query, count=tweetsPerQry)\n",
    "            else:\n",
    "                new_tweets = api.search(q=query, count=tweetsPerQry, max_id=str(max_id - 1))\n",
    "            \n",
    "            #If we didn't find any exit the loop\n",
    "            if not new_tweets:\n",
    "                print(\"No more tweets found\")\n",
    "                break\n",
    "            \n",
    "            #Write the JSON output of any new tweets we found to the output file\n",
    "            for tweet in new_tweets:\n",
    "                \n",
    "                #Make sure the tweet has place info before writing\n",
    "                if (tweet.place is not None) and (tweetCount < MaxTweets):\n",
    "                    f.write(jsonpickle.encode(tweet._json, unpicklable=False) +\n",
    "                        '\\n')\n",
    "                    tweetCount += 1\n",
    "                    \n",
    "            #Display how many tweets we have collected\n",
    "            print(\"Downloaded {0} tweets\".format(tweetCount))\n",
    "            \n",
    "            #Record the id of the last tweet we looked at\n",
    "            max_id = new_tweets[-1].id\n",
    "            \n",
    "        except tweepy.TweepError as e:\n",
    "            \n",
    "            #Print the error and continue searching\n",
    "            print(\"some error : \" + str(e))\n",
    "\n",
    "\n",
    "print (\"Downloaded {0} tweets, Saved to {1}\".format(tweetCount, fName))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#search for multiple phrases using OR (for Pharma companies)\n",
    "Allquery = \"'Johnson & Johnson' or 'J & J' or 'J&J' or 'JNJ' or '$JNJ' \\\n",
    "        or 'Pfizer' or 'pfizer' or 'PFE' or '$PFE' \\\n",
    "        or 'Novartis' or 'novartis' or 'NOVN' or '$NOVN' \\\n",
    "        or 'Roche' or 'roche' or 'ROG' or '$ROG' \\\n",
    "        or 'Merck & Co.' or 'Merck' or 'MRK' or '$MRK' \\\n",
    "        or 'AbbVie' or 'abbvie' or 'ABBV' or '$ABBV' \\\n",
    "        or 'Eli Lilly' or 'EliLilly' or 'LLY' or '$LLY' \\\n",
    "        or 'Sanofi' or 'sanofi' or 'SAN' or '$SAN' \\\n",
    "        or 'AstraZeneca' or 'astrazeneca' or 'AZN' or '$AZN' \\\n",
    "        or 'GlaxoSmithKline' or 'GSK' or '$GSK'\""
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}