4737 lines (4736 with data), 478.9 kB
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import re\n",
"import json\n",
"import sys\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"import xml.etree.ElementTree as et\n",
"import io\n",
"import glob\n",
"from tqdm._tqdm_notebook import tqdm_notebook as tqdm\n",
"\n",
"# pd.options.display.max_rows = 9999\n",
"# pd.options.display.max_columns = 9999\n",
"# pd.set_option('display.max_colwidth', -1)\n",
"\n",
"#tqdm_notebook.pandas()\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_csv('cancerTrials.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#df['intervention_name'] = df['intervention_name'].apply(', '.join)\n",
"df['intervention_name'] = df['intervention_name'].astype(str).str.replace('\\[|\\]|\\'', '')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"61777\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nct_id</th>\n",
" <th>brief_title</th>\n",
" <th>official_title</th>\n",
" <th>overall_status</th>\n",
" <th>start_date</th>\n",
" <th>completion_date</th>\n",
" <th>phase</th>\n",
" <th>study_type</th>\n",
" <th>brief_summary</th>\n",
" <th>detailed_description</th>\n",
" <th>enrollment</th>\n",
" <th>condition</th>\n",
" <th>intervention_name</th>\n",
" <th>eligibility</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NCT00000124</td>\n",
" <td>Collaborative Ocular Melanoma Study (COMS)</td>\n",
" <td>NaN</td>\n",
" <td>Unknown status</td>\n",
" <td>November 1986</td>\n",
" <td>NaN</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n To evaluate therapeutic interventions ...</td>\n",
" <td>\\n For more than 100 years, removal of th...</td>\n",
" <td>NaN</td>\n",
" <td>Uveitis</td>\n",
" <td>Brachytherapy, Eye Removal</td>\n",
" <td>\\n Men and women eligible for the study...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NCT00000136</td>\n",
" <td>Studies of the Ocular Complications of AIDS (S...</td>\n",
" <td>Foscarnet-Ganciclovir CMV Retinitis Trial</td>\n",
" <td>Completed</td>\n",
" <td>March 1990</td>\n",
" <td>October 1991</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n To evaluate the relative safety and ef...</td>\n",
" <td>\\n CMV retinitis is the most common intra...</td>\n",
" <td>234.0</td>\n",
" <td>Cytomegalovirus Retinitis</td>\n",
" <td>Ganciclovir, Foscarnet, Phosphonoacetic Acid, ...</td>\n",
" <td>\\n Inclusion criteria:\\n\\n - ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NCT00000395</td>\n",
" <td>Antifolate Effectiveness in Arthritis</td>\n",
" <td>Mechanisms of Antifolate Efficacy in Arthritis</td>\n",
" <td>Completed</td>\n",
" <td>September 1996</td>\n",
" <td>August 2002</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This study looks at how the arthritis ...</td>\n",
" <td>\\n Low-dose methotrexate therapy suppress...</td>\n",
" <td>40.0</td>\n",
" <td>Adjuvant Arthritis</td>\n",
" <td>Methotrexate, Folic Acid Antagonists, Folic Ac...</td>\n",
" <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nct_id brief_title \\\n",
"0 NCT00000124 Collaborative Ocular Melanoma Study (COMS) \n",
"1 NCT00000136 Studies of the Ocular Complications of AIDS (S... \n",
"2 NCT00000395 Antifolate Effectiveness in Arthritis \n",
"\n",
" official_title overall_status \\\n",
"0 NaN Unknown status \n",
"1 Foscarnet-Ganciclovir CMV Retinitis Trial Completed \n",
"2 Mechanisms of Antifolate Efficacy in Arthritis Completed \n",
"\n",
" start_date completion_date phase study_type \\\n",
"0 November 1986 NaN Phase 3 Interventional \n",
"1 March 1990 October 1991 Phase 3 Interventional \n",
"2 September 1996 August 2002 Phase 2 Interventional \n",
"\n",
" brief_summary \\\n",
"0 \\n To evaluate therapeutic interventions ... \n",
"1 \\n To evaluate the relative safety and ef... \n",
"2 \\n This study looks at how the arthritis ... \n",
"\n",
" detailed_description enrollment \\\n",
"0 \\n For more than 100 years, removal of th... NaN \n",
"1 \\n CMV retinitis is the most common intra... 234.0 \n",
"2 \\n Low-dose methotrexate therapy suppress... 40.0 \n",
"\n",
" condition \\\n",
"0 Uveitis \n",
"1 Cytomegalovirus Retinitis \n",
"2 Adjuvant Arthritis \n",
"\n",
" intervention_name \\\n",
"0 Brachytherapy, Eye Removal \n",
"1 Ganciclovir, Foscarnet, Phosphonoacetic Acid, ... \n",
"2 Methotrexate, Folic Acid Antagonists, Folic Ac... \n",
"\n",
" eligibility \n",
"0 \\n Men and women eligible for the study... \n",
"1 \\n Inclusion criteria:\\n\\n - ... \n",
"2 \\n Inclusion Criteria:\\n\\n - ... "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"print(len(df))\n",
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 61777 entries, 0 to 61776\n",
"Data columns (total 14 columns):\n",
"nct_id 61777 non-null object\n",
"brief_title 61777 non-null object\n",
"official_title 60936 non-null object\n",
"overall_status 61777 non-null object\n",
"start_date 61777 non-null object\n",
"completion_date 56975 non-null object\n",
"phase 47919 non-null object\n",
"study_type 61777 non-null object\n",
"brief_summary 61776 non-null object\n",
"detailed_description 43084 non-null object\n",
"enrollment 60236 non-null float64\n",
"condition 61777 non-null object\n",
"intervention_name 61777 non-null object\n",
"eligibility 61777 non-null object\n",
"dtypes: float64(1), object(13)\n",
"memory usage: 6.6+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Phase 3', 'Phase 2', nan, 'Phase 1', 'Phase 1/Phase 2', 'Phase 4',\n",
" 'Phase 2/Phase 3', 'Early Phase 1'], dtype=object)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.phase.unique()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Phase 2 20198\n",
"Phase 1 10738\n",
"Phase 3 7428\n",
"Phase 1/Phase 2 5273\n",
"Phase 4 2350\n",
"Phase 2/Phase 3 1060\n",
"Early Phase 1 872\n",
"Name: phase, dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.phase.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>index</th>\n",
" <th>phase</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Phase 2</td>\n",
" <td>20198</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Phase 1</td>\n",
" <td>10738</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Phase 3</td>\n",
" <td>7428</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Phase 1/Phase 2</td>\n",
" <td>5273</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Phase 4</td>\n",
" <td>2350</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Phase 2/Phase 3</td>\n",
" <td>1060</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Early Phase 1</td>\n",
" <td>872</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" index phase\n",
"0 Phase 2 20198\n",
"1 Phase 1 10738\n",
"2 Phase 3 7428\n",
"3 Phase 1/Phase 2 5273\n",
"4 Phase 4 2350\n",
"5 Phase 2/Phase 3 1060\n",
"6 Early Phase 1 872"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"phase = df.phase.value_counts(ascending=False, sort=True).reset_index()\n",
"phase"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x432 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#df.phase.value_counts().plot(kind='barh')\n",
"plt.figure(figsize=(6,6))\n",
"df.phase.value_counts(ascending=True, sort=True).plot(kind='barh', width=0.9, color=sns.color_palette(\"bright\", 7), alpha=0.8)\n",
"plt.title('Cancer Clinical Trials Status')\n",
"plt.xlabel('Number of Occurrences', fontsize=12)\n",
"plt.savefig('./image/phase1.png', bbox_inches = \"tight\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"phase = df.phase.value_counts(ascending=False, sort=True)#.reset_index()\n",
"plt.figure(figsize=(10,5))\n",
"sns.barplot(phase.index, phase.values, alpha=0.8)\n",
"plt.title('Cancer Clinical Trials Status')\n",
"plt.ylabel('Number of Occurrences', fontsize=12)\n",
"#plt.xlabel('city', fontsize=12)\n",
"plt.savefig('./image/phase2.png')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nct_id</th>\n",
" <th>brief_title</th>\n",
" <th>official_title</th>\n",
" <th>overall_status</th>\n",
" <th>start_date</th>\n",
" <th>completion_date</th>\n",
" <th>phase</th>\n",
" <th>study_type</th>\n",
" <th>brief_summary</th>\n",
" <th>detailed_description</th>\n",
" <th>enrollment</th>\n",
" <th>condition</th>\n",
" <th>intervention_name</th>\n",
" <th>eligibility</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NCT00000124</td>\n",
" <td>Collaborative Ocular Melanoma Study (COMS)</td>\n",
" <td>NaN</td>\n",
" <td>Unknown status</td>\n",
" <td>November 1986</td>\n",
" <td>NaN</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n To evaluate therapeutic interventions ...</td>\n",
" <td>\\n For more than 100 years, removal of th...</td>\n",
" <td>NaN</td>\n",
" <td>Uveitis</td>\n",
" <td>Brachytherapy, Eye Removal</td>\n",
" <td>\\n Men and women eligible for the study...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nct_id brief_title official_title \\\n",
"0 NCT00000124 Collaborative Ocular Melanoma Study (COMS) NaN \n",
"\n",
" overall_status start_date completion_date phase study_type \\\n",
"0 Unknown status November 1986 NaN Phase 3 Interventional \n",
"\n",
" brief_summary \\\n",
"0 \\n To evaluate therapeutic interventions ... \n",
"\n",
" detailed_description enrollment condition \\\n",
"0 \\n For more than 100 years, removal of th... NaN Uveitis \n",
"\n",
" intervention_name \\\n",
"0 Brachytherapy, Eye Removal \n",
"\n",
" eligibility \n",
"0 \\n Men and women eligible for the study... "
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(1)"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Amrit\\Anaconda3\\envs\\ML\\lib\\site-packages\\ipykernel_launcher.py:2: FutureWarning: using a dict on a Series for aggregation\n",
"is deprecated and will be removed in a future version\n",
" \n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>phase</th>\n",
" <th>enrolment</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Phase 3</td>\n",
" <td>626</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Phase 4</td>\n",
" <td>570</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Phase 2/Phase 3</td>\n",
" <td>350</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Phase 2</td>\n",
" <td>91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Early Phase 1</td>\n",
" <td>87</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Phase 1/Phase 2</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Phase 1</td>\n",
" <td>44</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" phase enrolment\n",
"0 Phase 3 626\n",
"1 Phase 4 570\n",
"2 Phase 2/Phase 3 350\n",
"3 Phase 2 91\n",
"4 Early Phase 1 87\n",
"5 Phase 1/Phase 2 64\n",
"6 Phase 1 44"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/plain": [
"<Figure size 432x432 with 0 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(6,6))\n",
"patient = df.groupby('phase')['enrollment'].aggregate({'enrolment':'mean'}).sort_values('enrolment',ascending=False).astype(int).reset_index()#.plot(kind='barh', width=0.9, color=sns.color_palette(\"bright\", 7), alpha=0.8)\n",
"patient"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 44\n",
"1 64\n",
"2 87\n",
"3 91\n",
"4 350\n",
"5 570\n",
"6 626\n",
"Name: enrolment, dtype: int32"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"patient.enrolment"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 648x432 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(9,6))\n",
"sns.barplot(patient.phase, patient.enrolment, alpha=0.8)\n",
"plt.title('Average Patient Enrollment', fontsize=14)\n",
"plt.xlabel('', fontsize=12)\n",
"plt.ylabel('Patient number', fontsize=14)\n",
"#plt.xlabel('city', fontsize=12)\n",
"plt.savefig('./image/patient_number.png')\n",
"plt.show()\n",
"\n",
"# plt.plot(x = 'phase', y='mean', data=patient, kind='barh', width=0.9, color=sns.color_palette(\"bright\", 7), alpha=0.8)\n",
"# plt.title('Average Patient Participation for ')\n",
"# plt.xlabel('Average Patient', fontsize=12)\n",
"# plt.savefig('./image/phase1.png', bbox_inches = \"tight\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"# grouped = data.groupby('month').agg(\"duration\": [min, max, mean])\n",
"# grouped.columns = grouped.columns.droplevel(level=0)\n",
"# grouped.rename(columns={\n",
"# \"min\": \"min_duration\", \"max\": \"max_duration\", \"mean\": \"mean_duration\"\n",
"# })\n",
"# grouped.head()\n",
"#df['enrollment'] = df['enrollment'].astype(int)\n",
"# patient = df.groupby('phase')['enrollment'].describe().unstack()\n",
"# patient = patient.astype(int)\n",
"# patient\n",
"# patient = df.groupby('phase')['enrollment'].aggregate(['min', np.median, max])\n",
"# patient = patient.astype(int)\n",
"#patient.columns = patient.columns.droplevel(level=0)\n",
"#grouped.rename(columns={\n",
"# \"min\": \"min_duration\", \"max\": \"max_duration\", \"mean\": \"mean_duration\"\n",
"#3})\n",
"ax = sns.boxplot(x=\"phase\", y=\"enrollment\", hue='phase',data=df, palette='bright')"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 648x432 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"#sns.set(style=\"darkgrid\", rc={'figure.figsize':(20,15)},color_codes=True,font_scale=3)\n",
"plt.figure(figsize=(9,6))\n",
"sns.set_style(\"ticks\")\n",
"b = sns.boxplot(x='phase', y='enrollment',hue='phase', data=df)\n",
"#b.set(ylim=(0, 1000))\n",
"#plt.yticks(list(range(10000, 0, -1000)))\n",
"plt.title('Patient distribution', fontsize=14)\n",
"plt.xlabel('') \n",
"plt.ylabel('Patient number', fontsize=14)\n",
"plt.savefig('./image/patient_stat.png');\n",
"\n",
"#plt.savefig('character length of reviews.png');"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Completed 26678\n",
"Recruiting 12889\n",
"Active, not recruiting 5803\n",
"Terminated 5695\n",
"Unknown status 5593\n",
"Not yet recruiting 2683\n",
"Withdrawn 1749\n",
"Enrolling by invitation 363\n",
"Suspended 324\n",
"Name: overall_status, dtype: int64"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.overall_status.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x432 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(6,6))\n",
"df.overall_status.value_counts(ascending=True, sort=True).plot(kind='barh', width=0.9, color=sns.color_palette(\"bright\", 7), alpha=0.8)\n",
"plt.title('Overall Status of Cancer Trials')\n",
"plt.xlabel('Number of Occurrences', fontsize=12)\n",
"plt.savefig('./image/status1.png', bbox_inches = \"tight\")"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x360 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"status = df.overall_status.value_counts(ascending=False, sort=True)#.reset_index()\n",
"plt.figure(figsize=(10,5))\n",
"sns.barplot(status.index, status.values, alpha=0.8)\n",
"plt.title('Overall Status of Cancer Trials')\n",
"plt.ylabel('Number of Occurrences', fontsize=12)\n",
"#plt.xlabel('city', fontsize=12)\n",
"plt.savefig('./image/status2.png')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Unknown status', 'Completed', 'Recruiting', 'Terminated',\n",
" 'Active, not recruiting', 'Withdrawn', 'Suspended',\n",
" 'Enrolling by invitation', 'Not yet recruiting'], dtype=object)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.overall_status.unique()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0 Uveitis\n",
"1 Cytomegalovirus Retinitis\n",
"2 Adjuvant Arthritis\n",
"3 Spondylitis, Ankylosing\n",
"4 Vascular Diseases\n",
"5 Postmenopause\n",
"6 Myocardial Ischemia\n",
"7 Myocardial Ischemia\n",
"8 Myocardial Ischemia\n",
"9 Blood Transfusion\n",
"10 Myelodysplastic Syndromes\n",
"11 Myelodysplastic Syndromes\n",
"12 HIV Infections\n",
"13 HIV Infections\n",
"14 Graft vs Host Disease\n",
"15 Immunoproliferative Disorder\n",
"16 Zollinger Ellison Syndrome\n",
"17 Sarcoma\n",
"18 Neoplasms\n",
"19 Zollinger Ellison Syndrome\n",
"20 Neoplasm Metastasis\n",
"21 Sarcoma, Ewing's\n",
"22 Osteosarcoma\n",
"23 Zollinger Ellison Syndrome\n",
"24 Gaucher's Disease\n",
"25 Lymphoma, Small Noncleaved-Cell\n",
"26 Breast Neoplasms\n",
"27 Lymphoma, T-Cell, Cutaneous\n",
"28 Neoplasm Metastasis\n",
"29 Meningeal Neoplasm\n",
" ... \n",
"61747 Secondary Malignant Neoplasm of Brain and Cere...\n",
"61748 Raw Corn Starch\n",
"61749 Health Behavior\n",
"61750 Recurrent Nasopharyngeal Carcinoma\n",
"61751 Healthy\n",
"61752 Recurrent Pituitary Adenomas\n",
"61753 Breast Cancer\n",
"61754 Lymphoma, Non-Hodgkin\n",
"61755 Secondary Osteoporosis\n",
"61756 Sentinel Lymph Node\n",
"61757 AML or MDS\n",
"61758 Multiple Myeloma\n",
"61759 Advanced Breast Cancer\n",
"61760 Richter Syndrome\n",
"61761 Breast Cancer\n",
"61762 PHA1A\n",
"61763 Advanced Solid Tumors\n",
"61764 Malignant Tumor\n",
"61765 DCIS\n",
"61766 Rectal Cancer\n",
"61767 Tumor Necrosis\n",
"61768 Aging\n",
"61769 Melanoma\n",
"61770 Nasopharyngeal Carcinoma\n",
"61771 Oral Mucositis Due to Radiation\n",
"61772 Relapsed Adult AML\n",
"61773 Phase II: Relapsed or Refractory Mantle Cell L...\n",
"61774 Endometrial Cancer\n",
"61775 Triple-negative Breast Cancer\n",
"61776 Nectin4-positive Advanced Malignant Solid Tumor\n",
"Name: condition, Length: 61777, dtype: object"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.condition"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# Searching for conditions with related to cancer only\n",
"df.condition = df.condition.str.lower()\n",
"\n",
"cancer = [\"cancer\" , \"neoplasm\" , \"oma\", \"tumor\"]\n",
"\n",
"pattern = '|'.join(cancer)\n",
"#pattern\n",
"\n",
"df['condition_cancer'] = df.condition.str.contains(pattern)\n",
"\n",
"#df['condition_cancer'] = df.loc[df.condition.isin]\n",
"\n",
"# def conditionCancerOnly(conditions):\n",
"# filteredList = []\n",
"# for c in cancer:\n",
"# if isCancer(c):\n",
"# filteredList.append(c )\n",
"\n",
"# return filteredList\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nct_id</th>\n",
" <th>brief_title</th>\n",
" <th>official_title</th>\n",
" <th>overall_status</th>\n",
" <th>start_date</th>\n",
" <th>completion_date</th>\n",
" <th>phase</th>\n",
" <th>study_type</th>\n",
" <th>brief_summary</th>\n",
" <th>detailed_description</th>\n",
" <th>enrollment</th>\n",
" <th>condition</th>\n",
" <th>intervention_name</th>\n",
" <th>eligibility</th>\n",
" <th>condition_cancer</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>NCT00000124</td>\n",
" <td>Collaborative Ocular Melanoma Study (COMS)</td>\n",
" <td>NaN</td>\n",
" <td>Unknown status</td>\n",
" <td>November 1986</td>\n",
" <td>NaN</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n To evaluate therapeutic interventions ...</td>\n",
" <td>\\n For more than 100 years, removal of th...</td>\n",
" <td>NaN</td>\n",
" <td>uveitis</td>\n",
" <td>Brachytherapy, Eye Removal</td>\n",
" <td>\\n Men and women eligible for the study...</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>NCT00000136</td>\n",
" <td>Studies of the Ocular Complications of AIDS (S...</td>\n",
" <td>Foscarnet-Ganciclovir CMV Retinitis Trial</td>\n",
" <td>Completed</td>\n",
" <td>March 1990</td>\n",
" <td>October 1991</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n To evaluate the relative safety and ef...</td>\n",
" <td>\\n CMV retinitis is the most common intra...</td>\n",
" <td>234.0</td>\n",
" <td>cytomegalovirus retinitis</td>\n",
" <td>Ganciclovir, Foscarnet, Phosphonoacetic Acid, ...</td>\n",
" <td>\\n Inclusion criteria:\\n\\n - ...</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NCT00000395</td>\n",
" <td>Antifolate Effectiveness in Arthritis</td>\n",
" <td>Mechanisms of Antifolate Efficacy in Arthritis</td>\n",
" <td>Completed</td>\n",
" <td>September 1996</td>\n",
" <td>August 2002</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This study looks at how the arthritis ...</td>\n",
" <td>\\n Low-dose methotrexate therapy suppress...</td>\n",
" <td>40.0</td>\n",
" <td>adjuvant arthritis</td>\n",
" <td>Methotrexate, Folic Acid Antagonists, Folic Ac...</td>\n",
" <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>NCT00000433</td>\n",
" <td>Blocking Tumor Necrosis Factor in Ankylosing S...</td>\n",
" <td>Anti-Tumor Necrosis Factor (TNFR:Fc) in Ankylo...</td>\n",
" <td>Completed</td>\n",
" <td>October 1999</td>\n",
" <td>March 2002</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n The Division of Rheumatology at Univer...</td>\n",
" <td>\\n In this Phase II clinical trial we wil...</td>\n",
" <td>42.0</td>\n",
" <td>spondylitis, ankylosing</td>\n",
" <td>Anti-Tumor Necrosis Factor</td>\n",
" <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>NCT00000479</td>\n",
" <td>Women's Health Study (WHS): A Randomized Trial...</td>\n",
" <td>Women's Health Study of Low-dose Aspirin and V...</td>\n",
" <td>Completed</td>\n",
" <td>September 1992</td>\n",
" <td>February 2005</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n The purpose of this study is to evalua...</td>\n",
" <td>\\n BACKGROUND:\\n\\n Various doses of ...</td>\n",
" <td>39876.0</td>\n",
" <td>vascular diseases</td>\n",
" <td>Vitamins, Vitamin E, Tocopherols, Tocotrienols...</td>\n",
" <td>\\n Inclusion Criteria:\\n\\n - ...</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nct_id brief_title \\\n",
"0 NCT00000124 Collaborative Ocular Melanoma Study (COMS) \n",
"1 NCT00000136 Studies of the Ocular Complications of AIDS (S... \n",
"2 NCT00000395 Antifolate Effectiveness in Arthritis \n",
"3 NCT00000433 Blocking Tumor Necrosis Factor in Ankylosing S... \n",
"4 NCT00000479 Women's Health Study (WHS): A Randomized Trial... \n",
"\n",
" official_title overall_status \\\n",
"0 NaN Unknown status \n",
"1 Foscarnet-Ganciclovir CMV Retinitis Trial Completed \n",
"2 Mechanisms of Antifolate Efficacy in Arthritis Completed \n",
"3 Anti-Tumor Necrosis Factor (TNFR:Fc) in Ankylo... Completed \n",
"4 Women's Health Study of Low-dose Aspirin and V... Completed \n",
"\n",
" start_date completion_date phase study_type \\\n",
"0 November 1986 NaN Phase 3 Interventional \n",
"1 March 1990 October 1991 Phase 3 Interventional \n",
"2 September 1996 August 2002 Phase 2 Interventional \n",
"3 October 1999 March 2002 Phase 2 Interventional \n",
"4 September 1992 February 2005 Phase 3 Interventional \n",
"\n",
" brief_summary \\\n",
"0 \\n To evaluate therapeutic interventions ... \n",
"1 \\n To evaluate the relative safety and ef... \n",
"2 \\n This study looks at how the arthritis ... \n",
"3 \\n The Division of Rheumatology at Univer... \n",
"4 \\n The purpose of this study is to evalua... \n",
"\n",
" detailed_description enrollment \\\n",
"0 \\n For more than 100 years, removal of th... NaN \n",
"1 \\n CMV retinitis is the most common intra... 234.0 \n",
"2 \\n Low-dose methotrexate therapy suppress... 40.0 \n",
"3 \\n In this Phase II clinical trial we wil... 42.0 \n",
"4 \\n BACKGROUND:\\n\\n Various doses of ... 39876.0 \n",
"\n",
" condition \\\n",
"0 uveitis \n",
"1 cytomegalovirus retinitis \n",
"2 adjuvant arthritis \n",
"3 spondylitis, ankylosing \n",
"4 vascular diseases \n",
"\n",
" intervention_name \\\n",
"0 Brachytherapy, Eye Removal \n",
"1 Ganciclovir, Foscarnet, Phosphonoacetic Acid, ... \n",
"2 Methotrexate, Folic Acid Antagonists, Folic Ac... \n",
"3 Anti-Tumor Necrosis Factor \n",
"4 Vitamins, Vitamin E, Tocopherols, Tocotrienols... \n",
"\n",
" eligibility condition_cancer \n",
"0 \\n Men and women eligible for the study... False \n",
"1 \\n Inclusion criteria:\\n\\n - ... False \n",
"2 \\n Inclusion Criteria:\\n\\n - ... False \n",
"3 \\n Inclusion Criteria:\\n\\n - ... False \n",
"4 \\n Inclusion Criteria:\\n\\n - ... False "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"df = df.loc[df['condition_cancer'] ==True,:]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(42725, 15)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"breast cancer 2919\n",
"prostate cancer 1888\n",
"colorectal cancer 1012\n",
"lung cancer 1009\n",
"multiple myeloma 996\n",
"cancer 972\n",
"lymphoma 855\n",
"non-small cell lung cancer 731\n",
"pancreatic cancer 659\n",
"unspecified adult solid tumor, protocol specific 656\n",
"head and neck cancer 524\n",
"hepatocellular carcinoma 496\n",
"melanoma 481\n",
"gastric cancer 460\n",
"ovarian cancer 441\n",
"neoplasms 424\n",
"solid tumors 375\n",
"metastatic breast cancer 370\n",
"breast neoplasms 317\n",
"carcinoma, non-small-cell lung 315\n",
"cervical cancer 289\n",
"non small cell lung cancer 289\n",
"sarcoma 288\n",
"metastatic colorectal cancer 285\n",
"bladder cancer 285\n",
"rectal cancer 282\n",
"brain and central nervous system tumors 271\n",
"esophageal cancer 260\n",
"advanced solid tumors 249\n",
"glioblastoma 223\n",
" ... \n",
"intraabdominal cancers (various types) 1\n",
"carcinoma of renal pelvis 1\n",
"recurrent idhwt gliomas with fgfr1-tacc1 fusion 1\n",
"cancer of the larynx 1\n",
"respiratory cancer 1\n",
"urothelial bladder carcinoma 1\n",
"locally advanced and unresectable, but non-metastatic pancreatic adenocarcinoma or cholangiocarcinoma 1\n",
"tumor with alterations of the fgf-r 1\n",
"nasal cancer 1\n",
"macroprolactinoma 1\n",
"ovarian, fallopian tube, and primary peritoneal cancer 1\n",
"non-squamous cell lung cancer with wild-type kras 1\n",
"stage ib1 cervical cancer ajcc v6 and v7 1\n",
"refractory transformed non-hodgkin lymphoma 1\n",
"malt-lymphoma 1\n",
"progressive metastatic prostate cancer 1\n",
"stage i prostate cancer 1\n",
"mandibular neoplasms 1\n",
"glioblastoma, who grade iv 1\n",
"advanced or metastatic biliary tract cancer 1\n",
"non-small cell lung cancer - completely resectable 1\n",
"phase ii: relapsed or refractory mantle cell lymphoma 1\n",
"related distress among cancer caregivers 1\n",
"left colonic adenocarcinoma 1\n",
"crohn disease-associated colorectal adenocarcinoma 1\n",
"localized hepatocellular carcinoma 1\n",
"pancreatic intraductal papillary mucinous neoplasm 1\n",
"non-hodgkin's lymphomas 1\n",
"children cancer, solid tumor 1\n",
"cancer-related cognitive difficulties 1\n",
"Name: condition, Length: 5865, dtype: int64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.condition.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['breast cancer', 'prostate cancer', 'colorectal cancer', 'lung cancer',\n",
" 'multiple myeloma', 'cancer', 'lymphoma', 'non-small cell lung cancer',\n",
" 'pancreatic cancer',\n",
" 'unspecified adult solid tumor, protocol specific'],\n",
" dtype='object')"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"condition = df.condition.value_counts(ascending=False, sort=True)\n",
"condition.index[:10]"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1080x360 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"condition = df.condition.value_counts(ascending=False, sort=True)\n",
"plt.figure(figsize=(15,5))\n",
"sns.barplot(condition.index[:20], condition.values[:20], alpha=0.8)\n",
"plt.title('Patient Condition')\n",
"plt.ylabel('Number of Occurrences', fontsize=12)\n",
"#plt.xlabel('city', fontsize=12)\n",
"plt.xticks(rotation=90)\n",
"plt.savefig('./image/condition.png', bbox_inches = \"tight\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Vaccines 492\n",
"Pembrolizumab 392\n",
"Paclitaxel, Albumin-Bound Paclitaxel 284\n",
"Gemcitabine 239\n",
"Docetaxel 229\n",
"Everolimus, Sirolimus 226\n",
"Sorafenib 219\n",
"Nivolumab 219\n",
"Temozolomide 217\n",
"Antibodies, Monoclonal 204\n",
"Cisplatin 198\n",
"Erlotinib Hydrochloride 194\n",
"Capecitabine 185\n",
"Sunitinib 177\n",
"Paclitaxel, Albumin-Bound Paclitaxel, Carboplatin 175\n",
"Nivolumab, Ipilimumab 163\n",
"Bevacizumab 159\n",
"Rituximab 158\n",
"Cetuximab 143\n",
"Cisplatin, Gemcitabine 132\n",
"Doxorubicin, Liposomal doxorubicin 127\n",
"Irinotecan 119\n",
"Gefitinib 116\n",
"Apatinib 114\n",
"Paclitaxel, Albumin-Bound Paclitaxel, Gemcitabine 109\n",
"Imatinib Mesylate 107\n",
"Oxaliplatin 104\n",
"Trastuzumab 100\n",
"Fluorodeoxyglucose F18 99\n",
"Capecitabine, Oxaliplatin 98\n",
" ... \n",
"Dermal Autograft, AlloDerm 1\n",
"RevM10 gene, RevM10/polAS gene, in vitro-treated peripheral blood stem cell transplantation, peripheral blood stem cell transplantation 1\n",
"SHR3680, SHR2554 1\n",
"Bevacizumab, Carboplatin, Liposomal doxorubicin, Doxorubicin 1\n",
"Benzocaine, Estrogens 1\n",
"Paclitaxel, Albumin-Bound Paclitaxel, Semaxinib, Angiogenesis Inhibitors 1\n",
"adjuvant therapy, 3-dimensional conformal radiation therapy, brachytherapy 1\n",
"Telerehabilitation group 1\n",
"FLOT regimen chemotherapy, D2 gastric and imaging metastases resection 1\n",
"Antigen-specific cytotoxic T lymphocytes induced by dendritic cells infected by recombinant adeno-associated virus with CEA gene 1\n",
"Cyclophosphamide, Methotrexate, Cytarabine, Rituximab, Ifosfamide, Isophosphamide mustard, Doxorubicin, Liposomal doxorubicin, Prednisone, Etoposide, Etoposide phosphate, Vincristine, Prednisolone, Methylprednisolone Hemisuccinate, Bleomycin, Vindesine, Lenograstim, Methylprednisolone Acetate, Methylprednisolone, Prednisolone acetate, Prednisolone hemisuccinate, Prednisolone phosphate 1\n",
"Doxorubicin, Lenalidomide, Vinblastine 1\n",
"Partial Breast Irradiation (PBI) 1\n",
"Axitinib, Selenium 1\n",
"TroVax 1\n",
"Camptothecin, Namitecan 1\n",
"Oxaliplatin, Fluorouracil, Eniluracil 1\n",
"Crotoxin 1\n",
"custom work endurance, dietary management adapted to the nutritional status 1\n",
"CT-scan, CEA, X-ray of lungs, CT-scan, CEA, X-ray of lungs 1\n",
"Paclitaxel, Albumin-Bound Paclitaxel, Bavituximab, Antibodies, Monoclonal 1\n",
"Interactive Voice Response Symptom Management, Tailored Newsletters 1\n",
"CMB305, Placebo 1\n",
"Doxorubicin, Liposomal doxorubicin, Ifosfamide, Semaxinib, Angiogenesis Inhibitors 1\n",
"injection of the cell therapy product 1\n",
"ETAF, Usual care 1\n",
"Gemcitabine, Capecitabine, Fluorouracil, Irinotecan, Oxaliplatin 1\n",
"Immunologic Factors, Fluorodeoxyglucose F18 1\n",
"Football 1\n",
"Paclitaxel, Albumin-Bound Paclitaxel, Carboplatin, Atezolizumab, Antibodies, Immunoglobulins, Antibodies, Monoclonal 1\n",
"Name: intervention_name, Length: 22902, dtype: int64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"df.intervention_name.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 1080x360 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"intervention = df.intervention_name.value_counts(ascending=False, sort=True)\n",
"plt.figure(figsize=(15,5))\n",
"sns.barplot(intervention.index[:20], intervention.values[:20], alpha=0.8)\n",
"plt.title('Treatment procedure')\n",
"plt.ylabel('Number of Occurrences', fontsize=12)\n",
"#plt.xlabel('city', fontsize=12)\n",
"plt.xticks(rotation=90)\n",
"plt.savefig('./image/intervention.png', bbox_inches = \"tight\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nct_id</th>\n",
" <th>brief_title</th>\n",
" <th>official_title</th>\n",
" <th>overall_status</th>\n",
" <th>start_date</th>\n",
" <th>completion_date</th>\n",
" <th>phase</th>\n",
" <th>study_type</th>\n",
" <th>brief_summary</th>\n",
" <th>detailed_description</th>\n",
" <th>enrollment</th>\n",
" <th>condition</th>\n",
" <th>intervention_name</th>\n",
" <th>eligibility</th>\n",
" <th>condition_cancer</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>NCT00001188</td>\n",
" <td>The Role of Multi-Modality Therapy for the Tre...</td>\n",
" <td>The Role of Multi-Modality Therapy for the Tre...</td>\n",
" <td>Completed</td>\n",
" <td>December 1983</td>\n",
" <td>September 2000</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients with Grade II and III soft ti...</td>\n",
" <td>\\n Patients with Grade II and III soft ti...</td>\n",
" <td>100.0</td>\n",
" <td>sarcoma</td>\n",
" <td>radiation therapy following surgery</td>\n",
" <td>\\n Patients must have biopsy-proven sof...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>NCT00001189</td>\n",
" <td>The Treatment of Grade I Sarcomas and Benign, ...</td>\n",
" <td>The Treatment of Grade I Sarcomas and Benign, ...</td>\n",
" <td>Completed</td>\n",
" <td>December 1983</td>\n",
" <td>April 2001</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients with Grade I soft tissue sarc...</td>\n",
" <td>\\n This is a randomized study. Patients u...</td>\n",
" <td>150.0</td>\n",
" <td>neoplasms</td>\n",
" <td>radiotherapy</td>\n",
" <td>\\n DISEASE CHARACTERISTICS:\\n\\n ...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>NCT00001193</td>\n",
" <td>A Multimodality Treatment Approach to Patients...</td>\n",
" <td>A Multimodality Treatment Approach to Patients...</td>\n",
" <td>Completed</td>\n",
" <td>November 1984</td>\n",
" <td>September 2000</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This study is designed to evaluate the...</td>\n",
" <td>\\n This study is designed to evaluate the...</td>\n",
" <td>200.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>Melphalan</td>\n",
" <td>\\n Patients must have a histologically ...</td>\n",
" <td>True</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" nct_id brief_title \\\n",
"17 NCT00001188 The Role of Multi-Modality Therapy for the Tre... \n",
"18 NCT00001189 The Treatment of Grade I Sarcomas and Benign, ... \n",
"20 NCT00001193 A Multimodality Treatment Approach to Patients... \n",
"\n",
" official_title overall_status \\\n",
"17 The Role of Multi-Modality Therapy for the Tre... Completed \n",
"18 The Treatment of Grade I Sarcomas and Benign, ... Completed \n",
"20 A Multimodality Treatment Approach to Patients... Completed \n",
"\n",
" start_date completion_date phase study_type \\\n",
"17 December 1983 September 2000 Phase 2 Interventional \n",
"18 December 1983 April 2001 Phase 2 Interventional \n",
"20 November 1984 September 2000 Phase 2 Interventional \n",
"\n",
" brief_summary \\\n",
"17 \\n Patients with Grade II and III soft ti... \n",
"18 \\n Patients with Grade I soft tissue sarc... \n",
"20 \\n This study is designed to evaluate the... \n",
"\n",
" detailed_description enrollment \\\n",
"17 \\n Patients with Grade II and III soft ti... 100.0 \n",
"18 \\n This is a randomized study. Patients u... 150.0 \n",
"20 \\n This study is designed to evaluate the... 200.0 \n",
"\n",
" condition intervention_name \\\n",
"17 sarcoma radiation therapy following surgery \n",
"18 neoplasms radiotherapy \n",
"20 neoplasm metastasis Melphalan \n",
"\n",
" eligibility condition_cancer \n",
"17 \\n Patients must have biopsy-proven sof... True \n",
"18 \\n DISEASE CHARACTERISTICS:\\n\\n ... True \n",
"20 \\n Patients must have a histologically ... True "
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(3)"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"# Extracting Inclusion/Exclusion criteria from eligibility column\n",
"df.eligibility = df.eligibility.str.lower()\n",
"df['eligible'] = df.eligibility.str.split('(inclusion criteria:)').str[2]\n",
"df['ineligible'] = df.eligibility.str.split('(exclusion criteria:)').str[2]"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 42725 entries, 17 to 61776\n",
"Data columns (total 17 columns):\n",
"nct_id 42725 non-null object\n",
"brief_title 42725 non-null object\n",
"official_title 42169 non-null object\n",
"overall_status 42725 non-null object\n",
"start_date 42725 non-null object\n",
"completion_date 38927 non-null object\n",
"phase 34821 non-null object\n",
"study_type 42725 non-null object\n",
"brief_summary 42724 non-null object\n",
"detailed_description 29941 non-null object\n",
"enrollment 41413 non-null float64\n",
"condition 42725 non-null object\n",
"intervention_name 42725 non-null object\n",
"eligibility 42725 non-null object\n",
"condition_cancer 42725 non-null bool\n",
"eligible 36042 non-null object\n",
"ineligible 34798 non-null object\n",
"dtypes: bool(1), float64(1), object(15)\n",
"memory usage: 5.6+ MB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>nct_id</th>\n",
" <th>brief_title</th>\n",
" <th>official_title</th>\n",
" <th>overall_status</th>\n",
" <th>start_date</th>\n",
" <th>completion_date</th>\n",
" <th>phase</th>\n",
" <th>study_type</th>\n",
" <th>brief_summary</th>\n",
" <th>detailed_description</th>\n",
" <th>enrollment</th>\n",
" <th>condition</th>\n",
" <th>intervention_name</th>\n",
" <th>eligibility</th>\n",
" <th>condition_cancer</th>\n",
" <th>eligible</th>\n",
" <th>ineligible</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>NCT00001188</td>\n",
" <td>The Role of Multi-Modality Therapy for the Tre...</td>\n",
" <td>The Role of Multi-Modality Therapy for the Tre...</td>\n",
" <td>Completed</td>\n",
" <td>December 1983</td>\n",
" <td>September 2000</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients with Grade II and III soft ti...</td>\n",
" <td>\\n Patients with Grade II and III soft ti...</td>\n",
" <td>100.0</td>\n",
" <td>sarcoma</td>\n",
" <td>radiation therapy following surgery</td>\n",
" <td>\\n patients must have biopsy-proven sof...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>NCT00001189</td>\n",
" <td>The Treatment of Grade I Sarcomas and Benign, ...</td>\n",
" <td>The Treatment of Grade I Sarcomas and Benign, ...</td>\n",
" <td>Completed</td>\n",
" <td>December 1983</td>\n",
" <td>April 2001</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients with Grade I soft tissue sarc...</td>\n",
" <td>\\n This is a randomized study. Patients u...</td>\n",
" <td>150.0</td>\n",
" <td>neoplasms</td>\n",
" <td>radiotherapy</td>\n",
" <td>\\n disease characteristics:\\n\\n ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20</th>\n",
" <td>NCT00001193</td>\n",
" <td>A Multimodality Treatment Approach to Patients...</td>\n",
" <td>A Multimodality Treatment Approach to Patients...</td>\n",
" <td>Completed</td>\n",
" <td>November 1984</td>\n",
" <td>September 2000</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This study is designed to evaluate the...</td>\n",
" <td>\\n This study is designed to evaluate the...</td>\n",
" <td>200.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>Melphalan</td>\n",
" <td>\\n patients must have a histologically ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21</th>\n",
" <td>NCT00001209</td>\n",
" <td>A Pilot Study for the Treatment of Patients Wi...</td>\n",
" <td>A Pilot Study for the Treatment of Patients Wi...</td>\n",
" <td>Completed</td>\n",
" <td>October 1986</td>\n",
" <td>August 2000</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This protocol is designed to test the ...</td>\n",
" <td>\\n This protocol is designed to test the ...</td>\n",
" <td>120.0</td>\n",
" <td>sarcoma, ewing's</td>\n",
" <td>Vincristine, Doxorubicin, Ifosfamide, Cyclopho...</td>\n",
" <td>\\n patients with high grade soft tissue...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22</th>\n",
" <td>NCT00001217</td>\n",
" <td>Osteosarcoma Study #2: A Randomized Trial of P...</td>\n",
" <td>Osteosarcoma Study #2: A Randomized Trial of P...</td>\n",
" <td>Completed</td>\n",
" <td>May 1987</td>\n",
" <td>December 2000</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n The study is designed to determine if ...</td>\n",
" <td>\\n The study is designed to determine if ...</td>\n",
" <td>260.0</td>\n",
" <td>osteosarcoma</td>\n",
" <td>pre-surgical chemotherapy</td>\n",
" <td>\\n must be less than or equal to 30 yea...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>NCT00001237</td>\n",
" <td>Pilot Protocol for the Treatment of Patients W...</td>\n",
" <td>Pilot Protocol for the Treatment of Patients W...</td>\n",
" <td>Completed</td>\n",
" <td>March 1989</td>\n",
" <td>April 2000</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Major improvements in the treatment of...</td>\n",
" <td>\\n Major improvements in the treatment of...</td>\n",
" <td>120.0</td>\n",
" <td>lymphoma, small noncleaved-cell</td>\n",
" <td>Sargramostim</td>\n",
" <td>\\n high risk protocol: patients with sm...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>NCT00001239</td>\n",
" <td>Combination Chemotherapy (FLAC) Combined With ...</td>\n",
" <td>Combination Chemotherapy (FLAC) Combined With ...</td>\n",
" <td>Completed</td>\n",
" <td>July 1989</td>\n",
" <td>January 2001</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n To evaluate a dose intensive chemother...</td>\n",
" <td>\\n To evaluate a dose intensive chemother...</td>\n",
" <td>100.0</td>\n",
" <td>breast neoplasms</td>\n",
" <td>Sargramostim</td>\n",
" <td>\\n all stage iii or clinical t3n0 or tx...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>NCT00001249</td>\n",
" <td>Treatment of Tac-Expressing Cutaneous T-Cell L...</td>\n",
" <td>Treatment of Tac-Expressing Cutaneous T-Cell L...</td>\n",
" <td>Completed</td>\n",
" <td>December 1989</td>\n",
" <td>October 2000</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n The study purpose is to evaluate the c...</td>\n",
" <td>\\n The study purpose is to evaluate the c...</td>\n",
" <td>30.0</td>\n",
" <td>lymphoma, t-cell, cutaneous</td>\n",
" <td>Antibodies, Daclizumab</td>\n",
" <td>\\n disease characteristics:\\n\\n ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>NCT00001250</td>\n",
" <td>Effect of Preoperative Chemotherapy on Axillar...</td>\n",
" <td>Effect of Preoperative Chemotherapy on Axillar...</td>\n",
" <td>Completed</td>\n",
" <td>December 1989</td>\n",
" <td>October 2002</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients with untreated clinical stage...</td>\n",
" <td>\\n A prospective randomized trial evaluat...</td>\n",
" <td>130.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>preoperative dose intense chemotherapy (FLAC/G...</td>\n",
" <td>\\n inclusion criteria\\n\\n women ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>NCT00001251</td>\n",
" <td>Phase I Study of Intrathecal Mafosfamide</td>\n",
" <td>Phase I Study of Intrathecal Mafosfamide</td>\n",
" <td>Completed</td>\n",
" <td>November 1989</td>\n",
" <td>November 2003</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n The purpose of this study is to determ...</td>\n",
" <td>\\n The purpose of this study is to determ...</td>\n",
" <td>65.0</td>\n",
" <td>meningeal neoplasm</td>\n",
" <td>Mafosfamide, Cyclophosphamide</td>\n",
" <td>\\n inclusion criteria:\\n\\n all p...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n all patients over 3 years of age w...</td>\n",
" <td>\\n\\n patients receiving other therapy (...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>30</th>\n",
" <td>NCT00001256</td>\n",
" <td>Steroids and Methotrexate to Treat Systemic Va...</td>\n",
" <td>An Open Trial of the Efficacy of Glucocorticoi...</td>\n",
" <td>Completed</td>\n",
" <td>March 1990</td>\n",
" <td>February 2004</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This study will evaluate the safety an...</td>\n",
" <td>\\n Previous studies at the NIH have demon...</td>\n",
" <td>100.0</td>\n",
" <td>wegener's granulomatosis</td>\n",
" <td>Methotrexate, Prednisone</td>\n",
" <td>\\n inclusion criteria:\\n\\n diagn...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n diagnosis: wegener's granulomatosi...</td>\n",
" <td>\\n\\n evidence of infection by gram stai...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31</th>\n",
" <td>NCT00001266</td>\n",
" <td>A Phase II Trial of Leuprolide + Flutamide + S...</td>\n",
" <td>A Phase II Trial of Leuprolide + Flutamide + S...</td>\n",
" <td>Completed</td>\n",
" <td>October 1990</td>\n",
" <td>August 2003</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n One current hypothesis as to what limi...</td>\n",
" <td>\\n The purpose of this study is to assess...</td>\n",
" <td>70.0</td>\n",
" <td>prostatic neoplasm</td>\n",
" <td>Leuprolide, Flutamide, Suramin</td>\n",
" <td>\\n inclusion criteria:\\n\\n patie...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n patients must have a histologic di...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>NCT00001269</td>\n",
" <td>Phase I Trial of FLAC (5-Fluorouracil, Leucovo...</td>\n",
" <td>Phase I Trial of FLAC (5-Fluorouracil, Leucovo...</td>\n",
" <td>Completed</td>\n",
" <td>May 1991</td>\n",
" <td>February 2001</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This is a phase I study to determine t...</td>\n",
" <td>\\n Phase I study to determine the maximal...</td>\n",
" <td>100.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>Fluorouracil, Cyclophosphamide, Doxorubicin, L...</td>\n",
" <td>\\n patients with stage iv (metastatic) ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34</th>\n",
" <td>NCT00001270</td>\n",
" <td>Feasibility Study of Interleukin 1-Alpha With ...</td>\n",
" <td>Feasibility Study of Interleukin 1-Alpha With ...</td>\n",
" <td>Completed</td>\n",
" <td>June 1991</td>\n",
" <td>March 2000</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This is a phase I/II study of interleu...</td>\n",
" <td>\\n This is a phase I/II study of interleu...</td>\n",
" <td>85.0</td>\n",
" <td>testicular neoplasms</td>\n",
" <td>Etoposide, Ifosfamide, Isophosphamide mustard</td>\n",
" <td>\\n a history of pathologically document...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>NCT00001271</td>\n",
" <td>A Phase I Study of Continuous Infusion Immunot...</td>\n",
" <td>A Phase I Study of Continuous Infusion Immunot...</td>\n",
" <td>Completed</td>\n",
" <td>July 1991</td>\n",
" <td>April 2001</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients with CD22(+) B-cell lymphomas...</td>\n",
" <td>\\n Patients with CD22(+) B-cell lymphomas...</td>\n",
" <td>24.0</td>\n",
" <td>b cell lymphoma</td>\n",
" <td>Immunotoxins</td>\n",
" <td>\\n patients with a histologic diagnosis...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>NCT00001272</td>\n",
" <td>A Phase I Study of Taxol, Cisplatin, Cyclophos...</td>\n",
" <td>A Phase I Study of Taxol, Cisplatin, Cyclophos...</td>\n",
" <td>Completed</td>\n",
" <td>September 1991</td>\n",
" <td>May 2000</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This is a Phase I study which addresse...</td>\n",
" <td>\\n This is a Phase I study which addresse...</td>\n",
" <td>60.0</td>\n",
" <td>ovarian neoplasms</td>\n",
" <td>Cisplatin, Cyclophosphamide, Paclitaxel, Album...</td>\n",
" <td>\\n all patients must have biopsy proven...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>37</th>\n",
" <td>NCT00001296</td>\n",
" <td>A Randomized Phase III Trial of Hyperthermic I...</td>\n",
" <td>A Randomized Phase III Trial of Hyperthermic I...</td>\n",
" <td>Completed</td>\n",
" <td>February 1992</td>\n",
" <td>October 2000</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Randomized study. Initially, 3 patient...</td>\n",
" <td>\\n Patients with locally advanced melanom...</td>\n",
" <td>122.0</td>\n",
" <td>melanoma</td>\n",
" <td>Interferons, Melphalan, Interferon-gamma</td>\n",
" <td>\\n disease characteristics:\\n\\n ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>NCT00001300</td>\n",
" <td>A Randomized Study of the Effect of Adjuvant C...</td>\n",
" <td>A Randomized Study of the Effect of Adjuvant C...</td>\n",
" <td>Completed</td>\n",
" <td>June 1992</td>\n",
" <td>March 2001</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Randomized study. All patients must be...</td>\n",
" <td>\\n Patients with primary, high-grade soft...</td>\n",
" <td>150.0</td>\n",
" <td>sarcoma</td>\n",
" <td>Doxorubicin, Liposomal doxorubicin, Ifosfamide...</td>\n",
" <td>\\n disease characteristics:\\n\\n ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>NCT00001302</td>\n",
" <td>A Phase I Study of Infusional Chemotherapy Wit...</td>\n",
" <td>A Phase I Study of Infusional Chemotherapy Wit...</td>\n",
" <td>Completed</td>\n",
" <td>September 1992</td>\n",
" <td>June 2002</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n The clinical study entitled \"A Phase I...</td>\n",
" <td>\\n The clinical study entitled \"A Phase I...</td>\n",
" <td>80.0</td>\n",
" <td>ovarian cancer</td>\n",
" <td>polysaccharide-K</td>\n",
" <td>\\n biopsy proven metastatic cancer, for...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>NCT00001328</td>\n",
" <td>Gene Therapy for the Treatment of Brain Tumors</td>\n",
" <td>Gene Therapy for the Treatment of Brain Tumors...</td>\n",
" <td>Completed</td>\n",
" <td>August 21, 1992</td>\n",
" <td>April 30, 2010</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Malignant brain tumors are responsible...</td>\n",
" <td>\\n Malignant brain tumors are responsible...</td>\n",
" <td>15.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>Ganciclovir, Ganciclovir triphosphate</td>\n",
" <td>\\n - inclusion criteria:\\n\\n al...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n all adults, greater than 18 years ...</td>\n",
" <td>\\n\\n no pregnant women will be entered ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>NCT00001332</td>\n",
" <td>Phase I Study of Continuous Hyperthermic Perit...</td>\n",
" <td>Phase I Study of Continuous Hyperthermic Perit...</td>\n",
" <td>Completed</td>\n",
" <td>December 1992</td>\n",
" <td>October 2000</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients with gastric adenocarcinoma a...</td>\n",
" <td>\\n Patients with gastric adenocarcinoma a...</td>\n",
" <td>50.0</td>\n",
" <td>stomach neoplasms</td>\n",
" <td>CHPP with cisplatin</td>\n",
" <td>\\n patients age greater than or equal t...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>NCT00001333</td>\n",
" <td>Phase I Study of Intrathecal Topotecan</td>\n",
" <td>Phase I Study of Intrathecal Topotecan</td>\n",
" <td>Completed</td>\n",
" <td>February 1993</td>\n",
" <td>December 2000</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n The purpose of this study is to determ...</td>\n",
" <td>\\n The purpose of this study is to determ...</td>\n",
" <td>30.0</td>\n",
" <td>meningeal neoplasms</td>\n",
" <td>Topotecan</td>\n",
" <td>\\n disease characteristics:\\n\\n ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>NCT00001335</td>\n",
" <td>New Therapeutic Strategies for Patients With E...</td>\n",
" <td>New Therapeutic Strategies for Patients With E...</td>\n",
" <td>Completed</td>\n",
" <td>April 1993</td>\n",
" <td>January 2002</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n The prognosis for patients with metast...</td>\n",
" <td>\\n The prognosis for patients with metast...</td>\n",
" <td>90.0</td>\n",
" <td>rhabdomyosarcoma</td>\n",
" <td>Topotecan, Dexrazoxane, Razoxane</td>\n",
" <td>\\n the patient must fall into one of th...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>NCT00001337</td>\n",
" <td>Dose-Adjusted EPOCH Chemotherapy and Rituximab...</td>\n",
" <td>Dose-Adjusted EPOCH Chemotherapy and Rituximab...</td>\n",
" <td>Recruiting</td>\n",
" <td>May 8, 1993</td>\n",
" <td>March 31, 2022</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n 5-Drug Combination Chemotherapy with H...</td>\n",
" <td>\\n Background:\\n\\n The treatment of ...</td>\n",
" <td>348.0</td>\n",
" <td>gray zone lymphoma</td>\n",
" <td>Rituximab</td>\n",
" <td>\\n - inclusion criteria:\\n\\n no...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n non-hodgkin's lymphomas in the fol...</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>47</th>\n",
" <td>NCT00001339</td>\n",
" <td>A Study of Combination Chemotherapy and Surgic...</td>\n",
" <td>A Study of Combination Chemotherapy and Surgic...</td>\n",
" <td>Completed</td>\n",
" <td>August 1993</td>\n",
" <td>August 2002</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients who have no response to preop...</td>\n",
" <td>\\n This is a study of infusional doxorubi...</td>\n",
" <td>42.0</td>\n",
" <td>adrenal cortical carcinoma</td>\n",
" <td>Doxorubicin, Liposomal doxorubicin, Etoposide,...</td>\n",
" <td>\\n biopsy-proven primary or recurrent a...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>NCT00001341</td>\n",
" <td>A Phase I Trial of ZD1694 (TOMUDEX), an Inhibi...</td>\n",
" <td>A Phase I Trial of ZD1694 (TOMUDEX® (Registere...</td>\n",
" <td>Completed</td>\n",
" <td>September 1993</td>\n",
" <td>June 2001</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Thymidylate synthase (TS), an enzyme w...</td>\n",
" <td>\\n Thymidylate synthase (TS), an enzyme w...</td>\n",
" <td>60.0</td>\n",
" <td>neoplasm</td>\n",
" <td>Raltitrexed</td>\n",
" <td>\\n disease characteristics:\\n\\n ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>NCT00001378</td>\n",
" <td>A Pilot Trial of Tamoxifen and 4-HPR (4-N-Hydr...</td>\n",
" <td>A Pilot Trial of Tamoxifen and 4-HPR (4-N-Hydr...</td>\n",
" <td>Completed</td>\n",
" <td>January 1994</td>\n",
" <td>November 2000</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This is a pilot, chemoprevention study...</td>\n",
" <td>\\n This is a pilot chemo-prevention study...</td>\n",
" <td>75.0</td>\n",
" <td>breast neoplasms</td>\n",
" <td>Tamoxifen, Retinamide</td>\n",
" <td>\\n population characteristics:\\n\\n ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>NCT00001381</td>\n",
" <td>A Phase I Trial Using Suramin to Treat Superfi...</td>\n",
" <td>A Phase I Trial Using Suramin to Treat Superfi...</td>\n",
" <td>Completed</td>\n",
" <td>March 1994</td>\n",
" <td>December 2000</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients with superficial transitional...</td>\n",
" <td>\\n Patients with superficial transitional...</td>\n",
" <td>18.0</td>\n",
" <td>carcinoma, transitional cell</td>\n",
" <td>Suramin</td>\n",
" <td>\\n disease characteristics:\\n\\n ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>NCT00001382</td>\n",
" <td>A Phase I Study of Recombinant Vaccinia Virus ...</td>\n",
" <td>A Phase I Study of Recombinant Vaccinia Virus ...</td>\n",
" <td>Completed</td>\n",
" <td>March 1994</td>\n",
" <td>March 2000</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This trial will evaluate, in patients ...</td>\n",
" <td>\\n This trial will evaluate, in patients ...</td>\n",
" <td>75.0</td>\n",
" <td>prostatic neoplasms</td>\n",
" <td>PROSTVAC</td>\n",
" <td>\\n disease characteristics:\\n\\n ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>NCT00001383</td>\n",
" <td>A Phase I Study of Infusional Paclitaxel With ...</td>\n",
" <td>A Phase I Study of Infusional Paclitaxel With ...</td>\n",
" <td>Completed</td>\n",
" <td>March 1994</td>\n",
" <td>January 2001</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This is a dosage escalation study to e...</td>\n",
" <td>\\n The clinical study entitled \"A Phase I...</td>\n",
" <td>52.0</td>\n",
" <td>ovarian cancer</td>\n",
" <td>Paclitaxel, Albumin-Bound Paclitaxel, polysacc...</td>\n",
" <td>\\n biopsy proven advanced cancer, for w...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>109</th>\n",
" <td>NCT00001587</td>\n",
" <td>A Phase I Study of Isolated Hepatic Portal and...</td>\n",
" <td>A Phase I Study of Isolated Hepatic Portal and...</td>\n",
" <td>Completed</td>\n",
" <td>September 1997</td>\n",
" <td>March 2001</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients with unresectable primary or ...</td>\n",
" <td>\\n Patients with unresectable primary or ...</td>\n",
" <td>30.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>Melphalan</td>\n",
" <td>\\n histologically or cytologically prov...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>115</th>\n",
" <td>NCT00001683</td>\n",
" <td>A Phase I Study of Oral COL-3 (NSC-683551), a ...</td>\n",
" <td>A Phase I Study of Oral COL-3 (NSC-683551), a ...</td>\n",
" <td>Completed</td>\n",
" <td>October 1997</td>\n",
" <td>August 2003</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Matrix metalloproteinases (MMPs) are a...</td>\n",
" <td>\\n Matrix metalloproteinases (MMPs) are a...</td>\n",
" <td>35.0</td>\n",
" <td>renal cell carcinoma</td>\n",
" <td>Tissue Inhibitor of Metalloproteinases, Matrix...</td>\n",
" <td>\\n inclusion criteria:\\n\\n all p...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n all patients with refractory solid...</td>\n",
" <td>\\n\\n active infection, including positi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>116</th>\n",
" <td>NCT00001685</td>\n",
" <td>Immunization of HLA-A201 Patients With Metasta...</td>\n",
" <td>Immunization of HLA-A201 Patients With Metasta...</td>\n",
" <td>Completed</td>\n",
" <td>November 1997</td>\n",
" <td>September 2000</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This is a study of a melanoma tumor an...</td>\n",
" <td>\\n This is a study of a melanoma tumor an...</td>\n",
" <td>114.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>Vaccines</td>\n",
" <td>\\n any patient 16 years of age or older...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>120</th>\n",
" <td>NCT00001696</td>\n",
" <td>A Pharmacokinetic Study of Genistein, a Tyrosi...</td>\n",
" <td>A Pharmacokinetic Study of Genistein, a Tyrosi...</td>\n",
" <td>Completed</td>\n",
" <td>April 1998</td>\n",
" <td>March 2001</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Genistein is a natural product found i...</td>\n",
" <td>\\n Genistein is a natural product found i...</td>\n",
" <td>15.0</td>\n",
" <td>cancer</td>\n",
" <td>Genistein</td>\n",
" <td>\\n must be 18 years old or greater.\\n\\n...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>122</th>\n",
" <td>NCT00001703</td>\n",
" <td>Vaccine Therapy With Tumor Specific Mutated VH...</td>\n",
" <td>Vaccine Therapy With Tumor Specific Mutated VH...</td>\n",
" <td>Terminated</td>\n",
" <td>August 1998</td>\n",
" <td>November 2008</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n About 27,000 new cases of renal cell c...</td>\n",
" <td>\\n About 27,000 new cases of renal cell c...</td>\n",
" <td>6.0</td>\n",
" <td>renal cell carcinoma</td>\n",
" <td>Vaccines, \"Freunds Adjuvant\"</td>\n",
" <td>\\n inclusion criteria:\\n\\n - ...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n - patients must be 18 years of ...</td>\n",
" <td>\\n\\n - any condition that does not f...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>123</th>\n",
" <td>NCT00001705</td>\n",
" <td>Immunization of Patients With Metastatic Melan...</td>\n",
" <td>Immunization of Patients With Metastatic Melan...</td>\n",
" <td>Completed</td>\n",
" <td>July 1998</td>\n",
" <td>June 2001</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Patients with metastatic melanoma who ...</td>\n",
" <td>\\n Patients with metastatic melanoma who ...</td>\n",
" <td>141.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>Vaccines</td>\n",
" <td>\\n any patient age greater than or equa...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>125</th>\n",
" <td>NCT00001730</td>\n",
" <td>Study of Radioiodine (131-I) Uptake Following ...</td>\n",
" <td>A Dosimetry Study of Radioiodine (131-I) Uptak...</td>\n",
" <td>Completed</td>\n",
" <td>December 1997</td>\n",
" <td>April 2000</td>\n",
" <td>Phase 4</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Thyroid cancer is typically treated wi...</td>\n",
" <td>\\n This is a multi-centered, open-labeled...</td>\n",
" <td>20.0</td>\n",
" <td>thyroid neoplasms</td>\n",
" <td>Hormones</td>\n",
" <td>\\n patients greater than or equal to 18...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>127</th>\n",
" <td>NCT00001750</td>\n",
" <td>Comparing Treatments for Multiple Myeloma</td>\n",
" <td>Randomized Trial of Autologous Transplantation...</td>\n",
" <td>Completed</td>\n",
" <td>September 1998</td>\n",
" <td>August 2002</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Some drugs have the ability to push st...</td>\n",
" <td>\\n Some drugs, such as hematopoietic cyto...</td>\n",
" <td>32.0</td>\n",
" <td>multiple myeloma</td>\n",
" <td>Stemgen</td>\n",
" <td>\\n inclusion criteria\\n\\n age 70...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>128</th>\n",
" <td>NCT00001765</td>\n",
" <td>Stem Cell Transplant Following Low-Intensity C...</td>\n",
" <td>Low Intensity Preparative Regimen Followed by ...</td>\n",
" <td>Completed</td>\n",
" <td>April 1998</td>\n",
" <td>February 2005</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This study will investigate the safety...</td>\n",
" <td>\\n Chronic Granulomatous Disease (CGD) is...</td>\n",
" <td>60.0</td>\n",
" <td>chronic granulomatous disease</td>\n",
" <td>Nexell Isolex with T-cell Depletion, Baxter is...</td>\n",
" <td>\\n inclusion criteria:\\n\\n patie...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n patient criteria:\\n\\n ages ...</td>\n",
" <td>\\n\\n patient or donor pregnant.\\n\\n ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>131</th>\n",
" <td>NCT00001805</td>\n",
" <td>A Phase II Clinical Trial of Suppression of Hu...</td>\n",
" <td>A Phase II Clinical Trial of Suppression of Hu...</td>\n",
" <td>Completed</td>\n",
" <td>March 1999</td>\n",
" <td>June 2000</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This is a phase II clinical and pharma...</td>\n",
" <td>\\n This is a phase II clinical and pharma...</td>\n",
" <td>20.0</td>\n",
" <td>stomach neoplasms</td>\n",
" <td>Antibodies, Rituximab, Immunotoxins, Antitoxins</td>\n",
" <td>\\n patients must have advanced stage so...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>132</th>\n",
" <td>NCT00001806</td>\n",
" <td>Methods in Education for Breast Cancer Genetics</td>\n",
" <td>Methods in Education for Breast Cancer Genetics</td>\n",
" <td>Completed</td>\n",
" <td>April 6, 1999</td>\n",
" <td>December 6, 2017</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n In 1997, the Genetics Department of th...</td>\n",
" <td>\\n In October 1995 the National Naval Med...</td>\n",
" <td>170.0</td>\n",
" <td>ovarian cancer</td>\n",
" <td>Genetic Education and Counseling, Genetic Educ...</td>\n",
" <td>\\n - inclusion criteria:\\n\\n at...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n at least one of the following:\\n\\n...</td>\n",
" <td>\\n\\n patients will be considered inelig...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>134</th>\n",
" <td>NCT00001812</td>\n",
" <td>A Randomized, Double-Blind, Placebo Controlled...</td>\n",
" <td>A Randomized, Double-Blind, Placebo Controlled...</td>\n",
" <td>Completed</td>\n",
" <td>April 1999</td>\n",
" <td>August 2000</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n In patients who are receiving intraven...</td>\n",
" <td>\\n In patients who are receiving intraven...</td>\n",
" <td>84.0</td>\n",
" <td>stomatitis</td>\n",
" <td>Interleukin-2, Nystatin</td>\n",
" <td>\\n all patients enrolled on high dose i...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>136</th>\n",
" <td>NCT00001827</td>\n",
" <td>p53 Vaccine for Ovarian Cancer</td>\n",
" <td>Vaccine Therapy With Tumor Specific p53 Peptid...</td>\n",
" <td>Terminated</td>\n",
" <td>July 26, 1999</td>\n",
" <td>January 25, 2013</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This study will examine whether vaccin...</td>\n",
" <td>\\n P53 is the most commonly mutated gene ...</td>\n",
" <td>21.0</td>\n",
" <td>ovarian neoplasm</td>\n",
" <td>Vaccines, Sargramostim, \"Freunds Adjuvant\", Al...</td>\n",
" <td>\\n - inclusion criteria:\\n\\n pa...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n patients must be 18 years of age o...</td>\n",
" <td>\\n\\n any condition that does not fit wi...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>137</th>\n",
" <td>NCT00001830</td>\n",
" <td>Donor Th2 Cells to Prevent Graft-Versus-Host D...</td>\n",
" <td>Pilot Study of Donor Th2 Cells for the Prevent...</td>\n",
" <td>Completed</td>\n",
" <td>July 20, 1999</td>\n",
" <td>May 19, 2015</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Allogeneic peripheral blood stem cell ...</td>\n",
" <td>\\n Allogeneic peripheral blood stem cell ...</td>\n",
" <td>110.0</td>\n",
" <td>non hodgkin's lymphoma</td>\n",
" <td>Th2 cells in allo HSCTT, Th2 Cells</td>\n",
" <td>\\n - inclusion criteria - patient:\\n\\n...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>138</th>\n",
" <td>NCT00001832</td>\n",
" <td>Lymphocyte Re-infusion During Immune Suppressi...</td>\n",
" <td>Treatment of Patients With Metastatic Melanoma...</td>\n",
" <td>Completed</td>\n",
" <td>August 1999</td>\n",
" <td>May 2010</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This experiment will test the safety a...</td>\n",
" <td>\\n Patients with metastatic melanoma who ...</td>\n",
" <td>170.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>Cyclophosphamide, Fludarabine phosphate, Fluda...</td>\n",
" <td>\\n - inclusion criteria\\n\\n -...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>139</th>\n",
" <td>NCT00001835</td>\n",
" <td>Oxaliplatin in Cancer Patients With Impaired K...</td>\n",
" <td>A Phase I Study of Oxaliplatin in Adult Cancer...</td>\n",
" <td>Completed</td>\n",
" <td>September 1999</td>\n",
" <td>December 2001</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Oxaliplatin is an experimental anti-ca...</td>\n",
" <td>\\n Oxaliplatin is a diaminocyclohexane pl...</td>\n",
" <td>60.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>Oxaliplatin</td>\n",
" <td>\\n patients must have histologically co...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>NCT00001860</td>\n",
" <td>Sandostatin LAR Depot vs. Surgery for Treating...</td>\n",
" <td>Sandostatin LAR vs. Surgery in Acromegalics Wi...</td>\n",
" <td>Completed</td>\n",
" <td>August 1999</td>\n",
" <td>July 2002</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n The purpose of this study is to compar...</td>\n",
" <td>\\n The purpose of this study is to compar...</td>\n",
" <td>5.0</td>\n",
" <td>pituitary neoplasm</td>\n",
" <td>Octreotide</td>\n",
" <td>\\n inclusion criteria:\\n\\n male ...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n male or female patients, 18 years ...</td>\n",
" <td>\\n\\n patients demonstrating intolerance...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>NCT00001880</td>\n",
" <td>Stem Cell Transplantation for Metastatic Solid...</td>\n",
" <td>Exploratory Study of Non-Myeloablative Allogen...</td>\n",
" <td>Completed</td>\n",
" <td>March 12, 1999</td>\n",
" <td>September 23, 2008</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n The goal of this research study is to ...</td>\n",
" <td>\\n The main objective of this study is to...</td>\n",
" <td>84.0</td>\n",
" <td>neoplasm metastasis</td>\n",
" <td>Methotrexate, Cyclosporine, Cyclosporins</td>\n",
" <td>\\n - inclusion criteria:\\n\\n pa...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n patients:\\n\\n patients with...</td>\n",
" <td>\\n\\n patient:\\n\\n pregnant or la...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>150</th>\n",
" <td>NCT00001901</td>\n",
" <td>Etanercept to Treat Wegener's Granulomatosis</td>\n",
" <td>Phase I/II Trial of TNFR:Fc (Etanercept) in Pa...</td>\n",
" <td>Completed</td>\n",
" <td>February 1999</td>\n",
" <td>March 2005</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This study will examine the use of eta...</td>\n",
" <td>\\n The purpose of the study is to assess ...</td>\n",
" <td>60.0</td>\n",
" <td>wegener's granulomatosis</td>\n",
" <td>Etanercept</td>\n",
" <td>\\n inclusion criteria:\\n\\n docum...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n documentation of wegener's granulo...</td>\n",
" <td>\\n\\n patients with evidence of bacteria...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>155</th>\n",
" <td>NCT00001944</td>\n",
" <td>Vinorelbine and XR9576 to Treat Cancer</td>\n",
" <td>A Clinical Trial of the P-Glycoprotein Antagon...</td>\n",
" <td>Completed</td>\n",
" <td>December 1999</td>\n",
" <td>June 2001</td>\n",
" <td>Phase 1</td>\n",
" <td>Interventional</td>\n",
" <td>\\n Tumor resistance to anti-cancer drugs ...</td>\n",
" <td>\\n Intrinsic and acquired drug resistance...</td>\n",
" <td>30.0</td>\n",
" <td>ovarian cancer</td>\n",
" <td>Vinorelbine</td>\n",
" <td>\\n age greater than or equal to 18 year...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>156</th>\n",
" <td>NCT00001955</td>\n",
" <td>Study of Etanercept and Celecoxib to Treat Tem...</td>\n",
" <td>The Role of Cytokines as Inflammatory Mediator...</td>\n",
" <td>Completed</td>\n",
" <td>December 1999</td>\n",
" <td>February 2004</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n This 2-part study will evaluate the ef...</td>\n",
" <td>\\n The proposed clinical trial will consi...</td>\n",
" <td>150.0</td>\n",
" <td>temporomandibular joint disorder</td>\n",
" <td>Celecoxib, Etanercept</td>\n",
" <td>\\n celecoxib study:\\n\\n inclusio...</td>\n",
" <td>True</td>\n",
" <td>\\n\\n recruitment will include patients ...</td>\n",
" <td>\\n\\n subjects who had undergone any tmj...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>158</th>\n",
" <td>NCT00002454</td>\n",
" <td>Papilloma Virus Vaccine Therapy in Treating Yo...</td>\n",
" <td>Phase II Study of Immunotherapy With Autogenou...</td>\n",
" <td>Unknown status</td>\n",
" <td>December 1971</td>\n",
" <td>NaN</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n RATIONALE: Vaccines made from papillom...</td>\n",
" <td>\\n OBJECTIVES: I. Determine the immune re...</td>\n",
" <td>NaN</td>\n",
" <td>precancerous condition</td>\n",
" <td>Vaccines</td>\n",
" <td>\\n disease characteristics: diagnosis o...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>159</th>\n",
" <td>NCT00002455</td>\n",
" <td>Immunotherapy After Surgery in Treating Patien...</td>\n",
" <td>Immunotherapy of Colon Cancer With Autologous ...</td>\n",
" <td>Unknown status</td>\n",
" <td>April 1971</td>\n",
" <td>NaN</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n RATIONALE: Immunotherapy uses differen...</td>\n",
" <td>\\n OBJECTIVES:\\n\\n - Determine th...</td>\n",
" <td>NaN</td>\n",
" <td>melanoma (skin)</td>\n",
" <td>Corynebacterium granulosum P40, adjuvant therapy</td>\n",
" <td>\\n disease characteristics:\\n\\n ...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>160</th>\n",
" <td>NCT00002456</td>\n",
" <td>Graft-Versus-Host Disease Prevention in Treati...</td>\n",
" <td>Postgrafting Methotrexate and Cyclosporine for...</td>\n",
" <td>Completed</td>\n",
" <td>May 1986</td>\n",
" <td>April 2002</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n RATIONALE: Bone marrow transplantation...</td>\n",
" <td>\\n OBJECTIVES: I. Determine the efficacy ...</td>\n",
" <td>NaN</td>\n",
" <td>lymphoma</td>\n",
" <td>Methotrexate, Cyclosporine, Cyclosporins</td>\n",
" <td>\\n disease characteristics: ongoing bon...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>161</th>\n",
" <td>NCT00002458</td>\n",
" <td>Monoclonal Antibody Therapy in Treating Childr...</td>\n",
" <td>Phase II Study of Adjuvant Therapy With Antiga...</td>\n",
" <td>Completed</td>\n",
" <td>November 1987</td>\n",
" <td>September 2001</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n RATIONALE: Monoclonal antibodies can l...</td>\n",
" <td>\\n OBJECTIVES: I. Evaluate the efficacy o...</td>\n",
" <td>NaN</td>\n",
" <td>neuroblastoma</td>\n",
" <td>Antibodies, Immunoglobulins, Antibodies, Monoc...</td>\n",
" <td>\\n disease characteristics: histologica...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>162</th>\n",
" <td>NCT00002459</td>\n",
" <td>Radiation Therapy or No Further Treatment Foll...</td>\n",
" <td>Phase III Randomized Study of Adjuvant Pelvic ...</td>\n",
" <td>Completed</td>\n",
" <td>April 1988</td>\n",
" <td>NaN</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n RATIONALE: Radiation therapy uses high...</td>\n",
" <td>\\n OBJECTIVES: I. Compare the rates of pe...</td>\n",
" <td>224.0</td>\n",
" <td>sarcoma</td>\n",
" <td>radiation therapy</td>\n",
" <td>\\n disease characteristics: histologica...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>163</th>\n",
" <td>NCT00002460</td>\n",
" <td>Adjuvant Hormone Therapy in Treating Women Wit...</td>\n",
" <td>Phase III Randomized Study of Adjuvant Therapy...</td>\n",
" <td>Unknown status</td>\n",
" <td>September 1987</td>\n",
" <td>NaN</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n RATIONALE: Estrogen can stimulate the ...</td>\n",
" <td>\\n OBJECTIVES: I. Determine, in a prospec...</td>\n",
" <td>NaN</td>\n",
" <td>breast cancer</td>\n",
" <td>Tamoxifen, Goserelin</td>\n",
" <td>\\n disease characteristics: operable, c...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>164</th>\n",
" <td>NCT00002461</td>\n",
" <td>Combination Chemotherapy Followed by Bone Marr...</td>\n",
" <td>Phase II Study of Intensive Carmustine and Eto...</td>\n",
" <td>Completed</td>\n",
" <td>April 1988</td>\n",
" <td>July 1991</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n RATIONALE: Drugs used in chemotherapy ...</td>\n",
" <td>\\n OBJECTIVES: I. Determine the antitumor...</td>\n",
" <td>35.0</td>\n",
" <td>lymphoma</td>\n",
" <td>Cisplatin, Cyclophosphamide, Etoposide, Etopos...</td>\n",
" <td>\\n disease characteristics: diagnosis o...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>165</th>\n",
" <td>NCT00002462</td>\n",
" <td>RT or No RT Following Chemotherapy in Treating...</td>\n",
" <td>Phase III Randomized Trial of Adjuvant Involve...</td>\n",
" <td>Active, not recruiting</td>\n",
" <td>September 1989</td>\n",
" <td>NaN</td>\n",
" <td>Phase 3</td>\n",
" <td>Interventional</td>\n",
" <td>\\n RATIONALE: Drugs used in chemotherapy ...</td>\n",
" <td>\\n OBJECTIVES: I. Compare relapse-free su...</td>\n",
" <td>615.0</td>\n",
" <td>lymphoma</td>\n",
" <td>Doxorubicin, Liposomal doxorubicin, Bleomycin,...</td>\n",
" <td>\\n disease characteristics: histologica...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>166</th>\n",
" <td>NCT00002463</td>\n",
" <td>Combination Chemotherapy in Treating Children ...</td>\n",
" <td>Phase II Study of Methotrexate, Mechlorethamin...</td>\n",
" <td>Completed</td>\n",
" <td>February 1989</td>\n",
" <td>January 2008</td>\n",
" <td>Phase 2</td>\n",
" <td>Interventional</td>\n",
" <td>\\n RATIONALE: Drugs used in chemotherapy ...</td>\n",
" <td>\\n OBJECTIVES: I. Determine the efficacy ...</td>\n",
" <td>4.0</td>\n",
" <td>brain and central nervous system tumors</td>\n",
" <td>Methotrexate, Prednisone, Vincristine, Procarb...</td>\n",
" <td>\\n disease characteristics: histologica...</td>\n",
" <td>True</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>100 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" nct_id brief_title \\\n",
"17 NCT00001188 The Role of Multi-Modality Therapy for the Tre... \n",
"18 NCT00001189 The Treatment of Grade I Sarcomas and Benign, ... \n",
"20 NCT00001193 A Multimodality Treatment Approach to Patients... \n",
"21 NCT00001209 A Pilot Study for the Treatment of Patients Wi... \n",
"22 NCT00001217 Osteosarcoma Study #2: A Randomized Trial of P... \n",
"25 NCT00001237 Pilot Protocol for the Treatment of Patients W... \n",
"26 NCT00001239 Combination Chemotherapy (FLAC) Combined With ... \n",
"27 NCT00001249 Treatment of Tac-Expressing Cutaneous T-Cell L... \n",
"28 NCT00001250 Effect of Preoperative Chemotherapy on Axillar... \n",
"29 NCT00001251 Phase I Study of Intrathecal Mafosfamide \n",
"30 NCT00001256 Steroids and Methotrexate to Treat Systemic Va... \n",
"31 NCT00001266 A Phase II Trial of Leuprolide + Flutamide + S... \n",
"33 NCT00001269 Phase I Trial of FLAC (5-Fluorouracil, Leucovo... \n",
"34 NCT00001270 Feasibility Study of Interleukin 1-Alpha With ... \n",
"35 NCT00001271 A Phase I Study of Continuous Infusion Immunot... \n",
"36 NCT00001272 A Phase I Study of Taxol, Cisplatin, Cyclophos... \n",
"37 NCT00001296 A Randomized Phase III Trial of Hyperthermic I... \n",
"39 NCT00001300 A Randomized Study of the Effect of Adjuvant C... \n",
"40 NCT00001302 A Phase I Study of Infusional Chemotherapy Wit... \n",
"41 NCT00001328 Gene Therapy for the Treatment of Brain Tumors \n",
"42 NCT00001332 Phase I Study of Continuous Hyperthermic Perit... \n",
"43 NCT00001333 Phase I Study of Intrathecal Topotecan \n",
"44 NCT00001335 New Therapeutic Strategies for Patients With E... \n",
"45 NCT00001337 Dose-Adjusted EPOCH Chemotherapy and Rituximab... \n",
"47 NCT00001339 A Study of Combination Chemotherapy and Surgic... \n",
"48 NCT00001341 A Phase I Trial of ZD1694 (TOMUDEX), an Inhibi... \n",
"49 NCT00001378 A Pilot Trial of Tamoxifen and 4-HPR (4-N-Hydr... \n",
"51 NCT00001381 A Phase I Trial Using Suramin to Treat Superfi... \n",
"52 NCT00001382 A Phase I Study of Recombinant Vaccinia Virus ... \n",
"53 NCT00001383 A Phase I Study of Infusional Paclitaxel With ... \n",
".. ... ... \n",
"109 NCT00001587 A Phase I Study of Isolated Hepatic Portal and... \n",
"115 NCT00001683 A Phase I Study of Oral COL-3 (NSC-683551), a ... \n",
"116 NCT00001685 Immunization of HLA-A201 Patients With Metasta... \n",
"120 NCT00001696 A Pharmacokinetic Study of Genistein, a Tyrosi... \n",
"122 NCT00001703 Vaccine Therapy With Tumor Specific Mutated VH... \n",
"123 NCT00001705 Immunization of Patients With Metastatic Melan... \n",
"125 NCT00001730 Study of Radioiodine (131-I) Uptake Following ... \n",
"127 NCT00001750 Comparing Treatments for Multiple Myeloma \n",
"128 NCT00001765 Stem Cell Transplant Following Low-Intensity C... \n",
"131 NCT00001805 A Phase II Clinical Trial of Suppression of Hu... \n",
"132 NCT00001806 Methods in Education for Breast Cancer Genetics \n",
"134 NCT00001812 A Randomized, Double-Blind, Placebo Controlled... \n",
"136 NCT00001827 p53 Vaccine for Ovarian Cancer \n",
"137 NCT00001830 Donor Th2 Cells to Prevent Graft-Versus-Host D... \n",
"138 NCT00001832 Lymphocyte Re-infusion During Immune Suppressi... \n",
"139 NCT00001835 Oxaliplatin in Cancer Patients With Impaired K... \n",
"144 NCT00001860 Sandostatin LAR Depot vs. Surgery for Treating... \n",
"148 NCT00001880 Stem Cell Transplantation for Metastatic Solid... \n",
"150 NCT00001901 Etanercept to Treat Wegener's Granulomatosis \n",
"155 NCT00001944 Vinorelbine and XR9576 to Treat Cancer \n",
"156 NCT00001955 Study of Etanercept and Celecoxib to Treat Tem... \n",
"158 NCT00002454 Papilloma Virus Vaccine Therapy in Treating Yo... \n",
"159 NCT00002455 Immunotherapy After Surgery in Treating Patien... \n",
"160 NCT00002456 Graft-Versus-Host Disease Prevention in Treati... \n",
"161 NCT00002458 Monoclonal Antibody Therapy in Treating Childr... \n",
"162 NCT00002459 Radiation Therapy or No Further Treatment Foll... \n",
"163 NCT00002460 Adjuvant Hormone Therapy in Treating Women Wit... \n",
"164 NCT00002461 Combination Chemotherapy Followed by Bone Marr... \n",
"165 NCT00002462 RT or No RT Following Chemotherapy in Treating... \n",
"166 NCT00002463 Combination Chemotherapy in Treating Children ... \n",
"\n",
" official_title \\\n",
"17 The Role of Multi-Modality Therapy for the Tre... \n",
"18 The Treatment of Grade I Sarcomas and Benign, ... \n",
"20 A Multimodality Treatment Approach to Patients... \n",
"21 A Pilot Study for the Treatment of Patients Wi... \n",
"22 Osteosarcoma Study #2: A Randomized Trial of P... \n",
"25 Pilot Protocol for the Treatment of Patients W... \n",
"26 Combination Chemotherapy (FLAC) Combined With ... \n",
"27 Treatment of Tac-Expressing Cutaneous T-Cell L... \n",
"28 Effect of Preoperative Chemotherapy on Axillar... \n",
"29 Phase I Study of Intrathecal Mafosfamide \n",
"30 An Open Trial of the Efficacy of Glucocorticoi... \n",
"31 A Phase II Trial of Leuprolide + Flutamide + S... \n",
"33 Phase I Trial of FLAC (5-Fluorouracil, Leucovo... \n",
"34 Feasibility Study of Interleukin 1-Alpha With ... \n",
"35 A Phase I Study of Continuous Infusion Immunot... \n",
"36 A Phase I Study of Taxol, Cisplatin, Cyclophos... \n",
"37 A Randomized Phase III Trial of Hyperthermic I... \n",
"39 A Randomized Study of the Effect of Adjuvant C... \n",
"40 A Phase I Study of Infusional Chemotherapy Wit... \n",
"41 Gene Therapy for the Treatment of Brain Tumors... \n",
"42 Phase I Study of Continuous Hyperthermic Perit... \n",
"43 Phase I Study of Intrathecal Topotecan \n",
"44 New Therapeutic Strategies for Patients With E... \n",
"45 Dose-Adjusted EPOCH Chemotherapy and Rituximab... \n",
"47 A Study of Combination Chemotherapy and Surgic... \n",
"48 A Phase I Trial of ZD1694 (TOMUDEX® (Registere... \n",
"49 A Pilot Trial of Tamoxifen and 4-HPR (4-N-Hydr... \n",
"51 A Phase I Trial Using Suramin to Treat Superfi... \n",
"52 A Phase I Study of Recombinant Vaccinia Virus ... \n",
"53 A Phase I Study of Infusional Paclitaxel With ... \n",
".. ... \n",
"109 A Phase I Study of Isolated Hepatic Portal and... \n",
"115 A Phase I Study of Oral COL-3 (NSC-683551), a ... \n",
"116 Immunization of HLA-A201 Patients With Metasta... \n",
"120 A Pharmacokinetic Study of Genistein, a Tyrosi... \n",
"122 Vaccine Therapy With Tumor Specific Mutated VH... \n",
"123 Immunization of Patients With Metastatic Melan... \n",
"125 A Dosimetry Study of Radioiodine (131-I) Uptak... \n",
"127 Randomized Trial of Autologous Transplantation... \n",
"128 Low Intensity Preparative Regimen Followed by ... \n",
"131 A Phase II Clinical Trial of Suppression of Hu... \n",
"132 Methods in Education for Breast Cancer Genetics \n",
"134 A Randomized, Double-Blind, Placebo Controlled... \n",
"136 Vaccine Therapy With Tumor Specific p53 Peptid... \n",
"137 Pilot Study of Donor Th2 Cells for the Prevent... \n",
"138 Treatment of Patients With Metastatic Melanoma... \n",
"139 A Phase I Study of Oxaliplatin in Adult Cancer... \n",
"144 Sandostatin LAR vs. Surgery in Acromegalics Wi... \n",
"148 Exploratory Study of Non-Myeloablative Allogen... \n",
"150 Phase I/II Trial of TNFR:Fc (Etanercept) in Pa... \n",
"155 A Clinical Trial of the P-Glycoprotein Antagon... \n",
"156 The Role of Cytokines as Inflammatory Mediator... \n",
"158 Phase II Study of Immunotherapy With Autogenou... \n",
"159 Immunotherapy of Colon Cancer With Autologous ... \n",
"160 Postgrafting Methotrexate and Cyclosporine for... \n",
"161 Phase II Study of Adjuvant Therapy With Antiga... \n",
"162 Phase III Randomized Study of Adjuvant Pelvic ... \n",
"163 Phase III Randomized Study of Adjuvant Therapy... \n",
"164 Phase II Study of Intensive Carmustine and Eto... \n",
"165 Phase III Randomized Trial of Adjuvant Involve... \n",
"166 Phase II Study of Methotrexate, Mechlorethamin... \n",
"\n",
" overall_status start_date completion_date phase \\\n",
"17 Completed December 1983 September 2000 Phase 2 \n",
"18 Completed December 1983 April 2001 Phase 2 \n",
"20 Completed November 1984 September 2000 Phase 2 \n",
"21 Completed October 1986 August 2000 Phase 1 \n",
"22 Completed May 1987 December 2000 Phase 3 \n",
"25 Completed March 1989 April 2000 Phase 2 \n",
"26 Completed July 1989 January 2001 Phase 2 \n",
"27 Completed December 1989 October 2000 Phase 1 \n",
"28 Completed December 1989 October 2002 Phase 2 \n",
"29 Completed November 1989 November 2003 Phase 1 \n",
"30 Completed March 1990 February 2004 Phase 2 \n",
"31 Completed October 1990 August 2003 Phase 2 \n",
"33 Completed May 1991 February 2001 Phase 1 \n",
"34 Completed June 1991 March 2000 Phase 1 \n",
"35 Completed July 1991 April 2001 Phase 1 \n",
"36 Completed September 1991 May 2000 Phase 1 \n",
"37 Completed February 1992 October 2000 Phase 3 \n",
"39 Completed June 1992 March 2001 Phase 3 \n",
"40 Completed September 1992 June 2002 Phase 1 \n",
"41 Completed August 21, 1992 April 30, 2010 Phase 1 \n",
"42 Completed December 1992 October 2000 Phase 1 \n",
"43 Completed February 1993 December 2000 Phase 1 \n",
"44 Completed April 1993 January 2002 Phase 2 \n",
"45 Recruiting May 8, 1993 March 31, 2022 Phase 2 \n",
"47 Completed August 1993 August 2002 Phase 2 \n",
"48 Completed September 1993 June 2001 Phase 1 \n",
"49 Completed January 1994 November 2000 Phase 1 \n",
"51 Completed March 1994 December 2000 Phase 1 \n",
"52 Completed March 1994 March 2000 Phase 1 \n",
"53 Completed March 1994 January 2001 Phase 1 \n",
".. ... ... ... ... \n",
"109 Completed September 1997 March 2001 Phase 1 \n",
"115 Completed October 1997 August 2003 Phase 1 \n",
"116 Completed November 1997 September 2000 Phase 2 \n",
"120 Completed April 1998 March 2001 Phase 1 \n",
"122 Terminated August 1998 November 2008 Phase 2 \n",
"123 Completed July 1998 June 2001 Phase 2 \n",
"125 Completed December 1997 April 2000 Phase 4 \n",
"127 Completed September 1998 August 2002 Phase 2 \n",
"128 Completed April 1998 February 2005 Phase 1 \n",
"131 Completed March 1999 June 2000 Phase 2 \n",
"132 Completed April 6, 1999 December 6, 2017 Phase 3 \n",
"134 Completed April 1999 August 2000 Phase 3 \n",
"136 Terminated July 26, 1999 January 25, 2013 Phase 2 \n",
"137 Completed July 20, 1999 May 19, 2015 Phase 1 \n",
"138 Completed August 1999 May 2010 Phase 2 \n",
"139 Completed September 1999 December 2001 Phase 1 \n",
"144 Completed August 1999 July 2002 Phase 2 \n",
"148 Completed March 12, 1999 September 23, 2008 Phase 2 \n",
"150 Completed February 1999 March 2005 Phase 2 \n",
"155 Completed December 1999 June 2001 Phase 1 \n",
"156 Completed December 1999 February 2004 Phase 2 \n",
"158 Unknown status December 1971 NaN Phase 2 \n",
"159 Unknown status April 1971 NaN Phase 3 \n",
"160 Completed May 1986 April 2002 Phase 3 \n",
"161 Completed November 1987 September 2001 Phase 2 \n",
"162 Completed April 1988 NaN Phase 3 \n",
"163 Unknown status September 1987 NaN Phase 3 \n",
"164 Completed April 1988 July 1991 Phase 2 \n",
"165 Active, not recruiting September 1989 NaN Phase 3 \n",
"166 Completed February 1989 January 2008 Phase 2 \n",
"\n",
" study_type brief_summary \\\n",
"17 Interventional \\n Patients with Grade II and III soft ti... \n",
"18 Interventional \\n Patients with Grade I soft tissue sarc... \n",
"20 Interventional \\n This study is designed to evaluate the... \n",
"21 Interventional \\n This protocol is designed to test the ... \n",
"22 Interventional \\n The study is designed to determine if ... \n",
"25 Interventional \\n Major improvements in the treatment of... \n",
"26 Interventional \\n To evaluate a dose intensive chemother... \n",
"27 Interventional \\n The study purpose is to evaluate the c... \n",
"28 Interventional \\n Patients with untreated clinical stage... \n",
"29 Interventional \\n The purpose of this study is to determ... \n",
"30 Interventional \\n This study will evaluate the safety an... \n",
"31 Interventional \\n One current hypothesis as to what limi... \n",
"33 Interventional \\n This is a phase I study to determine t... \n",
"34 Interventional \\n This is a phase I/II study of interleu... \n",
"35 Interventional \\n Patients with CD22(+) B-cell lymphomas... \n",
"36 Interventional \\n This is a Phase I study which addresse... \n",
"37 Interventional \\n Randomized study. Initially, 3 patient... \n",
"39 Interventional \\n Randomized study. All patients must be... \n",
"40 Interventional \\n The clinical study entitled \"A Phase I... \n",
"41 Interventional \\n Malignant brain tumors are responsible... \n",
"42 Interventional \\n Patients with gastric adenocarcinoma a... \n",
"43 Interventional \\n The purpose of this study is to determ... \n",
"44 Interventional \\n The prognosis for patients with metast... \n",
"45 Interventional \\n 5-Drug Combination Chemotherapy with H... \n",
"47 Interventional \\n Patients who have no response to preop... \n",
"48 Interventional \\n Thymidylate synthase (TS), an enzyme w... \n",
"49 Interventional \\n This is a pilot, chemoprevention study... \n",
"51 Interventional \\n Patients with superficial transitional... \n",
"52 Interventional \\n This trial will evaluate, in patients ... \n",
"53 Interventional \\n This is a dosage escalation study to e... \n",
".. ... ... \n",
"109 Interventional \\n Patients with unresectable primary or ... \n",
"115 Interventional \\n Matrix metalloproteinases (MMPs) are a... \n",
"116 Interventional \\n This is a study of a melanoma tumor an... \n",
"120 Interventional \\n Genistein is a natural product found i... \n",
"122 Interventional \\n About 27,000 new cases of renal cell c... \n",
"123 Interventional \\n Patients with metastatic melanoma who ... \n",
"125 Interventional \\n Thyroid cancer is typically treated wi... \n",
"127 Interventional \\n Some drugs have the ability to push st... \n",
"128 Interventional \\n This study will investigate the safety... \n",
"131 Interventional \\n This is a phase II clinical and pharma... \n",
"132 Interventional \\n In 1997, the Genetics Department of th... \n",
"134 Interventional \\n In patients who are receiving intraven... \n",
"136 Interventional \\n This study will examine whether vaccin... \n",
"137 Interventional \\n Allogeneic peripheral blood stem cell ... \n",
"138 Interventional \\n This experiment will test the safety a... \n",
"139 Interventional \\n Oxaliplatin is an experimental anti-ca... \n",
"144 Interventional \\n The purpose of this study is to compar... \n",
"148 Interventional \\n The goal of this research study is to ... \n",
"150 Interventional \\n This study will examine the use of eta... \n",
"155 Interventional \\n Tumor resistance to anti-cancer drugs ... \n",
"156 Interventional \\n This 2-part study will evaluate the ef... \n",
"158 Interventional \\n RATIONALE: Vaccines made from papillom... \n",
"159 Interventional \\n RATIONALE: Immunotherapy uses differen... \n",
"160 Interventional \\n RATIONALE: Bone marrow transplantation... \n",
"161 Interventional \\n RATIONALE: Monoclonal antibodies can l... \n",
"162 Interventional \\n RATIONALE: Radiation therapy uses high... \n",
"163 Interventional \\n RATIONALE: Estrogen can stimulate the ... \n",
"164 Interventional \\n RATIONALE: Drugs used in chemotherapy ... \n",
"165 Interventional \\n RATIONALE: Drugs used in chemotherapy ... \n",
"166 Interventional \\n RATIONALE: Drugs used in chemotherapy ... \n",
"\n",
" detailed_description enrollment \\\n",
"17 \\n Patients with Grade II and III soft ti... 100.0 \n",
"18 \\n This is a randomized study. Patients u... 150.0 \n",
"20 \\n This study is designed to evaluate the... 200.0 \n",
"21 \\n This protocol is designed to test the ... 120.0 \n",
"22 \\n The study is designed to determine if ... 260.0 \n",
"25 \\n Major improvements in the treatment of... 120.0 \n",
"26 \\n To evaluate a dose intensive chemother... 100.0 \n",
"27 \\n The study purpose is to evaluate the c... 30.0 \n",
"28 \\n A prospective randomized trial evaluat... 130.0 \n",
"29 \\n The purpose of this study is to determ... 65.0 \n",
"30 \\n Previous studies at the NIH have demon... 100.0 \n",
"31 \\n The purpose of this study is to assess... 70.0 \n",
"33 \\n Phase I study to determine the maximal... 100.0 \n",
"34 \\n This is a phase I/II study of interleu... 85.0 \n",
"35 \\n Patients with CD22(+) B-cell lymphomas... 24.0 \n",
"36 \\n This is a Phase I study which addresse... 60.0 \n",
"37 \\n Patients with locally advanced melanom... 122.0 \n",
"39 \\n Patients with primary, high-grade soft... 150.0 \n",
"40 \\n The clinical study entitled \"A Phase I... 80.0 \n",
"41 \\n Malignant brain tumors are responsible... 15.0 \n",
"42 \\n Patients with gastric adenocarcinoma a... 50.0 \n",
"43 \\n The purpose of this study is to determ... 30.0 \n",
"44 \\n The prognosis for patients with metast... 90.0 \n",
"45 \\n Background:\\n\\n The treatment of ... 348.0 \n",
"47 \\n This is a study of infusional doxorubi... 42.0 \n",
"48 \\n Thymidylate synthase (TS), an enzyme w... 60.0 \n",
"49 \\n This is a pilot chemo-prevention study... 75.0 \n",
"51 \\n Patients with superficial transitional... 18.0 \n",
"52 \\n This trial will evaluate, in patients ... 75.0 \n",
"53 \\n The clinical study entitled \"A Phase I... 52.0 \n",
".. ... ... \n",
"109 \\n Patients with unresectable primary or ... 30.0 \n",
"115 \\n Matrix metalloproteinases (MMPs) are a... 35.0 \n",
"116 \\n This is a study of a melanoma tumor an... 114.0 \n",
"120 \\n Genistein is a natural product found i... 15.0 \n",
"122 \\n About 27,000 new cases of renal cell c... 6.0 \n",
"123 \\n Patients with metastatic melanoma who ... 141.0 \n",
"125 \\n This is a multi-centered, open-labeled... 20.0 \n",
"127 \\n Some drugs, such as hematopoietic cyto... 32.0 \n",
"128 \\n Chronic Granulomatous Disease (CGD) is... 60.0 \n",
"131 \\n This is a phase II clinical and pharma... 20.0 \n",
"132 \\n In October 1995 the National Naval Med... 170.0 \n",
"134 \\n In patients who are receiving intraven... 84.0 \n",
"136 \\n P53 is the most commonly mutated gene ... 21.0 \n",
"137 \\n Allogeneic peripheral blood stem cell ... 110.0 \n",
"138 \\n Patients with metastatic melanoma who ... 170.0 \n",
"139 \\n Oxaliplatin is a diaminocyclohexane pl... 60.0 \n",
"144 \\n The purpose of this study is to compar... 5.0 \n",
"148 \\n The main objective of this study is to... 84.0 \n",
"150 \\n The purpose of the study is to assess ... 60.0 \n",
"155 \\n Intrinsic and acquired drug resistance... 30.0 \n",
"156 \\n The proposed clinical trial will consi... 150.0 \n",
"158 \\n OBJECTIVES: I. Determine the immune re... NaN \n",
"159 \\n OBJECTIVES:\\n\\n - Determine th... NaN \n",
"160 \\n OBJECTIVES: I. Determine the efficacy ... NaN \n",
"161 \\n OBJECTIVES: I. Evaluate the efficacy o... NaN \n",
"162 \\n OBJECTIVES: I. Compare the rates of pe... 224.0 \n",
"163 \\n OBJECTIVES: I. Determine, in a prospec... NaN \n",
"164 \\n OBJECTIVES: I. Determine the antitumor... 35.0 \n",
"165 \\n OBJECTIVES: I. Compare relapse-free su... 615.0 \n",
"166 \\n OBJECTIVES: I. Determine the efficacy ... 4.0 \n",
"\n",
" condition \\\n",
"17 sarcoma \n",
"18 neoplasms \n",
"20 neoplasm metastasis \n",
"21 sarcoma, ewing's \n",
"22 osteosarcoma \n",
"25 lymphoma, small noncleaved-cell \n",
"26 breast neoplasms \n",
"27 lymphoma, t-cell, cutaneous \n",
"28 neoplasm metastasis \n",
"29 meningeal neoplasm \n",
"30 wegener's granulomatosis \n",
"31 prostatic neoplasm \n",
"33 neoplasm metastasis \n",
"34 testicular neoplasms \n",
"35 b cell lymphoma \n",
"36 ovarian neoplasms \n",
"37 melanoma \n",
"39 sarcoma \n",
"40 ovarian cancer \n",
"41 neoplasm metastasis \n",
"42 stomach neoplasms \n",
"43 meningeal neoplasms \n",
"44 rhabdomyosarcoma \n",
"45 gray zone lymphoma \n",
"47 adrenal cortical carcinoma \n",
"48 neoplasm \n",
"49 breast neoplasms \n",
"51 carcinoma, transitional cell \n",
"52 prostatic neoplasms \n",
"53 ovarian cancer \n",
".. ... \n",
"109 neoplasm metastasis \n",
"115 renal cell carcinoma \n",
"116 neoplasm metastasis \n",
"120 cancer \n",
"122 renal cell carcinoma \n",
"123 neoplasm metastasis \n",
"125 thyroid neoplasms \n",
"127 multiple myeloma \n",
"128 chronic granulomatous disease \n",
"131 stomach neoplasms \n",
"132 ovarian cancer \n",
"134 stomatitis \n",
"136 ovarian neoplasm \n",
"137 non hodgkin's lymphoma \n",
"138 neoplasm metastasis \n",
"139 neoplasm metastasis \n",
"144 pituitary neoplasm \n",
"148 neoplasm metastasis \n",
"150 wegener's granulomatosis \n",
"155 ovarian cancer \n",
"156 temporomandibular joint disorder \n",
"158 precancerous condition \n",
"159 melanoma (skin) \n",
"160 lymphoma \n",
"161 neuroblastoma \n",
"162 sarcoma \n",
"163 breast cancer \n",
"164 lymphoma \n",
"165 lymphoma \n",
"166 brain and central nervous system tumors \n",
"\n",
" intervention_name \\\n",
"17 radiation therapy following surgery \n",
"18 radiotherapy \n",
"20 Melphalan \n",
"21 Vincristine, Doxorubicin, Ifosfamide, Cyclopho... \n",
"22 pre-surgical chemotherapy \n",
"25 Sargramostim \n",
"26 Sargramostim \n",
"27 Antibodies, Daclizumab \n",
"28 preoperative dose intense chemotherapy (FLAC/G... \n",
"29 Mafosfamide, Cyclophosphamide \n",
"30 Methotrexate, Prednisone \n",
"31 Leuprolide, Flutamide, Suramin \n",
"33 Fluorouracil, Cyclophosphamide, Doxorubicin, L... \n",
"34 Etoposide, Ifosfamide, Isophosphamide mustard \n",
"35 Immunotoxins \n",
"36 Cisplatin, Cyclophosphamide, Paclitaxel, Album... \n",
"37 Interferons, Melphalan, Interferon-gamma \n",
"39 Doxorubicin, Liposomal doxorubicin, Ifosfamide... \n",
"40 polysaccharide-K \n",
"41 Ganciclovir, Ganciclovir triphosphate \n",
"42 CHPP with cisplatin \n",
"43 Topotecan \n",
"44 Topotecan, Dexrazoxane, Razoxane \n",
"45 Rituximab \n",
"47 Doxorubicin, Liposomal doxorubicin, Etoposide,... \n",
"48 Raltitrexed \n",
"49 Tamoxifen, Retinamide \n",
"51 Suramin \n",
"52 PROSTVAC \n",
"53 Paclitaxel, Albumin-Bound Paclitaxel, polysacc... \n",
".. ... \n",
"109 Melphalan \n",
"115 Tissue Inhibitor of Metalloproteinases, Matrix... \n",
"116 Vaccines \n",
"120 Genistein \n",
"122 Vaccines, \"Freunds Adjuvant\" \n",
"123 Vaccines \n",
"125 Hormones \n",
"127 Stemgen \n",
"128 Nexell Isolex with T-cell Depletion, Baxter is... \n",
"131 Antibodies, Rituximab, Immunotoxins, Antitoxins \n",
"132 Genetic Education and Counseling, Genetic Educ... \n",
"134 Interleukin-2, Nystatin \n",
"136 Vaccines, Sargramostim, \"Freunds Adjuvant\", Al... \n",
"137 Th2 cells in allo HSCTT, Th2 Cells \n",
"138 Cyclophosphamide, Fludarabine phosphate, Fluda... \n",
"139 Oxaliplatin \n",
"144 Octreotide \n",
"148 Methotrexate, Cyclosporine, Cyclosporins \n",
"150 Etanercept \n",
"155 Vinorelbine \n",
"156 Celecoxib, Etanercept \n",
"158 Vaccines \n",
"159 Corynebacterium granulosum P40, adjuvant therapy \n",
"160 Methotrexate, Cyclosporine, Cyclosporins \n",
"161 Antibodies, Immunoglobulins, Antibodies, Monoc... \n",
"162 radiation therapy \n",
"163 Tamoxifen, Goserelin \n",
"164 Cisplatin, Cyclophosphamide, Etoposide, Etopos... \n",
"165 Doxorubicin, Liposomal doxorubicin, Bleomycin,... \n",
"166 Methotrexate, Prednisone, Vincristine, Procarb... \n",
"\n",
" eligibility condition_cancer \\\n",
"17 \\n patients must have biopsy-proven sof... True \n",
"18 \\n disease characteristics:\\n\\n ... True \n",
"20 \\n patients must have a histologically ... True \n",
"21 \\n patients with high grade soft tissue... True \n",
"22 \\n must be less than or equal to 30 yea... True \n",
"25 \\n high risk protocol: patients with sm... True \n",
"26 \\n all stage iii or clinical t3n0 or tx... True \n",
"27 \\n disease characteristics:\\n\\n ... True \n",
"28 \\n inclusion criteria\\n\\n women ... True \n",
"29 \\n inclusion criteria:\\n\\n all p... True \n",
"30 \\n inclusion criteria:\\n\\n diagn... True \n",
"31 \\n inclusion criteria:\\n\\n patie... True \n",
"33 \\n patients with stage iv (metastatic) ... True \n",
"34 \\n a history of pathologically document... True \n",
"35 \\n patients with a histologic diagnosis... True \n",
"36 \\n all patients must have biopsy proven... True \n",
"37 \\n disease characteristics:\\n\\n ... True \n",
"39 \\n disease characteristics:\\n\\n ... True \n",
"40 \\n biopsy proven metastatic cancer, for... True \n",
"41 \\n - inclusion criteria:\\n\\n al... True \n",
"42 \\n patients age greater than or equal t... True \n",
"43 \\n disease characteristics:\\n\\n ... True \n",
"44 \\n the patient must fall into one of th... True \n",
"45 \\n - inclusion criteria:\\n\\n no... True \n",
"47 \\n biopsy-proven primary or recurrent a... True \n",
"48 \\n disease characteristics:\\n\\n ... True \n",
"49 \\n population characteristics:\\n\\n ... True \n",
"51 \\n disease characteristics:\\n\\n ... True \n",
"52 \\n disease characteristics:\\n\\n ... True \n",
"53 \\n biopsy proven advanced cancer, for w... True \n",
".. ... ... \n",
"109 \\n histologically or cytologically prov... True \n",
"115 \\n inclusion criteria:\\n\\n all p... True \n",
"116 \\n any patient 16 years of age or older... True \n",
"120 \\n must be 18 years old or greater.\\n\\n... True \n",
"122 \\n inclusion criteria:\\n\\n - ... True \n",
"123 \\n any patient age greater than or equa... True \n",
"125 \\n patients greater than or equal to 18... True \n",
"127 \\n inclusion criteria\\n\\n age 70... True \n",
"128 \\n inclusion criteria:\\n\\n patie... True \n",
"131 \\n patients must have advanced stage so... True \n",
"132 \\n - inclusion criteria:\\n\\n at... True \n",
"134 \\n all patients enrolled on high dose i... True \n",
"136 \\n - inclusion criteria:\\n\\n pa... True \n",
"137 \\n - inclusion criteria - patient:\\n\\n... True \n",
"138 \\n - inclusion criteria\\n\\n -... True \n",
"139 \\n patients must have histologically co... True \n",
"144 \\n inclusion criteria:\\n\\n male ... True \n",
"148 \\n - inclusion criteria:\\n\\n pa... True \n",
"150 \\n inclusion criteria:\\n\\n docum... True \n",
"155 \\n age greater than or equal to 18 year... True \n",
"156 \\n celecoxib study:\\n\\n inclusio... True \n",
"158 \\n disease characteristics: diagnosis o... True \n",
"159 \\n disease characteristics:\\n\\n ... True \n",
"160 \\n disease characteristics: ongoing bon... True \n",
"161 \\n disease characteristics: histologica... True \n",
"162 \\n disease characteristics: histologica... True \n",
"163 \\n disease characteristics: operable, c... True \n",
"164 \\n disease characteristics: diagnosis o... True \n",
"165 \\n disease characteristics: histologica... True \n",
"166 \\n disease characteristics: histologica... True \n",
"\n",
" eligible \\\n",
"17 NaN \n",
"18 NaN \n",
"20 NaN \n",
"21 NaN \n",
"22 NaN \n",
"25 NaN \n",
"26 NaN \n",
"27 NaN \n",
"28 NaN \n",
"29 \\n\\n all patients over 3 years of age w... \n",
"30 \\n\\n diagnosis: wegener's granulomatosi... \n",
"31 \\n\\n patients must have a histologic di... \n",
"33 NaN \n",
"34 NaN \n",
"35 NaN \n",
"36 NaN \n",
"37 NaN \n",
"39 NaN \n",
"40 NaN \n",
"41 \\n\\n all adults, greater than 18 years ... \n",
"42 NaN \n",
"43 NaN \n",
"44 NaN \n",
"45 \\n\\n non-hodgkin's lymphomas in the fol... \n",
"47 NaN \n",
"48 NaN \n",
"49 NaN \n",
"51 NaN \n",
"52 NaN \n",
"53 NaN \n",
".. ... \n",
"109 NaN \n",
"115 \\n\\n all patients with refractory solid... \n",
"116 NaN \n",
"120 NaN \n",
"122 \\n\\n - patients must be 18 years of ... \n",
"123 NaN \n",
"125 NaN \n",
"127 NaN \n",
"128 \\n\\n patient criteria:\\n\\n ages ... \n",
"131 NaN \n",
"132 \\n\\n at least one of the following:\\n\\n... \n",
"134 NaN \n",
"136 \\n\\n patients must be 18 years of age o... \n",
"137 NaN \n",
"138 NaN \n",
"139 NaN \n",
"144 \\n\\n male or female patients, 18 years ... \n",
"148 \\n\\n patients:\\n\\n patients with... \n",
"150 \\n\\n documentation of wegener's granulo... \n",
"155 NaN \n",
"156 \\n\\n recruitment will include patients ... \n",
"158 NaN \n",
"159 NaN \n",
"160 NaN \n",
"161 NaN \n",
"162 NaN \n",
"163 NaN \n",
"164 NaN \n",
"165 NaN \n",
"166 NaN \n",
"\n",
" ineligible \n",
"17 NaN \n",
"18 NaN \n",
"20 NaN \n",
"21 NaN \n",
"22 NaN \n",
"25 NaN \n",
"26 NaN \n",
"27 NaN \n",
"28 NaN \n",
"29 \\n\\n patients receiving other therapy (... \n",
"30 \\n\\n evidence of infection by gram stai... \n",
"31 NaN \n",
"33 NaN \n",
"34 NaN \n",
"35 NaN \n",
"36 NaN \n",
"37 NaN \n",
"39 NaN \n",
"40 NaN \n",
"41 \\n\\n no pregnant women will be entered ... \n",
"42 NaN \n",
"43 NaN \n",
"44 NaN \n",
"45 NaN \n",
"47 NaN \n",
"48 NaN \n",
"49 NaN \n",
"51 NaN \n",
"52 NaN \n",
"53 NaN \n",
".. ... \n",
"109 NaN \n",
"115 \\n\\n active infection, including positi... \n",
"116 NaN \n",
"120 NaN \n",
"122 \\n\\n - any condition that does not f... \n",
"123 NaN \n",
"125 NaN \n",
"127 NaN \n",
"128 \\n\\n patient or donor pregnant.\\n\\n ... \n",
"131 NaN \n",
"132 \\n\\n patients will be considered inelig... \n",
"134 NaN \n",
"136 \\n\\n any condition that does not fit wi... \n",
"137 NaN \n",
"138 NaN \n",
"139 NaN \n",
"144 \\n\\n patients demonstrating intolerance... \n",
"148 \\n\\n patient:\\n\\n pregnant or la... \n",
"150 \\n\\n patients with evidence of bacteria... \n",
"155 NaN \n",
"156 \\n\\n subjects who had undergone any tmj... \n",
"158 NaN \n",
"159 NaN \n",
"160 NaN \n",
"161 NaN \n",
"162 NaN \n",
"163 NaN \n",
"164 NaN \n",
"165 NaN \n",
"166 NaN \n",
"\n",
"[100 rows x 17 columns]"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head(100)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\Amrit\\Anaconda3\\envs\\ML\\lib\\site-packages\\ipykernel_launcher.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.\n",
" \n"
]
},
{
"data": {
"text/plain": [
"(34494, 17)"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2=df[df['eligible'].notnull()]\n",
"df3=df2[df['ineligible'].notnull()]\n",
"df3.shape"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 34494 entries, 29 to 61776\n",
"Data columns (total 17 columns):\n",
"nct_id 34494 non-null object\n",
"brief_title 34494 non-null object\n",
"official_title 33991 non-null object\n",
"overall_status 34494 non-null object\n",
"start_date 34494 non-null object\n",
"completion_date 32854 non-null object\n",
"phase 27400 non-null object\n",
"study_type 34494 non-null object\n",
"brief_summary 34493 non-null object\n",
"detailed_description 22562 non-null object\n",
"enrollment 34382 non-null float64\n",
"condition 34494 non-null object\n",
"intervention_name 34494 non-null object\n",
"eligibility 34494 non-null object\n",
"condition_cancer 34494 non-null bool\n",
"eligible 34494 non-null object\n",
"ineligible 34494 non-null object\n",
"dtypes: bool(1), float64(1), object(15)\n",
"memory usage: 4.5+ MB\n"
]
}
],
"source": [
"df3.info()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 648x432 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"eligible_length = df3['eligible'].map(len)\n",
"\n",
"plt.figure(figsize=(9,6))\n",
"plt.hist(eligible_length)\n",
"plt.xlabel('Length (characters)', fontsize=16)\n",
"plt.ylabel('Inclusion', fontsize=16)\n",
"plt.savefig('./image/inclusion.png', bbox_inches = \"tight\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "0",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-36-54d0bfb05f43>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m#df.description[0]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0mText\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdf3\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0meligible\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0mText\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Anaconda3\\envs\\ML\\lib\\site-packages\\pandas\\core\\series.py\u001b[0m in \u001b[0;36m__getitem__\u001b[1;34m(self, key)\u001b[0m\n\u001b[0;32m 866\u001b[0m \u001b[0mkey\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 867\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 868\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_value\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 869\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 870\u001b[0m \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mis_scalar\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32m~\\Anaconda3\\envs\\ML\\lib\\site-packages\\pandas\\core\\indexes\\base.py\u001b[0m in \u001b[0;36mget_value\u001b[1;34m(self, series, key)\u001b[0m\n\u001b[0;32m 4373\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4374\u001b[0m return self._engine.get_value(s, k,\n\u001b[1;32m-> 4375\u001b[1;33m tz=getattr(series.dtype, 'tz', None))\n\u001b[0m\u001b[0;32m 4376\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mKeyError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me1\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4377\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>\u001b[0m \u001b[1;36m0\u001b[0m \u001b[1;32mand\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mholds_integer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mor\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mis_boolean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_value\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas/_libs/index.pyx\u001b[0m in \u001b[0;36mpandas._libs.index.IndexEngine.get_loc\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[1;34m()\u001b[0m\n",
"\u001b[1;32mpandas/_libs/hashtable_class_helper.pxi\u001b[0m in \u001b[0;36mpandas._libs.hashtable.Int64HashTable.get_item\u001b[1;34m()\u001b[0m\n",
"\u001b[1;31mKeyError\u001b[0m: 0"
]
}
],
"source": [
"#df.description[0]\n",
"Text=df3.eligible[0]\n",
"Text"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import preprocessing\n",
"from wordcloud import WordCloud, STOPWORDS\n",
"Text=df3['eligible'][:0]\n",
"\n",
"wc=WordCloud().generate(Text)\n",
"plt.figure(figsize=(16,12))\n",
"plt.imshow(wc, interpolation='bilinear')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ineligible_length = df3['eligible'].map(len)\n",
"\n",
"plt.figure(figsize=(9,6))\n",
"plt.hist(ineligible_length)\n",
"plt.xlabel('Length (characters)', fontsize=16)\n",
"plt.ylabel('Exclusion', fontsize=16)\n",
"plt.savefig('./image/Exclusion.png', bbox_inches = \"tight\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"df4 = df3[['condition','intervention_name', 'eligible', 'ineligible']]\n",
"df4.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4.condition = df4.condition.str.lower()\n",
"df4.condition = df4.condition.str.replace('/', ' ')\n",
"df4.condition = df4.condition.str.replace('-', ' ')\n",
"df4.condition = df4.condition.str.strip()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4.condition.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"condition_select = ['breast cancer', 'prostate cancer' 'colorectal cancer', 'lung cancer',\n",
" 'multiple myeloma', 'lymphoma', 'non-small cell lung cancer'\n",
" 'pancreatic cancer', 'head and neck cancer', 'hepatocellular carcinoma',\n",
" 'melanoma', 'gastric cancer', 'ovarian cancer', 'solid tumors',\n",
" 'metastatic breast cancer']\n",
"\n",
"df10 = df4\n",
"def add_condition(condition):\n",
" for catagory in condition_select:\n",
" if catagory == condition:\n",
" return catagory\n",
" \n",
" return None\n",
"\n",
"df10['cancer_type'] = df10.condition.apply(add_condition)\n",
"df10.head(100)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df11 = df10[['cancer_type', 'eligible', 'ineligible']]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df11 = df11.loc[df11.cancer_type.notnull()]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df11.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df11.to_csv('df11.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df12 = df11.groupby('cancer_type')['eligible'].apply(' '.join).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df12.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df12.cancer_type.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df12.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df12.to_csv('df12.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4.eligible[30]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Wordcounts for eligible\n",
"df4['word_count_eligible'] = df4['eligible'].apply(lambda x: len(str(x).split(\" \")))\n",
"df4['word_count_ineligible'] = df4['ineligible'].apply(lambda x: len(str(x).split(\" \")))\n",
"df4[['word_count_eligible','word_count_ineligible']].head().sort_values('word_count_eligible', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4.word_count_eligible.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4.word_count_ineligible.describe()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Identify common words\n",
"freq_eli = pd.Series(' '.join(df4['eligible']).split()).value_counts()[:20]\n",
"freq_eli"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#Identify uncommon words\n",
"least_eli = pd.Series(' '.join(df4['eligible']).split()).value_counts()[-20:]\n",
"least_eli"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"freq_in = pd.Series(' '.join(df4['ineligible']).split()).value_counts()[:20]\n",
"freq_in"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"clean_text = []\n",
"#def cleanText(file):\n",
"for index in range(len(df4.eligible)):\n",
" #Remove punctuations\n",
" text = re.sub('[^a-zA-Z]', ' ', str(index))\n",
"\n",
" #Convert to lowercase\n",
" text = text.lower()\n",
"\n",
" # Additiona characters\n",
" #text_col_tags = re.sub(\"[!@#$+%*:()'-]\", ' ', text_col_lower)\n",
"\n",
" #remove tags\n",
" text=re.sub(\"</?.*?>\",\" <> \",text)\n",
"\n",
" # remove special characters and digits\n",
" #text=re.sub(\"(\\\\d|\\\\W)+\",\" \",text)\n",
"\n",
" ##Convert to list from string\n",
" text = text.split()\n",
"\n",
"# ##Stemming\n",
"# ps=PorterStemmer()\n",
"# text = [ps.stem(word) for word in text] \n",
"\n",
" #Lemmatisation\n",
"# word_lemma = WordNetLemmatizer()\n",
"# text = [word_lemma.lemmatize(word) for word in text] \n",
" text = \" \".join(text)\n",
" clean_text.append(text)\n",
" \n",
"df4['clean_text'] = clean_text"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"X = df4.eligible\n",
"tfidf= TfidfVectorizer(ngram_range = (1,3), min_df = 10,sublinear_tf=True)\n",
"X =tfidf.fit_transform(X.ravel())"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"y = df4.condition"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(X.shape)\n",
"print(y.shape)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"import string\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.naive_bayes import MultinomialNB\n",
"from sklearn.feature_extraction.text import TfidfTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.metrics import confusion_matrix,classification_report\n",
"from sklearn.feature_extraction.text import CountVectorizer\n",
"from sklearn.svm import SVC\n",
"from sklearn import metrics\n",
"from sklearn.feature_extraction.text import TfidfTransformer\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.svm import LinearSVC\n",
"import nltk\n",
"from nltk.corpus import stopwords\n",
"from nltk.tokenize import word_tokenize\n",
"from nltk.tokenize import sent_tokenize\n",
"from nltk.stem import WordNetLemmatizer \n",
"from nltk.corpus import stopwords\n",
"from os import path\n",
"from PIL import Image\n",
"#from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator\n",
"#from imblearn.over_sampling import SMOTE\n",
"#from imblearn.combine import SMOTEENN\n",
"from sklearn.utils import class_weight\n",
"#import os"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.30, random_state = 42)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mnb = MultinomialNB(alpha = 0.001,fit_prior=True)\n",
"mnb.fit(X_train,y_train)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mnb_predictions = mnb.predict(X_test)\n",
"print(confusion_matrix(y_test,mnb_predictions))\n",
"print('\\n', 'Classification report')\n",
"print(classification_report(y_test,mnb_predictions))\n",
"# Print the overall accuracy\n",
"print('\\n', 'accuracy Score')\n",
"print(metrics.accuracy_score(y_test,mnb_predictions))\n",
"\n",
"print('\\n', 'f1 Score')\n",
"print(metrics.f1_score(y_test, mnb_predictions, average='weighted'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import string\n",
"def text_process(mess):\n",
" \"\"\"\n",
" Takes in a string of text, then performs the following:\n",
" 1. Remove all punctuation\n",
" 2. Remove all stopwords\n",
" 3. Returns a list of the cleaned text\n",
" \"\"\"\n",
" # Check characters to see if they are in punctuation\n",
" nopunc = [char for char in mess if char not in string.punctuation]\n",
"\n",
" # Join the characters again to form the string.\n",
" nopunc = ''.join(nopunc)\n",
" \n",
" # Now just remove any stopwords\n",
" return [word for word in nopunc.split()]# if word.lower() not in stopwords.words('english')]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4['new'] = df4.eligible.apply(text_process)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df5 = df4.groupby('condition')['eligible'].apply(' '.join).reset_index()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df5.condition.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"condition_select = ['breast cancer', 'prostate cancer' 'colorectal cancer', 'lung cancer',\n",
" 'multiple myeloma', 'lymphoma', 'non-small cell lung cancer'\n",
" 'pancreatic cancer', 'head and neck cancer', 'hepatocellular carcinoma',\n",
" 'melanoma', 'gastric cancer', 'ovarian cancer', 'solid tumors',\n",
" 'metastatic breast cancer']\n",
"\n",
"# df5['condition_select'] = df5.condition.str.contains('|'.join(condition_select))\n",
"# df5.head()\n",
"\n",
"def add_condition(condition):\n",
" for catagory in condition_select:\n",
" if catagory == condition:\n",
" return catagory\n",
" \n",
" return None\n",
"\n",
"df5['cancer_type'] = df5.condition.apply(add_condition)\n",
"df5.head(100)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df6 = df5[df5.condition_select == True]\n",
"df6.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df6.info()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df6.condition.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4['eligible'][df4.condition == 'breast cancer']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4.eligible[30]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#cleanText(df4.eligible)\n",
"for index in range(len(df4.eligible)):\n",
" print(index)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dp6 = df.sample(10)\n",
"dp6.eligibility = dp6.eligibility.str.lower()\n",
"dp6['eli'] = dp6.eligibility.str.split('(inclusion criteria:)').str[2]#apply(pd.Series)[[0]].rename(columns={0: 'eligible'})\n",
"dp6.head(1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(dp6.eli.to_string(index=False))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for v in dp6.eli:\n",
" print(v)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#dp.eligibility.str.split('inclusion criteria:')\n",
"\n",
"cancer = [\"inclusion criteria\" , \"neoplasm\" , \"oma\", \"tumor\"]\n",
"\n",
"pattern = '|'.join(cancer)\n",
"pattern\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.eligibility = df.eligibility.str.lower()\n",
"df['eligible'] = df.eligibility.str.contains('inclusion criteria')\n",
"df2 = df.loc[df['eligible'] ==True,:]\n",
"df2.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.eligibility = df.eligibility.str.lower()\n",
"df['eligible'] = df.eligibility.str.contains('inclusion criteria')\n",
"df4 = df.loc[df['eligible'] ==False,:]\n",
"print(df4.shape)\n",
"df4.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df4.iloc[0,13]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pd.Series(['a1', 'b2', 'c3'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"pd.Series(['a1', 'b2', 'c3']).str.extract(r'([ab])(\\d)', expand=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.eligibility = df.eligibility.str.lower()\n",
"df['ineligible'] = df.eligibility.str.contains('exclusion criteria')\n",
"df3 = df.loc[df['ineligible'] ==True,:]\n",
"df3.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.eligibility = df.eligibility.str.lower()\n",
"df['ineligible'] = df.eligibility.str.contains('exclusion criteria')\n",
"df5 = df.loc[df['ineligible'] ==False,:]\n",
"print(df5.shape)\n",
"df5.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# dp.eligibility = dp.eligibility.str.lower()\n",
"# dp2 = dp.eligibility.str.split('(*inclusion criteria*:|*exclusion criteria*:)').apply(pd.Series)[[2,4]].rename(columns={2: 'eligible', 4: 'ineligible'})\n",
"# dp2.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dp = df.sample(10)\n",
"dp.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#dp = df.sample(100)\n",
"dp.eligibility = dp.eligibility.str.lower()\n",
"dp3 = dp.eligibility.str.split('(\\**inclusion criteria\\**:|\\**exclusion criteria\\**:)').apply(pd.Series)[[2,4]].rename(columns={2: 'eligible', 4: 'ineligible'})"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"print(dp.eligibility.values)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"file_path = \"./data/search_result/NCT00000479.xml\"\n",
"\n",
"parse_XML(file_path)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for child in root:\n",
" print(child.tag, child.attrib)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from xmljson import badgerfish as bf\n",
"from xml.etree.ElementTree import fromstring"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!python -m xmljson -d badgerfish ./data/search_result/NCT00000479.xml"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!for f in $(ls | grep '.xml'); do python -m xmljson -d yahoo $f > $json/f.json ; done"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for child in root:\n",
" print(child.tag, child.attrib)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dfcols = ['clinical_study', 'nct_id', 'brief_title']\n",
"df = pd.DataFrame(columns=dfcols)\n",
"\n",
"for i in etree.iter(tag='data'):\n",
"df = df.append(\n",
" pd.Series([i.get('id'), i.get('name')], index=dfcols),\n",
" ignore_index=True)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"trials.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Retrieve Tweets\n",
"MaxTweets = 10000000\n",
"tweetsPerQry = 100\n",
"fName = 'tweets.txt' # Storage name\n",
"\n",
"tweetCount = 0\n",
"print(\"Downloading max {0} tweets\".format(MaxTweets))\n",
"with open(fName, 'w') as f:\n",
" for tweet in tweepy.Cursor(api.search,q=query, wait_on_rate_limit=True).items(MaxTweets) : \n",
"\n",
" # Verify the tweet has place info before writing\n",
" if tweet.place is not None:\n",
" \n",
" #Write to the text file, and add one to the number of tweets\n",
" f.write(jsonpickle.encode(tweet._json, unpicklable=False) + '\\n')\n",
" tweetCount += 1\n",
"\n",
" #Display how many tweets we have collected\n",
" print(\"Downloaded {0} tweets\".format(tweetCount))\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Checking the data\n",
"tweet._json"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"max_id = -1\n",
"tweetCount = 0\n",
"with open('PoGo_USA_Tutorial.json', 'w') as f:\n",
" #While we still want to collect more tweets\n",
" while tweetCount < MaxTweets:\n",
" try:\n",
" #Look for more tweets, resuming where we left off\n",
" if max_id <= 0:\n",
" new_tweets = api.search(q=query, count=tweetsPerQry)\n",
" else:\n",
" new_tweets = api.search(q=query, count=tweetsPerQry, max_id=str(max_id - 1))\n",
" \n",
" #If we didn't find any exit the loop\n",
" if not new_tweets:\n",
" print(\"No more tweets found\")\n",
" break\n",
" \n",
" #Write the JSON output of any new tweets we found to the output file\n",
" for tweet in new_tweets:\n",
" \n",
" #Make sure the tweet has place info before writing\n",
" if (tweet.place is not None) and (tweetCount < MaxTweets):\n",
" f.write(jsonpickle.encode(tweet._json, unpicklable=False) +\n",
" '\\n')\n",
" tweetCount += 1\n",
" \n",
" #Display how many tweets we have collected\n",
" print(\"Downloaded {0} tweets\".format(tweetCount))\n",
" \n",
" #Record the id of the last tweet we looked at\n",
" max_id = new_tweets[-1].id\n",
" \n",
" except tweepy.TweepError as e:\n",
" \n",
" #Print the error and continue searching\n",
" print(\"some error : \" + str(e))\n",
"\n",
"\n",
"print (\"Downloaded {0} tweets, Saved to {1}\".format(tweetCount, fName))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#search for multiple phrases using OR (for Pharma companies)\n",
"Allquery = \"'Johnson & Johnson' or 'J & J' or 'J&J' or 'JNJ' or '$JNJ' \\\n",
" or 'Pfizer' or 'pfizer' or 'PFE' or '$PFE' \\\n",
" or 'Novartis' or 'novartis' or 'NOVN' or '$NOVN' \\\n",
" or 'Roche' or 'roche' or 'ROG' or '$ROG' \\\n",
" or 'Merck & Co.' or 'Merck' or 'MRK' or '$MRK' \\\n",
" or 'AbbVie' or 'abbvie' or 'ABBV' or '$ABBV' \\\n",
" or 'Eli Lilly' or 'EliLilly' or 'LLY' or '$LLY' \\\n",
" or 'Sanofi' or 'sanofi' or 'SAN' or '$SAN' \\\n",
" or 'AstraZeneca' or 'astrazeneca' or 'AZN' or '$AZN' \\\n",
" or 'GlaxoSmithKline' or 'GSK' or '$GSK'\""
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}