181 lines (180 with data), 5.3 kB
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h1><center> Clean and Organize data for the I-SPY1 Clinical Trial</center></h1>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>White</th>\n",
" <th>ER+</th>\n",
" <th>PR+</th>\n",
" <th>HR+</th>\n",
" <th>Bilateral</th>\n",
" <th>Right_Breast</th>\n",
" <th>MRI_LD_Baseline</th>\n",
" <th>MRI_LD_1_3dAC</th>\n",
" <th>MRI_LD_Int_Reg</th>\n",
" <th>MRI_LD_PreSurg</th>\n",
" <th>Alive</th>\n",
" <th>Survival_length</th>\n",
" <th>RFS</th>\n",
" <th>RFS_code</th>\n",
" <th>PCR</th>\n",
" <th>RCB</th>\n",
" </tr>\n",
" <tr>\n",
" <th>SUBJECTID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1001</th>\n",
" <td>38.73</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>88.0</td>\n",
" <td>78.0</td>\n",
" <td>30.0</td>\n",
" <td>14.0</td>\n",
" <td>No</td>\n",
" <td>1264</td>\n",
" <td>751</td>\n",
" <td>1</td>\n",
" <td>No</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1002</th>\n",
" <td>37.79</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>29.0</td>\n",
" <td>26.0</td>\n",
" <td>66.0</td>\n",
" <td>16.0</td>\n",
" <td>No</td>\n",
" <td>1155</td>\n",
" <td>1043</td>\n",
" <td>1</td>\n",
" <td>No</td>\n",
" <td>3.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age White ER+ PR+ HR+ Bilateral Right_Breast MRI_LD_Baseline \\\n",
"SUBJECTID \n",
"1001 38.73 Yes Yes No Yes No No 88.0 \n",
"1002 37.79 Yes Yes Yes Yes No Yes 29.0 \n",
"\n",
" MRI_LD_1_3dAC MRI_LD_Int_Reg MRI_LD_PreSurg Alive \\\n",
"SUBJECTID \n",
"1001 78.0 30.0 14.0 No \n",
"1002 26.0 66.0 16.0 No \n",
"\n",
" Survival_length RFS RFS_code PCR RCB \n",
"SUBJECTID \n",
"1001 1264 751 1 No 2.0 \n",
"1002 1155 1043 1 No 3.0 "
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# load module by Julio and pandas\n",
"from ispy1 import clean_data\n",
"import pandas as pd\n",
"\n",
"file = './data/I-SPY_1_All_Patient_Clinical_and_Outcome_Data.xlsx'\n",
"df = clean_data.clean_my_data(file)\n",
"df.head(2)\n",
"\n",
"# save clean data in new csv file\n",
"df.to_csv('./data/I-SPY_1_clean_data.csv')\n",
"\n",
"df.head(2)"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}