Diff of /1-Clean_Data.ipynb [000000] .. [1654c6]

Switch to side-by-side view

--- a
+++ b/1-Clean_Data.ipynb
@@ -0,0 +1,180 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<h1><center> Clean and Organize data for the I-SPY1 Clinical Trial</center></h1>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style>\n",
+       "    .dataframe thead tr:only-child th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: left;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "      <th>White</th>\n",
+       "      <th>ER+</th>\n",
+       "      <th>PR+</th>\n",
+       "      <th>HR+</th>\n",
+       "      <th>Bilateral</th>\n",
+       "      <th>Right_Breast</th>\n",
+       "      <th>MRI_LD_Baseline</th>\n",
+       "      <th>MRI_LD_1_3dAC</th>\n",
+       "      <th>MRI_LD_Int_Reg</th>\n",
+       "      <th>MRI_LD_PreSurg</th>\n",
+       "      <th>Alive</th>\n",
+       "      <th>Survival_length</th>\n",
+       "      <th>RFS</th>\n",
+       "      <th>RFS_code</th>\n",
+       "      <th>PCR</th>\n",
+       "      <th>RCB</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>SUBJECTID</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1001</th>\n",
+       "      <td>38.73</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>No</td>\n",
+       "      <td>No</td>\n",
+       "      <td>88.0</td>\n",
+       "      <td>78.0</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>No</td>\n",
+       "      <td>1264</td>\n",
+       "      <td>751</td>\n",
+       "      <td>1</td>\n",
+       "      <td>No</td>\n",
+       "      <td>2.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1002</th>\n",
+       "      <td>37.79</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>29.0</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>16.0</td>\n",
+       "      <td>No</td>\n",
+       "      <td>1155</td>\n",
+       "      <td>1043</td>\n",
+       "      <td>1</td>\n",
+       "      <td>No</td>\n",
+       "      <td>3.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             age White  ER+  PR+  HR+ Bilateral Right_Breast  MRI_LD_Baseline  \\\n",
+       "SUBJECTID                                                                       \n",
+       "1001       38.73   Yes  Yes   No  Yes        No           No             88.0   \n",
+       "1002       37.79   Yes  Yes  Yes  Yes        No          Yes             29.0   \n",
+       "\n",
+       "           MRI_LD_1_3dAC  MRI_LD_Int_Reg  MRI_LD_PreSurg Alive  \\\n",
+       "SUBJECTID                                                        \n",
+       "1001                78.0            30.0            14.0    No   \n",
+       "1002                26.0            66.0            16.0    No   \n",
+       "\n",
+       "           Survival_length   RFS  RFS_code PCR  RCB  \n",
+       "SUBJECTID                                            \n",
+       "1001                  1264   751         1  No  2.0  \n",
+       "1002                  1155  1043         1  No  3.0  "
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# load module by Julio and pandas\n",
+    "from ispy1 import clean_data\n",
+    "import pandas as pd\n",
+    "\n",
+    "file = './data/I-SPY_1_All_Patient_Clinical_and_Outcome_Data.xlsx'\n",
+    "df = clean_data.clean_my_data(file)\n",
+    "df.head(2)\n",
+    "\n",
+    "# save clean data in new  csv file\n",
+    "df.to_csv('./data/I-SPY_1_clean_data.csv')\n",
+    "\n",
+    "df.head(2)"
+   ]
+  }
+ ],
+ "metadata": {
+  "anaconda-cloud": {},
+  "kernelspec": {
+   "display_name": "Python [conda root]",
+   "language": "python",
+   "name": "conda-root-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.5.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}