--- a +++ b/hemolysisnotebook.ipynb @@ -0,0 +1 @@ +{"cells":[{"source":"<a href=\"https://www.kaggle.com/code/alexgu7104/hemolysisnotebook?scriptVersionId=123062613\" target=\"_blank\"><img align=\"left\" alt=\"Kaggle\" title=\"Open in Kaggle\" src=\"https://kaggle.com/static/images/open-in-kaggle.svg\"></a>","metadata":{},"cell_type":"markdown","outputs":[],"execution_count":0},{"cell_type":"code","execution_count":1,"id":"2780ba25","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:27.676692Z","iopub.status.busy":"2023-03-22T21:53:27.676298Z","iopub.status.idle":"2023-03-22T21:53:27.691786Z","shell.execute_reply":"2023-03-22T21:53:27.690498Z"},"papermill":{"duration":0.030596,"end_time":"2023-03-22T21:53:27.696225","exception":false,"start_time":"2023-03-22T21:53:27.665629","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["/kaggle/input/aacc-2023-helpwithhemolysis/hemolysis_index_results.csv\n","/kaggle/input/aacc-2023-helpwithhemolysis/redraw_costs.csv\n","/kaggle/input/aacc-2023-helpwithhemolysis/train.csv\n","/kaggle/input/aacc-2023-helpwithhemolysis/test.csv\n","/kaggle/input/aacc-2023-helpwithhemolysis/sample_solution.csv\n","/kaggle/input/aacc-2023-helpwithhemolysis/redraw_thresholds.csv\n"]}],"source":["import pandas as pd\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"code","execution_count":2,"id":"67182adc","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:27.713204Z","iopub.status.busy":"2023-03-22T21:53:27.712402Z","iopub.status.idle":"2023-03-22T21:53:28.189968Z","shell.execute_reply":"2023-03-22T21:53:28.189085Z"},"papermill":{"duration":0.487765,"end_time":"2023-03-22T21:53:28.192921","exception":false,"start_time":"2023-03-22T21:53:27.705156","status":"completed"},"tags":[]},"outputs":[],"source":["df=pd.read_csv('/kaggle/input/aacc-2023-helpwithhemolysis/hemolysis_index_results.csv')\n","cost=pd.read_csv(\"/kaggle/input/aacc-2023-helpwithhemolysis/redraw_costs.csv\")"]},{"cell_type":"code","execution_count":3,"id":"2664c5b4","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:28.206918Z","iopub.status.busy":"2023-03-22T21:53:28.206288Z","iopub.status.idle":"2023-03-22T21:53:28.260062Z","shell.execute_reply":"2023-03-22T21:53:28.259125Z"},"papermill":{"duration":0.063372,"end_time":"2023-03-22T21:53:28.2624","exception":false,"start_time":"2023-03-22T21:53:28.199028","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>COLLECTOR_ID</th>\n"," <th>REDRAW_COUNT</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>COLLECTOR_101</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>COLLECTOR_1013</td>\n"," <td>2</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>COLLECTOR_1028</td>\n"," <td>3</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>COLLECTOR_1029</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>COLLECTOR_1031</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>1234</th>\n"," <td>COLLECTOR_981</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1235</th>\n"," <td>COLLECTOR_984</td>\n"," <td>3</td>\n"," </tr>\n"," <tr>\n"," <th>1236</th>\n"," <td>COLLECTOR_989</td>\n"," <td>2</td>\n"," </tr>\n"," <tr>\n"," <th>1237</th>\n"," <td>COLLECTOR_991</td>\n"," <td>1</td>\n"," </tr>\n"," <tr>\n"," <th>1238</th>\n"," <td>COLLECTOR_992</td>\n"," <td>2</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>1239 rows × 2 columns</p>\n","</div>"],"text/plain":[" COLLECTOR_ID REDRAW_COUNT\n","0 COLLECTOR_101 1\n","1 COLLECTOR_1013 2\n","2 COLLECTOR_1028 3\n","3 COLLECTOR_1029 1\n","4 COLLECTOR_1031 1\n","... ... ...\n","1234 COLLECTOR_981 1\n","1235 COLLECTOR_984 3\n","1236 COLLECTOR_989 2\n","1237 COLLECTOR_991 1\n","1238 COLLECTOR_992 2\n","\n","[1239 rows x 2 columns]"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["redraw = df.query(\"HEMOLYSIS_INDEX > 75\")\n","df3 = redraw[['COLLECTOR_ID', 'SPECIMEN_ID', 'HEMOLYSIS_INDEX']].copy()\n","df3 = df3.groupby('COLLECTOR_ID')['SPECIMEN_ID'].nunique().reset_index()\n","df3 = df3.rename(columns={'SPECIMEN_ID' : 'REDRAW_COUNT'})\n","df3"]},{"cell_type":"code","execution_count":4,"id":"64140e14","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:28.277425Z","iopub.status.busy":"2023-03-22T21:53:28.276269Z","iopub.status.idle":"2023-03-22T21:53:28.401313Z","shell.execute_reply":"2023-03-22T21:53:28.40045Z"},"papermill":{"duration":0.13552,"end_time":"2023-03-22T21:53:28.404264","exception":false,"start_time":"2023-03-22T21:53:28.268744","status":"completed"},"scrolled":true,"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>ORDER_ID</th>\n"," <th>ORDERABLE</th>\n"," <th>PATIENT_ID</th>\n"," <th>PATIENT_AGE</th>\n"," <th>PATIENT_SEX</th>\n"," <th>PATIENT_RACE</th>\n"," <th>NURSING_UNIT</th>\n"," <th>ENCOUNTER_TYPE</th>\n"," <th>MEDICAL_SERVICE</th>\n"," <th>SPECIMEN_ID</th>\n"," <th>SPECIMEN_TYPE</th>\n"," <th>COLLECTOR_ID</th>\n"," <th>DRAW_SITE</th>\n"," <th>COLLECTION_DAY</th>\n"," <th>COLLECTION_TIME</th>\n"," <th>HEMOLYSIS_INDEX</th>\n"," <th>REDRAW_COST</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>ORDER_1179508</td>\n"," <td>Vancomycin Tr</td>\n"," <td>PATIENT_131440</td>\n"," <td>44</td>\n"," <td>Female</td>\n"," <td>White</td>\n"," <td>UNIT_10</td>\n"," <td>Intensive Care</td>\n"," <td>Medical</td>\n"," <td>SPECIMEN_543979</td>\n"," <td>Blood</td>\n"," <td>COLLECTOR_2732</td>\n"," <td>NaN</td>\n"," <td>6</td>\n"," <td>23:21:00</td>\n"," <td>6.0</td>\n"," <td>162.18</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>ORDER_1181190</td>\n"," <td>Comp Met Plas</td>\n"," <td>PATIENT_58856</td>\n"," <td>75</td>\n"," <td>Male</td>\n"," <td>White</td>\n"," <td>UNIT_49</td>\n"," <td>Inpatient</td>\n"," <td>Bone Marrow Transplant</td>\n"," <td>SPECIMEN_544650</td>\n"," <td>Blood</td>\n"," <td>COLLECTOR_522</td>\n"," <td>NaN</td>\n"," <td>6</td>\n"," <td>23:16:00</td>\n"," <td>8.0</td>\n"," <td>357.15</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>ORDER_1181164</td>\n"," <td>Phos Plas</td>\n"," <td>PATIENT_58856</td>\n"," <td>75</td>\n"," <td>Male</td>\n"," <td>White</td>\n"," <td>UNIT_49</td>\n"," <td>Inpatient</td>\n"," <td>Bone Marrow Transplant</td>\n"," <td>SPECIMEN_544650</td>\n"," <td>Blood</td>\n"," <td>COLLECTOR_522</td>\n"," <td>NaN</td>\n"," <td>6</td>\n"," <td>23:16:00</td>\n"," <td>8.0</td>\n"," <td>357.15</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>ORDER_1181191</td>\n"," <td>Magnesium</td>\n"," <td>PATIENT_58856</td>\n"," <td>75</td>\n"," <td>Male</td>\n"," <td>White</td>\n"," <td>UNIT_49</td>\n"," <td>Inpatient</td>\n"," <td>Bone Marrow Transplant</td>\n"," <td>SPECIMEN_544650</td>\n"," <td>Blood</td>\n"," <td>COLLECTOR_522</td>\n"," <td>NaN</td>\n"," <td>6</td>\n"," <td>23:16:00</td>\n"," <td>8.0</td>\n"," <td>357.15</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>ORDER_1180687</td>\n"," <td>Basic Met Plas</td>\n"," <td>PATIENT_99436</td>\n"," <td>40</td>\n"," <td>Female</td>\n"," <td>White</td>\n"," <td>UNIT_161</td>\n"," <td>Intensive Care</td>\n"," <td>Cardiology</td>\n"," <td>SPECIMEN_544632</td>\n"," <td>Blood</td>\n"," <td>COLLECTOR_201</td>\n"," <td>NaN</td>\n"," <td>6</td>\n"," <td>23:18:00</td>\n"," <td>7.0</td>\n"," <td>162.18</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>101169</th>\n"," <td>ORDER_1253155</td>\n"," <td>Phos Plas</td>\n"," <td>PATIENT_74490</td>\n"," <td>80</td>\n"," <td>Male</td>\n"," <td>White</td>\n"," <td>UNIT_45</td>\n"," <td>Inpatient</td>\n"," <td>Bone Marrow Transplant</td>\n"," <td>SPECIMEN_577685</td>\n"," <td>Blood</td>\n"," <td>COLLECTOR_5636</td>\n"," <td>NaN</td>\n"," <td>27</td>\n"," <td>23:10:00</td>\n"," <td>27.0</td>\n"," <td>357.15</td>\n"," </tr>\n"," <tr>\n"," <th>101170</th>\n"," <td>ORDER_1253154</td>\n"," <td>Magnesium</td>\n"," <td>PATIENT_74490</td>\n"," <td>80</td>\n"," <td>Male</td>\n"," <td>White</td>\n"," <td>UNIT_45</td>\n"," <td>Inpatient</td>\n"," <td>Bone Marrow Transplant</td>\n"," <td>SPECIMEN_577685</td>\n"," <td>Blood</td>\n"," <td>COLLECTOR_5636</td>\n"," <td>NaN</td>\n"," <td>27</td>\n"," <td>23:10:00</td>\n"," <td>27.0</td>\n"," <td>357.15</td>\n"," </tr>\n"," <tr>\n"," <th>101171</th>\n"," <td>ORDER_1253157</td>\n"," <td>Uric Acid</td>\n"," <td>PATIENT_74490</td>\n"," <td>80</td>\n"," <td>Male</td>\n"," <td>White</td>\n"," <td>UNIT_45</td>\n"," <td>Inpatient</td>\n"," <td>Bone Marrow Transplant</td>\n"," <td>SPECIMEN_577685</td>\n"," <td>Blood</td>\n"," <td>COLLECTOR_5636</td>\n"," <td>NaN</td>\n"," <td>27</td>\n"," <td>23:10:00</td>\n"," <td>27.0</td>\n"," <td>357.15</td>\n"," </tr>\n"," <tr>\n"," <th>101172</th>\n"," <td>ORDER_1252360</td>\n"," <td>Comp Met Plas</td>\n"," <td>PATIENT_132340</td>\n"," <td>58</td>\n"," <td>Male</td>\n"," <td>White</td>\n"," <td>UNIT_145</td>\n"," <td>Emergency</td>\n"," <td>Neuro Medicine</td>\n"," <td>SPECIMEN_577345</td>\n"," <td>Blood</td>\n"," <td>COLLECTOR_2490</td>\n"," <td>NaN</td>\n"," <td>27</td>\n"," <td>22:55:00</td>\n"," <td>16.0</td>\n"," <td>600.00</td>\n"," </tr>\n"," <tr>\n"," <th>101173</th>\n"," <td>ORDER_1252361</td>\n"," <td>Thyroid Cascade</td>\n"," <td>PATIENT_132340</td>\n"," <td>58</td>\n"," <td>Male</td>\n"," <td>White</td>\n"," <td>UNIT_145</td>\n"," <td>Emergency</td>\n"," <td>Neuro Medicine</td>\n"," <td>SPECIMEN_577345</td>\n"," <td>Blood</td>\n"," <td>COLLECTOR_2490</td>\n"," <td>NaN</td>\n"," <td>27</td>\n"," <td>22:55:00</td>\n"," <td>16.0</td>\n"," <td>600.00</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>101174 rows × 17 columns</p>\n","</div>"],"text/plain":[" ORDER_ID ORDERABLE PATIENT_ID PATIENT_AGE \\\n","0 ORDER_1179508 Vancomycin Tr PATIENT_131440 44 \n","1 ORDER_1181190 Comp Met Plas PATIENT_58856 75 \n","2 ORDER_1181164 Phos Plas PATIENT_58856 75 \n","3 ORDER_1181191 Magnesium PATIENT_58856 75 \n","4 ORDER_1180687 Basic Met Plas PATIENT_99436 40 \n","... ... ... ... ... \n","101169 ORDER_1253155 Phos Plas PATIENT_74490 80 \n","101170 ORDER_1253154 Magnesium PATIENT_74490 80 \n","101171 ORDER_1253157 Uric Acid PATIENT_74490 80 \n","101172 ORDER_1252360 Comp Met Plas PATIENT_132340 58 \n","101173 ORDER_1252361 Thyroid Cascade PATIENT_132340 58 \n","\n"," PATIENT_SEX PATIENT_RACE NURSING_UNIT ENCOUNTER_TYPE \\\n","0 Female White UNIT_10 Intensive Care \n","1 Male White UNIT_49 Inpatient \n","2 Male White UNIT_49 Inpatient \n","3 Male White UNIT_49 Inpatient \n","4 Female White UNIT_161 Intensive Care \n","... ... ... ... ... \n","101169 Male White UNIT_45 Inpatient \n","101170 Male White UNIT_45 Inpatient \n","101171 Male White UNIT_45 Inpatient \n","101172 Male White UNIT_145 Emergency \n","101173 Male White UNIT_145 Emergency \n","\n"," MEDICAL_SERVICE SPECIMEN_ID SPECIMEN_TYPE COLLECTOR_ID \\\n","0 Medical SPECIMEN_543979 Blood COLLECTOR_2732 \n","1 Bone Marrow Transplant SPECIMEN_544650 Blood COLLECTOR_522 \n","2 Bone Marrow Transplant SPECIMEN_544650 Blood COLLECTOR_522 \n","3 Bone Marrow Transplant SPECIMEN_544650 Blood COLLECTOR_522 \n","4 Cardiology SPECIMEN_544632 Blood COLLECTOR_201 \n","... ... ... ... ... \n","101169 Bone Marrow Transplant SPECIMEN_577685 Blood COLLECTOR_5636 \n","101170 Bone Marrow Transplant SPECIMEN_577685 Blood COLLECTOR_5636 \n","101171 Bone Marrow Transplant SPECIMEN_577685 Blood COLLECTOR_5636 \n","101172 Neuro Medicine SPECIMEN_577345 Blood COLLECTOR_2490 \n","101173 Neuro Medicine SPECIMEN_577345 Blood COLLECTOR_2490 \n","\n"," DRAW_SITE COLLECTION_DAY COLLECTION_TIME HEMOLYSIS_INDEX REDRAW_COST \n","0 NaN 6 23:21:00 6.0 162.18 \n","1 NaN 6 23:16:00 8.0 357.15 \n","2 NaN 6 23:16:00 8.0 357.15 \n","3 NaN 6 23:16:00 8.0 357.15 \n","4 NaN 6 23:18:00 7.0 162.18 \n","... ... ... ... ... ... \n","101169 NaN 27 23:10:00 27.0 357.15 \n","101170 NaN 27 23:10:00 27.0 357.15 \n","101171 NaN 27 23:10:00 27.0 357.15 \n","101172 NaN 27 22:55:00 16.0 600.00 \n","101173 NaN 27 22:55:00 16.0 600.00 \n","\n","[101174 rows x 17 columns]"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["df=df.merge(cost,how=\"left\",on=\"ENCOUNTER_TYPE\")\n","df"]},{"cell_type":"code","execution_count":5,"id":"abd61363","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:28.419528Z","iopub.status.busy":"2023-03-22T21:53:28.419126Z","iopub.status.idle":"2023-03-22T21:53:28.50873Z","shell.execute_reply":"2023-03-22T21:53:28.507649Z"},"papermill":{"duration":0.10034,"end_time":"2023-03-22T21:53:28.511329","exception":false,"start_time":"2023-03-22T21:53:28.410989","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>COLLECTOR_ID</th>\n"," <th>SPECIMEN_COUNT</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>COLLECTOR_101</td>\n"," <td>7</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>COLLECTOR_1013</td>\n"," <td>35</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>COLLECTOR_1028</td>\n"," <td>23</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>COLLECTOR_1029</td>\n"," <td>17</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>COLLECTOR_1031</td>\n"," <td>30</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>2223</th>\n"," <td>COLLECTOR_991</td>\n"," <td>6</td>\n"," </tr>\n"," <tr>\n"," <th>2224</th>\n"," <td>COLLECTOR_992</td>\n"," <td>43</td>\n"," </tr>\n"," <tr>\n"," <th>2225</th>\n"," <td>COLLECTOR_993</td>\n"," <td>23</td>\n"," </tr>\n"," <tr>\n"," <th>2226</th>\n"," <td>COLLECTOR_995</td>\n"," <td>2</td>\n"," </tr>\n"," <tr>\n"," <th>2227</th>\n"," <td>COLLECTOR_999</td>\n"," <td>41</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>2228 rows × 2 columns</p>\n","</div>"],"text/plain":[" COLLECTOR_ID SPECIMEN_COUNT\n","0 COLLECTOR_101 7\n","1 COLLECTOR_1013 35\n","2 COLLECTOR_1028 23\n","3 COLLECTOR_1029 17\n","4 COLLECTOR_1031 30\n","... ... ...\n","2223 COLLECTOR_991 6\n","2224 COLLECTOR_992 43\n","2225 COLLECTOR_993 23\n","2226 COLLECTOR_995 2\n","2227 COLLECTOR_999 41\n","\n","[2228 rows x 2 columns]"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["#calculate number of specimen per collector\n","df2 = df[['COLLECTOR_ID', 'SPECIMEN_ID', 'HEMOLYSIS_INDEX']].copy()\n","df2 = df2.groupby('COLLECTOR_ID')['SPECIMEN_ID'].nunique().reset_index()\n","df2 = df2.rename(columns={'SPECIMEN_ID' : 'SPECIMEN_COUNT'})\n","df2"]},{"cell_type":"code","execution_count":6,"id":"d3c29a12","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:28.52692Z","iopub.status.busy":"2023-03-22T21:53:28.526523Z","iopub.status.idle":"2023-03-22T21:53:32.456168Z","shell.execute_reply":"2023-03-22T21:53:32.455054Z"},"papermill":{"duration":3.940606,"end_time":"2023-03-22T21:53:32.458766","exception":false,"start_time":"2023-03-22T21:53:28.51816","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>COLLECTOR_ID</th>\n"," <th>median_hi</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>COLLECTOR_101</td>\n"," <td>9.0</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>COLLECTOR_1013</td>\n"," <td>12.0</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>COLLECTOR_1028</td>\n"," <td>8.0</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>COLLECTOR_1029</td>\n"," <td>5.0</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>COLLECTOR_1031</td>\n"," <td>15.5</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>2223</th>\n"," <td>COLLECTOR_991</td>\n"," <td>16.5</td>\n"," </tr>\n"," <tr>\n"," <th>2224</th>\n"," <td>COLLECTOR_992</td>\n"," <td>16.0</td>\n"," </tr>\n"," <tr>\n"," <th>2225</th>\n"," <td>COLLECTOR_993</td>\n"," <td>11.0</td>\n"," </tr>\n"," <tr>\n"," <th>2226</th>\n"," <td>COLLECTOR_995</td>\n"," <td>7.5</td>\n"," </tr>\n"," <tr>\n"," <th>2227</th>\n"," <td>COLLECTOR_999</td>\n"," <td>12.0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>2228 rows × 2 columns</p>\n","</div>"],"text/plain":[" COLLECTOR_ID median_hi\n","0 COLLECTOR_101 9.0\n","1 COLLECTOR_1013 12.0\n","2 COLLECTOR_1028 8.0\n","3 COLLECTOR_1029 5.0\n","4 COLLECTOR_1031 15.5\n","... ... ...\n","2223 COLLECTOR_991 16.5\n","2224 COLLECTOR_992 16.0\n","2225 COLLECTOR_993 11.0\n","2226 COLLECTOR_995 7.5\n","2227 COLLECTOR_999 12.0\n","\n","[2228 rows x 2 columns]"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["#%% compute median hemolysis index per collector\n","def compute_median(X:pd.DataFrame)->float:\n"," '''Return median hemolsysis index across a collectors samples'''\n"," median=X.drop_duplicates(subset='SPECIMEN_ID')['HEMOLYSIS_INDEX'].median()\n"," return(median)\n","\n","median_hi=df.groupby('COLLECTOR_ID').apply(lambda X: compute_median(X))\n","median_hi=median_hi.reset_index().rename(columns={0:'median_hi'})\n","median_hi"]},{"cell_type":"code","execution_count":7,"id":"72738fa1","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:32.474639Z","iopub.status.busy":"2023-03-22T21:53:32.474213Z","iopub.status.idle":"2023-03-22T21:53:32.501019Z","shell.execute_reply":"2023-03-22T21:53:32.499873Z"},"papermill":{"duration":0.037662,"end_time":"2023-03-22T21:53:32.503448","exception":false,"start_time":"2023-03-22T21:53:32.465786","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>COLLECTOR_ID</th>\n"," <th>ENCOUNTER_TYPE</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>COLLECTOR_2732</td>\n"," <td>Intensive Care</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>COLLECTOR_522</td>\n"," <td>Inpatient</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>COLLECTOR_201</td>\n"," <td>Intensive Care</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>COLLECTOR_3469</td>\n"," <td>Emergency</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>COLLECTOR_2720</td>\n"," <td>Intensive Care</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>2224</th>\n"," <td>COLLECTOR_4046</td>\n"," <td>Inpatient</td>\n"," </tr>\n"," <tr>\n"," <th>2225</th>\n"," <td>COLLECTOR_3746</td>\n"," <td>Outpatient</td>\n"," </tr>\n"," <tr>\n"," <th>2226</th>\n"," <td>COLLECTOR_1858</td>\n"," <td>Emergency</td>\n"," </tr>\n"," <tr>\n"," <th>2227</th>\n"," <td>COLLECTOR_4625</td>\n"," <td>Inpatient</td>\n"," </tr>\n"," <tr>\n"," <th>2228</th>\n"," <td>COLLECTOR_693</td>\n"," <td>Inpatient</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>2229 rows × 2 columns</p>\n","</div>"],"text/plain":[" COLLECTOR_ID ENCOUNTER_TYPE\n","0 COLLECTOR_2732 Intensive Care\n","1 COLLECTOR_522 Inpatient\n","2 COLLECTOR_201 Intensive Care\n","3 COLLECTOR_3469 Emergency\n","4 COLLECTOR_2720 Intensive Care\n","... ... ...\n","2224 COLLECTOR_4046 Inpatient\n","2225 COLLECTOR_3746 Outpatient\n","2226 COLLECTOR_1858 Emergency\n","2227 COLLECTOR_4625 Inpatient\n","2228 COLLECTOR_693 Inpatient\n","\n","[2229 rows x 2 columns]"]},"execution_count":7,"metadata":{},"output_type":"execute_result"}],"source":["#determine specialized encoutner type for each collector\n","df4 = df[['COLLECTOR_ID', 'ENCOUNTER_TYPE']].drop_duplicates(subset='COLLECTOR_ID').reset_index(drop=True).copy()\n","df4"]},{"cell_type":"code","execution_count":8,"id":"d37fa622","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:32.519691Z","iopub.status.busy":"2023-03-22T21:53:32.519268Z","iopub.status.idle":"2023-03-22T21:53:32.569025Z","shell.execute_reply":"2023-03-22T21:53:32.568114Z"},"papermill":{"duration":0.060923,"end_time":"2023-03-22T21:53:32.571661","exception":false,"start_time":"2023-03-22T21:53:32.510738","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>SPLIT</th>\n"," <th>COLLECTOR_ID</th>\n"," <th>RELEVANCE</th>\n"," <th>median_hi</th>\n"," <th>SPECIMEN_COUNT</th>\n"," <th>REDRAW_COUNT</th>\n"," <th>ENCOUNTER_TYPE</th>\n"," <th>REDRAW_COST</th>\n"," <th>REDRAW_PERCENT</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>TRAIN</td>\n"," <td>COLLECTOR_13</td>\n"," <td>0.00000</td>\n"," <td>29.0</td>\n"," <td>2.0</td>\n"," <td>0.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>TRAIN</td>\n"," <td>COLLECTOR_2184</td>\n"," <td>0.00000</td>\n"," <td>18.0</td>\n"," <td>13.0</td>\n"," <td>2.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.153846</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>TRAIN</td>\n"," <td>COLLECTOR_798</td>\n"," <td>0.00000</td>\n"," <td>13.0</td>\n"," <td>39.0</td>\n"," <td>2.0</td>\n"," <td>Intensive Care</td>\n"," <td>162.18</td>\n"," <td>0.051282</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>TRAIN</td>\n"," <td>COLLECTOR_492</td>\n"," <td>0.32436</td>\n"," <td>21.0</td>\n"," <td>20.0</td>\n"," <td>2.0</td>\n"," <td>Intensive Care</td>\n"," <td>162.18</td>\n"," <td>0.100000</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>TRAIN</td>\n"," <td>COLLECTOR_2658</td>\n"," <td>0.00000</td>\n"," <td>10.5</td>\n"," <td>26.0</td>\n"," <td>0.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>1338</th>\n"," <td>TRAIN</td>\n"," <td>COLLECTOR_3798</td>\n"," <td>0.00000</td>\n"," <td>33.0</td>\n"," <td>9.0</td>\n"," <td>1.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.111111</td>\n"," </tr>\n"," <tr>\n"," <th>1339</th>\n"," <td>TRAIN</td>\n"," <td>COLLECTOR_3397</td>\n"," <td>0.00000</td>\n"," <td>12.5</td>\n"," <td>46.0</td>\n"," <td>3.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.065217</td>\n"," </tr>\n"," <tr>\n"," <th>1340</th>\n"," <td>TRAIN</td>\n"," <td>COLLECTOR_33</td>\n"," <td>0.00000</td>\n"," <td>13.0</td>\n"," <td>43.0</td>\n"," <td>2.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.046512</td>\n"," </tr>\n"," <tr>\n"," <th>1341</th>\n"," <td>TRAIN</td>\n"," <td>COLLECTOR_3001</td>\n"," <td>0.00000</td>\n"," <td>13.0</td>\n"," <td>3.0</td>\n"," <td>0.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>1342</th>\n"," <td>TRAIN</td>\n"," <td>COLLECTOR_4667</td>\n"," <td>0.00000</td>\n"," <td>13.0</td>\n"," <td>6.0</td>\n"," <td>0.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>1343 rows × 9 columns</p>\n","</div>"],"text/plain":[" SPLIT COLLECTOR_ID RELEVANCE median_hi SPECIMEN_COUNT \\\n","0 TRAIN COLLECTOR_13 0.00000 29.0 2.0 \n","1 TRAIN COLLECTOR_2184 0.00000 18.0 13.0 \n","2 TRAIN COLLECTOR_798 0.00000 13.0 39.0 \n","3 TRAIN COLLECTOR_492 0.32436 21.0 20.0 \n","4 TRAIN COLLECTOR_2658 0.00000 10.5 26.0 \n","... ... ... ... ... ... \n","1338 TRAIN COLLECTOR_3798 0.00000 33.0 9.0 \n","1339 TRAIN COLLECTOR_3397 0.00000 12.5 46.0 \n","1340 TRAIN COLLECTOR_33 0.00000 13.0 43.0 \n","1341 TRAIN COLLECTOR_3001 0.00000 13.0 3.0 \n","1342 TRAIN COLLECTOR_4667 0.00000 13.0 6.0 \n","\n"," REDRAW_COUNT ENCOUNTER_TYPE REDRAW_COST REDRAW_PERCENT \n","0 0.0 Inpatient 357.15 0.000000 \n","1 2.0 Inpatient 357.15 0.153846 \n","2 2.0 Intensive Care 162.18 0.051282 \n","3 2.0 Intensive Care 162.18 0.100000 \n","4 0.0 Inpatient 357.15 0.000000 \n","... ... ... ... ... \n","1338 1.0 Inpatient 357.15 0.111111 \n","1339 3.0 Inpatient 357.15 0.065217 \n","1340 2.0 Inpatient 357.15 0.046512 \n","1341 0.0 Inpatient 357.15 0.000000 \n","1342 0.0 Inpatient 357.15 0.000000 \n","\n","[1343 rows x 9 columns]"]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["#join features with train collector ids in sample solution \n","train=pd.read_csv('/kaggle/input/aacc-2023-helpwithhemolysis/train.csv')\n","train=train.merge(median_hi,how='left',on='COLLECTOR_ID')\n","train=train.merge(df2,how='left',on='COLLECTOR_ID')\n","train=train.merge(df3,how='left',on='COLLECTOR_ID')\n","train=train.merge(df4,how='left',on='COLLECTOR_ID')\n","train=train.merge(cost,how='left',on='ENCOUNTER_TYPE')\n","train['REDRAW_COUNT'] = train['REDRAW_COUNT'].fillna(0)\n","train['REDRAW_PERCENT'] = train['REDRAW_COUNT'] / train['SPECIMEN_COUNT']\n","train"]},{"cell_type":"code","execution_count":9,"id":"31377372","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:32.588663Z","iopub.status.busy":"2023-03-22T21:53:32.588272Z","iopub.status.idle":"2023-03-22T21:53:32.608605Z","shell.execute_reply":"2023-03-22T21:53:32.607614Z"},"papermill":{"duration":0.031705,"end_time":"2023-03-22T21:53:32.611021","exception":false,"start_time":"2023-03-22T21:53:32.579316","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>RELEVANCE</th>\n"," <th>median_hi</th>\n"," <th>REDRAW_COUNT</th>\n"," <th>ENCOUNTER_TYPE</th>\n"," <th>REDRAW_COST</th>\n"," <th>REDRAW_PERCENT</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0.00000</td>\n"," <td>29.0</td>\n"," <td>0.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>0.00000</td>\n"," <td>18.0</td>\n"," <td>2.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.153846</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>0.00000</td>\n"," <td>13.0</td>\n"," <td>2.0</td>\n"," <td>Intensive Care</td>\n"," <td>162.18</td>\n"," <td>0.051282</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>0.32436</td>\n"," <td>21.0</td>\n"," <td>2.0</td>\n"," <td>Intensive Care</td>\n"," <td>162.18</td>\n"," <td>0.100000</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>0.00000</td>\n"," <td>10.5</td>\n"," <td>0.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>1338</th>\n"," <td>0.00000</td>\n"," <td>33.0</td>\n"," <td>1.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.111111</td>\n"," </tr>\n"," <tr>\n"," <th>1339</th>\n"," <td>0.00000</td>\n"," <td>12.5</td>\n"," <td>3.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.065217</td>\n"," </tr>\n"," <tr>\n"," <th>1340</th>\n"," <td>0.00000</td>\n"," <td>13.0</td>\n"," <td>2.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.046512</td>\n"," </tr>\n"," <tr>\n"," <th>1341</th>\n"," <td>0.00000</td>\n"," <td>13.0</td>\n"," <td>0.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>1342</th>\n"," <td>0.00000</td>\n"," <td>13.0</td>\n"," <td>0.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>1343 rows × 6 columns</p>\n","</div>"],"text/plain":[" RELEVANCE median_hi REDRAW_COUNT ENCOUNTER_TYPE REDRAW_COST \\\n","0 0.00000 29.0 0.0 Inpatient 357.15 \n","1 0.00000 18.0 2.0 Inpatient 357.15 \n","2 0.00000 13.0 2.0 Intensive Care 162.18 \n","3 0.32436 21.0 2.0 Intensive Care 162.18 \n","4 0.00000 10.5 0.0 Inpatient 357.15 \n","... ... ... ... ... ... \n","1338 0.00000 33.0 1.0 Inpatient 357.15 \n","1339 0.00000 12.5 3.0 Inpatient 357.15 \n","1340 0.00000 13.0 2.0 Inpatient 357.15 \n","1341 0.00000 13.0 0.0 Inpatient 357.15 \n","1342 0.00000 13.0 0.0 Inpatient 357.15 \n","\n"," REDRAW_PERCENT \n","0 0.000000 \n","1 0.153846 \n","2 0.051282 \n","3 0.100000 \n","4 0.000000 \n","... ... \n","1338 0.111111 \n","1339 0.065217 \n","1340 0.046512 \n","1341 0.000000 \n","1342 0.000000 \n","\n","[1343 rows x 6 columns]"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["#remove redundant columns\n","train = train.drop(['SPLIT', 'SPECIMEN_COUNT', 'COLLECTOR_ID'], axis = 1)\n","train"]},{"cell_type":"code","execution_count":10,"id":"7ca70ba3","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:32.628728Z","iopub.status.busy":"2023-03-22T21:53:32.627679Z","iopub.status.idle":"2023-03-22T21:53:32.646024Z","shell.execute_reply":"2023-03-22T21:53:32.645228Z"},"papermill":{"duration":0.029364,"end_time":"2023-03-22T21:53:32.648152","exception":false,"start_time":"2023-03-22T21:53:32.618788","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>RELEVANCE</th>\n"," <th>median_hi</th>\n"," <th>REDRAW_COUNT</th>\n"," <th>REDRAW_COST</th>\n"," <th>REDRAW_PERCENT</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0.000000</td>\n"," <td>29.0</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>0.000000</td>\n"," <td>18.0</td>\n"," <td>2.0</td>\n"," <td>357.15</td>\n"," <td>0.153846</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>0.000000</td>\n"," <td>10.5</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>7</th>\n"," <td>0.912717</td>\n"," <td>10.0</td>\n"," <td>1.0</td>\n"," <td>357.15</td>\n"," <td>0.045455</td>\n"," </tr>\n"," <tr>\n"," <th>8</th>\n"," <td>0.000000</td>\n"," <td>10.0</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>1338</th>\n"," <td>0.000000</td>\n"," <td>33.0</td>\n"," <td>1.0</td>\n"," <td>357.15</td>\n"," <td>0.111111</td>\n"," </tr>\n"," <tr>\n"," <th>1339</th>\n"," <td>0.000000</td>\n"," <td>12.5</td>\n"," <td>3.0</td>\n"," <td>357.15</td>\n"," <td>0.065217</td>\n"," </tr>\n"," <tr>\n"," <th>1340</th>\n"," <td>0.000000</td>\n"," <td>13.0</td>\n"," <td>2.0</td>\n"," <td>357.15</td>\n"," <td>0.046512</td>\n"," </tr>\n"," <tr>\n"," <th>1341</th>\n"," <td>0.000000</td>\n"," <td>13.0</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>1342</th>\n"," <td>0.000000</td>\n"," <td>13.0</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>937 rows × 5 columns</p>\n","</div>"],"text/plain":[" RELEVANCE median_hi REDRAW_COUNT REDRAW_COST REDRAW_PERCENT\n","0 0.000000 29.0 0.0 357.15 0.000000\n","1 0.000000 18.0 2.0 357.15 0.153846\n","4 0.000000 10.5 0.0 357.15 0.000000\n","7 0.912717 10.0 1.0 357.15 0.045455\n","8 0.000000 10.0 0.0 357.15 0.000000\n","... ... ... ... ... ...\n","1338 0.000000 33.0 1.0 357.15 0.111111\n","1339 0.000000 12.5 3.0 357.15 0.065217\n","1340 0.000000 13.0 2.0 357.15 0.046512\n","1341 0.000000 13.0 0.0 357.15 0.000000\n","1342 0.000000 13.0 0.0 357.15 0.000000\n","\n","[937 rows x 5 columns]"]},"execution_count":10,"metadata":{},"output_type":"execute_result"}],"source":["#filter for only Emergency and Inpatient collectors\n","emergency = train.loc[(train['ENCOUNTER_TYPE'] == \"Emergency\") | (train['ENCOUNTER_TYPE'] == \"Inpatient\")]\n","emergency = emergency.drop('ENCOUNTER_TYPE', axis=1)\n","emergency"]},{"cell_type":"code","execution_count":11,"id":"cd38e836","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:32.66636Z","iopub.status.busy":"2023-03-22T21:53:32.665633Z","iopub.status.idle":"2023-03-22T21:53:33.712071Z","shell.execute_reply":"2023-03-22T21:53:33.710985Z"},"papermill":{"duration":1.058739,"end_time":"2023-03-22T21:53:33.714904","exception":false,"start_time":"2023-03-22T21:53:32.656165","status":"completed"},"tags":[]},"outputs":[],"source":["import pandas as pd\n","import numpy as np\n","import xgboost as xgb\n","from sklearn.model_selection import train_test_split\n","from sklearn.metrics import mean_squared_error"]},{"cell_type":"code","execution_count":12,"id":"de273660","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:33.734773Z","iopub.status.busy":"2023-03-22T21:53:33.734347Z","iopub.status.idle":"2023-03-22T21:53:33.745517Z","shell.execute_reply":"2023-03-22T21:53:33.744214Z"},"papermill":{"duration":0.023539,"end_time":"2023-03-22T21:53:33.747771","exception":false,"start_time":"2023-03-22T21:53:33.724232","status":"completed"},"tags":[]},"outputs":[],"source":["X = emergency.drop('RELEVANCE', axis=1)\n","y = emergency['RELEVANCE']\n","\n","#Select only nonzero relevance scores to train on\n","X=X[y>0]\n","y=y[y>0]\n","\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"]},{"cell_type":"code","execution_count":13,"id":"77edb102","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:33.766276Z","iopub.status.busy":"2023-03-22T21:53:33.765204Z","iopub.status.idle":"2023-03-22T21:53:33.796333Z","shell.execute_reply":"2023-03-22T21:53:33.795338Z"},"papermill":{"duration":0.043286,"end_time":"2023-03-22T21:53:33.799093","exception":false,"start_time":"2023-03-22T21:53:33.755807","status":"completed"},"tags":[]},"outputs":[],"source":["dtrain = xgb.DMatrix(X_train, label=y_train)\n","dtest = xgb.DMatrix(X_test, label=y_test)\n","params = {\n"," \"max_depth\": 15,\n"," \"eta\": 0.3,\n"," \"subsample\": 1.0,\n"," \"colsample_bytree\": 1.0,\n"," \"learning_rate\": 0.35,\n"," \"objective\": \"reg:squarederror\",\n"," \"eval_metric\": \"rmse\"\n","}"]},{"cell_type":"code","execution_count":14,"id":"b4c7d477","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:33.818412Z","iopub.status.busy":"2023-03-22T21:53:33.817959Z","iopub.status.idle":"2023-03-22T21:53:34.129574Z","shell.execute_reply":"2023-03-22T21:53:34.128571Z"},"papermill":{"duration":0.32389,"end_time":"2023-03-22T21:53:34.132568","exception":false,"start_time":"2023-03-22T21:53:33.808678","status":"completed"},"tags":[]},"outputs":[],"source":["num_round = 100\n","bst = xgb.train(params, dtrain, num_round)"]},{"cell_type":"code","execution_count":15,"id":"d188c30e","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.15203Z","iopub.status.busy":"2023-03-22T21:53:34.151511Z","iopub.status.idle":"2023-03-22T21:53:34.162863Z","shell.execute_reply":"2023-03-22T21:53:34.16174Z"},"papermill":{"duration":0.02369,"end_time":"2023-03-22T21:53:34.165843","exception":false,"start_time":"2023-03-22T21:53:34.142153","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["RMSE: 3.329161021152597\n"]}],"source":["preds = bst.predict(dtest)\n","rmse = mean_squared_error(y_test, preds, squared=False)\n","print(\"RMSE:\", rmse)"]},{"cell_type":"code","execution_count":16,"id":"80eca2a2","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.185372Z","iopub.status.busy":"2023-03-22T21:53:34.184923Z","iopub.status.idle":"2023-03-22T21:53:34.231438Z","shell.execute_reply":"2023-03-22T21:53:34.230034Z"},"papermill":{"duration":0.059367,"end_time":"2023-03-22T21:53:34.234876","exception":false,"start_time":"2023-03-22T21:53:34.175509","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>SPLIT</th>\n"," <th>COLLECTOR_ID</th>\n"," <th>median_hi</th>\n"," <th>SPECIMEN_COUNT</th>\n"," <th>REDRAW_COUNT</th>\n"," <th>ENCOUNTER_TYPE</th>\n"," <th>REDRAW_COST</th>\n"," <th>REDRAW_PERCENT</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_2011</td>\n"," <td>32.0</td>\n"," <td>17.0</td>\n"," <td>4.0</td>\n"," <td>Emergency</td>\n"," <td>600.00</td>\n"," <td>0.235294</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_1550</td>\n"," <td>50.0</td>\n"," <td>9.0</td>\n"," <td>3.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.333333</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_4570</td>\n"," <td>8.0</td>\n"," <td>32.0</td>\n"," <td>2.0</td>\n"," <td>Intensive Care</td>\n"," <td>162.18</td>\n"," <td>0.062500</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_3531</td>\n"," <td>9.0</td>\n"," <td>7.0</td>\n"," <td>0.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_5020</td>\n"," <td>31.0</td>\n"," <td>18.0</td>\n"," <td>2.0</td>\n"," <td>Emergency</td>\n"," <td>600.00</td>\n"," <td>0.111111</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>890</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_2848</td>\n"," <td>14.0</td>\n"," <td>31.0</td>\n"," <td>1.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.032258</td>\n"," </tr>\n"," <tr>\n"," <th>891</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_1501</td>\n"," <td>1.0</td>\n"," <td>1.0</td>\n"," <td>0.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>892</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_860</td>\n"," <td>72.0</td>\n"," <td>48.0</td>\n"," <td>23.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.479167</td>\n"," </tr>\n"," <tr>\n"," <th>893</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_3200</td>\n"," <td>30.0</td>\n"," <td>3.0</td>\n"," <td>1.0</td>\n"," <td>Inpatient</td>\n"," <td>357.15</td>\n"," <td>0.333333</td>\n"," </tr>\n"," <tr>\n"," <th>894</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_4788</td>\n"," <td>12.0</td>\n"," <td>4.0</td>\n"," <td>1.0</td>\n"," <td>Intensive Care</td>\n"," <td>162.18</td>\n"," <td>0.250000</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>895 rows × 8 columns</p>\n","</div>"],"text/plain":[" SPLIT COLLECTOR_ID median_hi SPECIMEN_COUNT REDRAW_COUNT \\\n","0 TEST COLLECTOR_2011 32.0 17.0 4.0 \n","1 TEST COLLECTOR_1550 50.0 9.0 3.0 \n","2 TEST COLLECTOR_4570 8.0 32.0 2.0 \n","3 TEST COLLECTOR_3531 9.0 7.0 0.0 \n","4 TEST COLLECTOR_5020 31.0 18.0 2.0 \n",".. ... ... ... ... ... \n","890 TEST COLLECTOR_2848 14.0 31.0 1.0 \n","891 TEST COLLECTOR_1501 1.0 1.0 0.0 \n","892 TEST COLLECTOR_860 72.0 48.0 23.0 \n","893 TEST COLLECTOR_3200 30.0 3.0 1.0 \n","894 TEST COLLECTOR_4788 12.0 4.0 1.0 \n","\n"," ENCOUNTER_TYPE REDRAW_COST REDRAW_PERCENT \n","0 Emergency 600.00 0.235294 \n","1 Inpatient 357.15 0.333333 \n","2 Intensive Care 162.18 0.062500 \n","3 Inpatient 357.15 0.000000 \n","4 Emergency 600.00 0.111111 \n",".. ... ... ... \n","890 Inpatient 357.15 0.032258 \n","891 Inpatient 357.15 0.000000 \n","892 Inpatient 357.15 0.479167 \n","893 Inpatient 357.15 0.333333 \n","894 Intensive Care 162.18 0.250000 \n","\n","[895 rows x 8 columns]"]},"execution_count":16,"metadata":{},"output_type":"execute_result"}],"source":["#calculate features for test set\n","test=pd.read_csv('/kaggle/input/aacc-2023-helpwithhemolysis/test.csv')\n","test=test.merge(median_hi,how='left',on='COLLECTOR_ID')\n","test=test.merge(df2,how='left',on='COLLECTOR_ID')\n","test=test.merge(df3,how='left',on='COLLECTOR_ID')\n","test=test.merge(df4,how='left',on='COLLECTOR_ID')\n","test=test.merge(cost,how='left',on='ENCOUNTER_TYPE')\n","test['REDRAW_COUNT'] = test['REDRAW_COUNT'].fillna(0)\n","test['REDRAW_PERCENT'] = test['REDRAW_COUNT'] / test['SPECIMEN_COUNT']\n","test"]},{"cell_type":"code","execution_count":17,"id":"a2a3c73a","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.253209Z","iopub.status.busy":"2023-03-22T21:53:34.252752Z","iopub.status.idle":"2023-03-22T21:53:34.275157Z","shell.execute_reply":"2023-03-22T21:53:34.273859Z"},"papermill":{"duration":0.034488,"end_time":"2023-03-22T21:53:34.277682","exception":false,"start_time":"2023-03-22T21:53:34.243194","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>median_hi</th>\n"," <th>REDRAW_COUNT</th>\n"," <th>REDRAW_COST</th>\n"," <th>REDRAW_PERCENT</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>32.0</td>\n"," <td>4.0</td>\n"," <td>600.00</td>\n"," <td>0.235294</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>50.0</td>\n"," <td>3.0</td>\n"," <td>357.15</td>\n"," <td>0.333333</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>9.0</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>31.0</td>\n"," <td>2.0</td>\n"," <td>600.00</td>\n"," <td>0.111111</td>\n"," </tr>\n"," <tr>\n"," <th>6</th>\n"," <td>16.0</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>888</th>\n"," <td>12.5</td>\n"," <td>1.0</td>\n"," <td>357.15</td>\n"," <td>0.019231</td>\n"," </tr>\n"," <tr>\n"," <th>890</th>\n"," <td>14.0</td>\n"," <td>1.0</td>\n"," <td>357.15</td>\n"," <td>0.032258</td>\n"," </tr>\n"," <tr>\n"," <th>891</th>\n"," <td>1.0</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," </tr>\n"," <tr>\n"," <th>892</th>\n"," <td>72.0</td>\n"," <td>23.0</td>\n"," <td>357.15</td>\n"," <td>0.479167</td>\n"," </tr>\n"," <tr>\n"," <th>893</th>\n"," <td>30.0</td>\n"," <td>1.0</td>\n"," <td>357.15</td>\n"," <td>0.333333</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>635 rows × 4 columns</p>\n","</div>"],"text/plain":[" median_hi REDRAW_COUNT REDRAW_COST REDRAW_PERCENT\n","0 32.0 4.0 600.00 0.235294\n","1 50.0 3.0 357.15 0.333333\n","3 9.0 0.0 357.15 0.000000\n","4 31.0 2.0 600.00 0.111111\n","6 16.0 0.0 357.15 0.000000\n",".. ... ... ... ...\n","888 12.5 1.0 357.15 0.019231\n","890 14.0 1.0 357.15 0.032258\n","891 1.0 0.0 357.15 0.000000\n","892 72.0 23.0 357.15 0.479167\n","893 30.0 1.0 357.15 0.333333\n","\n","[635 rows x 4 columns]"]},"execution_count":17,"metadata":{},"output_type":"execute_result"}],"source":["test = test.drop(['SPLIT', 'SPECIMEN_COUNT', 'COLLECTOR_ID'], axis = 1)\n","test = test.loc[(test['ENCOUNTER_TYPE'] == \"Emergency\") | (test['ENCOUNTER_TYPE'] == \"Inpatient\")]\n","test = test.drop('ENCOUNTER_TYPE', axis=1)\n","test"]},{"cell_type":"code","execution_count":18,"id":"90502e56","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.297647Z","iopub.status.busy":"2023-03-22T21:53:34.296284Z","iopub.status.idle":"2023-03-22T21:53:34.322929Z","shell.execute_reply":"2023-03-22T21:53:34.321712Z"},"papermill":{"duration":0.039237,"end_time":"2023-03-22T21:53:34.325539","exception":false,"start_time":"2023-03-22T21:53:34.286302","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>median_hi</th>\n"," <th>REDRAW_COUNT</th>\n"," <th>REDRAW_COST</th>\n"," <th>REDRAW_PERCENT</th>\n"," <th>pred</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>32.0</td>\n"," <td>4.0</td>\n"," <td>600.00</td>\n"," <td>0.235294</td>\n"," <td>1.953370</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>50.0</td>\n"," <td>3.0</td>\n"," <td>357.15</td>\n"," <td>0.333333</td>\n"," <td>2.510889</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>9.0</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," <td>0.304316</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>31.0</td>\n"," <td>2.0</td>\n"," <td>600.00</td>\n"," <td>0.111111</td>\n"," <td>1.311873</td>\n"," </tr>\n"," <tr>\n"," <th>6</th>\n"," <td>16.0</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," <td>0.332751</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>888</th>\n"," <td>12.5</td>\n"," <td>1.0</td>\n"," <td>357.15</td>\n"," <td>0.019231</td>\n"," <td>1.501695</td>\n"," </tr>\n"," <tr>\n"," <th>890</th>\n"," <td>14.0</td>\n"," <td>1.0</td>\n"," <td>357.15</td>\n"," <td>0.032258</td>\n"," <td>1.442110</td>\n"," </tr>\n"," <tr>\n"," <th>891</th>\n"," <td>1.0</td>\n"," <td>0.0</td>\n"," <td>357.15</td>\n"," <td>0.000000</td>\n"," <td>0.356631</td>\n"," </tr>\n"," <tr>\n"," <th>892</th>\n"," <td>72.0</td>\n"," <td>23.0</td>\n"," <td>357.15</td>\n"," <td>0.479167</td>\n"," <td>0.846387</td>\n"," </tr>\n"," <tr>\n"," <th>893</th>\n"," <td>30.0</td>\n"," <td>1.0</td>\n"," <td>357.15</td>\n"," <td>0.333333</td>\n"," <td>1.144415</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>635 rows × 5 columns</p>\n","</div>"],"text/plain":[" median_hi REDRAW_COUNT REDRAW_COST REDRAW_PERCENT pred\n","0 32.0 4.0 600.00 0.235294 1.953370\n","1 50.0 3.0 357.15 0.333333 2.510889\n","3 9.0 0.0 357.15 0.000000 0.304316\n","4 31.0 2.0 600.00 0.111111 1.311873\n","6 16.0 0.0 357.15 0.000000 0.332751\n",".. ... ... ... ... ...\n","888 12.5 1.0 357.15 0.019231 1.501695\n","890 14.0 1.0 357.15 0.032258 1.442110\n","891 1.0 0.0 357.15 0.000000 0.356631\n","892 72.0 23.0 357.15 0.479167 0.846387\n","893 30.0 1.0 357.15 0.333333 1.144415\n","\n","[635 rows x 5 columns]"]},"execution_count":18,"metadata":{},"output_type":"execute_result"}],"source":["dtestset = xgb.DMatrix(test)\n","predictions = bst.predict(dtestset)\n","test[\"pred\"]=predictions\n","test"]},{"cell_type":"code","execution_count":19,"id":"1bce50ce","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.346751Z","iopub.status.busy":"2023-03-22T21:53:34.345566Z","iopub.status.idle":"2023-03-22T21:53:34.373609Z","shell.execute_reply":"2023-03-22T21:53:34.372354Z"},"papermill":{"duration":0.041222,"end_time":"2023-03-22T21:53:34.376143","exception":false,"start_time":"2023-03-22T21:53:34.334921","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>SPLIT</th>\n"," <th>COLLECTOR_ID</th>\n"," <th>pred</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_2011</td>\n"," <td>1.953370</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_1550</td>\n"," <td>2.510889</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_4570</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_3531</td>\n"," <td>0.304316</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_5020</td>\n"," <td>1.311873</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>890</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_2848</td>\n"," <td>1.442110</td>\n"," </tr>\n"," <tr>\n"," <th>891</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_1501</td>\n"," <td>0.356631</td>\n"," </tr>\n"," <tr>\n"," <th>892</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_860</td>\n"," <td>0.846387</td>\n"," </tr>\n"," <tr>\n"," <th>893</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_3200</td>\n"," <td>1.144415</td>\n"," </tr>\n"," <tr>\n"," <th>894</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_4788</td>\n"," <td>NaN</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>895 rows × 3 columns</p>\n","</div>"],"text/plain":[" SPLIT COLLECTOR_ID pred\n","0 TEST COLLECTOR_2011 1.953370\n","1 TEST COLLECTOR_1550 2.510889\n","2 TEST COLLECTOR_4570 NaN\n","3 TEST COLLECTOR_3531 0.304316\n","4 TEST COLLECTOR_5020 1.311873\n",".. ... ... ...\n","890 TEST COLLECTOR_2848 1.442110\n","891 TEST COLLECTOR_1501 0.356631\n","892 TEST COLLECTOR_860 0.846387\n","893 TEST COLLECTOR_3200 1.144415\n","894 TEST COLLECTOR_4788 NaN\n","\n","[895 rows x 3 columns]"]},"execution_count":19,"metadata":{},"output_type":"execute_result"}],"source":["sub=pd.read_csv('/kaggle/input/aacc-2023-helpwithhemolysis/sample_solution.csv')\n","sub = sub.merge(test['pred'], how='left', left_index=True, right_index=True)\n","sub"]},{"cell_type":"code","execution_count":20,"id":"1e81243d","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.396239Z","iopub.status.busy":"2023-03-22T21:53:34.395262Z","iopub.status.idle":"2023-03-22T21:53:34.409089Z","shell.execute_reply":"2023-03-22T21:53:34.408201Z"},"papermill":{"duration":0.026094,"end_time":"2023-03-22T21:53:34.411248","exception":false,"start_time":"2023-03-22T21:53:34.385154","status":"completed"},"scrolled":true,"tags":[]},"outputs":[{"data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>SPLIT</th>\n"," <th>COLLECTOR_ID</th>\n"," <th>pred</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>183</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_5162</td>\n"," <td>36.953739</td>\n"," </tr>\n"," <tr>\n"," <th>163</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_2973</td>\n"," <td>36.892879</td>\n"," </tr>\n"," <tr>\n"," <th>850</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_2028</td>\n"," <td>13.891359</td>\n"," </tr>\n"," <tr>\n"," <th>76</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_1491</td>\n"," <td>11.083419</td>\n"," </tr>\n"," <tr>\n"," <th>786</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_1460</td>\n"," <td>6.416948</td>\n"," </tr>\n"," <tr>\n"," <th>...</th>\n"," <td>...</td>\n"," <td>...</td>\n"," <td>...</td>\n"," </tr>\n"," <tr>\n"," <th>882</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_3097</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>883</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_310</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>884</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_5283</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>889</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_418</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>894</th>\n"," <td>TEST</td>\n"," <td>COLLECTOR_4788</td>\n"," <td>NaN</td>\n"," </tr>\n"," </tbody>\n","</table>\n","<p>895 rows × 3 columns</p>\n","</div>"],"text/plain":[" SPLIT COLLECTOR_ID pred\n","183 TEST COLLECTOR_5162 36.953739\n","163 TEST COLLECTOR_2973 36.892879\n","850 TEST COLLECTOR_2028 13.891359\n","76 TEST COLLECTOR_1491 11.083419\n","786 TEST COLLECTOR_1460 6.416948\n",".. ... ... ...\n","882 TEST COLLECTOR_3097 NaN\n","883 TEST COLLECTOR_310 NaN\n","884 TEST COLLECTOR_5283 NaN\n","889 TEST COLLECTOR_418 NaN\n","894 TEST COLLECTOR_4788 NaN\n","\n","[895 rows x 3 columns]"]},"execution_count":20,"metadata":{},"output_type":"execute_result"}],"source":["sub=sub.sort_values(by=\"pred\",ascending=False)\n","sub"]},{"cell_type":"code","execution_count":21,"id":"eeb70d41","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.432143Z","iopub.status.busy":"2023-03-22T21:53:34.431499Z","iopub.status.idle":"2023-03-22T21:53:34.443486Z","shell.execute_reply":"2023-03-22T21:53:34.442508Z"},"papermill":{"duration":0.025522,"end_time":"2023-03-22T21:53:34.446147","exception":false,"start_time":"2023-03-22T21:53:34.420625","status":"completed"},"tags":[]},"outputs":[],"source":["sub[['SPLIT','COLLECTOR_ID']].to_csv('mlsub5.csv',index=False)"]},{"cell_type":"code","execution_count":null,"id":"28954832","metadata":{"papermill":{"duration":0.009111,"end_time":"2023-03-22T21:53:34.464601","exception":false,"start_time":"2023-03-22T21:53:34.45549","status":"completed"},"tags":[]},"outputs":[],"source":[]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.12"},"papermill":{"default_parameters":{},"duration":17.610669,"end_time":"2023-03-22T21:53:35.29769","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-03-22T21:53:17.687021","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5} \ No newline at end of file