{"cells":[{"source":"\"Kaggle\"","metadata":{},"cell_type":"markdown","outputs":[],"execution_count":0},{"cell_type":"code","execution_count":1,"id":"2780ba25","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:27.676692Z","iopub.status.busy":"2023-03-22T21:53:27.676298Z","iopub.status.idle":"2023-03-22T21:53:27.691786Z","shell.execute_reply":"2023-03-22T21:53:27.690498Z"},"papermill":{"duration":0.030596,"end_time":"2023-03-22T21:53:27.696225","exception":false,"start_time":"2023-03-22T21:53:27.665629","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["/kaggle/input/aacc-2023-helpwithhemolysis/hemolysis_index_results.csv\n","/kaggle/input/aacc-2023-helpwithhemolysis/redraw_costs.csv\n","/kaggle/input/aacc-2023-helpwithhemolysis/train.csv\n","/kaggle/input/aacc-2023-helpwithhemolysis/test.csv\n","/kaggle/input/aacc-2023-helpwithhemolysis/sample_solution.csv\n","/kaggle/input/aacc-2023-helpwithhemolysis/redraw_thresholds.csv\n"]}],"source":["import pandas as pd\n","import os\n","for dirname, _, filenames in os.walk('/kaggle/input'):\n"," for filename in filenames:\n"," print(os.path.join(dirname, filename))"]},{"cell_type":"code","execution_count":2,"id":"67182adc","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:27.713204Z","iopub.status.busy":"2023-03-22T21:53:27.712402Z","iopub.status.idle":"2023-03-22T21:53:28.189968Z","shell.execute_reply":"2023-03-22T21:53:28.189085Z"},"papermill":{"duration":0.487765,"end_time":"2023-03-22T21:53:28.192921","exception":false,"start_time":"2023-03-22T21:53:27.705156","status":"completed"},"tags":[]},"outputs":[],"source":["df=pd.read_csv('/kaggle/input/aacc-2023-helpwithhemolysis/hemolysis_index_results.csv')\n","cost=pd.read_csv(\"/kaggle/input/aacc-2023-helpwithhemolysis/redraw_costs.csv\")"]},{"cell_type":"code","execution_count":3,"id":"2664c5b4","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:28.206918Z","iopub.status.busy":"2023-03-22T21:53:28.206288Z","iopub.status.idle":"2023-03-22T21:53:28.260062Z","shell.execute_reply":"2023-03-22T21:53:28.259125Z"},"papermill":{"duration":0.063372,"end_time":"2023-03-22T21:53:28.2624","exception":false,"start_time":"2023-03-22T21:53:28.199028","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
COLLECTOR_IDREDRAW_COUNT
0COLLECTOR_1011
1COLLECTOR_10132
2COLLECTOR_10283
3COLLECTOR_10291
4COLLECTOR_10311
.........
1234COLLECTOR_9811
1235COLLECTOR_9843
1236COLLECTOR_9892
1237COLLECTOR_9911
1238COLLECTOR_9922
\n","

1239 rows × 2 columns

\n","
"],"text/plain":[" COLLECTOR_ID REDRAW_COUNT\n","0 COLLECTOR_101 1\n","1 COLLECTOR_1013 2\n","2 COLLECTOR_1028 3\n","3 COLLECTOR_1029 1\n","4 COLLECTOR_1031 1\n","... ... ...\n","1234 COLLECTOR_981 1\n","1235 COLLECTOR_984 3\n","1236 COLLECTOR_989 2\n","1237 COLLECTOR_991 1\n","1238 COLLECTOR_992 2\n","\n","[1239 rows x 2 columns]"]},"execution_count":3,"metadata":{},"output_type":"execute_result"}],"source":["redraw = df.query(\"HEMOLYSIS_INDEX > 75\")\n","df3 = redraw[['COLLECTOR_ID', 'SPECIMEN_ID', 'HEMOLYSIS_INDEX']].copy()\n","df3 = df3.groupby('COLLECTOR_ID')['SPECIMEN_ID'].nunique().reset_index()\n","df3 = df3.rename(columns={'SPECIMEN_ID' : 'REDRAW_COUNT'})\n","df3"]},{"cell_type":"code","execution_count":4,"id":"64140e14","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:28.277425Z","iopub.status.busy":"2023-03-22T21:53:28.276269Z","iopub.status.idle":"2023-03-22T21:53:28.401313Z","shell.execute_reply":"2023-03-22T21:53:28.40045Z"},"papermill":{"duration":0.13552,"end_time":"2023-03-22T21:53:28.404264","exception":false,"start_time":"2023-03-22T21:53:28.268744","status":"completed"},"scrolled":true,"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
ORDER_IDORDERABLEPATIENT_IDPATIENT_AGEPATIENT_SEXPATIENT_RACENURSING_UNITENCOUNTER_TYPEMEDICAL_SERVICESPECIMEN_IDSPECIMEN_TYPECOLLECTOR_IDDRAW_SITECOLLECTION_DAYCOLLECTION_TIMEHEMOLYSIS_INDEXREDRAW_COST
0ORDER_1179508Vancomycin TrPATIENT_13144044FemaleWhiteUNIT_10Intensive CareMedicalSPECIMEN_543979BloodCOLLECTOR_2732NaN623:21:006.0162.18
1ORDER_1181190Comp Met PlasPATIENT_5885675MaleWhiteUNIT_49InpatientBone Marrow TransplantSPECIMEN_544650BloodCOLLECTOR_522NaN623:16:008.0357.15
2ORDER_1181164Phos PlasPATIENT_5885675MaleWhiteUNIT_49InpatientBone Marrow TransplantSPECIMEN_544650BloodCOLLECTOR_522NaN623:16:008.0357.15
3ORDER_1181191MagnesiumPATIENT_5885675MaleWhiteUNIT_49InpatientBone Marrow TransplantSPECIMEN_544650BloodCOLLECTOR_522NaN623:16:008.0357.15
4ORDER_1180687Basic Met PlasPATIENT_9943640FemaleWhiteUNIT_161Intensive CareCardiologySPECIMEN_544632BloodCOLLECTOR_201NaN623:18:007.0162.18
......................................................
101169ORDER_1253155Phos PlasPATIENT_7449080MaleWhiteUNIT_45InpatientBone Marrow TransplantSPECIMEN_577685BloodCOLLECTOR_5636NaN2723:10:0027.0357.15
101170ORDER_1253154MagnesiumPATIENT_7449080MaleWhiteUNIT_45InpatientBone Marrow TransplantSPECIMEN_577685BloodCOLLECTOR_5636NaN2723:10:0027.0357.15
101171ORDER_1253157Uric AcidPATIENT_7449080MaleWhiteUNIT_45InpatientBone Marrow TransplantSPECIMEN_577685BloodCOLLECTOR_5636NaN2723:10:0027.0357.15
101172ORDER_1252360Comp Met PlasPATIENT_13234058MaleWhiteUNIT_145EmergencyNeuro MedicineSPECIMEN_577345BloodCOLLECTOR_2490NaN2722:55:0016.0600.00
101173ORDER_1252361Thyroid CascadePATIENT_13234058MaleWhiteUNIT_145EmergencyNeuro MedicineSPECIMEN_577345BloodCOLLECTOR_2490NaN2722:55:0016.0600.00
\n","

101174 rows × 17 columns

\n","
"],"text/plain":[" ORDER_ID ORDERABLE PATIENT_ID PATIENT_AGE \\\n","0 ORDER_1179508 Vancomycin Tr PATIENT_131440 44 \n","1 ORDER_1181190 Comp Met Plas PATIENT_58856 75 \n","2 ORDER_1181164 Phos Plas PATIENT_58856 75 \n","3 ORDER_1181191 Magnesium PATIENT_58856 75 \n","4 ORDER_1180687 Basic Met Plas PATIENT_99436 40 \n","... ... ... ... ... \n","101169 ORDER_1253155 Phos Plas PATIENT_74490 80 \n","101170 ORDER_1253154 Magnesium PATIENT_74490 80 \n","101171 ORDER_1253157 Uric Acid PATIENT_74490 80 \n","101172 ORDER_1252360 Comp Met Plas PATIENT_132340 58 \n","101173 ORDER_1252361 Thyroid Cascade PATIENT_132340 58 \n","\n"," PATIENT_SEX PATIENT_RACE NURSING_UNIT ENCOUNTER_TYPE \\\n","0 Female White UNIT_10 Intensive Care \n","1 Male White UNIT_49 Inpatient \n","2 Male White UNIT_49 Inpatient \n","3 Male White UNIT_49 Inpatient \n","4 Female White UNIT_161 Intensive Care \n","... ... ... ... ... \n","101169 Male White UNIT_45 Inpatient \n","101170 Male White UNIT_45 Inpatient \n","101171 Male White UNIT_45 Inpatient \n","101172 Male White UNIT_145 Emergency \n","101173 Male White UNIT_145 Emergency \n","\n"," MEDICAL_SERVICE SPECIMEN_ID SPECIMEN_TYPE COLLECTOR_ID \\\n","0 Medical SPECIMEN_543979 Blood COLLECTOR_2732 \n","1 Bone Marrow Transplant SPECIMEN_544650 Blood COLLECTOR_522 \n","2 Bone Marrow Transplant SPECIMEN_544650 Blood COLLECTOR_522 \n","3 Bone Marrow Transplant SPECIMEN_544650 Blood COLLECTOR_522 \n","4 Cardiology SPECIMEN_544632 Blood COLLECTOR_201 \n","... ... ... ... ... \n","101169 Bone Marrow Transplant SPECIMEN_577685 Blood COLLECTOR_5636 \n","101170 Bone Marrow Transplant SPECIMEN_577685 Blood COLLECTOR_5636 \n","101171 Bone Marrow Transplant SPECIMEN_577685 Blood COLLECTOR_5636 \n","101172 Neuro Medicine SPECIMEN_577345 Blood COLLECTOR_2490 \n","101173 Neuro Medicine SPECIMEN_577345 Blood COLLECTOR_2490 \n","\n"," DRAW_SITE COLLECTION_DAY COLLECTION_TIME HEMOLYSIS_INDEX REDRAW_COST \n","0 NaN 6 23:21:00 6.0 162.18 \n","1 NaN 6 23:16:00 8.0 357.15 \n","2 NaN 6 23:16:00 8.0 357.15 \n","3 NaN 6 23:16:00 8.0 357.15 \n","4 NaN 6 23:18:00 7.0 162.18 \n","... ... ... ... ... ... \n","101169 NaN 27 23:10:00 27.0 357.15 \n","101170 NaN 27 23:10:00 27.0 357.15 \n","101171 NaN 27 23:10:00 27.0 357.15 \n","101172 NaN 27 22:55:00 16.0 600.00 \n","101173 NaN 27 22:55:00 16.0 600.00 \n","\n","[101174 rows x 17 columns]"]},"execution_count":4,"metadata":{},"output_type":"execute_result"}],"source":["df=df.merge(cost,how=\"left\",on=\"ENCOUNTER_TYPE\")\n","df"]},{"cell_type":"code","execution_count":5,"id":"abd61363","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:28.419528Z","iopub.status.busy":"2023-03-22T21:53:28.419126Z","iopub.status.idle":"2023-03-22T21:53:28.50873Z","shell.execute_reply":"2023-03-22T21:53:28.507649Z"},"papermill":{"duration":0.10034,"end_time":"2023-03-22T21:53:28.511329","exception":false,"start_time":"2023-03-22T21:53:28.410989","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
COLLECTOR_IDSPECIMEN_COUNT
0COLLECTOR_1017
1COLLECTOR_101335
2COLLECTOR_102823
3COLLECTOR_102917
4COLLECTOR_103130
.........
2223COLLECTOR_9916
2224COLLECTOR_99243
2225COLLECTOR_99323
2226COLLECTOR_9952
2227COLLECTOR_99941
\n","

2228 rows × 2 columns

\n","
"],"text/plain":[" COLLECTOR_ID SPECIMEN_COUNT\n","0 COLLECTOR_101 7\n","1 COLLECTOR_1013 35\n","2 COLLECTOR_1028 23\n","3 COLLECTOR_1029 17\n","4 COLLECTOR_1031 30\n","... ... ...\n","2223 COLLECTOR_991 6\n","2224 COLLECTOR_992 43\n","2225 COLLECTOR_993 23\n","2226 COLLECTOR_995 2\n","2227 COLLECTOR_999 41\n","\n","[2228 rows x 2 columns]"]},"execution_count":5,"metadata":{},"output_type":"execute_result"}],"source":["#calculate number of specimen per collector\n","df2 = df[['COLLECTOR_ID', 'SPECIMEN_ID', 'HEMOLYSIS_INDEX']].copy()\n","df2 = df2.groupby('COLLECTOR_ID')['SPECIMEN_ID'].nunique().reset_index()\n","df2 = df2.rename(columns={'SPECIMEN_ID' : 'SPECIMEN_COUNT'})\n","df2"]},{"cell_type":"code","execution_count":6,"id":"d3c29a12","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:28.52692Z","iopub.status.busy":"2023-03-22T21:53:28.526523Z","iopub.status.idle":"2023-03-22T21:53:32.456168Z","shell.execute_reply":"2023-03-22T21:53:32.455054Z"},"papermill":{"duration":3.940606,"end_time":"2023-03-22T21:53:32.458766","exception":false,"start_time":"2023-03-22T21:53:28.51816","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
COLLECTOR_IDmedian_hi
0COLLECTOR_1019.0
1COLLECTOR_101312.0
2COLLECTOR_10288.0
3COLLECTOR_10295.0
4COLLECTOR_103115.5
.........
2223COLLECTOR_99116.5
2224COLLECTOR_99216.0
2225COLLECTOR_99311.0
2226COLLECTOR_9957.5
2227COLLECTOR_99912.0
\n","

2228 rows × 2 columns

\n","
"],"text/plain":[" COLLECTOR_ID median_hi\n","0 COLLECTOR_101 9.0\n","1 COLLECTOR_1013 12.0\n","2 COLLECTOR_1028 8.0\n","3 COLLECTOR_1029 5.0\n","4 COLLECTOR_1031 15.5\n","... ... ...\n","2223 COLLECTOR_991 16.5\n","2224 COLLECTOR_992 16.0\n","2225 COLLECTOR_993 11.0\n","2226 COLLECTOR_995 7.5\n","2227 COLLECTOR_999 12.0\n","\n","[2228 rows x 2 columns]"]},"execution_count":6,"metadata":{},"output_type":"execute_result"}],"source":["#%% compute median hemolysis index per collector\n","def compute_median(X:pd.DataFrame)->float:\n"," '''Return median hemolsysis index across a collectors samples'''\n"," median=X.drop_duplicates(subset='SPECIMEN_ID')['HEMOLYSIS_INDEX'].median()\n"," return(median)\n","\n","median_hi=df.groupby('COLLECTOR_ID').apply(lambda X: compute_median(X))\n","median_hi=median_hi.reset_index().rename(columns={0:'median_hi'})\n","median_hi"]},{"cell_type":"code","execution_count":7,"id":"72738fa1","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:32.474639Z","iopub.status.busy":"2023-03-22T21:53:32.474213Z","iopub.status.idle":"2023-03-22T21:53:32.501019Z","shell.execute_reply":"2023-03-22T21:53:32.499873Z"},"papermill":{"duration":0.037662,"end_time":"2023-03-22T21:53:32.503448","exception":false,"start_time":"2023-03-22T21:53:32.465786","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
COLLECTOR_IDENCOUNTER_TYPE
0COLLECTOR_2732Intensive Care
1COLLECTOR_522Inpatient
2COLLECTOR_201Intensive Care
3COLLECTOR_3469Emergency
4COLLECTOR_2720Intensive Care
.........
2224COLLECTOR_4046Inpatient
2225COLLECTOR_3746Outpatient
2226COLLECTOR_1858Emergency
2227COLLECTOR_4625Inpatient
2228COLLECTOR_693Inpatient
\n","

2229 rows × 2 columns

\n","
"],"text/plain":[" COLLECTOR_ID ENCOUNTER_TYPE\n","0 COLLECTOR_2732 Intensive Care\n","1 COLLECTOR_522 Inpatient\n","2 COLLECTOR_201 Intensive Care\n","3 COLLECTOR_3469 Emergency\n","4 COLLECTOR_2720 Intensive Care\n","... ... ...\n","2224 COLLECTOR_4046 Inpatient\n","2225 COLLECTOR_3746 Outpatient\n","2226 COLLECTOR_1858 Emergency\n","2227 COLLECTOR_4625 Inpatient\n","2228 COLLECTOR_693 Inpatient\n","\n","[2229 rows x 2 columns]"]},"execution_count":7,"metadata":{},"output_type":"execute_result"}],"source":["#determine specialized encoutner type for each collector\n","df4 = df[['COLLECTOR_ID', 'ENCOUNTER_TYPE']].drop_duplicates(subset='COLLECTOR_ID').reset_index(drop=True).copy()\n","df4"]},{"cell_type":"code","execution_count":8,"id":"d37fa622","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:32.519691Z","iopub.status.busy":"2023-03-22T21:53:32.519268Z","iopub.status.idle":"2023-03-22T21:53:32.569025Z","shell.execute_reply":"2023-03-22T21:53:32.568114Z"},"papermill":{"duration":0.060923,"end_time":"2023-03-22T21:53:32.571661","exception":false,"start_time":"2023-03-22T21:53:32.510738","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
SPLITCOLLECTOR_IDRELEVANCEmedian_hiSPECIMEN_COUNTREDRAW_COUNTENCOUNTER_TYPEREDRAW_COSTREDRAW_PERCENT
0TRAINCOLLECTOR_130.0000029.02.00.0Inpatient357.150.000000
1TRAINCOLLECTOR_21840.0000018.013.02.0Inpatient357.150.153846
2TRAINCOLLECTOR_7980.0000013.039.02.0Intensive Care162.180.051282
3TRAINCOLLECTOR_4920.3243621.020.02.0Intensive Care162.180.100000
4TRAINCOLLECTOR_26580.0000010.526.00.0Inpatient357.150.000000
..............................
1338TRAINCOLLECTOR_37980.0000033.09.01.0Inpatient357.150.111111
1339TRAINCOLLECTOR_33970.0000012.546.03.0Inpatient357.150.065217
1340TRAINCOLLECTOR_330.0000013.043.02.0Inpatient357.150.046512
1341TRAINCOLLECTOR_30010.0000013.03.00.0Inpatient357.150.000000
1342TRAINCOLLECTOR_46670.0000013.06.00.0Inpatient357.150.000000
\n","

1343 rows × 9 columns

\n","
"],"text/plain":[" SPLIT COLLECTOR_ID RELEVANCE median_hi SPECIMEN_COUNT \\\n","0 TRAIN COLLECTOR_13 0.00000 29.0 2.0 \n","1 TRAIN COLLECTOR_2184 0.00000 18.0 13.0 \n","2 TRAIN COLLECTOR_798 0.00000 13.0 39.0 \n","3 TRAIN COLLECTOR_492 0.32436 21.0 20.0 \n","4 TRAIN COLLECTOR_2658 0.00000 10.5 26.0 \n","... ... ... ... ... ... \n","1338 TRAIN COLLECTOR_3798 0.00000 33.0 9.0 \n","1339 TRAIN COLLECTOR_3397 0.00000 12.5 46.0 \n","1340 TRAIN COLLECTOR_33 0.00000 13.0 43.0 \n","1341 TRAIN COLLECTOR_3001 0.00000 13.0 3.0 \n","1342 TRAIN COLLECTOR_4667 0.00000 13.0 6.0 \n","\n"," REDRAW_COUNT ENCOUNTER_TYPE REDRAW_COST REDRAW_PERCENT \n","0 0.0 Inpatient 357.15 0.000000 \n","1 2.0 Inpatient 357.15 0.153846 \n","2 2.0 Intensive Care 162.18 0.051282 \n","3 2.0 Intensive Care 162.18 0.100000 \n","4 0.0 Inpatient 357.15 0.000000 \n","... ... ... ... ... \n","1338 1.0 Inpatient 357.15 0.111111 \n","1339 3.0 Inpatient 357.15 0.065217 \n","1340 2.0 Inpatient 357.15 0.046512 \n","1341 0.0 Inpatient 357.15 0.000000 \n","1342 0.0 Inpatient 357.15 0.000000 \n","\n","[1343 rows x 9 columns]"]},"execution_count":8,"metadata":{},"output_type":"execute_result"}],"source":["#join features with train collector ids in sample solution \n","train=pd.read_csv('/kaggle/input/aacc-2023-helpwithhemolysis/train.csv')\n","train=train.merge(median_hi,how='left',on='COLLECTOR_ID')\n","train=train.merge(df2,how='left',on='COLLECTOR_ID')\n","train=train.merge(df3,how='left',on='COLLECTOR_ID')\n","train=train.merge(df4,how='left',on='COLLECTOR_ID')\n","train=train.merge(cost,how='left',on='ENCOUNTER_TYPE')\n","train['REDRAW_COUNT'] = train['REDRAW_COUNT'].fillna(0)\n","train['REDRAW_PERCENT'] = train['REDRAW_COUNT'] / train['SPECIMEN_COUNT']\n","train"]},{"cell_type":"code","execution_count":9,"id":"31377372","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:32.588663Z","iopub.status.busy":"2023-03-22T21:53:32.588272Z","iopub.status.idle":"2023-03-22T21:53:32.608605Z","shell.execute_reply":"2023-03-22T21:53:32.607614Z"},"papermill":{"duration":0.031705,"end_time":"2023-03-22T21:53:32.611021","exception":false,"start_time":"2023-03-22T21:53:32.579316","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
RELEVANCEmedian_hiREDRAW_COUNTENCOUNTER_TYPEREDRAW_COSTREDRAW_PERCENT
00.0000029.00.0Inpatient357.150.000000
10.0000018.02.0Inpatient357.150.153846
20.0000013.02.0Intensive Care162.180.051282
30.3243621.02.0Intensive Care162.180.100000
40.0000010.50.0Inpatient357.150.000000
.....................
13380.0000033.01.0Inpatient357.150.111111
13390.0000012.53.0Inpatient357.150.065217
13400.0000013.02.0Inpatient357.150.046512
13410.0000013.00.0Inpatient357.150.000000
13420.0000013.00.0Inpatient357.150.000000
\n","

1343 rows × 6 columns

\n","
"],"text/plain":[" RELEVANCE median_hi REDRAW_COUNT ENCOUNTER_TYPE REDRAW_COST \\\n","0 0.00000 29.0 0.0 Inpatient 357.15 \n","1 0.00000 18.0 2.0 Inpatient 357.15 \n","2 0.00000 13.0 2.0 Intensive Care 162.18 \n","3 0.32436 21.0 2.0 Intensive Care 162.18 \n","4 0.00000 10.5 0.0 Inpatient 357.15 \n","... ... ... ... ... ... \n","1338 0.00000 33.0 1.0 Inpatient 357.15 \n","1339 0.00000 12.5 3.0 Inpatient 357.15 \n","1340 0.00000 13.0 2.0 Inpatient 357.15 \n","1341 0.00000 13.0 0.0 Inpatient 357.15 \n","1342 0.00000 13.0 0.0 Inpatient 357.15 \n","\n"," REDRAW_PERCENT \n","0 0.000000 \n","1 0.153846 \n","2 0.051282 \n","3 0.100000 \n","4 0.000000 \n","... ... \n","1338 0.111111 \n","1339 0.065217 \n","1340 0.046512 \n","1341 0.000000 \n","1342 0.000000 \n","\n","[1343 rows x 6 columns]"]},"execution_count":9,"metadata":{},"output_type":"execute_result"}],"source":["#remove redundant columns\n","train = train.drop(['SPLIT', 'SPECIMEN_COUNT', 'COLLECTOR_ID'], axis = 1)\n","train"]},{"cell_type":"code","execution_count":10,"id":"7ca70ba3","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:32.628728Z","iopub.status.busy":"2023-03-22T21:53:32.627679Z","iopub.status.idle":"2023-03-22T21:53:32.646024Z","shell.execute_reply":"2023-03-22T21:53:32.645228Z"},"papermill":{"duration":0.029364,"end_time":"2023-03-22T21:53:32.648152","exception":false,"start_time":"2023-03-22T21:53:32.618788","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
RELEVANCEmedian_hiREDRAW_COUNTREDRAW_COSTREDRAW_PERCENT
00.00000029.00.0357.150.000000
10.00000018.02.0357.150.153846
40.00000010.50.0357.150.000000
70.91271710.01.0357.150.045455
80.00000010.00.0357.150.000000
..................
13380.00000033.01.0357.150.111111
13390.00000012.53.0357.150.065217
13400.00000013.02.0357.150.046512
13410.00000013.00.0357.150.000000
13420.00000013.00.0357.150.000000
\n","

937 rows × 5 columns

\n","
"],"text/plain":[" RELEVANCE median_hi REDRAW_COUNT REDRAW_COST REDRAW_PERCENT\n","0 0.000000 29.0 0.0 357.15 0.000000\n","1 0.000000 18.0 2.0 357.15 0.153846\n","4 0.000000 10.5 0.0 357.15 0.000000\n","7 0.912717 10.0 1.0 357.15 0.045455\n","8 0.000000 10.0 0.0 357.15 0.000000\n","... ... ... ... ... ...\n","1338 0.000000 33.0 1.0 357.15 0.111111\n","1339 0.000000 12.5 3.0 357.15 0.065217\n","1340 0.000000 13.0 2.0 357.15 0.046512\n","1341 0.000000 13.0 0.0 357.15 0.000000\n","1342 0.000000 13.0 0.0 357.15 0.000000\n","\n","[937 rows x 5 columns]"]},"execution_count":10,"metadata":{},"output_type":"execute_result"}],"source":["#filter for only Emergency and Inpatient collectors\n","emergency = train.loc[(train['ENCOUNTER_TYPE'] == \"Emergency\") | (train['ENCOUNTER_TYPE'] == \"Inpatient\")]\n","emergency = emergency.drop('ENCOUNTER_TYPE', axis=1)\n","emergency"]},{"cell_type":"code","execution_count":11,"id":"cd38e836","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:32.66636Z","iopub.status.busy":"2023-03-22T21:53:32.665633Z","iopub.status.idle":"2023-03-22T21:53:33.712071Z","shell.execute_reply":"2023-03-22T21:53:33.710985Z"},"papermill":{"duration":1.058739,"end_time":"2023-03-22T21:53:33.714904","exception":false,"start_time":"2023-03-22T21:53:32.656165","status":"completed"},"tags":[]},"outputs":[],"source":["import pandas as pd\n","import numpy as np\n","import xgboost as xgb\n","from sklearn.model_selection import train_test_split\n","from sklearn.metrics import mean_squared_error"]},{"cell_type":"code","execution_count":12,"id":"de273660","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:33.734773Z","iopub.status.busy":"2023-03-22T21:53:33.734347Z","iopub.status.idle":"2023-03-22T21:53:33.745517Z","shell.execute_reply":"2023-03-22T21:53:33.744214Z"},"papermill":{"duration":0.023539,"end_time":"2023-03-22T21:53:33.747771","exception":false,"start_time":"2023-03-22T21:53:33.724232","status":"completed"},"tags":[]},"outputs":[],"source":["X = emergency.drop('RELEVANCE', axis=1)\n","y = emergency['RELEVANCE']\n","\n","#Select only nonzero relevance scores to train on\n","X=X[y>0]\n","y=y[y>0]\n","\n","X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)"]},{"cell_type":"code","execution_count":13,"id":"77edb102","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:33.766276Z","iopub.status.busy":"2023-03-22T21:53:33.765204Z","iopub.status.idle":"2023-03-22T21:53:33.796333Z","shell.execute_reply":"2023-03-22T21:53:33.795338Z"},"papermill":{"duration":0.043286,"end_time":"2023-03-22T21:53:33.799093","exception":false,"start_time":"2023-03-22T21:53:33.755807","status":"completed"},"tags":[]},"outputs":[],"source":["dtrain = xgb.DMatrix(X_train, label=y_train)\n","dtest = xgb.DMatrix(X_test, label=y_test)\n","params = {\n"," \"max_depth\": 15,\n"," \"eta\": 0.3,\n"," \"subsample\": 1.0,\n"," \"colsample_bytree\": 1.0,\n"," \"learning_rate\": 0.35,\n"," \"objective\": \"reg:squarederror\",\n"," \"eval_metric\": \"rmse\"\n","}"]},{"cell_type":"code","execution_count":14,"id":"b4c7d477","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:33.818412Z","iopub.status.busy":"2023-03-22T21:53:33.817959Z","iopub.status.idle":"2023-03-22T21:53:34.129574Z","shell.execute_reply":"2023-03-22T21:53:34.128571Z"},"papermill":{"duration":0.32389,"end_time":"2023-03-22T21:53:34.132568","exception":false,"start_time":"2023-03-22T21:53:33.808678","status":"completed"},"tags":[]},"outputs":[],"source":["num_round = 100\n","bst = xgb.train(params, dtrain, num_round)"]},{"cell_type":"code","execution_count":15,"id":"d188c30e","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.15203Z","iopub.status.busy":"2023-03-22T21:53:34.151511Z","iopub.status.idle":"2023-03-22T21:53:34.162863Z","shell.execute_reply":"2023-03-22T21:53:34.16174Z"},"papermill":{"duration":0.02369,"end_time":"2023-03-22T21:53:34.165843","exception":false,"start_time":"2023-03-22T21:53:34.142153","status":"completed"},"tags":[]},"outputs":[{"name":"stdout","output_type":"stream","text":["RMSE: 3.329161021152597\n"]}],"source":["preds = bst.predict(dtest)\n","rmse = mean_squared_error(y_test, preds, squared=False)\n","print(\"RMSE:\", rmse)"]},{"cell_type":"code","execution_count":16,"id":"80eca2a2","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.185372Z","iopub.status.busy":"2023-03-22T21:53:34.184923Z","iopub.status.idle":"2023-03-22T21:53:34.231438Z","shell.execute_reply":"2023-03-22T21:53:34.230034Z"},"papermill":{"duration":0.059367,"end_time":"2023-03-22T21:53:34.234876","exception":false,"start_time":"2023-03-22T21:53:34.175509","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
SPLITCOLLECTOR_IDmedian_hiSPECIMEN_COUNTREDRAW_COUNTENCOUNTER_TYPEREDRAW_COSTREDRAW_PERCENT
0TESTCOLLECTOR_201132.017.04.0Emergency600.000.235294
1TESTCOLLECTOR_155050.09.03.0Inpatient357.150.333333
2TESTCOLLECTOR_45708.032.02.0Intensive Care162.180.062500
3TESTCOLLECTOR_35319.07.00.0Inpatient357.150.000000
4TESTCOLLECTOR_502031.018.02.0Emergency600.000.111111
...........................
890TESTCOLLECTOR_284814.031.01.0Inpatient357.150.032258
891TESTCOLLECTOR_15011.01.00.0Inpatient357.150.000000
892TESTCOLLECTOR_86072.048.023.0Inpatient357.150.479167
893TESTCOLLECTOR_320030.03.01.0Inpatient357.150.333333
894TESTCOLLECTOR_478812.04.01.0Intensive Care162.180.250000
\n","

895 rows × 8 columns

\n","
"],"text/plain":[" SPLIT COLLECTOR_ID median_hi SPECIMEN_COUNT REDRAW_COUNT \\\n","0 TEST COLLECTOR_2011 32.0 17.0 4.0 \n","1 TEST COLLECTOR_1550 50.0 9.0 3.0 \n","2 TEST COLLECTOR_4570 8.0 32.0 2.0 \n","3 TEST COLLECTOR_3531 9.0 7.0 0.0 \n","4 TEST COLLECTOR_5020 31.0 18.0 2.0 \n",".. ... ... ... ... ... \n","890 TEST COLLECTOR_2848 14.0 31.0 1.0 \n","891 TEST COLLECTOR_1501 1.0 1.0 0.0 \n","892 TEST COLLECTOR_860 72.0 48.0 23.0 \n","893 TEST COLLECTOR_3200 30.0 3.0 1.0 \n","894 TEST COLLECTOR_4788 12.0 4.0 1.0 \n","\n"," ENCOUNTER_TYPE REDRAW_COST REDRAW_PERCENT \n","0 Emergency 600.00 0.235294 \n","1 Inpatient 357.15 0.333333 \n","2 Intensive Care 162.18 0.062500 \n","3 Inpatient 357.15 0.000000 \n","4 Emergency 600.00 0.111111 \n",".. ... ... ... \n","890 Inpatient 357.15 0.032258 \n","891 Inpatient 357.15 0.000000 \n","892 Inpatient 357.15 0.479167 \n","893 Inpatient 357.15 0.333333 \n","894 Intensive Care 162.18 0.250000 \n","\n","[895 rows x 8 columns]"]},"execution_count":16,"metadata":{},"output_type":"execute_result"}],"source":["#calculate features for test set\n","test=pd.read_csv('/kaggle/input/aacc-2023-helpwithhemolysis/test.csv')\n","test=test.merge(median_hi,how='left',on='COLLECTOR_ID')\n","test=test.merge(df2,how='left',on='COLLECTOR_ID')\n","test=test.merge(df3,how='left',on='COLLECTOR_ID')\n","test=test.merge(df4,how='left',on='COLLECTOR_ID')\n","test=test.merge(cost,how='left',on='ENCOUNTER_TYPE')\n","test['REDRAW_COUNT'] = test['REDRAW_COUNT'].fillna(0)\n","test['REDRAW_PERCENT'] = test['REDRAW_COUNT'] / test['SPECIMEN_COUNT']\n","test"]},{"cell_type":"code","execution_count":17,"id":"a2a3c73a","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.253209Z","iopub.status.busy":"2023-03-22T21:53:34.252752Z","iopub.status.idle":"2023-03-22T21:53:34.275157Z","shell.execute_reply":"2023-03-22T21:53:34.273859Z"},"papermill":{"duration":0.034488,"end_time":"2023-03-22T21:53:34.277682","exception":false,"start_time":"2023-03-22T21:53:34.243194","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
median_hiREDRAW_COUNTREDRAW_COSTREDRAW_PERCENT
032.04.0600.000.235294
150.03.0357.150.333333
39.00.0357.150.000000
431.02.0600.000.111111
616.00.0357.150.000000
...............
88812.51.0357.150.019231
89014.01.0357.150.032258
8911.00.0357.150.000000
89272.023.0357.150.479167
89330.01.0357.150.333333
\n","

635 rows × 4 columns

\n","
"],"text/plain":[" median_hi REDRAW_COUNT REDRAW_COST REDRAW_PERCENT\n","0 32.0 4.0 600.00 0.235294\n","1 50.0 3.0 357.15 0.333333\n","3 9.0 0.0 357.15 0.000000\n","4 31.0 2.0 600.00 0.111111\n","6 16.0 0.0 357.15 0.000000\n",".. ... ... ... ...\n","888 12.5 1.0 357.15 0.019231\n","890 14.0 1.0 357.15 0.032258\n","891 1.0 0.0 357.15 0.000000\n","892 72.0 23.0 357.15 0.479167\n","893 30.0 1.0 357.15 0.333333\n","\n","[635 rows x 4 columns]"]},"execution_count":17,"metadata":{},"output_type":"execute_result"}],"source":["test = test.drop(['SPLIT', 'SPECIMEN_COUNT', 'COLLECTOR_ID'], axis = 1)\n","test = test.loc[(test['ENCOUNTER_TYPE'] == \"Emergency\") | (test['ENCOUNTER_TYPE'] == \"Inpatient\")]\n","test = test.drop('ENCOUNTER_TYPE', axis=1)\n","test"]},{"cell_type":"code","execution_count":18,"id":"90502e56","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.297647Z","iopub.status.busy":"2023-03-22T21:53:34.296284Z","iopub.status.idle":"2023-03-22T21:53:34.322929Z","shell.execute_reply":"2023-03-22T21:53:34.321712Z"},"papermill":{"duration":0.039237,"end_time":"2023-03-22T21:53:34.325539","exception":false,"start_time":"2023-03-22T21:53:34.286302","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
median_hiREDRAW_COUNTREDRAW_COSTREDRAW_PERCENTpred
032.04.0600.000.2352941.953370
150.03.0357.150.3333332.510889
39.00.0357.150.0000000.304316
431.02.0600.000.1111111.311873
616.00.0357.150.0000000.332751
..................
88812.51.0357.150.0192311.501695
89014.01.0357.150.0322581.442110
8911.00.0357.150.0000000.356631
89272.023.0357.150.4791670.846387
89330.01.0357.150.3333331.144415
\n","

635 rows × 5 columns

\n","
"],"text/plain":[" median_hi REDRAW_COUNT REDRAW_COST REDRAW_PERCENT pred\n","0 32.0 4.0 600.00 0.235294 1.953370\n","1 50.0 3.0 357.15 0.333333 2.510889\n","3 9.0 0.0 357.15 0.000000 0.304316\n","4 31.0 2.0 600.00 0.111111 1.311873\n","6 16.0 0.0 357.15 0.000000 0.332751\n",".. ... ... ... ... ...\n","888 12.5 1.0 357.15 0.019231 1.501695\n","890 14.0 1.0 357.15 0.032258 1.442110\n","891 1.0 0.0 357.15 0.000000 0.356631\n","892 72.0 23.0 357.15 0.479167 0.846387\n","893 30.0 1.0 357.15 0.333333 1.144415\n","\n","[635 rows x 5 columns]"]},"execution_count":18,"metadata":{},"output_type":"execute_result"}],"source":["dtestset = xgb.DMatrix(test)\n","predictions = bst.predict(dtestset)\n","test[\"pred\"]=predictions\n","test"]},{"cell_type":"code","execution_count":19,"id":"1bce50ce","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.346751Z","iopub.status.busy":"2023-03-22T21:53:34.345566Z","iopub.status.idle":"2023-03-22T21:53:34.373609Z","shell.execute_reply":"2023-03-22T21:53:34.372354Z"},"papermill":{"duration":0.041222,"end_time":"2023-03-22T21:53:34.376143","exception":false,"start_time":"2023-03-22T21:53:34.334921","status":"completed"},"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
SPLITCOLLECTOR_IDpred
0TESTCOLLECTOR_20111.953370
1TESTCOLLECTOR_15502.510889
2TESTCOLLECTOR_4570NaN
3TESTCOLLECTOR_35310.304316
4TESTCOLLECTOR_50201.311873
............
890TESTCOLLECTOR_28481.442110
891TESTCOLLECTOR_15010.356631
892TESTCOLLECTOR_8600.846387
893TESTCOLLECTOR_32001.144415
894TESTCOLLECTOR_4788NaN
\n","

895 rows × 3 columns

\n","
"],"text/plain":[" SPLIT COLLECTOR_ID pred\n","0 TEST COLLECTOR_2011 1.953370\n","1 TEST COLLECTOR_1550 2.510889\n","2 TEST COLLECTOR_4570 NaN\n","3 TEST COLLECTOR_3531 0.304316\n","4 TEST COLLECTOR_5020 1.311873\n",".. ... ... ...\n","890 TEST COLLECTOR_2848 1.442110\n","891 TEST COLLECTOR_1501 0.356631\n","892 TEST COLLECTOR_860 0.846387\n","893 TEST COLLECTOR_3200 1.144415\n","894 TEST COLLECTOR_4788 NaN\n","\n","[895 rows x 3 columns]"]},"execution_count":19,"metadata":{},"output_type":"execute_result"}],"source":["sub=pd.read_csv('/kaggle/input/aacc-2023-helpwithhemolysis/sample_solution.csv')\n","sub = sub.merge(test['pred'], how='left', left_index=True, right_index=True)\n","sub"]},{"cell_type":"code","execution_count":20,"id":"1e81243d","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.396239Z","iopub.status.busy":"2023-03-22T21:53:34.395262Z","iopub.status.idle":"2023-03-22T21:53:34.409089Z","shell.execute_reply":"2023-03-22T21:53:34.408201Z"},"papermill":{"duration":0.026094,"end_time":"2023-03-22T21:53:34.411248","exception":false,"start_time":"2023-03-22T21:53:34.385154","status":"completed"},"scrolled":true,"tags":[]},"outputs":[{"data":{"text/html":["
\n","\n","\n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n"," \n","
SPLITCOLLECTOR_IDpred
183TESTCOLLECTOR_516236.953739
163TESTCOLLECTOR_297336.892879
850TESTCOLLECTOR_202813.891359
76TESTCOLLECTOR_149111.083419
786TESTCOLLECTOR_14606.416948
............
882TESTCOLLECTOR_3097NaN
883TESTCOLLECTOR_310NaN
884TESTCOLLECTOR_5283NaN
889TESTCOLLECTOR_418NaN
894TESTCOLLECTOR_4788NaN
\n","

895 rows × 3 columns

\n","
"],"text/plain":[" SPLIT COLLECTOR_ID pred\n","183 TEST COLLECTOR_5162 36.953739\n","163 TEST COLLECTOR_2973 36.892879\n","850 TEST COLLECTOR_2028 13.891359\n","76 TEST COLLECTOR_1491 11.083419\n","786 TEST COLLECTOR_1460 6.416948\n",".. ... ... ...\n","882 TEST COLLECTOR_3097 NaN\n","883 TEST COLLECTOR_310 NaN\n","884 TEST COLLECTOR_5283 NaN\n","889 TEST COLLECTOR_418 NaN\n","894 TEST COLLECTOR_4788 NaN\n","\n","[895 rows x 3 columns]"]},"execution_count":20,"metadata":{},"output_type":"execute_result"}],"source":["sub=sub.sort_values(by=\"pred\",ascending=False)\n","sub"]},{"cell_type":"code","execution_count":21,"id":"eeb70d41","metadata":{"execution":{"iopub.execute_input":"2023-03-22T21:53:34.432143Z","iopub.status.busy":"2023-03-22T21:53:34.431499Z","iopub.status.idle":"2023-03-22T21:53:34.443486Z","shell.execute_reply":"2023-03-22T21:53:34.442508Z"},"papermill":{"duration":0.025522,"end_time":"2023-03-22T21:53:34.446147","exception":false,"start_time":"2023-03-22T21:53:34.420625","status":"completed"},"tags":[]},"outputs":[],"source":["sub[['SPLIT','COLLECTOR_ID']].to_csv('mlsub5.csv',index=False)"]},{"cell_type":"code","execution_count":null,"id":"28954832","metadata":{"papermill":{"duration":0.009111,"end_time":"2023-03-22T21:53:34.464601","exception":false,"start_time":"2023-03-22T21:53:34.45549","status":"completed"},"tags":[]},"outputs":[],"source":[]}],"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.7.12"},"papermill":{"default_parameters":{},"duration":17.610669,"end_time":"2023-03-22T21:53:35.29769","environment_variables":{},"exception":null,"input_path":"__notebook__.ipynb","output_path":"__notebook__.ipynb","parameters":{},"start_time":"2023-03-22T21:53:17.687021","version":"2.4.0"}},"nbformat":4,"nbformat_minor":5}