Diff of /liver_prediction.ipynb [000000] .. [b018ba]

Switch to unified view

a b/liver_prediction.ipynb
1
{
2
 "cells": [
3
  {
4
   "cell_type": "markdown",
5
   "id": "073c1361-bcae-45d9-ad98-a07133b4f412",
6
   "metadata": {},
7
   "source": [
8
    "Liver Disease Prediction"
9
   ]
10
  },
11
  {
12
   "cell_type": "code",
13
   "execution_count": 1,
14
   "id": "ce0cfb84-91cc-46d0-9971-3478aa0045a4",
15
   "metadata": {},
16
   "outputs": [
17
    {
18
     "ename": "ModuleNotFoundError",
19
     "evalue": "No module named 'seaborn'",
20
     "output_type": "error",
21
     "traceback": [
22
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
23
      "\u001b[1;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
24
      "Cell \u001b[1;32mIn[1], line 3\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[1;32m----> 3\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mseaborn\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01msns\u001b[39;00m\n\u001b[0;32m      4\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m pyplot \u001b[38;5;28;01mas\u001b[39;00m plt\n\u001b[0;32m      6\u001b[0m \u001b[38;5;66;03m# Ignore Warnings\u001b[39;00m\n",
25
      "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'seaborn'"
26
     ]
27
    }
28
   ],
29
   "source": [
30
    "\n",
31
    "import numpy as np\n",
32
    "import pandas as pd\n",
33
    "import seaborn as sns\n",
34
    "from matplotlib import pyplot as plt\n",
35
    "\n",
36
    "# Ignore Warnings\n",
37
    "import warnings\n",
38
    "warnings.filterwarnings(\"ignore\")\n",
39
    "\n",
40
    "from math import sqrt\n",
41
    "from sklearn.model_selection import train_test_split\n",
42
    "from sklearn.metrics import accuracy_score, confusion_matrix, classification_report\n",
43
    "\n",
44
    "from sklearn.model_selection import GridSearchCV\n",
45
    "\n",
46
    "\n",
47
    "\n",
48
    "from sklearn.linear_model import LogisticRegression\n",
49
    "from sklearn.tree import DecisionTreeClassifier\n",
50
    "from sklearn.svm import SVC\n",
51
    "from sklearn.preprocessing import StandardScaler\n",
52
    "from sklearn.model_selection import train_test_split\n",
53
    "from sklearn.neighbors import KNeighborsClassifier\n",
54
    "\n",
55
    "plt.style.use('ggplot')\n"
56
   ]
57
  },
58
  {
59
   "cell_type": "code",
60
   "execution_count": 133,
61
   "id": "5d94117e-65b4-4806-b4d4-36eab6594700",
62
   "metadata": {},
63
   "outputs": [
64
    {
65
     "data": {
66
      "text/html": [
67
       "<div>\n",
68
       "<style scoped>\n",
69
       "    .dataframe tbody tr th:only-of-type {\n",
70
       "        vertical-align: middle;\n",
71
       "    }\n",
72
       "\n",
73
       "    .dataframe tbody tr th {\n",
74
       "        vertical-align: top;\n",
75
       "    }\n",
76
       "\n",
77
       "    .dataframe thead th {\n",
78
       "        text-align: right;\n",
79
       "    }\n",
80
       "</style>\n",
81
       "<table border=\"1\" class=\"dataframe\">\n",
82
       "  <thead>\n",
83
       "    <tr style=\"text-align: right;\">\n",
84
       "      <th></th>\n",
85
       "      <th>Age of the patient</th>\n",
86
       "      <th>Gender of the patient</th>\n",
87
       "      <th>Total Bilirubin</th>\n",
88
       "      <th>Direct Bilirubin</th>\n",
89
       "      <th>Alkphos Alkaline Phosphotase</th>\n",
90
       "      <th>Sgpt Alamine Aminotransferase</th>\n",
91
       "      <th>Sgot Aspartate Aminotransferase</th>\n",
92
       "      <th>Total Protiens</th>\n",
93
       "      <th>ALB Albumin</th>\n",
94
       "      <th>A/G Ratio Albumin and Globulin Ratio</th>\n",
95
       "      <th>Result</th>\n",
96
       "    </tr>\n",
97
       "  </thead>\n",
98
       "  <tbody>\n",
99
       "    <tr>\n",
100
       "      <th>0</th>\n",
101
       "      <td>65.0</td>\n",
102
       "      <td>Female</td>\n",
103
       "      <td>0.7</td>\n",
104
       "      <td>0.1</td>\n",
105
       "      <td>187.0</td>\n",
106
       "      <td>16.0</td>\n",
107
       "      <td>18.0</td>\n",
108
       "      <td>6.8</td>\n",
109
       "      <td>3.3</td>\n",
110
       "      <td>0.90</td>\n",
111
       "      <td>1</td>\n",
112
       "    </tr>\n",
113
       "    <tr>\n",
114
       "      <th>1</th>\n",
115
       "      <td>62.0</td>\n",
116
       "      <td>Male</td>\n",
117
       "      <td>10.9</td>\n",
118
       "      <td>5.5</td>\n",
119
       "      <td>699.0</td>\n",
120
       "      <td>64.0</td>\n",
121
       "      <td>100.0</td>\n",
122
       "      <td>7.5</td>\n",
123
       "      <td>3.2</td>\n",
124
       "      <td>0.74</td>\n",
125
       "      <td>1</td>\n",
126
       "    </tr>\n",
127
       "    <tr>\n",
128
       "      <th>2</th>\n",
129
       "      <td>62.0</td>\n",
130
       "      <td>Male</td>\n",
131
       "      <td>7.3</td>\n",
132
       "      <td>4.1</td>\n",
133
       "      <td>490.0</td>\n",
134
       "      <td>60.0</td>\n",
135
       "      <td>68.0</td>\n",
136
       "      <td>7.0</td>\n",
137
       "      <td>3.3</td>\n",
138
       "      <td>0.89</td>\n",
139
       "      <td>1</td>\n",
140
       "    </tr>\n",
141
       "    <tr>\n",
142
       "      <th>3</th>\n",
143
       "      <td>58.0</td>\n",
144
       "      <td>Male</td>\n",
145
       "      <td>1.0</td>\n",
146
       "      <td>0.4</td>\n",
147
       "      <td>182.0</td>\n",
148
       "      <td>14.0</td>\n",
149
       "      <td>20.0</td>\n",
150
       "      <td>6.8</td>\n",
151
       "      <td>3.4</td>\n",
152
       "      <td>1.00</td>\n",
153
       "      <td>1</td>\n",
154
       "    </tr>\n",
155
       "    <tr>\n",
156
       "      <th>4</th>\n",
157
       "      <td>72.0</td>\n",
158
       "      <td>Male</td>\n",
159
       "      <td>3.9</td>\n",
160
       "      <td>2.0</td>\n",
161
       "      <td>195.0</td>\n",
162
       "      <td>27.0</td>\n",
163
       "      <td>59.0</td>\n",
164
       "      <td>7.3</td>\n",
165
       "      <td>2.4</td>\n",
166
       "      <td>0.40</td>\n",
167
       "      <td>1</td>\n",
168
       "    </tr>\n",
169
       "  </tbody>\n",
170
       "</table>\n",
171
       "</div>"
172
      ],
173
      "text/plain": [
174
       "   Age of the patient Gender of the patient  Total Bilirubin  \\\n",
175
       "0                65.0                Female              0.7   \n",
176
       "1                62.0                  Male             10.9   \n",
177
       "2                62.0                  Male              7.3   \n",
178
       "3                58.0                  Male              1.0   \n",
179
       "4                72.0                  Male              3.9   \n",
180
       "\n",
181
       "   Direct Bilirubin   Alkphos Alkaline Phosphotase  \\\n",
182
       "0               0.1                          187.0   \n",
183
       "1               5.5                          699.0   \n",
184
       "2               4.1                          490.0   \n",
185
       "3               0.4                          182.0   \n",
186
       "4               2.0                          195.0   \n",
187
       "\n",
188
       "    Sgpt Alamine Aminotransferase  Sgot Aspartate Aminotransferase  \\\n",
189
       "0                            16.0                             18.0   \n",
190
       "1                            64.0                            100.0   \n",
191
       "2                            60.0                             68.0   \n",
192
       "3                            14.0                             20.0   \n",
193
       "4                            27.0                             59.0   \n",
194
       "\n",
195
       "   Total Protiens   ALB Albumin  A/G Ratio Albumin and Globulin Ratio  Result  \n",
196
       "0             6.8           3.3                                  0.90       1  \n",
197
       "1             7.5           3.2                                  0.74       1  \n",
198
       "2             7.0           3.3                                  0.89       1  \n",
199
       "3             6.8           3.4                                  1.00       1  \n",
200
       "4             7.3           2.4                                  0.40       1  "
201
      ]
202
     },
203
     "execution_count": 133,
204
     "metadata": {},
205
     "output_type": "execute_result"
206
    }
207
   ],
208
   "source": [
209
    "df=pd.read_csv(r'datasets\\Liver.csv',encoding='ISO-8859-1')\n",
210
    "df.head()"
211
   ]
212
  },
213
  {
214
   "cell_type": "code",
215
   "execution_count": 134,
216
   "id": "2a8f3ab1-5de2-4b9d-9aa9-8cc2d1e8e6f7",
217
   "metadata": {},
218
   "outputs": [
219
    {
220
     "data": {
221
      "text/html": [
222
       "<div>\n",
223
       "<style scoped>\n",
224
       "    .dataframe tbody tr th:only-of-type {\n",
225
       "        vertical-align: middle;\n",
226
       "    }\n",
227
       "\n",
228
       "    .dataframe tbody tr th {\n",
229
       "        vertical-align: top;\n",
230
       "    }\n",
231
       "\n",
232
       "    .dataframe thead th {\n",
233
       "        text-align: right;\n",
234
       "    }\n",
235
       "</style>\n",
236
       "<table border=\"1\" class=\"dataframe\">\n",
237
       "  <thead>\n",
238
       "    <tr style=\"text-align: right;\">\n",
239
       "      <th></th>\n",
240
       "      <th>Age of the patient</th>\n",
241
       "      <th>Gender of the patient</th>\n",
242
       "      <th>Total Bilirubin</th>\n",
243
       "      <th>Direct Bilirubin</th>\n",
244
       "      <th>Alkphos Alkaline Phosphotase</th>\n",
245
       "      <th>Sgpt Alamine Aminotransferase</th>\n",
246
       "      <th>Aspartate_Aminotransferase</th>\n",
247
       "      <th>Total Protiens</th>\n",
248
       "      <th>ALB Albumin</th>\n",
249
       "      <th>A/G Ratio Albumin and Globulin Ratio</th>\n",
250
       "      <th>Result</th>\n",
251
       "    </tr>\n",
252
       "  </thead>\n",
253
       "  <tbody>\n",
254
       "    <tr>\n",
255
       "      <th>0</th>\n",
256
       "      <td>65.0</td>\n",
257
       "      <td>Female</td>\n",
258
       "      <td>0.7</td>\n",
259
       "      <td>0.1</td>\n",
260
       "      <td>187.0</td>\n",
261
       "      <td>16.0</td>\n",
262
       "      <td>18.0</td>\n",
263
       "      <td>6.8</td>\n",
264
       "      <td>3.3</td>\n",
265
       "      <td>0.90</td>\n",
266
       "      <td>1</td>\n",
267
       "    </tr>\n",
268
       "    <tr>\n",
269
       "      <th>1</th>\n",
270
       "      <td>62.0</td>\n",
271
       "      <td>Male</td>\n",
272
       "      <td>10.9</td>\n",
273
       "      <td>5.5</td>\n",
274
       "      <td>699.0</td>\n",
275
       "      <td>64.0</td>\n",
276
       "      <td>100.0</td>\n",
277
       "      <td>7.5</td>\n",
278
       "      <td>3.2</td>\n",
279
       "      <td>0.74</td>\n",
280
       "      <td>1</td>\n",
281
       "    </tr>\n",
282
       "    <tr>\n",
283
       "      <th>2</th>\n",
284
       "      <td>62.0</td>\n",
285
       "      <td>Male</td>\n",
286
       "      <td>7.3</td>\n",
287
       "      <td>4.1</td>\n",
288
       "      <td>490.0</td>\n",
289
       "      <td>60.0</td>\n",
290
       "      <td>68.0</td>\n",
291
       "      <td>7.0</td>\n",
292
       "      <td>3.3</td>\n",
293
       "      <td>0.89</td>\n",
294
       "      <td>1</td>\n",
295
       "    </tr>\n",
296
       "    <tr>\n",
297
       "      <th>3</th>\n",
298
       "      <td>58.0</td>\n",
299
       "      <td>Male</td>\n",
300
       "      <td>1.0</td>\n",
301
       "      <td>0.4</td>\n",
302
       "      <td>182.0</td>\n",
303
       "      <td>14.0</td>\n",
304
       "      <td>20.0</td>\n",
305
       "      <td>6.8</td>\n",
306
       "      <td>3.4</td>\n",
307
       "      <td>1.00</td>\n",
308
       "      <td>1</td>\n",
309
       "    </tr>\n",
310
       "    <tr>\n",
311
       "      <th>4</th>\n",
312
       "      <td>72.0</td>\n",
313
       "      <td>Male</td>\n",
314
       "      <td>3.9</td>\n",
315
       "      <td>2.0</td>\n",
316
       "      <td>195.0</td>\n",
317
       "      <td>27.0</td>\n",
318
       "      <td>59.0</td>\n",
319
       "      <td>7.3</td>\n",
320
       "      <td>2.4</td>\n",
321
       "      <td>0.40</td>\n",
322
       "      <td>1</td>\n",
323
       "    </tr>\n",
324
       "  </tbody>\n",
325
       "</table>\n",
326
       "</div>"
327
      ],
328
      "text/plain": [
329
       "   Age of the patient Gender of the patient  Total Bilirubin  \\\n",
330
       "0                65.0                Female              0.7   \n",
331
       "1                62.0                  Male             10.9   \n",
332
       "2                62.0                  Male              7.3   \n",
333
       "3                58.0                  Male              1.0   \n",
334
       "4                72.0                  Male              3.9   \n",
335
       "\n",
336
       "   Direct Bilirubin   Alkphos Alkaline Phosphotase  \\\n",
337
       "0               0.1                          187.0   \n",
338
       "1               5.5                          699.0   \n",
339
       "2               4.1                          490.0   \n",
340
       "3               0.4                          182.0   \n",
341
       "4               2.0                          195.0   \n",
342
       "\n",
343
       "    Sgpt Alamine Aminotransferase  Aspartate_Aminotransferase  Total Protiens  \\\n",
344
       "0                            16.0                        18.0             6.8   \n",
345
       "1                            64.0                       100.0             7.5   \n",
346
       "2                            60.0                        68.0             7.0   \n",
347
       "3                            14.0                        20.0             6.8   \n",
348
       "4                            27.0                        59.0             7.3   \n",
349
       "\n",
350
       "    ALB Albumin  A/G Ratio Albumin and Globulin Ratio  Result  \n",
351
       "0           3.3                                  0.90       1  \n",
352
       "1           3.2                                  0.74       1  \n",
353
       "2           3.3                                  0.89       1  \n",
354
       "3           3.4                                  1.00       1  \n",
355
       "4           2.4                                  0.40       1  "
356
      ]
357
     },
358
     "execution_count": 134,
359
     "metadata": {},
360
     "output_type": "execute_result"
361
    }
362
   ],
363
   "source": [
364
    "column_rename_dict = {\n",
365
    "    'Sgot Aspartate Aminotransferase': 'Aspartate_Aminotransferase'\n",
366
    "}\n",
367
    "\n",
368
    "df.rename(columns=column_rename_dict, inplace=True)\n",
369
    "df.head()"
370
   ]
371
  },
372
  {
373
   "cell_type": "code",
374
   "execution_count": 135,
375
   "id": "beafdf96",
376
   "metadata": {},
377
   "outputs": [
378
    {
379
     "name": "stdout",
380
     "output_type": "stream",
381
     "text": [
382
      "[1]\n",
383
      "M\n"
384
     ]
385
    }
386
   ],
387
   "source": [
388
    "from sklearn.linear_model import LogisticRegression\n",
389
    "input_data =(26.0,0,0.7,0.2,185.0,16.0,22.0,7.3,3.7,1.00\n",
390
    ")\n",
391
    "#100,12,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,0,0,1) \n",
392
    "\n",
393
    "'''(14.36,0.09779,0.08129,0.04781,0.1885,0.05766,0.7886,23.56,0.008462,\n",
394
    "0.0146,0.02387,0.01315,0.0198,0.0023,15.11,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259\n",
395
    ")'''\n",
396
    "\n",
397
    "input_data_as_numpy_array = np.asarray(input_data)\n",
398
    "\n",
399
    "# reshape the array as we are predicting for one instance\n",
400
    "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
401
    "predictions = svc.predict(input_data_reshaped)\n",
402
    "print(predictions)\n",
403
    "if (predictions == 0):\n",
404
    "  print('B')\n",
405
    "else:\n",
406
    "  print('M')"
407
   ]
408
  },
409
  {
410
   "cell_type": "code",
411
   "execution_count": 136,
412
   "id": "da3fe93c-cf85-44bd-b380-6b76b8047fba",
413
   "metadata": {},
414
   "outputs": [
415
    {
416
     "data": {
417
      "text/plain": [
418
       "(30691, 11)"
419
      ]
420
     },
421
     "execution_count": 136,
422
     "metadata": {},
423
     "output_type": "execute_result"
424
    }
425
   ],
426
   "source": [
427
    "df.shape"
428
   ]
429
  },
430
  {
431
   "cell_type": "markdown",
432
   "id": "5ffcf663-aa04-47bc-9006-68315a8e1cf6",
433
   "metadata": {},
434
   "source": [
435
    "\n",
436
    "Exploratory Data Analysis"
437
   ]
438
  },
439
  {
440
   "cell_type": "code",
441
   "execution_count": 137,
442
   "id": "40315d82-475f-439f-aa15-cb287393b7a7",
443
   "metadata": {},
444
   "outputs": [
445
    {
446
     "data": {
447
      "text/plain": [
448
       "5425"
449
      ]
450
     },
451
     "execution_count": 137,
452
     "metadata": {},
453
     "output_type": "execute_result"
454
    }
455
   ],
456
   "source": [
457
    "df.isna().sum().sum()"
458
   ]
459
  },
460
  {
461
   "cell_type": "code",
462
   "execution_count": 138,
463
   "id": "2ce172d1-e6ec-46a3-941c-bb414dab5192",
464
   "metadata": {},
465
   "outputs": [
466
    {
467
     "data": {
468
      "text/plain": [
469
       "Age of the patient                      0\n",
470
       "Gender of the patient                   0\n",
471
       "Total Bilirubin                         0\n",
472
       "Direct Bilirubin                        0\n",
473
       " Alkphos Alkaline Phosphotase           0\n",
474
       " Sgpt Alamine Aminotransferase          0\n",
475
       "Aspartate_Aminotransferase              0\n",
476
       "Total Protiens                          0\n",
477
       " ALB Albumin                            0\n",
478
       "A/G Ratio Albumin and Globulin Ratio    0\n",
479
       "Result                                  0\n",
480
       "dtype: int64"
481
      ]
482
     },
483
     "execution_count": 138,
484
     "metadata": {},
485
     "output_type": "execute_result"
486
    }
487
   ],
488
   "source": [
489
    "df = df.dropna()\n",
490
    "df.isna().sum()"
491
   ]
492
  },
493
  {
494
   "cell_type": "markdown",
495
   "id": "b58e5846-2b1f-45a7-8689-57a17d124404",
496
   "metadata": {},
497
   "source": [
498
    "Distribution of Numerical Features"
499
   ]
500
  },
501
  {
502
   "cell_type": "code",
503
   "execution_count": 139,
504
   "id": "4716666d-5d2e-4d7c-a366-5907ce220b57",
505
   "metadata": {},
506
   "outputs": [
507
    {
508
     "data": {
509
      "text/html": [
510
       "<div>\n",
511
       "<style scoped>\n",
512
       "    .dataframe tbody tr th:only-of-type {\n",
513
       "        vertical-align: middle;\n",
514
       "    }\n",
515
       "\n",
516
       "    .dataframe tbody tr th {\n",
517
       "        vertical-align: top;\n",
518
       "    }\n",
519
       "\n",
520
       "    .dataframe thead th {\n",
521
       "        text-align: right;\n",
522
       "    }\n",
523
       "</style>\n",
524
       "<table border=\"1\" class=\"dataframe\">\n",
525
       "  <thead>\n",
526
       "    <tr style=\"text-align: right;\">\n",
527
       "      <th></th>\n",
528
       "      <th>Age of the patient</th>\n",
529
       "      <th>Gender of the patient</th>\n",
530
       "      <th>Total Bilirubin</th>\n",
531
       "      <th>Direct Bilirubin</th>\n",
532
       "      <th>Alkphos Alkaline Phosphotase</th>\n",
533
       "      <th>Sgpt Alamine Aminotransferase</th>\n",
534
       "      <th>Aspartate_Aminotransferase</th>\n",
535
       "      <th>Total Protiens</th>\n",
536
       "      <th>ALB Albumin</th>\n",
537
       "      <th>A/G Ratio Albumin and Globulin Ratio</th>\n",
538
       "      <th>Result</th>\n",
539
       "    </tr>\n",
540
       "  </thead>\n",
541
       "  <tbody>\n",
542
       "    <tr>\n",
543
       "      <th>5090</th>\n",
544
       "      <td>29.0</td>\n",
545
       "      <td>Female</td>\n",
546
       "      <td>1.3</td>\n",
547
       "      <td>0.6</td>\n",
548
       "      <td>166.0</td>\n",
549
       "      <td>49.0</td>\n",
550
       "      <td>42.0</td>\n",
551
       "      <td>5.6</td>\n",
552
       "      <td>2.5</td>\n",
553
       "      <td>0.80</td>\n",
554
       "      <td>2</td>\n",
555
       "    </tr>\n",
556
       "    <tr>\n",
557
       "      <th>3055</th>\n",
558
       "      <td>16.0</td>\n",
559
       "      <td>Male</td>\n",
560
       "      <td>0.6</td>\n",
561
       "      <td>0.1</td>\n",
562
       "      <td>186.0</td>\n",
563
       "      <td>20.0</td>\n",
564
       "      <td>21.0</td>\n",
565
       "      <td>6.2</td>\n",
566
       "      <td>3.3</td>\n",
567
       "      <td>1.10</td>\n",
568
       "      <td>2</td>\n",
569
       "    </tr>\n",
570
       "    <tr>\n",
571
       "      <th>24182</th>\n",
572
       "      <td>38.0</td>\n",
573
       "      <td>Male</td>\n",
574
       "      <td>1.1</td>\n",
575
       "      <td>0.5</td>\n",
576
       "      <td>128.0</td>\n",
577
       "      <td>20.0</td>\n",
578
       "      <td>30.0</td>\n",
579
       "      <td>3.9</td>\n",
580
       "      <td>1.9</td>\n",
581
       "      <td>0.95</td>\n",
582
       "      <td>2</td>\n",
583
       "    </tr>\n",
584
       "    <tr>\n",
585
       "      <th>30230</th>\n",
586
       "      <td>60.0</td>\n",
587
       "      <td>Female</td>\n",
588
       "      <td>1.4</td>\n",
589
       "      <td>0.7</td>\n",
590
       "      <td>195.0</td>\n",
591
       "      <td>36.0</td>\n",
592
       "      <td>16.0</td>\n",
593
       "      <td>7.9</td>\n",
594
       "      <td>3.7</td>\n",
595
       "      <td>0.90</td>\n",
596
       "      <td>2</td>\n",
597
       "    </tr>\n",
598
       "    <tr>\n",
599
       "      <th>20921</th>\n",
600
       "      <td>56.0</td>\n",
601
       "      <td>Male</td>\n",
602
       "      <td>5.0</td>\n",
603
       "      <td>2.1</td>\n",
604
       "      <td>103.0</td>\n",
605
       "      <td>18.0</td>\n",
606
       "      <td>40.0</td>\n",
607
       "      <td>5.0</td>\n",
608
       "      <td>2.1</td>\n",
609
       "      <td>1.72</td>\n",
610
       "      <td>1</td>\n",
611
       "    </tr>\n",
612
       "  </tbody>\n",
613
       "</table>\n",
614
       "</div>"
615
      ],
616
      "text/plain": [
617
       "       Age of the patient Gender of the patient  Total Bilirubin  \\\n",
618
       "5090                 29.0                Female              1.3   \n",
619
       "3055                 16.0                  Male              0.6   \n",
620
       "24182                38.0                  Male              1.1   \n",
621
       "30230                60.0                Female              1.4   \n",
622
       "20921                56.0                  Male              5.0   \n",
623
       "\n",
624
       "       Direct Bilirubin   Alkphos Alkaline Phosphotase  \\\n",
625
       "5090                0.6                          166.0   \n",
626
       "3055                0.1                          186.0   \n",
627
       "24182               0.5                          128.0   \n",
628
       "30230               0.7                          195.0   \n",
629
       "20921               2.1                          103.0   \n",
630
       "\n",
631
       "        Sgpt Alamine Aminotransferase  Aspartate_Aminotransferase  \\\n",
632
       "5090                             49.0                        42.0   \n",
633
       "3055                             20.0                        21.0   \n",
634
       "24182                            20.0                        30.0   \n",
635
       "30230                            36.0                        16.0   \n",
636
       "20921                            18.0                        40.0   \n",
637
       "\n",
638
       "       Total Protiens   ALB Albumin  A/G Ratio Albumin and Globulin Ratio  \\\n",
639
       "5090              5.6           2.5                                  0.80   \n",
640
       "3055              6.2           3.3                                  1.10   \n",
641
       "24182             3.9           1.9                                  0.95   \n",
642
       "30230             7.9           3.7                                  0.90   \n",
643
       "20921             5.0           2.1                                  1.72   \n",
644
       "\n",
645
       "       Result  \n",
646
       "5090        2  \n",
647
       "3055        2  \n",
648
       "24182       2  \n",
649
       "30230       2  \n",
650
       "20921       1  "
651
      ]
652
     },
653
     "execution_count": 139,
654
     "metadata": {},
655
     "output_type": "execute_result"
656
    }
657
   ],
658
   "source": [
659
    "df.sample(5)\n"
660
   ]
661
  },
662
  {
663
   "cell_type": "code",
664
   "execution_count": 140,
665
   "id": "fd8f4fb9-7d74-49c2-ad94-284b09263880",
666
   "metadata": {},
667
   "outputs": [
668
    {
669
     "data": {
670
      "text/html": [
671
       "<div>\n",
672
       "<style scoped>\n",
673
       "    .dataframe tbody tr th:only-of-type {\n",
674
       "        vertical-align: middle;\n",
675
       "    }\n",
676
       "\n",
677
       "    .dataframe tbody tr th {\n",
678
       "        vertical-align: top;\n",
679
       "    }\n",
680
       "\n",
681
       "    .dataframe thead th {\n",
682
       "        text-align: right;\n",
683
       "    }\n",
684
       "</style>\n",
685
       "<table border=\"1\" class=\"dataframe\">\n",
686
       "  <thead>\n",
687
       "    <tr style=\"text-align: right;\">\n",
688
       "      <th></th>\n",
689
       "      <th>Age of the patient</th>\n",
690
       "      <th>Total Bilirubin</th>\n",
691
       "      <th>Direct Bilirubin</th>\n",
692
       "      <th>Alkphos Alkaline Phosphotase</th>\n",
693
       "      <th>Sgpt Alamine Aminotransferase</th>\n",
694
       "      <th>Aspartate_Aminotransferase</th>\n",
695
       "      <th>Total Protiens</th>\n",
696
       "      <th>ALB Albumin</th>\n",
697
       "      <th>A/G Ratio Albumin and Globulin Ratio</th>\n",
698
       "      <th>Result</th>\n",
699
       "    </tr>\n",
700
       "  </thead>\n",
701
       "  <tbody>\n",
702
       "    <tr>\n",
703
       "      <th>count</th>\n",
704
       "      <td>27158.000000</td>\n",
705
       "      <td>27158.000000</td>\n",
706
       "      <td>27158.000000</td>\n",
707
       "      <td>27158.000000</td>\n",
708
       "      <td>27158.000000</td>\n",
709
       "      <td>27158.000000</td>\n",
710
       "      <td>27158.000000</td>\n",
711
       "      <td>27158.000000</td>\n",
712
       "      <td>27158.000000</td>\n",
713
       "      <td>27158.000000</td>\n",
714
       "    </tr>\n",
715
       "    <tr>\n",
716
       "      <th>mean</th>\n",
717
       "      <td>44.125046</td>\n",
718
       "      <td>3.407909</td>\n",
719
       "      <td>1.541630</td>\n",
720
       "      <td>290.142021</td>\n",
721
       "      <td>81.279292</td>\n",
722
       "      <td>112.102879</td>\n",
723
       "      <td>6.472605</td>\n",
724
       "      <td>3.124044</td>\n",
725
       "      <td>0.943567</td>\n",
726
       "      <td>1.282790</td>\n",
727
       "    </tr>\n",
728
       "    <tr>\n",
729
       "      <th>std</th>\n",
730
       "      <td>15.971563</td>\n",
731
       "      <td>6.332486</td>\n",
732
       "      <td>2.895084</td>\n",
733
       "      <td>239.595473</td>\n",
734
       "      <td>181.571537</td>\n",
735
       "      <td>283.616005</td>\n",
736
       "      <td>1.081477</td>\n",
737
       "      <td>0.792329</td>\n",
738
       "      <td>0.324205</td>\n",
739
       "      <td>0.450363</td>\n",
740
       "    </tr>\n",
741
       "    <tr>\n",
742
       "      <th>min</th>\n",
743
       "      <td>4.000000</td>\n",
744
       "      <td>0.400000</td>\n",
745
       "      <td>0.100000</td>\n",
746
       "      <td>63.000000</td>\n",
747
       "      <td>10.000000</td>\n",
748
       "      <td>10.000000</td>\n",
749
       "      <td>2.700000</td>\n",
750
       "      <td>0.900000</td>\n",
751
       "      <td>0.300000</td>\n",
752
       "      <td>1.000000</td>\n",
753
       "    </tr>\n",
754
       "    <tr>\n",
755
       "      <th>25%</th>\n",
756
       "      <td>33.000000</td>\n",
757
       "      <td>0.800000</td>\n",
758
       "      <td>0.200000</td>\n",
759
       "      <td>175.000000</td>\n",
760
       "      <td>23.000000</td>\n",
761
       "      <td>26.000000</td>\n",
762
       "      <td>5.800000</td>\n",
763
       "      <td>2.600000</td>\n",
764
       "      <td>0.700000</td>\n",
765
       "      <td>1.000000</td>\n",
766
       "    </tr>\n",
767
       "    <tr>\n",
768
       "      <th>50%</th>\n",
769
       "      <td>45.000000</td>\n",
770
       "      <td>1.000000</td>\n",
771
       "      <td>0.300000</td>\n",
772
       "      <td>209.000000</td>\n",
773
       "      <td>36.000000</td>\n",
774
       "      <td>42.000000</td>\n",
775
       "      <td>6.600000</td>\n",
776
       "      <td>3.100000</td>\n",
777
       "      <td>0.900000</td>\n",
778
       "      <td>1.000000</td>\n",
779
       "    </tr>\n",
780
       "    <tr>\n",
781
       "      <th>75%</th>\n",
782
       "      <td>55.000000</td>\n",
783
       "      <td>2.700000</td>\n",
784
       "      <td>1.300000</td>\n",
785
       "      <td>298.000000</td>\n",
786
       "      <td>62.000000</td>\n",
787
       "      <td>88.000000</td>\n",
788
       "      <td>7.200000</td>\n",
789
       "      <td>3.700000</td>\n",
790
       "      <td>1.100000</td>\n",
791
       "      <td>2.000000</td>\n",
792
       "    </tr>\n",
793
       "    <tr>\n",
794
       "      <th>max</th>\n",
795
       "      <td>90.000000</td>\n",
796
       "      <td>75.000000</td>\n",
797
       "      <td>19.700000</td>\n",
798
       "      <td>2110.000000</td>\n",
799
       "      <td>2000.000000</td>\n",
800
       "      <td>4929.000000</td>\n",
801
       "      <td>9.600000</td>\n",
802
       "      <td>5.500000</td>\n",
803
       "      <td>2.800000</td>\n",
804
       "      <td>2.000000</td>\n",
805
       "    </tr>\n",
806
       "  </tbody>\n",
807
       "</table>\n",
808
       "</div>"
809
      ],
810
      "text/plain": [
811
       "       Age of the patient  Total Bilirubin  Direct Bilirubin  \\\n",
812
       "count        27158.000000     27158.000000      27158.000000   \n",
813
       "mean            44.125046         3.407909          1.541630   \n",
814
       "std             15.971563         6.332486          2.895084   \n",
815
       "min              4.000000         0.400000          0.100000   \n",
816
       "25%             33.000000         0.800000          0.200000   \n",
817
       "50%             45.000000         1.000000          0.300000   \n",
818
       "75%             55.000000         2.700000          1.300000   \n",
819
       "max             90.000000        75.000000         19.700000   \n",
820
       "\n",
821
       "        Alkphos Alkaline Phosphotase   Sgpt Alamine Aminotransferase  \\\n",
822
       "count                   27158.000000                    27158.000000   \n",
823
       "mean                      290.142021                       81.279292   \n",
824
       "std                       239.595473                      181.571537   \n",
825
       "min                        63.000000                       10.000000   \n",
826
       "25%                       175.000000                       23.000000   \n",
827
       "50%                       209.000000                       36.000000   \n",
828
       "75%                       298.000000                       62.000000   \n",
829
       "max                      2110.000000                     2000.000000   \n",
830
       "\n",
831
       "       Aspartate_Aminotransferase  Total Protiens   ALB Albumin  \\\n",
832
       "count                27158.000000    27158.000000  27158.000000   \n",
833
       "mean                   112.102879        6.472605      3.124044   \n",
834
       "std                    283.616005        1.081477      0.792329   \n",
835
       "min                     10.000000        2.700000      0.900000   \n",
836
       "25%                     26.000000        5.800000      2.600000   \n",
837
       "50%                     42.000000        6.600000      3.100000   \n",
838
       "75%                     88.000000        7.200000      3.700000   \n",
839
       "max                   4929.000000        9.600000      5.500000   \n",
840
       "\n",
841
       "       A/G Ratio Albumin and Globulin Ratio        Result  \n",
842
       "count                          27158.000000  27158.000000  \n",
843
       "mean                               0.943567      1.282790  \n",
844
       "std                                0.324205      0.450363  \n",
845
       "min                                0.300000      1.000000  \n",
846
       "25%                                0.700000      1.000000  \n",
847
       "50%                                0.900000      1.000000  \n",
848
       "75%                                1.100000      2.000000  \n",
849
       "max                                2.800000      2.000000  "
850
      ]
851
     },
852
     "execution_count": 140,
853
     "metadata": {},
854
     "output_type": "execute_result"
855
    }
856
   ],
857
   "source": [
858
    "df.describe()"
859
   ]
860
  },
861
  {
862
   "cell_type": "code",
863
   "execution_count": 141,
864
   "id": "a5454137-8361-4e5e-90b5-88a55e8cd37e",
865
   "metadata": {},
866
   "outputs": [],
867
   "source": [
868
    "## if score==negative, mark 0 ;else 1 \n",
869
    "def partition(x):\n",
870
    "    if x == 2:\n",
871
    "        return 0\n",
872
    "    return 1\n",
873
    "\n",
874
    "\n",
875
    "df['Result'] = df['Result'].map(partition)"
876
   ]
877
  },
878
  {
879
   "cell_type": "code",
880
   "execution_count": 142,
881
   "id": "047d3234-04c3-47f3-8027-4f6a959c9a33",
882
   "metadata": {},
883
   "outputs": [
884
    {
885
     "name": "stdout",
886
     "output_type": "stream",
887
     "text": [
888
      "Number of patients that are male:  19478\n",
889
      "Number of patients that are female:  7680\n"
890
     ]
891
    },
892
    {
893
     "data": {
894
      "image/png": "",
895
      "text/plain": [
896
       "<Figure size 640x480 with 1 Axes>"
897
      ]
898
     },
899
     "metadata": {},
900
     "output_type": "display_data"
901
    }
902
   ],
903
   "source": [
904
    "\n",
905
    "sns.countplot(data=df, x = 'Result', label='Count')\n",
906
    "\n",
907
    "M, F = df['Result'].value_counts()\n",
908
    "print('Number of patients that are male: ',M)\n",
909
    "print('Number of patients that are female: ',F)"
910
   ]
911
  },
912
  {
913
   "cell_type": "code",
914
   "execution_count": 160,
915
   "id": "bad0e537-76f6-4224-b489-bd47eab5e849",
916
   "metadata": {},
917
   "outputs": [],
918
   "source": [
919
    "## if score==negative, mark 0 ;else 1 \n",
920
    "def partition(x):\n",
921
    "    if x =='Male':\n",
922
    "        return 0\n",
923
    "    return 1\n",
924
    "\n",
925
    "df['Gender of the patient'] = df['Gender of the patient'].map(partition)"
926
   ]
927
  },
928
  {
929
   "cell_type": "code",
930
   "execution_count": 161,
931
   "id": "5943a716",
932
   "metadata": {},
933
   "outputs": [
934
    {
935
     "data": {
936
      "text/html": [
937
       "<div>\n",
938
       "<style scoped>\n",
939
       "    .dataframe tbody tr th:only-of-type {\n",
940
       "        vertical-align: middle;\n",
941
       "    }\n",
942
       "\n",
943
       "    .dataframe tbody tr th {\n",
944
       "        vertical-align: top;\n",
945
       "    }\n",
946
       "\n",
947
       "    .dataframe thead th {\n",
948
       "        text-align: right;\n",
949
       "    }\n",
950
       "</style>\n",
951
       "<table border=\"1\" class=\"dataframe\">\n",
952
       "  <thead>\n",
953
       "    <tr style=\"text-align: right;\">\n",
954
       "      <th></th>\n",
955
       "      <th>Age of the patient</th>\n",
956
       "      <th>Gender of the patient</th>\n",
957
       "      <th>Total Bilirubin</th>\n",
958
       "      <th>Direct Bilirubin</th>\n",
959
       "      <th>Alkphos Alkaline Phosphotase</th>\n",
960
       "      <th>Sgpt Alamine Aminotransferase</th>\n",
961
       "      <th>Aspartate_Aminotransferase</th>\n",
962
       "      <th>Total Protiens</th>\n",
963
       "      <th>ALB Albumin</th>\n",
964
       "      <th>A/G Ratio Albumin and Globulin Ratio</th>\n",
965
       "      <th>Result</th>\n",
966
       "    </tr>\n",
967
       "  </thead>\n",
968
       "  <tbody>\n",
969
       "    <tr>\n",
970
       "      <th>0</th>\n",
971
       "      <td>52.0</td>\n",
972
       "      <td>1</td>\n",
973
       "      <td>0.9</td>\n",
974
       "      <td>0.2</td>\n",
975
       "      <td>116.0</td>\n",
976
       "      <td>36.0</td>\n",
977
       "      <td>16.0</td>\n",
978
       "      <td>6.2</td>\n",
979
       "      <td>3.2</td>\n",
980
       "      <td>1.00</td>\n",
981
       "      <td>0</td>\n",
982
       "    </tr>\n",
983
       "    <tr>\n",
984
       "      <th>1</th>\n",
985
       "      <td>36.0</td>\n",
986
       "      <td>1</td>\n",
987
       "      <td>0.7</td>\n",
988
       "      <td>0.2</td>\n",
989
       "      <td>188.0</td>\n",
990
       "      <td>11.0</td>\n",
991
       "      <td>10.0</td>\n",
992
       "      <td>5.5</td>\n",
993
       "      <td>2.3</td>\n",
994
       "      <td>0.71</td>\n",
995
       "      <td>0</td>\n",
996
       "    </tr>\n",
997
       "    <tr>\n",
998
       "      <th>2</th>\n",
999
       "      <td>28.0</td>\n",
1000
       "      <td>0</td>\n",
1001
       "      <td>0.5</td>\n",
1002
       "      <td>0.1</td>\n",
1003
       "      <td>162.0</td>\n",
1004
       "      <td>155.0</td>\n",
1005
       "      <td>108.0</td>\n",
1006
       "      <td>8.1</td>\n",
1007
       "      <td>4.0</td>\n",
1008
       "      <td>0.90</td>\n",
1009
       "      <td>1</td>\n",
1010
       "    </tr>\n",
1011
       "    <tr>\n",
1012
       "      <th>3</th>\n",
1013
       "      <td>49.0</td>\n",
1014
       "      <td>0</td>\n",
1015
       "      <td>0.7</td>\n",
1016
       "      <td>0.2</td>\n",
1017
       "      <td>188.0</td>\n",
1018
       "      <td>13.0</td>\n",
1019
       "      <td>21.0</td>\n",
1020
       "      <td>6.0</td>\n",
1021
       "      <td>3.2</td>\n",
1022
       "      <td>1.10</td>\n",
1023
       "      <td>0</td>\n",
1024
       "    </tr>\n",
1025
       "    <tr>\n",
1026
       "      <th>4</th>\n",
1027
       "      <td>51.0</td>\n",
1028
       "      <td>0</td>\n",
1029
       "      <td>1.0</td>\n",
1030
       "      <td>0.3</td>\n",
1031
       "      <td>75.0</td>\n",
1032
       "      <td>25.0</td>\n",
1033
       "      <td>26.0</td>\n",
1034
       "      <td>5.1</td>\n",
1035
       "      <td>2.9</td>\n",
1036
       "      <td>1.30</td>\n",
1037
       "      <td>1</td>\n",
1038
       "    </tr>\n",
1039
       "  </tbody>\n",
1040
       "</table>\n",
1041
       "</div>"
1042
      ],
1043
      "text/plain": [
1044
       "   Age of the patient  Gender of the patient  Total Bilirubin  \\\n",
1045
       "0                52.0                      1              0.9   \n",
1046
       "1                36.0                      1              0.7   \n",
1047
       "2                28.0                      0              0.5   \n",
1048
       "3                49.0                      0              0.7   \n",
1049
       "4                51.0                      0              1.0   \n",
1050
       "\n",
1051
       "   Direct Bilirubin   Alkphos Alkaline Phosphotase  \\\n",
1052
       "0               0.2                          116.0   \n",
1053
       "1               0.2                          188.0   \n",
1054
       "2               0.1                          162.0   \n",
1055
       "3               0.2                          188.0   \n",
1056
       "4               0.3                           75.0   \n",
1057
       "\n",
1058
       "    Sgpt Alamine Aminotransferase  Aspartate_Aminotransferase  Total Protiens  \\\n",
1059
       "0                            36.0                        16.0             6.2   \n",
1060
       "1                            11.0                        10.0             5.5   \n",
1061
       "2                           155.0                       108.0             8.1   \n",
1062
       "3                            13.0                        21.0             6.0   \n",
1063
       "4                            25.0                        26.0             5.1   \n",
1064
       "\n",
1065
       "    ALB Albumin  A/G Ratio Albumin and Globulin Ratio  Result  \n",
1066
       "0           3.2                                  1.00       0  \n",
1067
       "1           2.3                                  0.71       0  \n",
1068
       "2           4.0                                  0.90       1  \n",
1069
       "3           3.2                                  1.10       0  \n",
1070
       "4           2.9                                  1.30       1  "
1071
      ]
1072
     },
1073
     "execution_count": 161,
1074
     "metadata": {},
1075
     "output_type": "execute_result"
1076
    }
1077
   ],
1078
   "source": [
1079
    "df.head()"
1080
   ]
1081
  },
1082
  {
1083
   "cell_type": "code",
1084
   "execution_count": 145,
1085
   "id": "39cc88eb",
1086
   "metadata": {},
1087
   "outputs": [],
1088
   "source": [
1089
    "# Filter rows for each class\n",
1090
    "class_0 = df[df['Result'] == 0]  # Replace 'target' with your column name\n",
1091
    "class_1 = df[df['Result'] == 1]\n",
1092
    "\n",
1093
    "# Downsample class 1 to 10,000 samples\n",
1094
    "class_1_downsampled = class_1.sample(n=10000, random_state=42)\n",
1095
    "\n",
1096
    "# Combine the two classes\n",
1097
    "df = pd.concat([class_0, class_1_downsampled])\n",
1098
    "\n",
1099
    "# Shuffle the resulting DataFrame to mix the classes\n",
1100
    "df = balanced_df.sample(frac=1, random_state=42).reset_index(drop=True)\n",
1101
    "\n"
1102
   ]
1103
  },
1104
  {
1105
   "cell_type": "code",
1106
   "execution_count": 146,
1107
   "id": "24000c18",
1108
   "metadata": {},
1109
   "outputs": [
1110
    {
1111
     "name": "stdout",
1112
     "output_type": "stream",
1113
     "text": [
1114
      "Number of patients that are male:  10000\n",
1115
      "Number of patients that are female:  7680\n"
1116
     ]
1117
    },
1118
    {
1119
     "data": {
1120
      "image/png": "",
1121
      "text/plain": [
1122
       "<Figure size 640x480 with 1 Axes>"
1123
      ]
1124
     },
1125
     "metadata": {},
1126
     "output_type": "display_data"
1127
    }
1128
   ],
1129
   "source": [
1130
    "\n",
1131
    "sns.countplot(data=df, x = 'Result', label='Count')\n",
1132
    "\n",
1133
    "M, F = df['Result'].value_counts()\n",
1134
    "print('Number of patients that are male: ',M)\n",
1135
    "print('Number of patients that are female: ',F)"
1136
   ]
1137
  },
1138
  {
1139
   "cell_type": "code",
1140
   "execution_count": 147,
1141
   "id": "ca0ada46-6d91-41c6-b6dc-a11dddf657d2",
1142
   "metadata": {},
1143
   "outputs": [
1144
    {
1145
     "data": {
1146
      "text/html": [
1147
       "<div>\n",
1148
       "<style scoped>\n",
1149
       "    .dataframe tbody tr th:only-of-type {\n",
1150
       "        vertical-align: middle;\n",
1151
       "    }\n",
1152
       "\n",
1153
       "    .dataframe tbody tr th {\n",
1154
       "        vertical-align: top;\n",
1155
       "    }\n",
1156
       "\n",
1157
       "    .dataframe thead th {\n",
1158
       "        text-align: right;\n",
1159
       "    }\n",
1160
       "</style>\n",
1161
       "<table border=\"1\" class=\"dataframe\">\n",
1162
       "  <thead>\n",
1163
       "    <tr style=\"text-align: right;\">\n",
1164
       "      <th></th>\n",
1165
       "      <th>Age of the patient</th>\n",
1166
       "      <th>Gender of the patient</th>\n",
1167
       "      <th>Total Bilirubin</th>\n",
1168
       "      <th>Direct Bilirubin</th>\n",
1169
       "      <th>Alkphos Alkaline Phosphotase</th>\n",
1170
       "      <th>Sgpt Alamine Aminotransferase</th>\n",
1171
       "      <th>Aspartate_Aminotransferase</th>\n",
1172
       "      <th>Total Protiens</th>\n",
1173
       "      <th>ALB Albumin</th>\n",
1174
       "      <th>A/G Ratio Albumin and Globulin Ratio</th>\n",
1175
       "      <th>Result</th>\n",
1176
       "    </tr>\n",
1177
       "  </thead>\n",
1178
       "  <tbody>\n",
1179
       "    <tr>\n",
1180
       "      <th>0</th>\n",
1181
       "      <td>52.0</td>\n",
1182
       "      <td>Female</td>\n",
1183
       "      <td>0.9</td>\n",
1184
       "      <td>0.2</td>\n",
1185
       "      <td>116.0</td>\n",
1186
       "      <td>36.0</td>\n",
1187
       "      <td>16.0</td>\n",
1188
       "      <td>6.2</td>\n",
1189
       "      <td>3.2</td>\n",
1190
       "      <td>1.00</td>\n",
1191
       "      <td>0</td>\n",
1192
       "    </tr>\n",
1193
       "    <tr>\n",
1194
       "      <th>1</th>\n",
1195
       "      <td>36.0</td>\n",
1196
       "      <td>Female</td>\n",
1197
       "      <td>0.7</td>\n",
1198
       "      <td>0.2</td>\n",
1199
       "      <td>188.0</td>\n",
1200
       "      <td>11.0</td>\n",
1201
       "      <td>10.0</td>\n",
1202
       "      <td>5.5</td>\n",
1203
       "      <td>2.3</td>\n",
1204
       "      <td>0.71</td>\n",
1205
       "      <td>0</td>\n",
1206
       "    </tr>\n",
1207
       "    <tr>\n",
1208
       "      <th>2</th>\n",
1209
       "      <td>28.0</td>\n",
1210
       "      <td>Male</td>\n",
1211
       "      <td>0.5</td>\n",
1212
       "      <td>0.1</td>\n",
1213
       "      <td>162.0</td>\n",
1214
       "      <td>155.0</td>\n",
1215
       "      <td>108.0</td>\n",
1216
       "      <td>8.1</td>\n",
1217
       "      <td>4.0</td>\n",
1218
       "      <td>0.90</td>\n",
1219
       "      <td>1</td>\n",
1220
       "    </tr>\n",
1221
       "    <tr>\n",
1222
       "      <th>3</th>\n",
1223
       "      <td>49.0</td>\n",
1224
       "      <td>Male</td>\n",
1225
       "      <td>0.7</td>\n",
1226
       "      <td>0.2</td>\n",
1227
       "      <td>188.0</td>\n",
1228
       "      <td>13.0</td>\n",
1229
       "      <td>21.0</td>\n",
1230
       "      <td>6.0</td>\n",
1231
       "      <td>3.2</td>\n",
1232
       "      <td>1.10</td>\n",
1233
       "      <td>0</td>\n",
1234
       "    </tr>\n",
1235
       "    <tr>\n",
1236
       "      <th>4</th>\n",
1237
       "      <td>51.0</td>\n",
1238
       "      <td>Male</td>\n",
1239
       "      <td>1.0</td>\n",
1240
       "      <td>0.3</td>\n",
1241
       "      <td>75.0</td>\n",
1242
       "      <td>25.0</td>\n",
1243
       "      <td>26.0</td>\n",
1244
       "      <td>5.1</td>\n",
1245
       "      <td>2.9</td>\n",
1246
       "      <td>1.30</td>\n",
1247
       "      <td>1</td>\n",
1248
       "    </tr>\n",
1249
       "  </tbody>\n",
1250
       "</table>\n",
1251
       "</div>"
1252
      ],
1253
      "text/plain": [
1254
       "   Age of the patient Gender of the patient  Total Bilirubin  \\\n",
1255
       "0                52.0                Female              0.9   \n",
1256
       "1                36.0                Female              0.7   \n",
1257
       "2                28.0                  Male              0.5   \n",
1258
       "3                49.0                  Male              0.7   \n",
1259
       "4                51.0                  Male              1.0   \n",
1260
       "\n",
1261
       "   Direct Bilirubin   Alkphos Alkaline Phosphotase  \\\n",
1262
       "0               0.2                          116.0   \n",
1263
       "1               0.2                          188.0   \n",
1264
       "2               0.1                          162.0   \n",
1265
       "3               0.2                          188.0   \n",
1266
       "4               0.3                           75.0   \n",
1267
       "\n",
1268
       "    Sgpt Alamine Aminotransferase  Aspartate_Aminotransferase  Total Protiens  \\\n",
1269
       "0                            36.0                        16.0             6.2   \n",
1270
       "1                            11.0                        10.0             5.5   \n",
1271
       "2                           155.0                       108.0             8.1   \n",
1272
       "3                            13.0                        21.0             6.0   \n",
1273
       "4                            25.0                        26.0             5.1   \n",
1274
       "\n",
1275
       "    ALB Albumin  A/G Ratio Albumin and Globulin Ratio  Result  \n",
1276
       "0           3.2                                  1.00       0  \n",
1277
       "1           2.3                                  0.71       0  \n",
1278
       "2           4.0                                  0.90       1  \n",
1279
       "3           3.2                                  1.10       0  \n",
1280
       "4           2.9                                  1.30       1  "
1281
      ]
1282
     },
1283
     "execution_count": 147,
1284
     "metadata": {},
1285
     "output_type": "execute_result"
1286
    }
1287
   ],
1288
   "source": [
1289
    "df.head()"
1290
   ]
1291
  },
1292
  {
1293
   "cell_type": "code",
1294
   "execution_count": null,
1295
   "id": "6b6b87ad",
1296
   "metadata": {},
1297
   "outputs": [],
1298
   "source": []
1299
  },
1300
  {
1301
   "cell_type": "markdown",
1302
   "id": "2c629af8-771d-46a6-9689-d8167458d448",
1303
   "metadata": {},
1304
   "source": [
1305
    "Data Cleaning"
1306
   ]
1307
  },
1308
  {
1309
   "cell_type": "code",
1310
   "execution_count": 148,
1311
   "id": "9a11c005-9add-4b4d-b1a2-b21bc2496d87",
1312
   "metadata": {},
1313
   "outputs": [
1314
    {
1315
     "name": "stdout",
1316
     "output_type": "stream",
1317
     "text": [
1318
      "(12181, 11)\n"
1319
     ]
1320
    }
1321
   ],
1322
   "source": [
1323
    "df = df.drop_duplicates()\n",
1324
    "print( df.shape )"
1325
   ]
1326
  },
1327
  {
1328
   "cell_type": "markdown",
1329
   "id": "bcfbdec2-af4a-4159-9eba-31573a5d2fba",
1330
   "metadata": {},
1331
   "source": [
1332
    "Removing Outliers"
1333
   ]
1334
  },
1335
  {
1336
   "cell_type": "code",
1337
   "execution_count": 149,
1338
   "id": "a0bfed13-3cff-43cd-893b-5729743fab71",
1339
   "metadata": {},
1340
   "outputs": [
1341
    {
1342
     "data": {
1343
      "text/plain": [
1344
       "<Axes: ylabel='Aspartate_Aminotransferase'>"
1345
      ]
1346
     },
1347
     "execution_count": 149,
1348
     "metadata": {},
1349
     "output_type": "execute_result"
1350
    },
1351
    {
1352
     "data": {
1353
      "image/png": "",
1354
      "text/plain": [
1355
       "<Figure size 640x480 with 1 Axes>"
1356
      ]
1357
     },
1358
     "metadata": {},
1359
     "output_type": "display_data"
1360
    }
1361
   ],
1362
   "source": [
1363
    "sns.boxplot(df.Aspartate_Aminotransferase)"
1364
   ]
1365
  },
1366
  {
1367
   "cell_type": "code",
1368
   "execution_count": 150,
1369
   "id": "b4d93eea-ac9d-49f4-a853-146942bdea52",
1370
   "metadata": {},
1371
   "outputs": [
1372
    {
1373
     "data": {
1374
      "text/plain": [
1375
       "2987     4929.0\n",
1376
       "6010     4929.0\n",
1377
       "6619     4929.0\n",
1378
       "11615    4929.0\n",
1379
       "12126    4929.0\n",
1380
       "Name: Aspartate_Aminotransferase, dtype: float64"
1381
      ]
1382
     },
1383
     "execution_count": 150,
1384
     "metadata": {},
1385
     "output_type": "execute_result"
1386
    }
1387
   ],
1388
   "source": [
1389
    "df.Aspartate_Aminotransferase.sort_values(ascending=False).head()"
1390
   ]
1391
  },
1392
  {
1393
   "cell_type": "code",
1394
   "execution_count": 151,
1395
   "id": "b2d8955c-67d3-4030-b2df-c8314c479ca8",
1396
   "metadata": {},
1397
   "outputs": [
1398
    {
1399
     "data": {
1400
      "text/plain": [
1401
       "(12165, 11)"
1402
      ]
1403
     },
1404
     "execution_count": 151,
1405
     "metadata": {},
1406
     "output_type": "execute_result"
1407
    }
1408
   ],
1409
   "source": [
1410
    "df = df[df.Aspartate_Aminotransferase <=3000 ]\n",
1411
    "df.shape"
1412
   ]
1413
  },
1414
  {
1415
   "cell_type": "code",
1416
   "execution_count": 152,
1417
   "id": "52c13b7b-7acc-48b3-a056-41d8245aef16",
1418
   "metadata": {},
1419
   "outputs": [
1420
    {
1421
     "data": {
1422
      "text/plain": [
1423
       "<Axes: ylabel='Aspartate_Aminotransferase'>"
1424
      ]
1425
     },
1426
     "execution_count": 152,
1427
     "metadata": {},
1428
     "output_type": "execute_result"
1429
    },
1430
    {
1431
     "data": {
1432
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkgAAAGKCAYAAAD31r6NAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8fJSN1AAAACXBIWXMAAA9hAAAPYQGoP6dpAAA9iklEQVR4nO3de1jUZf7/8dfADHJSxxMqoiIpmScwLUvtq5umtllmJ1ctLcvd0qytrLW1g5ZlpbmtmVt5+JkdTTe3g7pathoeysxMgQpRUQwRTAYXEGVgfn948VlmQMMPA8PA83FdezX357758B6vq/blfd+f+2NxuVwuAQAAwBDg6wIAAABqGwISAACABwISAACABwISAACABwISAACABwISAACABwISAACABwISAACABwISAACABwISAACAB6uvC/B3OTk5cjqdvi4DAABUgtVqVZMmTX57XA3UUqc5nU4VFRX5ugwAAOBFLLEBAAB4ICABAAB4ICABAAB4qFV7kDZs2KANGzYoOztbkhQVFaVbbrlFPXv2lCSdOXNGy5cv17Zt21RUVKS4uDjdc889stvtxj2OHz+uRYsWKSkpScHBwRowYIDGjBmjwMBAY0xSUpKWL1+u9PR0NWvWTDfffLMGDhxYk18VAADUYhaXy+XydRGldu7cqYCAALVu3Voul0ubN2/WJ598opdeeklt27bVokWLtGvXLk2ePFmhoaFasmSJAgIC9Oyzz0qSSkpK9Oijj8put+uOO+5QTk6OFixYoEGDBmnMmDGSpKysLD3yyCO65pprdPXVVysxMVHLli3TtGnTFB8ff8E1Z2dns0kbAAA/YbPZ1KJFi98cV6uW2Hr37q1LL71UrVu3VmRkpEaPHq3g4GDt27dPBQUF+vLLLzV+/Hh169ZNMTExmjRpkn7++WelpKRIkn744QcdOXJEU6ZMUXR0tHr27KlRo0Zp/fr1xqP4GzZsUEREhMaNG6eoqCgNGzZMV1xxhdasWePLrw4AAGqRWrXEVlZJSYm2b9+u06dPKzY2VgcOHFBxcbG6d+9ujGnTpo2aN2+ulJQUxcbGKiUlRe3atXNbcouPj9fixYuVnp6uDh06aN++fW73kKS4uDgtW7bsvPUUFRW5zRRZLBaFhIQYnwEAQN1R6wLS4cOHNX36dBUVFSk4OFhTp05VVFSU0tLSZLVaFRYW5ja+cePGcjgckiSHw+EWjkr7S/tK/1l6reyYU6dO6cyZMwoKCqqwrtWrV2vVqlVGu0OHDnrxxRcrNU0HAAD8S60LSJGRkZozZ44KCgr09ddf67XXXtPMmTN9XZZGjhyp4cOHG+3SWaPs7GxO0gbqkLS0NL388stG+5FHHlF0dLTvCgLgVVartVKTG7UuIFmtVrVq1UqSFBMTo/3792vt2rXq27evnE6n8vPz3WaRcnNzjVkju92u1NRUt/vl5uYafaX/LL1WdkxISMg5Z4+ks5u6bDZbhX21aJ87gCp44IEHyl0rDUvz58+v6XIA+FCt2qRdkZKSEhUVFSkmJkaBgYHau3ev0ZeRkaHjx48rNjZWkhQbG6vDhw+7BaA9e/YoJCREUVFRkqROnTq53aN0TOk9ANRPnuHoqquuOm8/gLqtVs0gvffee4qPj1fz5s1VWFioLVu2KDk5WdOnT1doaKiuvvpqLV++XOHh4QoNDdXSpUsVGxtrhJu4uDhFRUVpwYIFGjt2rBwOhz744AMNHTrUmP0ZMmSI1q9fr3feeUe/+93vlJiYqO3bt2vatGm+/OoAfCgtLc34PG3aNEVGRkqSbr31VmVkZOiFF14wxrHcBtQPteocpH/84x9KTExUTk6OQkND1b59e40YMUI9evSQ9L+DIrdu3Sqn01nhQZHZ2dlavHixkpKS1KBBAw0YMEBjx44td1DkW2+9pSNHjlT5oEjOQQL8X9nZoYqW0n6rH4D/qOw5SLUqIPkjAhLg/0oD0FVXXaVbb721XP8HH3ygbdu2SSIgAf7OLw+KBABfSkhIqPB6aTgCUH8QkADUew8//LDxOSMjw62vbLvsOAB1W63apA0AvlB243Xphuy+ffuWmzligzZQf7AHqYrYgwTUHed7lJ+9R0DdwCbtGkJAAuqWtLQ0zZs3z2g//PDDzBwBdQgBqYYQkAAA8B88xQYAAGASAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMCD1dcFlLV69Wrt2LFDv/zyi4KCghQbG6vbb79dkZGRxpgZM2YoOTnZ7ecGDx6sP/7xj0b7+PHjWrRokZKSkhQcHKwBAwZozJgxCgwMNMYkJSVp+fLlSk9PV7NmzXTzzTdr4MCB1f4dAQBA7VerAlJycrKGDh2qiy66SMXFxXr//fc1a9YszZs3T8HBwca4QYMGadSoUUY7KCjI+FxSUqLZs2fLbrdr1qxZysnJ0YIFCxQYGKgxY8ZIkrKysvTCCy/ommuu0ZQpU5SYmKjXX39ddrtd8fHxNfZ9AQBA7VSrltimT5+ugQMHqm3btoqOjtbkyZN1/PhxHThwwG1cgwYNZLfbjf+FhoYafT/88IOOHDmiKVOmKDo6Wj179tSoUaO0fv16OZ1OSdKGDRsUERGhcePGKSoqSsOGDdMVV1yhNWvW1Oj3BQAAtVOtmkHyVFBQIEkKDw93u56QkKCEhATZ7Xb16tVLN998sxo0aCBJSklJUbt27WS3243x8fHxWrx4sdLT09WhQwft27dP3bt3d7tnXFycli1bds5aioqKVFRUZLQtFotCQkKMzwAAoO6otQGppKREy5Yt08UXX6x27doZ1/v376/mzZuradOmOnTokN59911lZGRo6tSpkiSHw+EWjiSpcePGRl/pP0uvlR1z6tQpnTlzxm3JrtTq1au1atUqo92hQwe9+OKLatGihTe+LgAAqEWqHJBycnKUm5urVq1aue0TqqolS5YoPT1dzzzzjNv1wYMHG5/btWunJk2a6JlnnlFmZqZatWrltd/vaeTIkRo+fLjRLp01ys7ONpbuAABA7Wa1Wis1uWE6IH377bd69913dfToUUnSk08+qW7duunkyZOaNWuWbrnlFl1++eWm7r1kyRLt2rVLM2fOVLNmzc47tmPHjpJkBCS73a7U1FS3Mbm5uZJkzCzZ7XbjWtkxISEhFc4eSZLNZpPNZquwz+Vy/eZ3AgAA/sPUJu2dO3dq7ty5atiwoW699Va3vkaNGqlp06batGnTBd/X5XJpyZIl2rFjh5566ilFRET85s+kpaVJkpo0aSJJio2N1eHDh90C0J49exQSEqKoqChJUqdOnbR37163++zZs0exsbEXXDMAAKh7TAWkf/7zn+rSpYueffZZDR06tFx/bGysDh48eMH3XbJkiRISEvTggw8qJCREDodDDodDZ86ckXR2lmjVqlU6cOCAsrKytHPnTr322mu65JJL1L59e0lnN1tHRUVpwYIFSktL0+7du/XBBx9o6NChxgzQkCFDlJWVpXfeeUe//PKL1q9fr+3bt+u6664z88cBAADqGFNLbIcPH9b48ePP2d+4cWOdPHnygu+7YcMGSWcPgyxr0qRJGjhwoKxWq/bu3au1a9fq9OnTatasmfr06aObbrrJGBsQEKBp06Zp8eLFeuKJJ9SgQQMNGDDA7dykiIgITZs2TW+99ZbWrl2rZs2a6d577+UMJAAAIMlkQGrQoIEKCwvP2X/s2LFyj+ZXxocffnje/ubNm2vmzJm/eZ8WLVro8ccfP++Yrl276qWXXrqg+gAAQP1gaomta9eu2rx5s4qLi8v1ORwObdy4UXFxcVUuDgAAwBdMBaTRo0frxIkTevzxx/X5559LkrHX55FHHpEk3XLLLd6rEgAAoAZZXCafUU9PT9eyZcuUmJjodr1Lly66++67jSfG6rrs7Gy3E7YBAEDtZbPZKnUOkumAVCovL0+ZmZlyuVxq2bKlGjVqVJXb+R0CEgAA/qOyAanKJ2mHh4cbhzUCAADUBaYC0t69e3Xw4EHdcMMNxrUvv/xSK1eulNPpVL9+/TRu3DgFBJja4gQAAOBTphLMypUrjROspbPnIi1atEiNGjVSly5dtG7dOn3yySfeqhEAAKBGmQpIv/zyiy666CKj/dVXXykkJETPPPOMHnroIQ0aNEhfffWV14oEAACoSaYCUmFhoUJCQoz27t27FR8frwYNGkg6+wLZ7Oxs71QIAABQw0wFpObNm2v//v2Szr4fLT09XT169DD68/LyjPeeAQAA+BtTm7T79++vVatW6cSJEzpy5IjCwsJ02WWXGf0HDhxQ69atvVYkAABATTIVkG666SY5nU59//33at68uSZNmqSwsDBJZ2ePkpKS9Pvf/96rhQIAANSUKh8UWd9xUCQAAP6jsgdFclARAACAB9MnaZ85c0bffPONDh48qIKCApWUlLj1WywW3XfffVUuEAAAoKaZCkjZ2dmaOXOmsrOzFRoaqoKCAoWHhxtBqWHDhgoODvZ2rQAAADXC1BLb22+/rYKCAj333HP6+9//Lkl66KGHtHz5co0dO1ZBQUGaPn26VwsFAACoKaYCUlJSkoYMGaKOHTsa71tzuVyy2Wy64YYb1K1bNy1btsybdQIAANQYUwHp9OnTioiIkCTjRO2CggKjPzY2Vj/99JMXygMAAKh5pk/S/vXXXyVJgYGBatq0qfbt22f0HzlyREFBQd6pEAAAoIaZ2qTdrVs37dy5U7feeqskaeDAgfrXv/6lvLw8uVwuffXVVxowYIBXCwUAAKgppgLSjTfeqNTUVBUVFclms2nkyJHKycnRN998o4CAAPXv31/jxo3zdq0AAAA1gpO0q4iTtAEA8B/VdpL26dOnNWHCBH3yySemCgMAAKjtLjggNWjQQIGBgWrQoEF11AMAAOBzpp5i69Onj77++muxOgcAAOoiU3uQkpOTtWTJEjVs2FCDBg1SixYtKnysPyYmxitF1mbsQQIAwH9Udg+SqYA0atSoSo1bsWLFhd7a7xCQAADwH5UNSKYe87/vvvvM/BgAAIBf4DH/KmIGCQAA/1Ftj/l7ysnJUVpamgoLC6t6KwAAgFrBdED69ttv9ec//1n33nuv/vKXvyg1NVWSdPLkST322GPasWOH14oEAACoSaYC0s6dOzV37lw1bNjQeB9bqUaNGqlp06batGmTN+oDAACocaYC0j//+U916dJFzz77rIYOHVquPzY2VgcPHqxycQAAAL5gKiAdPnxYV1555Tn7GzdurJMnT5ouCgAAwJdMBaQGDRqcd1P2sWPHFB4ebrooAAAAXzIVkLp27arNmzeruLi4XJ/D4dDGjRsVFxdX5eIAAAB8wVRAGj16tE6cOKHHH39cn3/+uSRp9+7d+uCDD/TII49Ikm655RbvVQkAAFCDTB8UmZ6ermXLlikxMdHtepcuXXT33XcrKirKKwXWdhwUCQCA//Dqu9gOHTqkFi1aKDQ0tFxfXl6eMjMz5XK51LJlSzVq1MhcxX6KgAQAgP/w6knajz32mHbt2mW0Z86cqb1790qSwsPD1bFjR3Xq1KnehSMAAFA3VSogBQUF6fTp00Y7OTlZubm51VYUAACAL1krMyg6OlqfffaZAgICjGW21NRU2Wy28/5cnz59ql4hAABADavUHqT9+/dr3rx5On78+AXdfMWKFaYL8xfsQQIAwH94dZO2JBUXF+vYsWNyOByaOXOmRo4cqR49epz3Z7p06VK5av0YAQkAAP9R2YBUqSU2SQoMDFRkZKQiIyM1YMAA9erVS506dapSkQAAALWR6XOQKuJ0OuV0OhUcHOytW9Z6zCABAOA/vD6DVNbWrVu1b98+3Xnnnca1lStX6qOPPpIkXXrppZoyZcoFB6XVq1drx44d+uWXXxQUFKTY2FjdfvvtioyMNMacOXNGy5cv17Zt21RUVKS4uDjdc889stvtxpjjx49r0aJFSkpKUnBwsAYMGKAxY8YoMDDQGJOUlKTly5crPT1dzZo1080336yBAwea+eMAAAB1jKlXjXz22Wduj/3//PPPWrVqleLi4nTddddp9+7dRli6EMnJyRo6dKiee+45PfHEEyouLtasWbPcXoz71ltv6bvvvtPDDz+smTNnKicnRy+//LLRX1JSotmzZ8vpdGrWrFmaPHmyNm3a5LZhPCsrSy+88IK6du2ql156Sdddd51ef/117d6928wfBwAAqGNMBaTMzEy1b9/eaG/ZskV2u12PPvqobr/9dg0dOlTffPPNBd93+vTpGjhwoNq2bavo6GhNnjxZx48f14EDByRJBQUF+vLLLzV+/Hh169ZNMTExmjRpkn7++WelpKRIkn744QcdOXJEU6ZMUXR0tHr27KlRo0Zp/fr1cjqdkqQNGzYoIiJC48aNU1RUlIYNG6YrrrhCa9asMfPHAQAA6hhTS2xOp9PtDKQ9e/YoPj7eWMKKiorShg0bqlxcQUGBpLOndUvSgQMHVFxcrO7duxtj2rRpo+bNmyslJUWxsbFKSUlRu3bt3Jbc4uPjtXjxYqWnp6tDhw7at2+f2z0kKS4uTsuWLTtnLUVFRW57jSwWi0JCQozPAACg7jAVkCIiIrR3714NGjRI+/fvV2Zmpv7whz8Y/bm5uVXeqF1SUqJly5bp4osvVrt27SRJDodDVqtVYWFhbmMbN24sh8NhjCkbjkr7S/tK/1l6reyYU6dO6cyZMwoKCipXz+rVq7Vq1Sqj3aFDB7344ouV2ugFAAD8i6mANHjwYC1btkxHjhzRr7/+qqZNm6pXr15G/88//6y2bdtWqbAlS5YoPT1dzzzzTJXu4y0jR47U8OHDjXbprFF2draxdAcAAGo3q9VafU+xXXvttbLZbPr+++8VExOjESNGGLMueXl5cjgcuuaaa8zcWtLZcLRr1y7NnDlTzZo1M67b7XY5nU7l5+e7zSLl5uYas0Z2u12pqalu9yt9b1zZMZ7vksvNzVVISEiFs0fS2ccCz/VqFS+elAAAAGoBUwFJOjuLNHjw4HLXw8PD9cILL5i6p8vl0tKlS7Vjxw7NmDFDERERbv0xMTEKDAzU3r17dcUVV0iSMjIydPz4ccXGxkqSYmNj9dFHHyk3N9dYRtuzZ49CQkIUFRUlSerUqZO+//57t3vv2bPHuAcAAKjfTD3FVl2WLFmihIQEPfjggwoJCZHD4ZDD4dCZM2ckSaGhobr66qu1fPlyJSYm6sCBA1q4cKFiY2ONcBMXF6eoqCgtWLBAaWlp2r17tz744AMNHTrUmAEaMmSIsrKy9M477+iXX37R+vXrtX37dl133XU+++4AAKD2MH2S9u7du/Xll18qKytL+fn55ZaZLBaLXn311Qu652233Vbh9UmTJhmHOJYeFLl161Y5nc4KD4rMzs7W4sWLlZSUpAYNGmjAgAEaO3ZsuYMi33rrLR05cqRKB0VykjYAAP7D6y+rLeuTTz7Ru+++K7vdrosuush4DN/TpEmTLvTWfoeABACA/6jWV42sXbtW3bp10+OPPy6r1fQ2JgAAgFrJ1B6k/Px8XXHFFYQjAABQJ5kKSB07dlRGRoa3awEAAKgVTAWku+++Wzt27NCWLVu8XQ8AAIDPmdqkPXXqVOXl5SknJ0fBwcFq1qyZAgLcs5bFYtGcOXO8VmhtxSZtAAD8R7Vu0g4PD1fDhg3VunVrMz8OAABQq5k+BwlnMYMEAID/qOwMUq06SRsAAKA2qNJz+k6nUxkZGSooKFBJSUm5/i5dulTl9gAAAD5hKiCVlJTovffe04YNG3T69OlzjluxYoXpwgAAAHzFVEBavXq1Pv30Uw0ePFidO3fWggULNHbsWIWGhmrDhg2yWCwaO3ast2sFAACoEab2IG3atElXXnmlJk6cqPj4eElSTEyMBg8erOeff16SlJiY6LUiAQAAapKpgHTixAl169ZN0tnd4JJ05swZSZLVatVVV12lhIQEL5UIAABQs0wFpPDwcBUWFkqSgoODFRISoqysLLcxeXl5Va8OAADAB0ztQerQoYNSU1ONdteuXbVmzRpFR0fL5XJp3bp1io6O9laNAAAANcrUDNKgQYPkdDqNAxJHjx6tgoICPf3005oxY4ZOnTqlO+64w6uFAgAA1BSvnaRdUFCgpKQkBQQE6OKLL1Z4eLg3blvrcZI2AAD+o7InaV9wQDpz5ozef/99de3aVb179zZdYF1BQAIAwH9U26tGgoKC9MUXXyg3N9dUYQAAALWdqT1IMTExSk9P93YtAAAAtYKpgDR+/Hht3bpVGzduVHFxsbdrAgAA8ClTm7SnTp2q//73v3I4HLLZbGratKmCgoLcb2yxaM6cOV4rtLZiDxIAAP6jsnuQTJ2DFB4eroYNGyoyMtLMjwMAANRqXnvMv75iBgkAAP9RbU+xSdLmzZvLvVqkrOzsbG3evNnMrQEAAHzOVEBauHChUlJSztm/b98+LVy40HRRAAAAvmQqIP2WwsJCBQYGVsetAQAAql2lN2kfOnRIaWlpRvvHH3+s8BH//Px8ff7552rdurVXCgQAAKhpld6kvXLlSq1atapSNw0NDdX999+vXr16Vak4f8AmbQAA/IfX38WWk5OjnJwcuVwu/fWvf9Vtt92mnj17lhsXHBysli1b1pslNgISAAD+o9peVitJycnJatOmjRo3bmyquLqEgAQAgP+o1oMiu3TpYnw+cuSIsrOzJUktWrRQVFSUmVsCAADUGqYCkiR9++23Wr58ebnzkCIiIjR+/Hj17t27ysUBAAD4gqkltl27dumll15SixYtNGjQIGPW6MiRI9q4caOys7M1bdo0xcfHe7veWoclNqBuSUtL07x584z2ww8/rOjoaN8VBMCrqnUP0vTp0+V0OjVz5kwFBwe79RUWFuqpp56SzWbTc889d6G39jsEJKDueOCBB87ZN3/+/BqsBEB1qdZXjRw+fFgDBgwoF46ks0+xDRw4UIcPHzZzawDwCc9wdNVVV523H0DdZmoPks1mU15e3jn78/LyZLPZTBcFADWp7CG406ZNU2RkpCTp1ltvVUZGhl544QVjHMttQP1gagapW7duWrt2bYXvY9u3b5/WrVun7t27V7k4AKgJZfcclYajitplxwGo20zNIN1+++2aPn26nnzySXXs2NH4D0hGRoZSU1PVuHFjjR071quFAkB181xWK9W3b19t27athqsB4EumZpAiIiI0d+5cXXvttcrPz9e2bdu0bds25efn6/e//73mzJmjiIgIb9cKANUqISGhwuuEI6D+MX0OUuPGjXXnnXd6sRQA8I2HH37YWD7LyMhwW1bLyMhwGwegfjAdkACgrii78bp0Q3ZFy2ps0AbqD1PnIElnD4XctGmTjh07pvz8fHnexmKx6KmnnvJKkbUZ5yABdQfnIAF1X7W+i+2rr77SwoULFRgYqMjISIWHh5cbYzJ3AYDPzJ8/n5O0AUgyOYM0ZcoUhYeH6/HHH1ejRo2qoy6/wQwSAAD+o1pnkE6cOKHrr7++3ocjAHXP4cOHNXfuXKM9depUtWvXzocVAfAFUwGpffv2OnHihLdrUXJysj755BMdPHhQOTk5mjp1qi6//HKj/7XXXtPmzZvdfiYuLk7Tp0832nl5eVq6dKm+++47WSwW9enTR3fddZfba1EOHTqkJUuWaP/+/WrUqJGGDRumESNGeP37APAvFe1BKg1L7EEC6hdT5yCNGzdO//nPf/Tzzz97tZjTp08rOjpad9999znHxMfH68033zT+9+CDD7r1z58/X+np6XriiSc0bdo0/fjjj3rjjTeM/oKCAs2aNUvNmzfXCy+8oNtvv10rV67UF1984dXvAsC/lA1HFotFv/vd72SxWCrsB1D3mZpB+vjjjxUaGqqnnnpKUVFRat68uQIC3LOWxWLRY489dkH37dmzp3r27HneMVarVXa7vcK+I0eOaPfu3Zo9e7YuuugiSdKECRM0e/Zs3XHHHWratKm2bNkip9OpSZMmyWq1qm3btkpLS9Nnn32mwYMHX1C9AOqGsi/Xfvzxx9W6dWtJ0siRI3X06FHNnj3bGMdyG1A/mApIpf8xad68uQoLC3XkyJFyY8r+zcubkpOTdc899ygsLEzdunXTH/7wBzVs2FCSlJKSorCwMCMcSVL37t1lsViUmpqqyy+/XCkpKbrkkktktf7vq8fFxenjjz9WXl5ehU/kSVJRUZHbZmyLxaKQkBDjMwD/VbqMZrFYKnwXm8Vikcvl0ty5c/Xqq6/6okQANcxUQHrttde8XUelxMfHq0+fPoqIiFBmZqbef/99Pf/883ruuecUEBAgh8NRbuN4YGCgwsPD5XA4JEkOh6Pca1BKZ6QcDsc5A9Lq1au1atUqo92hQwe9+OKLldoJD8A/XH/99cbsUVnXXnut1q5dK0kV9gOoe/zqJO1+/foZn9u1a6f27dtrypQpSkpKUvfu3av1d48cOVLDhw832qWzRtnZ2XI6ndX6uwHUjE8//VTXXHNNuevr1q0zPh89erQmSwLgZVar1XuP+R8/flzS2SW1su3fUjq+urRs2VINGzZUZmamunfvLrvdrpMnT7qNKS4uVl5enjFLZLfbjdmkUqXtc+1tks6em2Cz2Srs41BMwL9NnTpVc+fOlcvlUkZGhtss0dGjR41/x6dOncq/70A9UamANHnyZEnSu+++K6vVarR/y4oVK8xXVgm//vqr8vLy1KRJE0lSbGys8vPzdeDAAcXExEiSEhMT5XK51LFjR2PM+++/L6fTaexD2rNnzzlPBAdQ95XdeD179mxZLBZdddVVSkhIcAtEbNAG6o9KBaT77rtP0tn9PGXb3lZYWKjMzEyjnZWVpbS0NIWHhys8PFwrV65Unz59ZLfbdezYMb3zzjtq1aqV4uLiJElRUVGKj4/XG2+8oYkTJ8rpdGrp0qXq27evmjZtKknq37+/Vq5cqddff10jRoxQenq61q1bp/Hjx1fLdwLgH+bPn288yu9yufTVV1+V6wdQf5h+WW11SEpK0syZM8tdHzBggCZOnKg5c+bo4MGDys/PV9OmTdWjRw+NGjXKbWksLy9PS5YscTsocsKECec8KLJhw4YaNmyYbrzxRlM186oRoG7hJG2gbqvsq0ZqVUDyRwQkAAD8R7W+i006uxz2zTff6NixY8rPzy+3cdFiseiuu+4ye3sAAACfMRWQ9u7dq3nz5qmgoOC84whIAADAH5kKSEuWLFFwcLAeeughdezYUaGhod6uCwAAwGdMvaz2+PHjuuGGG9SjRw/CEQAAqHNMBaT27dv/5vIaAACAvzIVkMaOHasNGzZo//793q4HAADA50w/5r9t2za9+uqratOmjZo1a6aAAPesZbFY9Nhjj3mlyNqMx/wBAPAf1fqY/9dff61XX31VJSUl+vXXX3Xq1KlyY0pf5goAAOBvTAWk9957T5GRkXrkkUcUGRnp7ZoAAAB8ytQepJycHA0ZMoRwBAAA6iRTAemiiy7S8ePHvV0LAABArWAqIE2YMEHbtm3Ttm3bvF0PAACAz5l6im3q1KnKy8tTTk6OgoODz/kU25w5c7xWaG3FU2wAAPiPan2KLTw8XA0bNlTr1q3N/DgAAECtZvocJJzFDBIAAP6jsjNIpvYg/ZaTJ0/q3//+d3XcGgAAoNqZWmKryOnTp/Xtt98qISFBe/fuVXFxsYYNG+at2wMAANSYKgWkkpIS/fDDD0pISNDOnTt1+vRptWrVStdee6169erlrRoBAABqlKmAlJKSoi1btmj79u06efKkWrRoodOnT+tPf/qTrr76am/XCAAAUKMqHZAyMjKUkJCgLVu2KCsrSy1bttSgQYPUr18/2Ww2PfjggwoLC6vOWgEAAGpEpQPSQw89JLvdrn79+qlv377q2LGj0ZeZmVktxQEAAPhCpZ9is1qtys/P1/Hjx/Xrr7/yaDsAAKizKj2DtGjRIm3fvl0JCQmaN2+egoODddlll6l///6VOk8AAADAX1Q6IIWGhmrQoEEaNGiQjh8/ri1btmjr1q1KSEhQcHCwJOmXX36R0+mU1eq10wMAAABqXJVP0j506JASEhK0detWnThxQsHBwerevbt69+6tgQMHeqnM2ouTtAEA8B+VPUnbq68aSUpKUkJCgr755hsVFBRoxYoV3rp1rUVAAgDAf/gkIJVyOp3atWuXLr/8cklSYWGhPvvsM/3f//2fIiIivP3rfIqABACA//Dpu9isVqsRjqSzAWnlypXKysqqjl8HAADgVdUSkAAAAPwZAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMCDVw6KLCgoUHBwsAICKs5bJSUl+vXXX2W322Wz2ar662oVDooEAMB/VPtBkfv379dzzz2n22+/XRMmTFBycrIk6eTJk3rppZeUlJT0v18SEKAWLVrUuXAEAADqJlMB6eeff9ZTTz2lzMxMXXXVVSo7CdWoUSMVFBTo888/91qRAAAANclUQHr//ffVpk0bzZs3T6NHjy7X37VrV6Wmpla5OAAAAF+wmvmh/fv3a/To0bLZbCosLCzX37RpUzkcjqrWBgA1buPGjfr444+N9ogRIzRo0CAfVgTAF0zNIAUGBup8e7tPnDih4OBg00UBgC888MADbuFIkj7++GM98MADPqoIgK+YCkidOnXS119/XWFfYWGhNm3apC5dulSpMACoSZ4hqEmTJuftB1C3mVpiu+222zRjxgzNnj1b/fr1kySlpaXp2LFj+vTTT3Xy5EndfPPNXi0UAKrLxo0bjc/33HOPevToYbT37NmjxYsXG+NYbgPqB9PnICUmJmrRokXKzMx0u96yZUvde++99WYGiXOQAP9XdnZo/vz5F9wPwH9U9hwkUzNIktStWzf9/e9/V1pamo4ePSqXy6WWLVsqJiZGFovF7G0BwGc8l9VKNW7cWLm5uTVcDQBfMhWQNm/erEsuuUQRERGKjo5WdHS0W39WVpZ+/PFHDRgwwBs1AkCNyMnJqfA64Qiof0wFpIULF2rKlCmKiIiosD81NVULFy684ICUnJysTz75RAcPHlROTo6mTp2qyy+/3Oh3uVz68MMPtXHjRuXn56tz586655571Lp1a2NMXl6eli5dqu+++04Wi0V9+vTRXXfd5fZU3aFDh7RkyRLt379fjRo10rBhwzRixIgL/FMAUFeMGDHCeHptz5495fYglR0HoH4wvcR2PoWFhQoMDLzgnzt9+rSio6N19dVXa+7cueX6P/74Y61bt06TJ09WRESEVqxYoeeee07z5s1TUFCQpLP7A3JycvTEE0+ouLhYCxcu1BtvvKEHH3xQ0tn3xs2aNUvdu3fXxIkTdfjwYf3jH/9QWFiYBg8eXLUvDsAvDRo0yAhIpRuyK1pWY4M2UH9UOiAdOnRIaWlpRvvHH39UcXFxuXH5+fn6/PPP3WZ1Kqtnz57q2bNnhX0ul0tr167VTTfdpMsuu0ySdP/992vixIn69ttv1a9fPx05ckS7d+/W7NmzddFFF0mSJkyYoNmzZ+uOO+5Q06ZNtWXLFjmdTk2aNElWq1Vt27ZVWlqaPvvsMwISUI/Nnz/fbTO2ZzhiczZQv1Q6IO3YsUOrVq0y2l988YW++OKLCseGhobq/vvvr3p1ZWRlZcnhcLhNfYeGhqpjx45KSUlRv379lJKSorCwMCMcSVL37t1lsViUmpqqyy+/XCkpKbrkkktktf7vq8fFxenjjz9WXl6ewsPDK/z9RUVFbk+rWSwWhYSEGJ8B+L9XX31VGzdu1L/+9S/j2o033sjMEVAPVTogDR48WL169ZLL5dJf//pX3XbbbRXO9gQHB6tly5amltjOp/TVJY0bN3a73rhxY6PP4XCoUaNGbv2BgYEKDw93G+O5d8putxt95wpIq1evdguIHTp00IsvvlipRwUB+I+4uDi3gBQXF2dqRhyAf6t0QGrSpInxCOzTTz+tNm3alAsrddnIkSM1fPhwo106a5SdnS2n0+mrsgB40ZQpU8pdmzlzpqSzs0sA/J/Vaq2+c5B8cQhk6SxPbm6u21klubm5xjEDdrtdJ0+edPu54uJi5eXlGT9vt9vLvUi3tF06piI2m002m63CPpNnbQKoRX7rVSJTpkxhHxJQj5h+is3hcOjLL7/UgQMHdOrUKZWUlLj1WywWPfXUU1UusFRERITsdrv27t1rBKKCggKlpqZqyJAhkqTY2Fjl5+frwIEDiomJkXT2xG+Xy6WOHTsaY95//305nU5jH9KePXsUGRl5zuU1AHVbYmJiuWs9evRwe8S/dFy3bt1qqiwAPmQqIB06dEgzZszQmTNnFBkZqcOHDysqKkoFBQU6ceKEWrZsqWbNml3wfQsLC91eXZKVlaW0tDSFh4erefPm+v3vf6+PPvpIrVu3VkREhD744AM1adLEeKotKipK8fHxeuONNzRx4kQ5nU4tXbpUffv2VdOmTSVJ/fv318qVK/X6669rxIgRSk9P17p16zR+/HgzfxQA6oA333zT+PzII4+offv2RvvQoUN6+eWXjXHMIgH1g6mA9N577yk4OFhz5sxRUFCQJk6cqLvuukvdunXT9u3btXjxYlNvvt6/f7+x3i9Jy5cvlyQNGDBAkydP1ogRI3T69Gm98cYbKigoUOfOnfXXv/7VOANJOjtNvmTJEj3zzDPGQZETJkww+kNDQ/XEE09oyZIlmjZtmho2bKibb76ZR/wBSJJbOKqoDaB+MBWQfvrpJ40YMULNmzdXXl6eJBlLbFdeeaV++uknvf32225hpzK6du2qDz/88Jz9FotFo0aN0qhRo845Jjw83DgU8lzat2+vZ5555oJqAwAA9UeAmR9yuVzGE2yhoaEKCAgwgpIktWvXTgcOHPBOhQBQg8oeiFtRG0D9YGoGKSIiQllZWZKkgIAARUREaO/everbt68k6eeff1ZYWJj3qgSAanTvvffq9ddflyTNmzdPktS5c2f99NNP5cYBqB9MBaQePXro66+/1ujRoyVJ11xzjd5++21lZWXJ5XIpKSlJ119/vVcLBYDqUtHRJZ7h6FzjANRNFpeJQ3zy8vKUlZWldu3ayWq1yuVy6aOPPtI333yjgIAAXXrppbrpppvcXudRV2VnZ7u9ggSA/zrfwyU8vQbUDTabrVIHRZoKSPgfAhJQtyQnJxvLbdLZZTVmjoC6o7IBydQm7ZkzZ2rv3r3n7E9MTLzgJ9gAoDZYv379edsA6gdTASk5OVm5ubnn7D958qSSk5NNFwUAvvDAAw/o4MGDbtcOHjxo6lw3AP7NVED6LZmZmQoJCamOWwNAtfitEERIAuqXSu+i3rRpkzZv3my0P/roI23cuLHcuIKCAh06dEg9e/b0ToUAUM3+9re/lbtW0bvY/va3v+mhhx6qqbIA+FClA9KZM2d08uRJo33q1ClZLBa3MRaLRQ0aNNA111yjW265xXtVAkA1Krusdr53sXkuvwGouyodkIYMGaIhQ4ZIkiZPnqy77rpLvXv3rrbCAMAXeBcbAMnEHqQzZ87osssuq45aAAAAaoULPskxKChIGzduVNu2baujHgDwqeeff16ZmZlGu1WrVj6sBoCvmHqKLSYmRunp6d6uBQB8ouw71sqGI88272ID6g9TAWn8+PHaunWrNm7cqOLiYm/XBAA1qrInZXOiNlB/mHpZ2sKFCxUQEKA333xT/+///T81bdpUQUFBbmMsFovmzJnjlSIBoDrt3r270uPi4+OrtRYAtYOpGaTw8HBFRkaqS5cu6tSpk5o1a6aGDRu6/S88PNzbtQJAtVi6dKnx2XMZrWy77DgAdZupGaQZM2Z4uQwA8L3Y2Fh16dJF8+fPd7t+0UUXaf/+/T6qCoAvVMurRgDAH6WkpFR4nXAE1D+mZpBKOZ1OZWRkqKCgQCUlJeX62dAIwB9MmDDBWD5bs2aN1q9fb/QNHTrUbRyA+sFUQCopKdF7772nDRs26PTp0+cct2LFCtOFAUBNKbvxumw48myzQRuoP0wFpNWrV+vTTz/V4MGD1blzZy1YsEBjx45VaGioNmzYIIvForFjx3q7VgAAgBphag/Spk2bdOWVV2rixInG36hiYmI0ePBgPf/885KkxMRErxUJANVp165dxufrrrvOra9su+w4AHWbqRmkEydOaMSIEZIkm80m6ew72iTJarXqqquu0po1azRmzBgvlQkA1WfZsmXG56FDh7rtO5LO7ksqHXfppZfWZGkAfMT0OUiFhYWSpODgYIWEhCgrK8ttTF5eXtWrA4Aa1LFjxwqvd+jQoYYrAeBrpmaQOnTooNTUVKPdtWtXrVmzRtHR0XK5XFq3bp2io6O9VSMA1Iiy/10r6+DBgzVcCQBfMzWDNHjwYDmdThUVFUmSRo8erYKCAj399NOaMWOGTp06pTvuuMOrhQJAdbnzzjuNz55nHpVtlx0HoG6zuFwulzduVFBQoKSkJAUEBOjiiy+uN68ayc7ONoIiAP/1wAMPuLU7dOhQbubI84RtAP7HZrOpRYsWvznOawGpviIgAXWHZ0gqi3AE1A01EpC+++47ff/998rOzpYktWjRQj179lSvXr3M3tLvEJCAumXXrl1uT7XdeeedPLkG1CGVDUimNmnn5+dr7ty5Sk5OVkBAgJo0aSJJ2rNnjz7//HNdcsklevTRRxUWFmbm9gDgMydPnjxvG0D9YGoGacGCBdqyZYvGjBmjIUOGKDg4WJJUWFioDRs26L333lP//v11//33e73g2oYZJKDuYIkNqPsqO4Nk6im2b7/9VkOGDNENN9xghCPp7JlIN9xwg4YMGaJvv/3WzK0BwCfOF44q0w+gbjEVkKxWqyIjI8/ZHxkZKavV1OodANS4TZs2lbvWt2/fSo0DUDeZCkh9+vTR119/rZKSknJ9xcXF2r59u6644ooqFwcANeGjjz4yPo8cOVKStG3bNre25zgAdZupPUg//vijli5dKpvNpsGDB6tVq1aSpKNHj+qLL76Q0+nU3XffraCgILefi4mJ8U7VtQh7kAD/dyHLZ+xFAvxbtT7FNmPGDOOz56mzpZ5++uly11asWGHm1wFAjevcubN++uknX5cBwEdMBaT77rvP23UAQK3Qp08fffPNN0Y4Km0DqF9MBaSBAwd6uQwAqB08wxDhCKifTG3SPpdjx47pyJEj3rwlAABAjTM1g7R27VqlpKToz3/+s3Ft4cKF2rx5s6SzL3l8/PHH1bhxY68UCQAAUJNMzSB9+eWXbuFn9+7d2rx5swYPHqwJEybo2LFjWrlypdeKBICa0rVr1/O2AdQPpmaQsrOz1aZNG6O9fft2RUREaOLEiZIkh8Ohr776yjsVAkANSkpKOm8bQP3glT1Ie/bsUXx8vNFu0aKFHA6HN24NAABQ40wFpNatWxvvWtu9e7dOnDihnj17Gv0nTpxQWFiYdyoEgBrUo0eP87YB1A+mltiuv/56zZ8/X3fddZcKCwsVFRWluLg4oz8xMVHR0dHeqhEAasyePXvO2wZQP5gKSP369VPDhg21a9cuhYWFaejQoQoMDJQk5eXlKTw8XP/3f//n1UJLffjhh1q1apXbtcjISL3yyiuSpDNnzmj58uXatm2bioqKFBcXp3vuuUd2u90Yf/z4cS1atEhJSUkKDg7WgAEDNGbMGOM7AACA+s1UQJLOTjt7Tj0XFRVp7969Kikp0d///ne9++67VS6wIm3bttWTTz5ptAMC/rdS+NZbb2nXrl16+OGHFRoaqiVLlujll1/Ws88+K0kqKSnR7NmzZbfbNWvWLOXk5GjBggUKDAzUmDFjqqVeAADgX0wHpFIul0t79+7Vli1btGPHDp06dUqNGjVSv379vFFfhQICAtxmhEoVFBToyy+/1IMPPqhu3bpJkiZNmqSHHnpIKSkpio2N1Q8//KAjR47oySeflN1uV3R0tEaNGqV3331Xt912m6zWKv+RAAAAP2c6DRw4cEAJCQnatm2b8cRav379NGzYMHXq1EkWi8VbNZaTmZmpP/3pT7LZbIqNjdWYMWPUvHlzHThwQMXFxerevbsxtk2bNmrevLkRkFJSUtSuXTu3gBUfH6/FixcrPT1dHTp0qPB3FhUVqaioyGhbLBaFhIQYnwHULb1799bOnTvLXeffd6B+uKCAdOzYMSUkJGjLli06evSomjZtqv79+6tjx4565ZVX1KdPH8XGxlZXrZKkTp06adKkSYqMjFROTo5WrVqlp556Si+//LIcDoesVmu5J+gaN25shDiHw1Fu9qn00MvzHU2wevVqt71PHTp00IsvvqgWLVp45XsBqF0qCkfS2ad4AdR9lQ5I06dPV2pqqho1aqQ+ffro3nvvVefOnSWdndGpKWWPE2jfvr0RmLZv366goKBq+70jR47U8OHDjXbp3yKzs7PldDqr7fcCqFmjRo3SihUrztk+evSoL8oC4CVWq7VSkxuVDkipqamKiIjQuHHjdOmll9aaJ77CwsIUGRmpzMxM9ejRQ06nU/n5+W6zSLm5ucaskd1uV2pqqts9cnNzjb5zsdlsstlsFfa5XK6qfQkAtUbZMFRRm3/fgfqh0gdFTpgwQXa7XXPnztUf//hHvfnmm0pMTPT5fywKCwuVmZkpu92umJgYBQYGau/evUZ/RkaGjh8/biz9xcbG6vDhw0Yoks6ecxISEqKoqKgarx+A791///1eHQfA/1V6Bmno0KEaOnSosrKyjH1IGzdulN1uN17mWBObF5cvX67evXurefPmysnJ0YcffqiAgAD1799foaGhuvrqq7V8+XKFh4crNDRUS5cuVWxsrBGQ4uLiFBUVpQULFmjs2LFyOBz64IMPNHTo0HPOEAGo2yq7d7K691gCqD0sripMAXk+yda4cWP16tVLvXv3Vvfu3atlT9Arr7yiH3/8Uf/973/VqFEjde7cWX/4wx/UqlUrSf87KHLr1q1yOp0VHhSZnZ2txYsXKykpSQ0aNNCAAQM0duxYU8uG2dnZbk+3AfA/P/30kxYuXPib4yZNmmTsvQTgn2w2W6X2IFUpIJUqKSlRYmKiEhIStGPHDhUWFiooKEhvv/12VW9d6xGQAP/3wAMPGJ8HDhyoTZs2nbM9f/78GqwMgLdVNiB55VTEgIAA42TtiRMnaufOndqyZYs3bg0ANapsGKqoDaB+8Pqx0UFBQerbt6/69u3r7VsDAADUiEo/xQYAddWAAQO8Og6A/yMgAaj3Nm/e7NVxAPwfAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMADAQkAAMCD1dcFAJDOnDmjY8eO+boMVEJ6erqvS6iXWrZsqaCgIF+XgXrE4nK5XL4uwp9lZ2erqKjI12XAz6Wnp2vOnDm+LgOotR599FG1bdvW12WgDrDZbGrRosVvjiMgVREBCd7ADJJv7du3T//6179+c9yNN96oTp06VX9BKIcZJHhLZQMSS2xALRAUFMTfjn2obdu2lQpIV199dfUXA6BWYJM2AEiaP39+lfoB1C0ssVURS2xA3ZKcnKzXX3/daN97773q0qWLDysC4E2VXWJjBgkAyujSpYseffRRSWc3BhOOgPqJgAQAAOCBgAQAAOCBp9jquRMnTig/P9/XZQC1SumRCxy9ALgLCwtT06ZNfV1GjWCTdhX58ybtEydO6LlZs1TkdPq6FACAH7BZrZr+xBN+HZI4Bwm/KT8/X0VOp663nlZzS4mvywEA1GLHXQH61Hn2/zv8OSBVFgEJam4pUasAJhIBAOdRUr/+Is0mbQAAAA/MIEHHXRapfv3FAABwgY67LL4uoUbV64D073//W59++qkcDofat2+vCRMmqGPHjr4uq8Z96gz2dQkAANQq9TYgbdu2TcuXL9fEiRPVqVMnrVmzRs8995xeeeUVNW7c2Nfl1airAs/IziZtAMB5OFwBSigO8nUZNabeBqTPPvtMgwYN0u9+9ztJ0sSJE7Vr1y795z//0Y033ujb4mpIWFiYbFarEnjKHwBQCTarVWFhYb4uo0bUy4DkdDp14MABtyAUEBCg7t27KyUlpcKfKSoqcjvvyGKxKCQkxPjsj5o1a6YnnnySgyJrgczMTC1fvtzXZQC11rhx49SqVStfl1Hv1aeDIutlQDp58qRKSkpkt9vdrtvtdmVkZFT4M6tXr9aqVauMdocOHfTiiy9W6rCp2qx169a+LgGSTp8+rW7duvm6DKDWatOmjRo0aODrMlCP1MuAZMbIkSM1fPhwo106a5SdnS0nJ1HDC0pnJAGUd+LECV+XgDrCarVykva5NGrUSAEBAXI4HG7XHQ5HuVmlUjabTTabrcI+3tYCAEDdUi8PirRarYqJiVFiYqJxraSkRImJiYqNjfVhZQAAoDaolzNIkjR8+HC99tpriomJUceOHbV27VqdPn1aAwcO9HVpAADAx+ptQOrbt69OnjypDz/8UA6HQ9HR0frrX/96ziU2AABQf1hcbKCpkuzsbLfH/wEAQO1ls9kqtUm7Xu5BAgAAOB8CEgAAgAcCEgAAgAcCEgAAgAcCEgAAgAcCEgAAgAcCEgAAgAcCEgAAgId6e5K2t1it/BECAOAvKvv/25ykDQAA4IElNgDwcOrUKf3lL3/RqVOnfF0KAB8hIAGAB5fLpYMHD4oJdqD+IiABAAB4ICABAAB4ICABgAebzaZbbrlFNpvN16UA8BGeYgMAAPDADBIAAIAHAhIAAIAHAhIAAIAHAhIAAIAHAhIAAIAHAhIAAIAHAhIAAIAHAhIAAICH/w+mMHkWYan/ngAAAABJRU5ErkJggg==",
1433
      "text/plain": [
1434
       "<Figure size 640x480 with 1 Axes>"
1435
      ]
1436
     },
1437
     "metadata": {},
1438
     "output_type": "display_data"
1439
    }
1440
   ],
1441
   "source": [
1442
    "sns.boxplot(df.Aspartate_Aminotransferase)"
1443
   ]
1444
  },
1445
  {
1446
   "cell_type": "code",
1447
   "execution_count": 153,
1448
   "id": "041012fb-531e-458a-8560-7ed33f16ac3b",
1449
   "metadata": {},
1450
   "outputs": [
1451
    {
1452
     "data": {
1453
      "text/plain": [
1454
       "10996    2946.0\n",
1455
       "8375     2946.0\n",
1456
       "5602     2946.0\n",
1457
       "6813     2946.0\n",
1458
       "5133     2946.0\n",
1459
       "Name: Aspartate_Aminotransferase, dtype: float64"
1460
      ]
1461
     },
1462
     "execution_count": 153,
1463
     "metadata": {},
1464
     "output_type": "execute_result"
1465
    }
1466
   ],
1467
   "source": [
1468
    "df.Aspartate_Aminotransferase.sort_values(ascending=False).head()"
1469
   ]
1470
  },
1471
  {
1472
   "cell_type": "code",
1473
   "execution_count": 154,
1474
   "id": "b0c9c216-bdee-4ed3-a100-5e621276c8fa",
1475
   "metadata": {},
1476
   "outputs": [
1477
    {
1478
     "data": {
1479
      "text/plain": [
1480
       "(12149, 11)"
1481
      ]
1482
     },
1483
     "execution_count": 154,
1484
     "metadata": {},
1485
     "output_type": "execute_result"
1486
    }
1487
   ],
1488
   "source": [
1489
    "df = df[df.Aspartate_Aminotransferase <=2500 ]\n",
1490
    "df.shape"
1491
   ]
1492
  },
1493
  {
1494
   "cell_type": "code",
1495
   "execution_count": 155,
1496
   "id": "1a1b9373-19dc-4416-95a6-c1a9d3900f49",
1497
   "metadata": {},
1498
   "outputs": [
1499
    {
1500
     "data": {
1501
      "text/plain": [
1502
       "False"
1503
      ]
1504
     },
1505
     "execution_count": 155,
1506
     "metadata": {},
1507
     "output_type": "execute_result"
1508
    }
1509
   ],
1510
   "source": [
1511
    "df.isnull().values.any()"
1512
   ]
1513
  },
1514
  {
1515
   "cell_type": "code",
1516
   "execution_count": 156,
1517
   "id": "3194fba5-d50d-4f53-9deb-f3ca78b51a7e",
1518
   "metadata": {},
1519
   "outputs": [
1520
    {
1521
     "data": {
1522
      "text/plain": [
1523
       "(12149, 11)"
1524
      ]
1525
     },
1526
     "execution_count": 156,
1527
     "metadata": {},
1528
     "output_type": "execute_result"
1529
    }
1530
   ],
1531
   "source": [
1532
    "df=df.dropna(how='any')  \n",
1533
    "df.shape"
1534
   ]
1535
  },
1536
  {
1537
   "cell_type": "code",
1538
   "execution_count": 157,
1539
   "id": "0d2b6ba0-8197-4a18-9faf-2f3d582a419a",
1540
   "metadata": {},
1541
   "outputs": [
1542
    {
1543
     "data": {
1544
      "text/html": [
1545
       "<div>\n",
1546
       "<style scoped>\n",
1547
       "    .dataframe tbody tr th:only-of-type {\n",
1548
       "        vertical-align: middle;\n",
1549
       "    }\n",
1550
       "\n",
1551
       "    .dataframe tbody tr th {\n",
1552
       "        vertical-align: top;\n",
1553
       "    }\n",
1554
       "\n",
1555
       "    .dataframe thead th {\n",
1556
       "        text-align: right;\n",
1557
       "    }\n",
1558
       "</style>\n",
1559
       "<table border=\"1\" class=\"dataframe\">\n",
1560
       "  <thead>\n",
1561
       "    <tr style=\"text-align: right;\">\n",
1562
       "      <th></th>\n",
1563
       "      <th>Age of the patient</th>\n",
1564
       "      <th>Gender of the patient</th>\n",
1565
       "      <th>Total Bilirubin</th>\n",
1566
       "      <th>Direct Bilirubin</th>\n",
1567
       "      <th>Alkphos Alkaline Phosphotase</th>\n",
1568
       "      <th>Sgpt Alamine Aminotransferase</th>\n",
1569
       "      <th>Aspartate_Aminotransferase</th>\n",
1570
       "      <th>Total Protiens</th>\n",
1571
       "      <th>ALB Albumin</th>\n",
1572
       "      <th>A/G Ratio Albumin and Globulin Ratio</th>\n",
1573
       "      <th>Result</th>\n",
1574
       "    </tr>\n",
1575
       "  </thead>\n",
1576
       "  <tbody>\n",
1577
       "    <tr>\n",
1578
       "      <th>0</th>\n",
1579
       "      <td>52.0</td>\n",
1580
       "      <td>Female</td>\n",
1581
       "      <td>0.9</td>\n",
1582
       "      <td>0.2</td>\n",
1583
       "      <td>116.0</td>\n",
1584
       "      <td>36.0</td>\n",
1585
       "      <td>16.0</td>\n",
1586
       "      <td>6.2</td>\n",
1587
       "      <td>3.2</td>\n",
1588
       "      <td>1.00</td>\n",
1589
       "      <td>0</td>\n",
1590
       "    </tr>\n",
1591
       "    <tr>\n",
1592
       "      <th>1</th>\n",
1593
       "      <td>36.0</td>\n",
1594
       "      <td>Female</td>\n",
1595
       "      <td>0.7</td>\n",
1596
       "      <td>0.2</td>\n",
1597
       "      <td>188.0</td>\n",
1598
       "      <td>11.0</td>\n",
1599
       "      <td>10.0</td>\n",
1600
       "      <td>5.5</td>\n",
1601
       "      <td>2.3</td>\n",
1602
       "      <td>0.71</td>\n",
1603
       "      <td>0</td>\n",
1604
       "    </tr>\n",
1605
       "    <tr>\n",
1606
       "      <th>2</th>\n",
1607
       "      <td>28.0</td>\n",
1608
       "      <td>Male</td>\n",
1609
       "      <td>0.5</td>\n",
1610
       "      <td>0.1</td>\n",
1611
       "      <td>162.0</td>\n",
1612
       "      <td>155.0</td>\n",
1613
       "      <td>108.0</td>\n",
1614
       "      <td>8.1</td>\n",
1615
       "      <td>4.0</td>\n",
1616
       "      <td>0.90</td>\n",
1617
       "      <td>1</td>\n",
1618
       "    </tr>\n",
1619
       "    <tr>\n",
1620
       "      <th>3</th>\n",
1621
       "      <td>49.0</td>\n",
1622
       "      <td>Male</td>\n",
1623
       "      <td>0.7</td>\n",
1624
       "      <td>0.2</td>\n",
1625
       "      <td>188.0</td>\n",
1626
       "      <td>13.0</td>\n",
1627
       "      <td>21.0</td>\n",
1628
       "      <td>6.0</td>\n",
1629
       "      <td>3.2</td>\n",
1630
       "      <td>1.10</td>\n",
1631
       "      <td>0</td>\n",
1632
       "    </tr>\n",
1633
       "    <tr>\n",
1634
       "      <th>4</th>\n",
1635
       "      <td>51.0</td>\n",
1636
       "      <td>Male</td>\n",
1637
       "      <td>1.0</td>\n",
1638
       "      <td>0.3</td>\n",
1639
       "      <td>75.0</td>\n",
1640
       "      <td>25.0</td>\n",
1641
       "      <td>26.0</td>\n",
1642
       "      <td>5.1</td>\n",
1643
       "      <td>2.9</td>\n",
1644
       "      <td>1.30</td>\n",
1645
       "      <td>1</td>\n",
1646
       "    </tr>\n",
1647
       "  </tbody>\n",
1648
       "</table>\n",
1649
       "</div>"
1650
      ],
1651
      "text/plain": [
1652
       "   Age of the patient Gender of the patient  Total Bilirubin  \\\n",
1653
       "0                52.0                Female              0.9   \n",
1654
       "1                36.0                Female              0.7   \n",
1655
       "2                28.0                  Male              0.5   \n",
1656
       "3                49.0                  Male              0.7   \n",
1657
       "4                51.0                  Male              1.0   \n",
1658
       "\n",
1659
       "   Direct Bilirubin   Alkphos Alkaline Phosphotase  \\\n",
1660
       "0               0.2                          116.0   \n",
1661
       "1               0.2                          188.0   \n",
1662
       "2               0.1                          162.0   \n",
1663
       "3               0.2                          188.0   \n",
1664
       "4               0.3                           75.0   \n",
1665
       "\n",
1666
       "    Sgpt Alamine Aminotransferase  Aspartate_Aminotransferase  Total Protiens  \\\n",
1667
       "0                            36.0                        16.0             6.2   \n",
1668
       "1                            11.0                        10.0             5.5   \n",
1669
       "2                           155.0                       108.0             8.1   \n",
1670
       "3                            13.0                        21.0             6.0   \n",
1671
       "4                            25.0                        26.0             5.1   \n",
1672
       "\n",
1673
       "    ALB Albumin  A/G Ratio Albumin and Globulin Ratio  Result  \n",
1674
       "0           3.2                                  1.00       0  \n",
1675
       "1           2.3                                  0.71       0  \n",
1676
       "2           4.0                                  0.90       1  \n",
1677
       "3           3.2                                  1.10       0  \n",
1678
       "4           2.9                                  1.30       1  "
1679
      ]
1680
     },
1681
     "execution_count": 157,
1682
     "metadata": {},
1683
     "output_type": "execute_result"
1684
    }
1685
   ],
1686
   "source": [
1687
    "df.head()"
1688
   ]
1689
  },
1690
  {
1691
   "cell_type": "markdown",
1692
   "id": "574de37d-d4fb-4b68-95f6-330e61a2320b",
1693
   "metadata": {},
1694
   "source": [
1695
    "Machine Learning Models"
1696
   ]
1697
  },
1698
  {
1699
   "cell_type": "markdown",
1700
   "id": "abce595d-98c0-4e66-9a05-8e84facfdd4a",
1701
   "metadata": {},
1702
   "source": [
1703
    "Data Preparation"
1704
   ]
1705
  },
1706
  {
1707
   "cell_type": "code",
1708
   "execution_count": 163,
1709
   "id": "274c73aa-a739-45a3-b4ac-fec2f5e8f0b2",
1710
   "metadata": {},
1711
   "outputs": [
1712
    {
1713
     "name": "stdout",
1714
     "output_type": "stream",
1715
     "text": [
1716
      "(8504, 10) (3645, 10) (8504,) (3645,)\n"
1717
     ]
1718
    }
1719
   ],
1720
   "source": [
1721
    "# Create separate object for target variable\n",
1722
    "y = df.Result\n",
1723
    "\n",
1724
    "# Create separate object for input features\n",
1725
    "X = df.drop('Result', axis=1)\n",
1726
    "# Split X and y into train and test sets\n",
1727
    "X_train, X_test, y_train, y_test = train_test_split(X, y, \n",
1728
    "                                                    test_size=0.3, \n",
1729
    "                                                    random_state=0,\n",
1730
    "                                                    stratify=df.Result)\n",
1731
    "# Print number of observations in X_train, X_test, y_train, and y_test\n",
1732
    "print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)"
1733
   ]
1734
  },
1735
  {
1736
   "cell_type": "markdown",
1737
   "id": "467f4e74-e6fa-4ae4-8cf6-6d5d92e67e11",
1738
   "metadata": {},
1739
   "source": [
1740
    "Data standardization"
1741
   ]
1742
  },
1743
  {
1744
   "cell_type": "code",
1745
   "execution_count": 164,
1746
   "id": "c1f9a786-f74d-4a33-af65-11aa3126b3fb",
1747
   "metadata": {},
1748
   "outputs": [
1749
    {
1750
     "data": {
1751
      "text/html": [
1752
       "<div>\n",
1753
       "<style scoped>\n",
1754
       "    .dataframe tbody tr th:only-of-type {\n",
1755
       "        vertical-align: middle;\n",
1756
       "    }\n",
1757
       "\n",
1758
       "    .dataframe tbody tr th {\n",
1759
       "        vertical-align: top;\n",
1760
       "    }\n",
1761
       "\n",
1762
       "    .dataframe thead th {\n",
1763
       "        text-align: right;\n",
1764
       "    }\n",
1765
       "</style>\n",
1766
       "<table border=\"1\" class=\"dataframe\">\n",
1767
       "  <thead>\n",
1768
       "    <tr style=\"text-align: right;\">\n",
1769
       "      <th></th>\n",
1770
       "      <th>Age of the patient</th>\n",
1771
       "      <th>Gender of the patient</th>\n",
1772
       "      <th>Total Bilirubin</th>\n",
1773
       "      <th>Direct Bilirubin</th>\n",
1774
       "      <th>Alkphos Alkaline Phosphotase</th>\n",
1775
       "      <th>Sgpt Alamine Aminotransferase</th>\n",
1776
       "      <th>Aspartate_Aminotransferase</th>\n",
1777
       "      <th>Total Protiens</th>\n",
1778
       "      <th>ALB Albumin</th>\n",
1779
       "      <th>A/G Ratio Albumin and Globulin Ratio</th>\n",
1780
       "    </tr>\n",
1781
       "  </thead>\n",
1782
       "  <tbody>\n",
1783
       "    <tr>\n",
1784
       "      <th>count</th>\n",
1785
       "      <td>8.504000e+03</td>\n",
1786
       "      <td>8.504000e+03</td>\n",
1787
       "      <td>8.504000e+03</td>\n",
1788
       "      <td>8.504000e+03</td>\n",
1789
       "      <td>8.504000e+03</td>\n",
1790
       "      <td>8.504000e+03</td>\n",
1791
       "      <td>8.504000e+03</td>\n",
1792
       "      <td>8.504000e+03</td>\n",
1793
       "      <td>8.504000e+03</td>\n",
1794
       "      <td>8.504000e+03</td>\n",
1795
       "    </tr>\n",
1796
       "    <tr>\n",
1797
       "      <th>mean</th>\n",
1798
       "      <td>-1.215710e-16</td>\n",
1799
       "      <td>1.215710e-16</td>\n",
1800
       "      <td>1.169755e-17</td>\n",
1801
       "      <td>-6.182992e-17</td>\n",
1802
       "      <td>1.932185e-18</td>\n",
1803
       "      <td>-8.982049e-18</td>\n",
1804
       "      <td>-2.548395e-17</td>\n",
1805
       "      <td>-2.197469e-16</td>\n",
1806
       "      <td>-3.843481e-17</td>\n",
1807
       "      <td>-1.478905e-16</td>\n",
1808
       "    </tr>\n",
1809
       "    <tr>\n",
1810
       "      <th>std</th>\n",
1811
       "      <td>1.000000e+00</td>\n",
1812
       "      <td>1.000000e+00</td>\n",
1813
       "      <td>1.000000e+00</td>\n",
1814
       "      <td>1.000000e+00</td>\n",
1815
       "      <td>1.000000e+00</td>\n",
1816
       "      <td>1.000000e+00</td>\n",
1817
       "      <td>1.000000e+00</td>\n",
1818
       "      <td>1.000000e+00</td>\n",
1819
       "      <td>1.000000e+00</td>\n",
1820
       "      <td>1.000000e+00</td>\n",
1821
       "    </tr>\n",
1822
       "    <tr>\n",
1823
       "      <th>min</th>\n",
1824
       "      <td>-2.422689e+00</td>\n",
1825
       "      <td>-6.412252e-01</td>\n",
1826
       "      <td>-4.594191e-01</td>\n",
1827
       "      <td>-4.656438e-01</td>\n",
1828
       "      <td>-9.303106e-01</td>\n",
1829
       "      <td>-4.163797e-01</td>\n",
1830
       "      <td>-4.956979e-01</td>\n",
1831
       "      <td>-3.476604e+00</td>\n",
1832
       "      <td>-2.820396e+00</td>\n",
1833
       "      <td>-2.052873e+00</td>\n",
1834
       "    </tr>\n",
1835
       "    <tr>\n",
1836
       "      <th>25%</th>\n",
1837
       "      <td>-7.125996e-01</td>\n",
1838
       "      <td>-6.412252e-01</td>\n",
1839
       "      <td>-3.878698e-01</td>\n",
1840
       "      <td>-4.279415e-01</td>\n",
1841
       "      <td>-4.664464e-01</td>\n",
1842
       "      <td>-3.305702e-01</td>\n",
1843
       "      <td>-4.089980e-01</td>\n",
1844
       "      <td>-6.316850e-01</td>\n",
1845
       "      <td>-6.980597e-01</td>\n",
1846
       "      <td>-7.707068e-01</td>\n",
1847
       "    </tr>\n",
1848
       "    <tr>\n",
1849
       "      <th>50%</th>\n",
1850
       "      <td>8.137060e-02</td>\n",
1851
       "      <td>-6.412252e-01</td>\n",
1852
       "      <td>-3.699824e-01</td>\n",
1853
       "      <td>-3.902392e-01</td>\n",
1854
       "      <td>-3.387774e-01</td>\n",
1855
       "      <td>-2.590623e-01</td>\n",
1856
       "      <td>-3.099125e-01</td>\n",
1857
       "      <td>1.024876e-01</td>\n",
1858
       "      <td>-7.384312e-02</td>\n",
1859
       "      <td>1.361914e-01</td>\n",
1860
       "    </tr>\n",
1861
       "    <tr>\n",
1862
       "      <th>75%</th>\n",
1863
       "      <td>6.921169e-01</td>\n",
1864
       "      <td>1.559331e+00</td>\n",
1865
       "      <td>-1.374469e-01</td>\n",
1866
       "      <td>-8.862079e-02</td>\n",
1867
       "      <td>3.571851e-02</td>\n",
1868
       "      <td>-8.029253e-02</td>\n",
1869
       "      <td>-6.839148e-02</td>\n",
1870
       "      <td>6.531171e-01</td>\n",
1871
       "      <td>8.000601e-01</td>\n",
1872
       "      <td>4.489149e-01</td>\n",
1873
       "    </tr>\n",
1874
       "    <tr>\n",
1875
       "      <th>max</th>\n",
1876
       "      <td>2.829729e+00</td>\n",
1877
       "      <td>1.559331e+00</td>\n",
1878
       "      <td>1.288454e+01</td>\n",
1879
       "      <td>6.924007e+00</td>\n",
1880
       "      <td>7.780974e+00</td>\n",
1881
       "      <td>1.152544e+01</td>\n",
1882
       "      <td>9.350928e+00</td>\n",
1883
       "      <td>2.855635e+00</td>\n",
1884
       "      <td>2.922396e+00</td>\n",
1885
       "      <td>5.765214e+00</td>\n",
1886
       "    </tr>\n",
1887
       "  </tbody>\n",
1888
       "</table>\n",
1889
       "</div>"
1890
      ],
1891
      "text/plain": [
1892
       "       Age of the patient  Gender of the patient  Total Bilirubin  \\\n",
1893
       "count        8.504000e+03           8.504000e+03     8.504000e+03   \n",
1894
       "mean        -1.215710e-16           1.215710e-16     1.169755e-17   \n",
1895
       "std          1.000000e+00           1.000000e+00     1.000000e+00   \n",
1896
       "min         -2.422689e+00          -6.412252e-01    -4.594191e-01   \n",
1897
       "25%         -7.125996e-01          -6.412252e-01    -3.878698e-01   \n",
1898
       "50%          8.137060e-02          -6.412252e-01    -3.699824e-01   \n",
1899
       "75%          6.921169e-01           1.559331e+00    -1.374469e-01   \n",
1900
       "max          2.829729e+00           1.559331e+00     1.288454e+01   \n",
1901
       "\n",
1902
       "       Direct Bilirubin   Alkphos Alkaline Phosphotase  \\\n",
1903
       "count      8.504000e+03                   8.504000e+03   \n",
1904
       "mean      -6.182992e-17                   1.932185e-18   \n",
1905
       "std        1.000000e+00                   1.000000e+00   \n",
1906
       "min       -4.656438e-01                  -9.303106e-01   \n",
1907
       "25%       -4.279415e-01                  -4.664464e-01   \n",
1908
       "50%       -3.902392e-01                  -3.387774e-01   \n",
1909
       "75%       -8.862079e-02                   3.571851e-02   \n",
1910
       "max        6.924007e+00                   7.780974e+00   \n",
1911
       "\n",
1912
       "        Sgpt Alamine Aminotransferase  Aspartate_Aminotransferase  \\\n",
1913
       "count                    8.504000e+03                8.504000e+03   \n",
1914
       "mean                    -8.982049e-18               -2.548395e-17   \n",
1915
       "std                      1.000000e+00                1.000000e+00   \n",
1916
       "min                     -4.163797e-01               -4.956979e-01   \n",
1917
       "25%                     -3.305702e-01               -4.089980e-01   \n",
1918
       "50%                     -2.590623e-01               -3.099125e-01   \n",
1919
       "75%                     -8.029253e-02               -6.839148e-02   \n",
1920
       "max                      1.152544e+01                9.350928e+00   \n",
1921
       "\n",
1922
       "       Total Protiens   ALB Albumin  A/G Ratio Albumin and Globulin Ratio  \n",
1923
       "count    8.504000e+03  8.504000e+03                          8.504000e+03  \n",
1924
       "mean    -2.197469e-16 -3.843481e-17                         -1.478905e-16  \n",
1925
       "std      1.000000e+00  1.000000e+00                          1.000000e+00  \n",
1926
       "min     -3.476604e+00 -2.820396e+00                         -2.052873e+00  \n",
1927
       "25%     -6.316850e-01 -6.980597e-01                         -7.707068e-01  \n",
1928
       "50%      1.024876e-01 -7.384312e-02                          1.361914e-01  \n",
1929
       "75%      6.531171e-01  8.000601e-01                          4.489149e-01  \n",
1930
       "max      2.855635e+00  2.922396e+00                          5.765214e+00  "
1931
      ]
1932
     },
1933
     "execution_count": 164,
1934
     "metadata": {},
1935
     "output_type": "execute_result"
1936
    }
1937
   ],
1938
   "source": [
1939
    "train_mean = X_train.mean()\n",
1940
    "train_std = X_train.std()\n",
1941
    "## Standardize the train data set\n",
1942
    "X_train = (X_train - train_mean) / train_std\n",
1943
    "## Check for mean and std dev.\n",
1944
    "X_train.describe()"
1945
   ]
1946
  },
1947
  {
1948
   "cell_type": "code",
1949
   "execution_count": 165,
1950
   "id": "6ffea4b9-4fec-41e8-adce-3105c9d76226",
1951
   "metadata": {},
1952
   "outputs": [
1953
    {
1954
     "data": {
1955
      "text/html": [
1956
       "<div>\n",
1957
       "<style scoped>\n",
1958
       "    .dataframe tbody tr th:only-of-type {\n",
1959
       "        vertical-align: middle;\n",
1960
       "    }\n",
1961
       "\n",
1962
       "    .dataframe tbody tr th {\n",
1963
       "        vertical-align: top;\n",
1964
       "    }\n",
1965
       "\n",
1966
       "    .dataframe thead th {\n",
1967
       "        text-align: right;\n",
1968
       "    }\n",
1969
       "</style>\n",
1970
       "<table border=\"1\" class=\"dataframe\">\n",
1971
       "  <thead>\n",
1972
       "    <tr style=\"text-align: right;\">\n",
1973
       "      <th></th>\n",
1974
       "      <th>Age of the patient</th>\n",
1975
       "      <th>Gender of the patient</th>\n",
1976
       "      <th>Total Bilirubin</th>\n",
1977
       "      <th>Direct Bilirubin</th>\n",
1978
       "      <th>Alkphos Alkaline Phosphotase</th>\n",
1979
       "      <th>Sgpt Alamine Aminotransferase</th>\n",
1980
       "      <th>Aspartate_Aminotransferase</th>\n",
1981
       "      <th>Total Protiens</th>\n",
1982
       "      <th>ALB Albumin</th>\n",
1983
       "      <th>A/G Ratio Albumin and Globulin Ratio</th>\n",
1984
       "    </tr>\n",
1985
       "  </thead>\n",
1986
       "  <tbody>\n",
1987
       "    <tr>\n",
1988
       "      <th>count</th>\n",
1989
       "      <td>3645.000000</td>\n",
1990
       "      <td>3645.000000</td>\n",
1991
       "      <td>3645.000000</td>\n",
1992
       "      <td>3645.000000</td>\n",
1993
       "      <td>3645.000000</td>\n",
1994
       "      <td>3645.000000</td>\n",
1995
       "      <td>3645.000000</td>\n",
1996
       "      <td>3645.000000</td>\n",
1997
       "      <td>3645.000000</td>\n",
1998
       "      <td>3645.000000</td>\n",
1999
       "    </tr>\n",
2000
       "    <tr>\n",
2001
       "      <th>mean</th>\n",
2002
       "      <td>0.026914</td>\n",
2003
       "      <td>0.014414</td>\n",
2004
       "      <td>0.047909</td>\n",
2005
       "      <td>0.038988</td>\n",
2006
       "      <td>-0.012918</td>\n",
2007
       "      <td>0.030375</td>\n",
2008
       "      <td>0.010400</td>\n",
2009
       "      <td>0.020988</td>\n",
2010
       "      <td>0.030279</td>\n",
2011
       "      <td>0.034189</td>\n",
2012
       "    </tr>\n",
2013
       "    <tr>\n",
2014
       "      <th>std</th>\n",
2015
       "      <td>1.018290</td>\n",
2016
       "      <td>1.006571</td>\n",
2017
       "      <td>1.147725</td>\n",
2018
       "      <td>1.085461</td>\n",
2019
       "      <td>0.951881</td>\n",
2020
       "      <td>1.148324</td>\n",
2021
       "      <td>1.022681</td>\n",
2022
       "      <td>0.991725</td>\n",
2023
       "      <td>0.980889</td>\n",
2024
       "      <td>1.011914</td>\n",
2025
       "    </tr>\n",
2026
       "    <tr>\n",
2027
       "      <th>min</th>\n",
2028
       "      <td>-2.422689</td>\n",
2029
       "      <td>-0.641225</td>\n",
2030
       "      <td>-0.459419</td>\n",
2031
       "      <td>-0.465644</td>\n",
2032
       "      <td>-0.930311</td>\n",
2033
       "      <td>-0.416380</td>\n",
2034
       "      <td>-0.495698</td>\n",
2035
       "      <td>-3.476604</td>\n",
2036
       "      <td>-2.820396</td>\n",
2037
       "      <td>-2.052873</td>\n",
2038
       "    </tr>\n",
2039
       "    <tr>\n",
2040
       "      <th>25%</th>\n",
2041
       "      <td>-0.712600</td>\n",
2042
       "      <td>-0.641225</td>\n",
2043
       "      <td>-0.387870</td>\n",
2044
       "      <td>-0.427941</td>\n",
2045
       "      <td>-0.457935</td>\n",
2046
       "      <td>-0.323419</td>\n",
2047
       "      <td>-0.408998</td>\n",
2048
       "      <td>-0.631685</td>\n",
2049
       "      <td>-0.698060</td>\n",
2050
       "      <td>-0.551800</td>\n",
2051
       "    </tr>\n",
2052
       "    <tr>\n",
2053
       "      <th>50%</th>\n",
2054
       "      <td>0.081371</td>\n",
2055
       "      <td>-0.641225</td>\n",
2056
       "      <td>-0.352095</td>\n",
2057
       "      <td>-0.390239</td>\n",
2058
       "      <td>-0.326010</td>\n",
2059
       "      <td>-0.251912</td>\n",
2060
       "      <td>-0.309912</td>\n",
2061
       "      <td>0.102488</td>\n",
2062
       "      <td>0.051000</td>\n",
2063
       "      <td>0.136191</td>\n",
2064
       "    </tr>\n",
2065
       "    <tr>\n",
2066
       "      <th>75%</th>\n",
2067
       "      <td>0.753192</td>\n",
2068
       "      <td>1.559331</td>\n",
2069
       "      <td>-0.119560</td>\n",
2070
       "      <td>-0.088621</td>\n",
2071
       "      <td>0.035719</td>\n",
2072
       "      <td>-0.065991</td>\n",
2073
       "      <td>-0.049813</td>\n",
2074
       "      <td>0.653117</td>\n",
2075
       "      <td>0.800060</td>\n",
2076
       "      <td>0.448915</td>\n",
2077
       "    </tr>\n",
2078
       "    <tr>\n",
2079
       "      <th>max</th>\n",
2080
       "      <td>2.829729</td>\n",
2081
       "      <td>1.559331</td>\n",
2082
       "      <td>12.884541</td>\n",
2083
       "      <td>6.924007</td>\n",
2084
       "      <td>7.780974</td>\n",
2085
       "      <td>11.525442</td>\n",
2086
       "      <td>9.350928</td>\n",
2087
       "      <td>2.855635</td>\n",
2088
       "      <td>2.922396</td>\n",
2089
       "      <td>5.765214</td>\n",
2090
       "    </tr>\n",
2091
       "  </tbody>\n",
2092
       "</table>\n",
2093
       "</div>"
2094
      ],
2095
      "text/plain": [
2096
       "       Age of the patient  Gender of the patient  Total Bilirubin  \\\n",
2097
       "count         3645.000000            3645.000000      3645.000000   \n",
2098
       "mean             0.026914               0.014414         0.047909   \n",
2099
       "std              1.018290               1.006571         1.147725   \n",
2100
       "min             -2.422689              -0.641225        -0.459419   \n",
2101
       "25%             -0.712600              -0.641225        -0.387870   \n",
2102
       "50%              0.081371              -0.641225        -0.352095   \n",
2103
       "75%              0.753192               1.559331        -0.119560   \n",
2104
       "max              2.829729               1.559331        12.884541   \n",
2105
       "\n",
2106
       "       Direct Bilirubin   Alkphos Alkaline Phosphotase  \\\n",
2107
       "count       3645.000000                    3645.000000   \n",
2108
       "mean           0.038988                      -0.012918   \n",
2109
       "std            1.085461                       0.951881   \n",
2110
       "min           -0.465644                      -0.930311   \n",
2111
       "25%           -0.427941                      -0.457935   \n",
2112
       "50%           -0.390239                      -0.326010   \n",
2113
       "75%           -0.088621                       0.035719   \n",
2114
       "max            6.924007                       7.780974   \n",
2115
       "\n",
2116
       "        Sgpt Alamine Aminotransferase  Aspartate_Aminotransferase  \\\n",
2117
       "count                     3645.000000                 3645.000000   \n",
2118
       "mean                         0.030375                    0.010400   \n",
2119
       "std                          1.148324                    1.022681   \n",
2120
       "min                         -0.416380                   -0.495698   \n",
2121
       "25%                         -0.323419                   -0.408998   \n",
2122
       "50%                         -0.251912                   -0.309912   \n",
2123
       "75%                         -0.065991                   -0.049813   \n",
2124
       "max                         11.525442                    9.350928   \n",
2125
       "\n",
2126
       "       Total Protiens   ALB Albumin  A/G Ratio Albumin and Globulin Ratio  \n",
2127
       "count     3645.000000   3645.000000                           3645.000000  \n",
2128
       "mean         0.020988      0.030279                              0.034189  \n",
2129
       "std          0.991725      0.980889                              1.011914  \n",
2130
       "min         -3.476604     -2.820396                             -2.052873  \n",
2131
       "25%         -0.631685     -0.698060                             -0.551800  \n",
2132
       "50%          0.102488      0.051000                              0.136191  \n",
2133
       "75%          0.653117      0.800060                              0.448915  \n",
2134
       "max          2.855635      2.922396                              5.765214  "
2135
      ]
2136
     },
2137
     "execution_count": 165,
2138
     "metadata": {},
2139
     "output_type": "execute_result"
2140
    }
2141
   ],
2142
   "source": [
2143
    "## Note: We use train_mean and train_std_dev to standardize test data set\n",
2144
    "X_test = (X_test - train_mean) / train_std\n",
2145
    "## Check for mean and std dev. - not exactly 0 and 1\n",
2146
    "X_test.describe()"
2147
   ]
2148
  },
2149
  {
2150
   "cell_type": "code",
2151
   "execution_count": null,
2152
   "id": "20b45207-0560-4252-b809-51117de3d9d5",
2153
   "metadata": {},
2154
   "outputs": [],
2155
   "source": []
2156
  },
2157
  {
2158
   "cell_type": "markdown",
2159
   "id": "81b8c6ab-58f2-40e6-a2b8-6b64575c11ad",
2160
   "metadata": {},
2161
   "source": [
2162
    "logestic regression"
2163
   ]
2164
  },
2165
  {
2166
   "cell_type": "code",
2167
   "execution_count": 166,
2168
   "id": "c2678bdc-b816-488c-b664-ea79c010bc1a",
2169
   "metadata": {},
2170
   "outputs": [
2171
    {
2172
     "data": {
2173
      "text/html": [
2174
       "<style>#sk-container-id-5 {\n",
2175
       "  /* Definition of color scheme common for light and dark mode */\n",
2176
       "  --sklearn-color-text: black;\n",
2177
       "  --sklearn-color-line: gray;\n",
2178
       "  /* Definition of color scheme for unfitted estimators */\n",
2179
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
2180
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
2181
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
2182
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
2183
       "  /* Definition of color scheme for fitted estimators */\n",
2184
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
2185
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
2186
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
2187
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
2188
       "\n",
2189
       "  /* Specific color for light theme */\n",
2190
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
2191
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
2192
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
2193
       "  --sklearn-color-icon: #696969;\n",
2194
       "\n",
2195
       "  @media (prefers-color-scheme: dark) {\n",
2196
       "    /* Redefinition of color scheme for dark theme */\n",
2197
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
2198
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
2199
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
2200
       "    --sklearn-color-icon: #878787;\n",
2201
       "  }\n",
2202
       "}\n",
2203
       "\n",
2204
       "#sk-container-id-5 {\n",
2205
       "  color: var(--sklearn-color-text);\n",
2206
       "}\n",
2207
       "\n",
2208
       "#sk-container-id-5 pre {\n",
2209
       "  padding: 0;\n",
2210
       "}\n",
2211
       "\n",
2212
       "#sk-container-id-5 input.sk-hidden--visually {\n",
2213
       "  border: 0;\n",
2214
       "  clip: rect(1px 1px 1px 1px);\n",
2215
       "  clip: rect(1px, 1px, 1px, 1px);\n",
2216
       "  height: 1px;\n",
2217
       "  margin: -1px;\n",
2218
       "  overflow: hidden;\n",
2219
       "  padding: 0;\n",
2220
       "  position: absolute;\n",
2221
       "  width: 1px;\n",
2222
       "}\n",
2223
       "\n",
2224
       "#sk-container-id-5 div.sk-dashed-wrapped {\n",
2225
       "  border: 1px dashed var(--sklearn-color-line);\n",
2226
       "  margin: 0 0.4em 0.5em 0.4em;\n",
2227
       "  box-sizing: border-box;\n",
2228
       "  padding-bottom: 0.4em;\n",
2229
       "  background-color: var(--sklearn-color-background);\n",
2230
       "}\n",
2231
       "\n",
2232
       "#sk-container-id-5 div.sk-container {\n",
2233
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
2234
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
2235
       "     so we also need the `!important` here to be able to override the\n",
2236
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
2237
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
2238
       "  display: inline-block !important;\n",
2239
       "  position: relative;\n",
2240
       "}\n",
2241
       "\n",
2242
       "#sk-container-id-5 div.sk-text-repr-fallback {\n",
2243
       "  display: none;\n",
2244
       "}\n",
2245
       "\n",
2246
       "div.sk-parallel-item,\n",
2247
       "div.sk-serial,\n",
2248
       "div.sk-item {\n",
2249
       "  /* draw centered vertical line to link estimators */\n",
2250
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
2251
       "  background-size: 2px 100%;\n",
2252
       "  background-repeat: no-repeat;\n",
2253
       "  background-position: center center;\n",
2254
       "}\n",
2255
       "\n",
2256
       "/* Parallel-specific style estimator block */\n",
2257
       "\n",
2258
       "#sk-container-id-5 div.sk-parallel-item::after {\n",
2259
       "  content: \"\";\n",
2260
       "  width: 100%;\n",
2261
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
2262
       "  flex-grow: 1;\n",
2263
       "}\n",
2264
       "\n",
2265
       "#sk-container-id-5 div.sk-parallel {\n",
2266
       "  display: flex;\n",
2267
       "  align-items: stretch;\n",
2268
       "  justify-content: center;\n",
2269
       "  background-color: var(--sklearn-color-background);\n",
2270
       "  position: relative;\n",
2271
       "}\n",
2272
       "\n",
2273
       "#sk-container-id-5 div.sk-parallel-item {\n",
2274
       "  display: flex;\n",
2275
       "  flex-direction: column;\n",
2276
       "}\n",
2277
       "\n",
2278
       "#sk-container-id-5 div.sk-parallel-item:first-child::after {\n",
2279
       "  align-self: flex-end;\n",
2280
       "  width: 50%;\n",
2281
       "}\n",
2282
       "\n",
2283
       "#sk-container-id-5 div.sk-parallel-item:last-child::after {\n",
2284
       "  align-self: flex-start;\n",
2285
       "  width: 50%;\n",
2286
       "}\n",
2287
       "\n",
2288
       "#sk-container-id-5 div.sk-parallel-item:only-child::after {\n",
2289
       "  width: 0;\n",
2290
       "}\n",
2291
       "\n",
2292
       "/* Serial-specific style estimator block */\n",
2293
       "\n",
2294
       "#sk-container-id-5 div.sk-serial {\n",
2295
       "  display: flex;\n",
2296
       "  flex-direction: column;\n",
2297
       "  align-items: center;\n",
2298
       "  background-color: var(--sklearn-color-background);\n",
2299
       "  padding-right: 1em;\n",
2300
       "  padding-left: 1em;\n",
2301
       "}\n",
2302
       "\n",
2303
       "\n",
2304
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
2305
       "clickable and can be expanded/collapsed.\n",
2306
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
2307
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
2308
       "*/\n",
2309
       "\n",
2310
       "/* Pipeline and ColumnTransformer style (default) */\n",
2311
       "\n",
2312
       "#sk-container-id-5 div.sk-toggleable {\n",
2313
       "  /* Default theme specific background. It is overwritten whether we have a\n",
2314
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
2315
       "  background-color: var(--sklearn-color-background);\n",
2316
       "}\n",
2317
       "\n",
2318
       "/* Toggleable label */\n",
2319
       "#sk-container-id-5 label.sk-toggleable__label {\n",
2320
       "  cursor: pointer;\n",
2321
       "  display: block;\n",
2322
       "  width: 100%;\n",
2323
       "  margin-bottom: 0;\n",
2324
       "  padding: 0.5em;\n",
2325
       "  box-sizing: border-box;\n",
2326
       "  text-align: center;\n",
2327
       "}\n",
2328
       "\n",
2329
       "#sk-container-id-5 label.sk-toggleable__label-arrow:before {\n",
2330
       "  /* Arrow on the left of the label */\n",
2331
       "  content: \"▸\";\n",
2332
       "  float: left;\n",
2333
       "  margin-right: 0.25em;\n",
2334
       "  color: var(--sklearn-color-icon);\n",
2335
       "}\n",
2336
       "\n",
2337
       "#sk-container-id-5 label.sk-toggleable__label-arrow:hover:before {\n",
2338
       "  color: var(--sklearn-color-text);\n",
2339
       "}\n",
2340
       "\n",
2341
       "/* Toggleable content - dropdown */\n",
2342
       "\n",
2343
       "#sk-container-id-5 div.sk-toggleable__content {\n",
2344
       "  max-height: 0;\n",
2345
       "  max-width: 0;\n",
2346
       "  overflow: hidden;\n",
2347
       "  text-align: left;\n",
2348
       "  /* unfitted */\n",
2349
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
2350
       "}\n",
2351
       "\n",
2352
       "#sk-container-id-5 div.sk-toggleable__content.fitted {\n",
2353
       "  /* fitted */\n",
2354
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
2355
       "}\n",
2356
       "\n",
2357
       "#sk-container-id-5 div.sk-toggleable__content pre {\n",
2358
       "  margin: 0.2em;\n",
2359
       "  border-radius: 0.25em;\n",
2360
       "  color: var(--sklearn-color-text);\n",
2361
       "  /* unfitted */\n",
2362
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
2363
       "}\n",
2364
       "\n",
2365
       "#sk-container-id-5 div.sk-toggleable__content.fitted pre {\n",
2366
       "  /* unfitted */\n",
2367
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
2368
       "}\n",
2369
       "\n",
2370
       "#sk-container-id-5 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
2371
       "  /* Expand drop-down */\n",
2372
       "  max-height: 200px;\n",
2373
       "  max-width: 100%;\n",
2374
       "  overflow: auto;\n",
2375
       "}\n",
2376
       "\n",
2377
       "#sk-container-id-5 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
2378
       "  content: \"▾\";\n",
2379
       "}\n",
2380
       "\n",
2381
       "/* Pipeline/ColumnTransformer-specific style */\n",
2382
       "\n",
2383
       "#sk-container-id-5 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2384
       "  color: var(--sklearn-color-text);\n",
2385
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
2386
       "}\n",
2387
       "\n",
2388
       "#sk-container-id-5 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2389
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
2390
       "}\n",
2391
       "\n",
2392
       "/* Estimator-specific style */\n",
2393
       "\n",
2394
       "/* Colorize estimator box */\n",
2395
       "#sk-container-id-5 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2396
       "  /* unfitted */\n",
2397
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
2398
       "}\n",
2399
       "\n",
2400
       "#sk-container-id-5 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2401
       "  /* fitted */\n",
2402
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
2403
       "}\n",
2404
       "\n",
2405
       "#sk-container-id-5 div.sk-label label.sk-toggleable__label,\n",
2406
       "#sk-container-id-5 div.sk-label label {\n",
2407
       "  /* The background is the default theme color */\n",
2408
       "  color: var(--sklearn-color-text-on-default-background);\n",
2409
       "}\n",
2410
       "\n",
2411
       "/* On hover, darken the color of the background */\n",
2412
       "#sk-container-id-5 div.sk-label:hover label.sk-toggleable__label {\n",
2413
       "  color: var(--sklearn-color-text);\n",
2414
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
2415
       "}\n",
2416
       "\n",
2417
       "/* Label box, darken color on hover, fitted */\n",
2418
       "#sk-container-id-5 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
2419
       "  color: var(--sklearn-color-text);\n",
2420
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
2421
       "}\n",
2422
       "\n",
2423
       "/* Estimator label */\n",
2424
       "\n",
2425
       "#sk-container-id-5 div.sk-label label {\n",
2426
       "  font-family: monospace;\n",
2427
       "  font-weight: bold;\n",
2428
       "  display: inline-block;\n",
2429
       "  line-height: 1.2em;\n",
2430
       "}\n",
2431
       "\n",
2432
       "#sk-container-id-5 div.sk-label-container {\n",
2433
       "  text-align: center;\n",
2434
       "}\n",
2435
       "\n",
2436
       "/* Estimator-specific */\n",
2437
       "#sk-container-id-5 div.sk-estimator {\n",
2438
       "  font-family: monospace;\n",
2439
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
2440
       "  border-radius: 0.25em;\n",
2441
       "  box-sizing: border-box;\n",
2442
       "  margin-bottom: 0.5em;\n",
2443
       "  /* unfitted */\n",
2444
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
2445
       "}\n",
2446
       "\n",
2447
       "#sk-container-id-5 div.sk-estimator.fitted {\n",
2448
       "  /* fitted */\n",
2449
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
2450
       "}\n",
2451
       "\n",
2452
       "/* on hover */\n",
2453
       "#sk-container-id-5 div.sk-estimator:hover {\n",
2454
       "  /* unfitted */\n",
2455
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
2456
       "}\n",
2457
       "\n",
2458
       "#sk-container-id-5 div.sk-estimator.fitted:hover {\n",
2459
       "  /* fitted */\n",
2460
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
2461
       "}\n",
2462
       "\n",
2463
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
2464
       "\n",
2465
       "/* Common style for \"i\" and \"?\" */\n",
2466
       "\n",
2467
       ".sk-estimator-doc-link,\n",
2468
       "a:link.sk-estimator-doc-link,\n",
2469
       "a:visited.sk-estimator-doc-link {\n",
2470
       "  float: right;\n",
2471
       "  font-size: smaller;\n",
2472
       "  line-height: 1em;\n",
2473
       "  font-family: monospace;\n",
2474
       "  background-color: var(--sklearn-color-background);\n",
2475
       "  border-radius: 1em;\n",
2476
       "  height: 1em;\n",
2477
       "  width: 1em;\n",
2478
       "  text-decoration: none !important;\n",
2479
       "  margin-left: 1ex;\n",
2480
       "  /* unfitted */\n",
2481
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
2482
       "  color: var(--sklearn-color-unfitted-level-1);\n",
2483
       "}\n",
2484
       "\n",
2485
       ".sk-estimator-doc-link.fitted,\n",
2486
       "a:link.sk-estimator-doc-link.fitted,\n",
2487
       "a:visited.sk-estimator-doc-link.fitted {\n",
2488
       "  /* fitted */\n",
2489
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
2490
       "  color: var(--sklearn-color-fitted-level-1);\n",
2491
       "}\n",
2492
       "\n",
2493
       "/* On hover */\n",
2494
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
2495
       ".sk-estimator-doc-link:hover,\n",
2496
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
2497
       ".sk-estimator-doc-link:hover {\n",
2498
       "  /* unfitted */\n",
2499
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
2500
       "  color: var(--sklearn-color-background);\n",
2501
       "  text-decoration: none;\n",
2502
       "}\n",
2503
       "\n",
2504
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
2505
       ".sk-estimator-doc-link.fitted:hover,\n",
2506
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
2507
       ".sk-estimator-doc-link.fitted:hover {\n",
2508
       "  /* fitted */\n",
2509
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
2510
       "  color: var(--sklearn-color-background);\n",
2511
       "  text-decoration: none;\n",
2512
       "}\n",
2513
       "\n",
2514
       "/* Span, style for the box shown on hovering the info icon */\n",
2515
       ".sk-estimator-doc-link span {\n",
2516
       "  display: none;\n",
2517
       "  z-index: 9999;\n",
2518
       "  position: relative;\n",
2519
       "  font-weight: normal;\n",
2520
       "  right: .2ex;\n",
2521
       "  padding: .5ex;\n",
2522
       "  margin: .5ex;\n",
2523
       "  width: min-content;\n",
2524
       "  min-width: 20ex;\n",
2525
       "  max-width: 50ex;\n",
2526
       "  color: var(--sklearn-color-text);\n",
2527
       "  box-shadow: 2pt 2pt 4pt #999;\n",
2528
       "  /* unfitted */\n",
2529
       "  background: var(--sklearn-color-unfitted-level-0);\n",
2530
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
2531
       "}\n",
2532
       "\n",
2533
       ".sk-estimator-doc-link.fitted span {\n",
2534
       "  /* fitted */\n",
2535
       "  background: var(--sklearn-color-fitted-level-0);\n",
2536
       "  border: var(--sklearn-color-fitted-level-3);\n",
2537
       "}\n",
2538
       "\n",
2539
       ".sk-estimator-doc-link:hover span {\n",
2540
       "  display: block;\n",
2541
       "}\n",
2542
       "\n",
2543
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
2544
       "\n",
2545
       "#sk-container-id-5 a.estimator_doc_link {\n",
2546
       "  float: right;\n",
2547
       "  font-size: 1rem;\n",
2548
       "  line-height: 1em;\n",
2549
       "  font-family: monospace;\n",
2550
       "  background-color: var(--sklearn-color-background);\n",
2551
       "  border-radius: 1rem;\n",
2552
       "  height: 1rem;\n",
2553
       "  width: 1rem;\n",
2554
       "  text-decoration: none;\n",
2555
       "  /* unfitted */\n",
2556
       "  color: var(--sklearn-color-unfitted-level-1);\n",
2557
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
2558
       "}\n",
2559
       "\n",
2560
       "#sk-container-id-5 a.estimator_doc_link.fitted {\n",
2561
       "  /* fitted */\n",
2562
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
2563
       "  color: var(--sklearn-color-fitted-level-1);\n",
2564
       "}\n",
2565
       "\n",
2566
       "/* On hover */\n",
2567
       "#sk-container-id-5 a.estimator_doc_link:hover {\n",
2568
       "  /* unfitted */\n",
2569
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
2570
       "  color: var(--sklearn-color-background);\n",
2571
       "  text-decoration: none;\n",
2572
       "}\n",
2573
       "\n",
2574
       "#sk-container-id-5 a.estimator_doc_link.fitted:hover {\n",
2575
       "  /* fitted */\n",
2576
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
2577
       "}\n",
2578
       "</style><div id=\"sk-container-id-5\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-7\" type=\"checkbox\" checked><label for=\"sk-estimator-id-7\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;LogisticRegression<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.LogisticRegression.html\">?<span>Documentation for LogisticRegression</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>LogisticRegression()</pre></div> </div></div></div></div>"
2579
      ],
2580
      "text/plain": [
2581
       "LogisticRegression()"
2582
      ]
2583
     },
2584
     "execution_count": 166,
2585
     "metadata": {},
2586
     "output_type": "execute_result"
2587
    }
2588
   ],
2589
   "source": [
2590
    "# fitting data to model\n",
2591
    "\n",
2592
    "from sklearn.linear_model import LogisticRegression\n",
2593
    "\n",
2594
    "lr = LogisticRegression()\n",
2595
    "lr.fit(X_train, y_train)"
2596
   ]
2597
  },
2598
  {
2599
   "cell_type": "code",
2600
   "execution_count": 167,
2601
   "id": "ccc5be4f-5129-467e-9ecd-72128e825b07",
2602
   "metadata": {},
2603
   "outputs": [
2604
    {
2605
     "name": "stdout",
2606
     "output_type": "stream",
2607
     "text": [
2608
      "0.6845014111006585\n",
2609
      "0.697119341563786\n"
2610
     ]
2611
    }
2612
   ],
2613
   "source": [
2614
    "# model predictions\n",
2615
    "\n",
2616
    "y_pred = lr.predict(X_test)\n",
2617
    "# accuracy score\n",
2618
    "\n",
2619
    "print(accuracy_score(y_train, lr.predict(X_train)))\n",
2620
    "\n",
2621
    "lr_acc = accuracy_score(y_test, lr.predict(X_test))\n",
2622
    "print(lr_acc)"
2623
   ]
2624
  },
2625
  {
2626
   "cell_type": "code",
2627
   "execution_count": 168,
2628
   "id": "4737a5e2-a9e1-4bdb-9921-01e419e985c9",
2629
   "metadata": {},
2630
   "outputs": [
2631
    {
2632
     "name": "stdout",
2633
     "output_type": "stream",
2634
     "text": [
2635
      "[[ 898  494]\n",
2636
      " [ 610 1643]]\n"
2637
     ]
2638
    }
2639
   ],
2640
   "source": [
2641
    "# confusion matrix\n",
2642
    "\n",
2643
    "print(confusion_matrix(y_test, y_pred))"
2644
   ]
2645
  },
2646
  {
2647
   "cell_type": "code",
2648
   "execution_count": 169,
2649
   "id": "6c9a3ad7-148e-40eb-99af-3853f0d2d883",
2650
   "metadata": {},
2651
   "outputs": [
2652
    {
2653
     "name": "stdout",
2654
     "output_type": "stream",
2655
     "text": [
2656
      "              precision    recall  f1-score   support\n",
2657
      "\n",
2658
      "           0       0.60      0.65      0.62      1392\n",
2659
      "           1       0.77      0.73      0.75      2253\n",
2660
      "\n",
2661
      "    accuracy                           0.70      3645\n",
2662
      "   macro avg       0.68      0.69      0.68      3645\n",
2663
      "weighted avg       0.70      0.70      0.70      3645\n",
2664
      "\n"
2665
     ]
2666
    }
2667
   ],
2668
   "source": [
2669
    "# classification report\n",
2670
    "\n",
2671
    "print(classification_report(y_test, y_pred))"
2672
   ]
2673
  },
2674
  {
2675
   "cell_type": "markdown",
2676
   "id": "242a7711-d64f-4762-8a00-093426a8fed2",
2677
   "metadata": {},
2678
   "source": [
2679
    "KNN"
2680
   ]
2681
  },
2682
  {
2683
   "cell_type": "code",
2684
   "execution_count": 170,
2685
   "id": "cf5f36d6-174f-467d-9119-ea777e0de770",
2686
   "metadata": {},
2687
   "outputs": [
2688
    {
2689
     "data": {
2690
      "text/html": [
2691
       "<style>#sk-container-id-6 {\n",
2692
       "  /* Definition of color scheme common for light and dark mode */\n",
2693
       "  --sklearn-color-text: black;\n",
2694
       "  --sklearn-color-line: gray;\n",
2695
       "  /* Definition of color scheme for unfitted estimators */\n",
2696
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
2697
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
2698
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
2699
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
2700
       "  /* Definition of color scheme for fitted estimators */\n",
2701
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
2702
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
2703
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
2704
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
2705
       "\n",
2706
       "  /* Specific color for light theme */\n",
2707
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
2708
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
2709
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
2710
       "  --sklearn-color-icon: #696969;\n",
2711
       "\n",
2712
       "  @media (prefers-color-scheme: dark) {\n",
2713
       "    /* Redefinition of color scheme for dark theme */\n",
2714
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
2715
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
2716
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
2717
       "    --sklearn-color-icon: #878787;\n",
2718
       "  }\n",
2719
       "}\n",
2720
       "\n",
2721
       "#sk-container-id-6 {\n",
2722
       "  color: var(--sklearn-color-text);\n",
2723
       "}\n",
2724
       "\n",
2725
       "#sk-container-id-6 pre {\n",
2726
       "  padding: 0;\n",
2727
       "}\n",
2728
       "\n",
2729
       "#sk-container-id-6 input.sk-hidden--visually {\n",
2730
       "  border: 0;\n",
2731
       "  clip: rect(1px 1px 1px 1px);\n",
2732
       "  clip: rect(1px, 1px, 1px, 1px);\n",
2733
       "  height: 1px;\n",
2734
       "  margin: -1px;\n",
2735
       "  overflow: hidden;\n",
2736
       "  padding: 0;\n",
2737
       "  position: absolute;\n",
2738
       "  width: 1px;\n",
2739
       "}\n",
2740
       "\n",
2741
       "#sk-container-id-6 div.sk-dashed-wrapped {\n",
2742
       "  border: 1px dashed var(--sklearn-color-line);\n",
2743
       "  margin: 0 0.4em 0.5em 0.4em;\n",
2744
       "  box-sizing: border-box;\n",
2745
       "  padding-bottom: 0.4em;\n",
2746
       "  background-color: var(--sklearn-color-background);\n",
2747
       "}\n",
2748
       "\n",
2749
       "#sk-container-id-6 div.sk-container {\n",
2750
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
2751
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
2752
       "     so we also need the `!important` here to be able to override the\n",
2753
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
2754
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
2755
       "  display: inline-block !important;\n",
2756
       "  position: relative;\n",
2757
       "}\n",
2758
       "\n",
2759
       "#sk-container-id-6 div.sk-text-repr-fallback {\n",
2760
       "  display: none;\n",
2761
       "}\n",
2762
       "\n",
2763
       "div.sk-parallel-item,\n",
2764
       "div.sk-serial,\n",
2765
       "div.sk-item {\n",
2766
       "  /* draw centered vertical line to link estimators */\n",
2767
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
2768
       "  background-size: 2px 100%;\n",
2769
       "  background-repeat: no-repeat;\n",
2770
       "  background-position: center center;\n",
2771
       "}\n",
2772
       "\n",
2773
       "/* Parallel-specific style estimator block */\n",
2774
       "\n",
2775
       "#sk-container-id-6 div.sk-parallel-item::after {\n",
2776
       "  content: \"\";\n",
2777
       "  width: 100%;\n",
2778
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
2779
       "  flex-grow: 1;\n",
2780
       "}\n",
2781
       "\n",
2782
       "#sk-container-id-6 div.sk-parallel {\n",
2783
       "  display: flex;\n",
2784
       "  align-items: stretch;\n",
2785
       "  justify-content: center;\n",
2786
       "  background-color: var(--sklearn-color-background);\n",
2787
       "  position: relative;\n",
2788
       "}\n",
2789
       "\n",
2790
       "#sk-container-id-6 div.sk-parallel-item {\n",
2791
       "  display: flex;\n",
2792
       "  flex-direction: column;\n",
2793
       "}\n",
2794
       "\n",
2795
       "#sk-container-id-6 div.sk-parallel-item:first-child::after {\n",
2796
       "  align-self: flex-end;\n",
2797
       "  width: 50%;\n",
2798
       "}\n",
2799
       "\n",
2800
       "#sk-container-id-6 div.sk-parallel-item:last-child::after {\n",
2801
       "  align-self: flex-start;\n",
2802
       "  width: 50%;\n",
2803
       "}\n",
2804
       "\n",
2805
       "#sk-container-id-6 div.sk-parallel-item:only-child::after {\n",
2806
       "  width: 0;\n",
2807
       "}\n",
2808
       "\n",
2809
       "/* Serial-specific style estimator block */\n",
2810
       "\n",
2811
       "#sk-container-id-6 div.sk-serial {\n",
2812
       "  display: flex;\n",
2813
       "  flex-direction: column;\n",
2814
       "  align-items: center;\n",
2815
       "  background-color: var(--sklearn-color-background);\n",
2816
       "  padding-right: 1em;\n",
2817
       "  padding-left: 1em;\n",
2818
       "}\n",
2819
       "\n",
2820
       "\n",
2821
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
2822
       "clickable and can be expanded/collapsed.\n",
2823
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
2824
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
2825
       "*/\n",
2826
       "\n",
2827
       "/* Pipeline and ColumnTransformer style (default) */\n",
2828
       "\n",
2829
       "#sk-container-id-6 div.sk-toggleable {\n",
2830
       "  /* Default theme specific background. It is overwritten whether we have a\n",
2831
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
2832
       "  background-color: var(--sklearn-color-background);\n",
2833
       "}\n",
2834
       "\n",
2835
       "/* Toggleable label */\n",
2836
       "#sk-container-id-6 label.sk-toggleable__label {\n",
2837
       "  cursor: pointer;\n",
2838
       "  display: block;\n",
2839
       "  width: 100%;\n",
2840
       "  margin-bottom: 0;\n",
2841
       "  padding: 0.5em;\n",
2842
       "  box-sizing: border-box;\n",
2843
       "  text-align: center;\n",
2844
       "}\n",
2845
       "\n",
2846
       "#sk-container-id-6 label.sk-toggleable__label-arrow:before {\n",
2847
       "  /* Arrow on the left of the label */\n",
2848
       "  content: \"▸\";\n",
2849
       "  float: left;\n",
2850
       "  margin-right: 0.25em;\n",
2851
       "  color: var(--sklearn-color-icon);\n",
2852
       "}\n",
2853
       "\n",
2854
       "#sk-container-id-6 label.sk-toggleable__label-arrow:hover:before {\n",
2855
       "  color: var(--sklearn-color-text);\n",
2856
       "}\n",
2857
       "\n",
2858
       "/* Toggleable content - dropdown */\n",
2859
       "\n",
2860
       "#sk-container-id-6 div.sk-toggleable__content {\n",
2861
       "  max-height: 0;\n",
2862
       "  max-width: 0;\n",
2863
       "  overflow: hidden;\n",
2864
       "  text-align: left;\n",
2865
       "  /* unfitted */\n",
2866
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
2867
       "}\n",
2868
       "\n",
2869
       "#sk-container-id-6 div.sk-toggleable__content.fitted {\n",
2870
       "  /* fitted */\n",
2871
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
2872
       "}\n",
2873
       "\n",
2874
       "#sk-container-id-6 div.sk-toggleable__content pre {\n",
2875
       "  margin: 0.2em;\n",
2876
       "  border-radius: 0.25em;\n",
2877
       "  color: var(--sklearn-color-text);\n",
2878
       "  /* unfitted */\n",
2879
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
2880
       "}\n",
2881
       "\n",
2882
       "#sk-container-id-6 div.sk-toggleable__content.fitted pre {\n",
2883
       "  /* unfitted */\n",
2884
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
2885
       "}\n",
2886
       "\n",
2887
       "#sk-container-id-6 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
2888
       "  /* Expand drop-down */\n",
2889
       "  max-height: 200px;\n",
2890
       "  max-width: 100%;\n",
2891
       "  overflow: auto;\n",
2892
       "}\n",
2893
       "\n",
2894
       "#sk-container-id-6 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
2895
       "  content: \"▾\";\n",
2896
       "}\n",
2897
       "\n",
2898
       "/* Pipeline/ColumnTransformer-specific style */\n",
2899
       "\n",
2900
       "#sk-container-id-6 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2901
       "  color: var(--sklearn-color-text);\n",
2902
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
2903
       "}\n",
2904
       "\n",
2905
       "#sk-container-id-6 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2906
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
2907
       "}\n",
2908
       "\n",
2909
       "/* Estimator-specific style */\n",
2910
       "\n",
2911
       "/* Colorize estimator box */\n",
2912
       "#sk-container-id-6 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2913
       "  /* unfitted */\n",
2914
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
2915
       "}\n",
2916
       "\n",
2917
       "#sk-container-id-6 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
2918
       "  /* fitted */\n",
2919
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
2920
       "}\n",
2921
       "\n",
2922
       "#sk-container-id-6 div.sk-label label.sk-toggleable__label,\n",
2923
       "#sk-container-id-6 div.sk-label label {\n",
2924
       "  /* The background is the default theme color */\n",
2925
       "  color: var(--sklearn-color-text-on-default-background);\n",
2926
       "}\n",
2927
       "\n",
2928
       "/* On hover, darken the color of the background */\n",
2929
       "#sk-container-id-6 div.sk-label:hover label.sk-toggleable__label {\n",
2930
       "  color: var(--sklearn-color-text);\n",
2931
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
2932
       "}\n",
2933
       "\n",
2934
       "/* Label box, darken color on hover, fitted */\n",
2935
       "#sk-container-id-6 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
2936
       "  color: var(--sklearn-color-text);\n",
2937
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
2938
       "}\n",
2939
       "\n",
2940
       "/* Estimator label */\n",
2941
       "\n",
2942
       "#sk-container-id-6 div.sk-label label {\n",
2943
       "  font-family: monospace;\n",
2944
       "  font-weight: bold;\n",
2945
       "  display: inline-block;\n",
2946
       "  line-height: 1.2em;\n",
2947
       "}\n",
2948
       "\n",
2949
       "#sk-container-id-6 div.sk-label-container {\n",
2950
       "  text-align: center;\n",
2951
       "}\n",
2952
       "\n",
2953
       "/* Estimator-specific */\n",
2954
       "#sk-container-id-6 div.sk-estimator {\n",
2955
       "  font-family: monospace;\n",
2956
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
2957
       "  border-radius: 0.25em;\n",
2958
       "  box-sizing: border-box;\n",
2959
       "  margin-bottom: 0.5em;\n",
2960
       "  /* unfitted */\n",
2961
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
2962
       "}\n",
2963
       "\n",
2964
       "#sk-container-id-6 div.sk-estimator.fitted {\n",
2965
       "  /* fitted */\n",
2966
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
2967
       "}\n",
2968
       "\n",
2969
       "/* on hover */\n",
2970
       "#sk-container-id-6 div.sk-estimator:hover {\n",
2971
       "  /* unfitted */\n",
2972
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
2973
       "}\n",
2974
       "\n",
2975
       "#sk-container-id-6 div.sk-estimator.fitted:hover {\n",
2976
       "  /* fitted */\n",
2977
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
2978
       "}\n",
2979
       "\n",
2980
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
2981
       "\n",
2982
       "/* Common style for \"i\" and \"?\" */\n",
2983
       "\n",
2984
       ".sk-estimator-doc-link,\n",
2985
       "a:link.sk-estimator-doc-link,\n",
2986
       "a:visited.sk-estimator-doc-link {\n",
2987
       "  float: right;\n",
2988
       "  font-size: smaller;\n",
2989
       "  line-height: 1em;\n",
2990
       "  font-family: monospace;\n",
2991
       "  background-color: var(--sklearn-color-background);\n",
2992
       "  border-radius: 1em;\n",
2993
       "  height: 1em;\n",
2994
       "  width: 1em;\n",
2995
       "  text-decoration: none !important;\n",
2996
       "  margin-left: 1ex;\n",
2997
       "  /* unfitted */\n",
2998
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
2999
       "  color: var(--sklearn-color-unfitted-level-1);\n",
3000
       "}\n",
3001
       "\n",
3002
       ".sk-estimator-doc-link.fitted,\n",
3003
       "a:link.sk-estimator-doc-link.fitted,\n",
3004
       "a:visited.sk-estimator-doc-link.fitted {\n",
3005
       "  /* fitted */\n",
3006
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
3007
       "  color: var(--sklearn-color-fitted-level-1);\n",
3008
       "}\n",
3009
       "\n",
3010
       "/* On hover */\n",
3011
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
3012
       ".sk-estimator-doc-link:hover,\n",
3013
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
3014
       ".sk-estimator-doc-link:hover {\n",
3015
       "  /* unfitted */\n",
3016
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
3017
       "  color: var(--sklearn-color-background);\n",
3018
       "  text-decoration: none;\n",
3019
       "}\n",
3020
       "\n",
3021
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
3022
       ".sk-estimator-doc-link.fitted:hover,\n",
3023
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
3024
       ".sk-estimator-doc-link.fitted:hover {\n",
3025
       "  /* fitted */\n",
3026
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
3027
       "  color: var(--sklearn-color-background);\n",
3028
       "  text-decoration: none;\n",
3029
       "}\n",
3030
       "\n",
3031
       "/* Span, style for the box shown on hovering the info icon */\n",
3032
       ".sk-estimator-doc-link span {\n",
3033
       "  display: none;\n",
3034
       "  z-index: 9999;\n",
3035
       "  position: relative;\n",
3036
       "  font-weight: normal;\n",
3037
       "  right: .2ex;\n",
3038
       "  padding: .5ex;\n",
3039
       "  margin: .5ex;\n",
3040
       "  width: min-content;\n",
3041
       "  min-width: 20ex;\n",
3042
       "  max-width: 50ex;\n",
3043
       "  color: var(--sklearn-color-text);\n",
3044
       "  box-shadow: 2pt 2pt 4pt #999;\n",
3045
       "  /* unfitted */\n",
3046
       "  background: var(--sklearn-color-unfitted-level-0);\n",
3047
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
3048
       "}\n",
3049
       "\n",
3050
       ".sk-estimator-doc-link.fitted span {\n",
3051
       "  /* fitted */\n",
3052
       "  background: var(--sklearn-color-fitted-level-0);\n",
3053
       "  border: var(--sklearn-color-fitted-level-3);\n",
3054
       "}\n",
3055
       "\n",
3056
       ".sk-estimator-doc-link:hover span {\n",
3057
       "  display: block;\n",
3058
       "}\n",
3059
       "\n",
3060
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
3061
       "\n",
3062
       "#sk-container-id-6 a.estimator_doc_link {\n",
3063
       "  float: right;\n",
3064
       "  font-size: 1rem;\n",
3065
       "  line-height: 1em;\n",
3066
       "  font-family: monospace;\n",
3067
       "  background-color: var(--sklearn-color-background);\n",
3068
       "  border-radius: 1rem;\n",
3069
       "  height: 1rem;\n",
3070
       "  width: 1rem;\n",
3071
       "  text-decoration: none;\n",
3072
       "  /* unfitted */\n",
3073
       "  color: var(--sklearn-color-unfitted-level-1);\n",
3074
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
3075
       "}\n",
3076
       "\n",
3077
       "#sk-container-id-6 a.estimator_doc_link.fitted {\n",
3078
       "  /* fitted */\n",
3079
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
3080
       "  color: var(--sklearn-color-fitted-level-1);\n",
3081
       "}\n",
3082
       "\n",
3083
       "/* On hover */\n",
3084
       "#sk-container-id-6 a.estimator_doc_link:hover {\n",
3085
       "  /* unfitted */\n",
3086
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
3087
       "  color: var(--sklearn-color-background);\n",
3088
       "  text-decoration: none;\n",
3089
       "}\n",
3090
       "\n",
3091
       "#sk-container-id-6 a.estimator_doc_link.fitted:hover {\n",
3092
       "  /* fitted */\n",
3093
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
3094
       "}\n",
3095
       "</style><div id=\"sk-container-id-6\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>KNeighborsClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-8\" type=\"checkbox\" checked><label for=\"sk-estimator-id-8\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;KNeighborsClassifier<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.neighbors.KNeighborsClassifier.html\">?<span>Documentation for KNeighborsClassifier</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>KNeighborsClassifier()</pre></div> </div></div></div></div>"
3096
      ],
3097
      "text/plain": [
3098
       "KNeighborsClassifier()"
3099
      ]
3100
     },
3101
     "execution_count": 170,
3102
     "metadata": {},
3103
     "output_type": "execute_result"
3104
    }
3105
   ],
3106
   "source": [
3107
    "from sklearn.neighbors import KNeighborsClassifier\n",
3108
    "\n",
3109
    "knn = KNeighborsClassifier()\n",
3110
    "knn.fit(X_train, y_train)"
3111
   ]
3112
  },
3113
  {
3114
   "cell_type": "code",
3115
   "execution_count": 171,
3116
   "id": "fc32ab9f-9b41-41f4-b5a9-4ac3680d3d5b",
3117
   "metadata": {},
3118
   "outputs": [
3119
    {
3120
     "name": "stdout",
3121
     "output_type": "stream",
3122
     "text": [
3123
      "0.9047507055503292\n",
3124
      "0.8153635116598079\n"
3125
     ]
3126
    }
3127
   ],
3128
   "source": [
3129
    "# model predictions \n",
3130
    "\n",
3131
    "y_pred = knn.predict(X_test)\n",
3132
    "# accuracy score\n",
3133
    "\n",
3134
    "print(accuracy_score(y_train, knn.predict(X_train)))\n",
3135
    "\n",
3136
    "knn_acc = accuracy_score(y_test, knn.predict(X_test))\n",
3137
    "print(knn_acc)"
3138
   ]
3139
  },
3140
  {
3141
   "cell_type": "code",
3142
   "execution_count": 172,
3143
   "id": "a9911e52-734d-4987-afe9-77073672dfb5",
3144
   "metadata": {},
3145
   "outputs": [
3146
    {
3147
     "name": "stdout",
3148
     "output_type": "stream",
3149
     "text": [
3150
      "[[1120  272]\n",
3151
      " [ 401 1852]]\n"
3152
     ]
3153
    }
3154
   ],
3155
   "source": [
3156
    "# confusion matrix\n",
3157
    "\n",
3158
    "print(confusion_matrix(y_test, y_pred))"
3159
   ]
3160
  },
3161
  {
3162
   "cell_type": "code",
3163
   "execution_count": 173,
3164
   "id": "9711d627-2120-41df-b9c5-4a4c796e07ac",
3165
   "metadata": {},
3166
   "outputs": [
3167
    {
3168
     "name": "stdout",
3169
     "output_type": "stream",
3170
     "text": [
3171
      "              precision    recall  f1-score   support\n",
3172
      "\n",
3173
      "           0       0.74      0.80      0.77      1392\n",
3174
      "           1       0.87      0.82      0.85      2253\n",
3175
      "\n",
3176
      "    accuracy                           0.82      3645\n",
3177
      "   macro avg       0.80      0.81      0.81      3645\n",
3178
      "weighted avg       0.82      0.82      0.82      3645\n",
3179
      "\n"
3180
     ]
3181
    }
3182
   ],
3183
   "source": [
3184
    "# classification report\n",
3185
    "\n",
3186
    "print(classification_report(y_test, y_pred))"
3187
   ]
3188
  },
3189
  {
3190
   "cell_type": "markdown",
3191
   "id": "45d2d477-dc9d-405d-869b-d62e4587ca51",
3192
   "metadata": {},
3193
   "source": [
3194
    "SVM"
3195
   ]
3196
  },
3197
  {
3198
   "cell_type": "code",
3199
   "execution_count": 174,
3200
   "id": "9e36c0d7-b9a6-42a9-8ed9-797b1d44cedc",
3201
   "metadata": {},
3202
   "outputs": [],
3203
   "source": [
3204
    "from sklearn.svm import SVC"
3205
   ]
3206
  },
3207
  {
3208
   "cell_type": "code",
3209
   "execution_count": 175,
3210
   "id": "b7745b13-0d48-47cb-b76f-74fbf820e776",
3211
   "metadata": {},
3212
   "outputs": [
3213
    {
3214
     "data": {
3215
      "text/html": [
3216
       "<style>#sk-container-id-7 {\n",
3217
       "  /* Definition of color scheme common for light and dark mode */\n",
3218
       "  --sklearn-color-text: black;\n",
3219
       "  --sklearn-color-line: gray;\n",
3220
       "  /* Definition of color scheme for unfitted estimators */\n",
3221
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
3222
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
3223
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
3224
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
3225
       "  /* Definition of color scheme for fitted estimators */\n",
3226
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
3227
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
3228
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
3229
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
3230
       "\n",
3231
       "  /* Specific color for light theme */\n",
3232
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
3233
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
3234
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
3235
       "  --sklearn-color-icon: #696969;\n",
3236
       "\n",
3237
       "  @media (prefers-color-scheme: dark) {\n",
3238
       "    /* Redefinition of color scheme for dark theme */\n",
3239
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
3240
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
3241
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
3242
       "    --sklearn-color-icon: #878787;\n",
3243
       "  }\n",
3244
       "}\n",
3245
       "\n",
3246
       "#sk-container-id-7 {\n",
3247
       "  color: var(--sklearn-color-text);\n",
3248
       "}\n",
3249
       "\n",
3250
       "#sk-container-id-7 pre {\n",
3251
       "  padding: 0;\n",
3252
       "}\n",
3253
       "\n",
3254
       "#sk-container-id-7 input.sk-hidden--visually {\n",
3255
       "  border: 0;\n",
3256
       "  clip: rect(1px 1px 1px 1px);\n",
3257
       "  clip: rect(1px, 1px, 1px, 1px);\n",
3258
       "  height: 1px;\n",
3259
       "  margin: -1px;\n",
3260
       "  overflow: hidden;\n",
3261
       "  padding: 0;\n",
3262
       "  position: absolute;\n",
3263
       "  width: 1px;\n",
3264
       "}\n",
3265
       "\n",
3266
       "#sk-container-id-7 div.sk-dashed-wrapped {\n",
3267
       "  border: 1px dashed var(--sklearn-color-line);\n",
3268
       "  margin: 0 0.4em 0.5em 0.4em;\n",
3269
       "  box-sizing: border-box;\n",
3270
       "  padding-bottom: 0.4em;\n",
3271
       "  background-color: var(--sklearn-color-background);\n",
3272
       "}\n",
3273
       "\n",
3274
       "#sk-container-id-7 div.sk-container {\n",
3275
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
3276
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
3277
       "     so we also need the `!important` here to be able to override the\n",
3278
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
3279
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
3280
       "  display: inline-block !important;\n",
3281
       "  position: relative;\n",
3282
       "}\n",
3283
       "\n",
3284
       "#sk-container-id-7 div.sk-text-repr-fallback {\n",
3285
       "  display: none;\n",
3286
       "}\n",
3287
       "\n",
3288
       "div.sk-parallel-item,\n",
3289
       "div.sk-serial,\n",
3290
       "div.sk-item {\n",
3291
       "  /* draw centered vertical line to link estimators */\n",
3292
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
3293
       "  background-size: 2px 100%;\n",
3294
       "  background-repeat: no-repeat;\n",
3295
       "  background-position: center center;\n",
3296
       "}\n",
3297
       "\n",
3298
       "/* Parallel-specific style estimator block */\n",
3299
       "\n",
3300
       "#sk-container-id-7 div.sk-parallel-item::after {\n",
3301
       "  content: \"\";\n",
3302
       "  width: 100%;\n",
3303
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
3304
       "  flex-grow: 1;\n",
3305
       "}\n",
3306
       "\n",
3307
       "#sk-container-id-7 div.sk-parallel {\n",
3308
       "  display: flex;\n",
3309
       "  align-items: stretch;\n",
3310
       "  justify-content: center;\n",
3311
       "  background-color: var(--sklearn-color-background);\n",
3312
       "  position: relative;\n",
3313
       "}\n",
3314
       "\n",
3315
       "#sk-container-id-7 div.sk-parallel-item {\n",
3316
       "  display: flex;\n",
3317
       "  flex-direction: column;\n",
3318
       "}\n",
3319
       "\n",
3320
       "#sk-container-id-7 div.sk-parallel-item:first-child::after {\n",
3321
       "  align-self: flex-end;\n",
3322
       "  width: 50%;\n",
3323
       "}\n",
3324
       "\n",
3325
       "#sk-container-id-7 div.sk-parallel-item:last-child::after {\n",
3326
       "  align-self: flex-start;\n",
3327
       "  width: 50%;\n",
3328
       "}\n",
3329
       "\n",
3330
       "#sk-container-id-7 div.sk-parallel-item:only-child::after {\n",
3331
       "  width: 0;\n",
3332
       "}\n",
3333
       "\n",
3334
       "/* Serial-specific style estimator block */\n",
3335
       "\n",
3336
       "#sk-container-id-7 div.sk-serial {\n",
3337
       "  display: flex;\n",
3338
       "  flex-direction: column;\n",
3339
       "  align-items: center;\n",
3340
       "  background-color: var(--sklearn-color-background);\n",
3341
       "  padding-right: 1em;\n",
3342
       "  padding-left: 1em;\n",
3343
       "}\n",
3344
       "\n",
3345
       "\n",
3346
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
3347
       "clickable and can be expanded/collapsed.\n",
3348
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
3349
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
3350
       "*/\n",
3351
       "\n",
3352
       "/* Pipeline and ColumnTransformer style (default) */\n",
3353
       "\n",
3354
       "#sk-container-id-7 div.sk-toggleable {\n",
3355
       "  /* Default theme specific background. It is overwritten whether we have a\n",
3356
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
3357
       "  background-color: var(--sklearn-color-background);\n",
3358
       "}\n",
3359
       "\n",
3360
       "/* Toggleable label */\n",
3361
       "#sk-container-id-7 label.sk-toggleable__label {\n",
3362
       "  cursor: pointer;\n",
3363
       "  display: block;\n",
3364
       "  width: 100%;\n",
3365
       "  margin-bottom: 0;\n",
3366
       "  padding: 0.5em;\n",
3367
       "  box-sizing: border-box;\n",
3368
       "  text-align: center;\n",
3369
       "}\n",
3370
       "\n",
3371
       "#sk-container-id-7 label.sk-toggleable__label-arrow:before {\n",
3372
       "  /* Arrow on the left of the label */\n",
3373
       "  content: \"▸\";\n",
3374
       "  float: left;\n",
3375
       "  margin-right: 0.25em;\n",
3376
       "  color: var(--sklearn-color-icon);\n",
3377
       "}\n",
3378
       "\n",
3379
       "#sk-container-id-7 label.sk-toggleable__label-arrow:hover:before {\n",
3380
       "  color: var(--sklearn-color-text);\n",
3381
       "}\n",
3382
       "\n",
3383
       "/* Toggleable content - dropdown */\n",
3384
       "\n",
3385
       "#sk-container-id-7 div.sk-toggleable__content {\n",
3386
       "  max-height: 0;\n",
3387
       "  max-width: 0;\n",
3388
       "  overflow: hidden;\n",
3389
       "  text-align: left;\n",
3390
       "  /* unfitted */\n",
3391
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
3392
       "}\n",
3393
       "\n",
3394
       "#sk-container-id-7 div.sk-toggleable__content.fitted {\n",
3395
       "  /* fitted */\n",
3396
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
3397
       "}\n",
3398
       "\n",
3399
       "#sk-container-id-7 div.sk-toggleable__content pre {\n",
3400
       "  margin: 0.2em;\n",
3401
       "  border-radius: 0.25em;\n",
3402
       "  color: var(--sklearn-color-text);\n",
3403
       "  /* unfitted */\n",
3404
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
3405
       "}\n",
3406
       "\n",
3407
       "#sk-container-id-7 div.sk-toggleable__content.fitted pre {\n",
3408
       "  /* unfitted */\n",
3409
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
3410
       "}\n",
3411
       "\n",
3412
       "#sk-container-id-7 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
3413
       "  /* Expand drop-down */\n",
3414
       "  max-height: 200px;\n",
3415
       "  max-width: 100%;\n",
3416
       "  overflow: auto;\n",
3417
       "}\n",
3418
       "\n",
3419
       "#sk-container-id-7 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
3420
       "  content: \"▾\";\n",
3421
       "}\n",
3422
       "\n",
3423
       "/* Pipeline/ColumnTransformer-specific style */\n",
3424
       "\n",
3425
       "#sk-container-id-7 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
3426
       "  color: var(--sklearn-color-text);\n",
3427
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
3428
       "}\n",
3429
       "\n",
3430
       "#sk-container-id-7 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
3431
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
3432
       "}\n",
3433
       "\n",
3434
       "/* Estimator-specific style */\n",
3435
       "\n",
3436
       "/* Colorize estimator box */\n",
3437
       "#sk-container-id-7 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
3438
       "  /* unfitted */\n",
3439
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
3440
       "}\n",
3441
       "\n",
3442
       "#sk-container-id-7 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
3443
       "  /* fitted */\n",
3444
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
3445
       "}\n",
3446
       "\n",
3447
       "#sk-container-id-7 div.sk-label label.sk-toggleable__label,\n",
3448
       "#sk-container-id-7 div.sk-label label {\n",
3449
       "  /* The background is the default theme color */\n",
3450
       "  color: var(--sklearn-color-text-on-default-background);\n",
3451
       "}\n",
3452
       "\n",
3453
       "/* On hover, darken the color of the background */\n",
3454
       "#sk-container-id-7 div.sk-label:hover label.sk-toggleable__label {\n",
3455
       "  color: var(--sklearn-color-text);\n",
3456
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
3457
       "}\n",
3458
       "\n",
3459
       "/* Label box, darken color on hover, fitted */\n",
3460
       "#sk-container-id-7 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
3461
       "  color: var(--sklearn-color-text);\n",
3462
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
3463
       "}\n",
3464
       "\n",
3465
       "/* Estimator label */\n",
3466
       "\n",
3467
       "#sk-container-id-7 div.sk-label label {\n",
3468
       "  font-family: monospace;\n",
3469
       "  font-weight: bold;\n",
3470
       "  display: inline-block;\n",
3471
       "  line-height: 1.2em;\n",
3472
       "}\n",
3473
       "\n",
3474
       "#sk-container-id-7 div.sk-label-container {\n",
3475
       "  text-align: center;\n",
3476
       "}\n",
3477
       "\n",
3478
       "/* Estimator-specific */\n",
3479
       "#sk-container-id-7 div.sk-estimator {\n",
3480
       "  font-family: monospace;\n",
3481
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
3482
       "  border-radius: 0.25em;\n",
3483
       "  box-sizing: border-box;\n",
3484
       "  margin-bottom: 0.5em;\n",
3485
       "  /* unfitted */\n",
3486
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
3487
       "}\n",
3488
       "\n",
3489
       "#sk-container-id-7 div.sk-estimator.fitted {\n",
3490
       "  /* fitted */\n",
3491
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
3492
       "}\n",
3493
       "\n",
3494
       "/* on hover */\n",
3495
       "#sk-container-id-7 div.sk-estimator:hover {\n",
3496
       "  /* unfitted */\n",
3497
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
3498
       "}\n",
3499
       "\n",
3500
       "#sk-container-id-7 div.sk-estimator.fitted:hover {\n",
3501
       "  /* fitted */\n",
3502
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
3503
       "}\n",
3504
       "\n",
3505
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
3506
       "\n",
3507
       "/* Common style for \"i\" and \"?\" */\n",
3508
       "\n",
3509
       ".sk-estimator-doc-link,\n",
3510
       "a:link.sk-estimator-doc-link,\n",
3511
       "a:visited.sk-estimator-doc-link {\n",
3512
       "  float: right;\n",
3513
       "  font-size: smaller;\n",
3514
       "  line-height: 1em;\n",
3515
       "  font-family: monospace;\n",
3516
       "  background-color: var(--sklearn-color-background);\n",
3517
       "  border-radius: 1em;\n",
3518
       "  height: 1em;\n",
3519
       "  width: 1em;\n",
3520
       "  text-decoration: none !important;\n",
3521
       "  margin-left: 1ex;\n",
3522
       "  /* unfitted */\n",
3523
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
3524
       "  color: var(--sklearn-color-unfitted-level-1);\n",
3525
       "}\n",
3526
       "\n",
3527
       ".sk-estimator-doc-link.fitted,\n",
3528
       "a:link.sk-estimator-doc-link.fitted,\n",
3529
       "a:visited.sk-estimator-doc-link.fitted {\n",
3530
       "  /* fitted */\n",
3531
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
3532
       "  color: var(--sklearn-color-fitted-level-1);\n",
3533
       "}\n",
3534
       "\n",
3535
       "/* On hover */\n",
3536
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
3537
       ".sk-estimator-doc-link:hover,\n",
3538
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
3539
       ".sk-estimator-doc-link:hover {\n",
3540
       "  /* unfitted */\n",
3541
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
3542
       "  color: var(--sklearn-color-background);\n",
3543
       "  text-decoration: none;\n",
3544
       "}\n",
3545
       "\n",
3546
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
3547
       ".sk-estimator-doc-link.fitted:hover,\n",
3548
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
3549
       ".sk-estimator-doc-link.fitted:hover {\n",
3550
       "  /* fitted */\n",
3551
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
3552
       "  color: var(--sklearn-color-background);\n",
3553
       "  text-decoration: none;\n",
3554
       "}\n",
3555
       "\n",
3556
       "/* Span, style for the box shown on hovering the info icon */\n",
3557
       ".sk-estimator-doc-link span {\n",
3558
       "  display: none;\n",
3559
       "  z-index: 9999;\n",
3560
       "  position: relative;\n",
3561
       "  font-weight: normal;\n",
3562
       "  right: .2ex;\n",
3563
       "  padding: .5ex;\n",
3564
       "  margin: .5ex;\n",
3565
       "  width: min-content;\n",
3566
       "  min-width: 20ex;\n",
3567
       "  max-width: 50ex;\n",
3568
       "  color: var(--sklearn-color-text);\n",
3569
       "  box-shadow: 2pt 2pt 4pt #999;\n",
3570
       "  /* unfitted */\n",
3571
       "  background: var(--sklearn-color-unfitted-level-0);\n",
3572
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
3573
       "}\n",
3574
       "\n",
3575
       ".sk-estimator-doc-link.fitted span {\n",
3576
       "  /* fitted */\n",
3577
       "  background: var(--sklearn-color-fitted-level-0);\n",
3578
       "  border: var(--sklearn-color-fitted-level-3);\n",
3579
       "}\n",
3580
       "\n",
3581
       ".sk-estimator-doc-link:hover span {\n",
3582
       "  display: block;\n",
3583
       "}\n",
3584
       "\n",
3585
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
3586
       "\n",
3587
       "#sk-container-id-7 a.estimator_doc_link {\n",
3588
       "  float: right;\n",
3589
       "  font-size: 1rem;\n",
3590
       "  line-height: 1em;\n",
3591
       "  font-family: monospace;\n",
3592
       "  background-color: var(--sklearn-color-background);\n",
3593
       "  border-radius: 1rem;\n",
3594
       "  height: 1rem;\n",
3595
       "  width: 1rem;\n",
3596
       "  text-decoration: none;\n",
3597
       "  /* unfitted */\n",
3598
       "  color: var(--sklearn-color-unfitted-level-1);\n",
3599
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
3600
       "}\n",
3601
       "\n",
3602
       "#sk-container-id-7 a.estimator_doc_link.fitted {\n",
3603
       "  /* fitted */\n",
3604
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
3605
       "  color: var(--sklearn-color-fitted-level-1);\n",
3606
       "}\n",
3607
       "\n",
3608
       "/* On hover */\n",
3609
       "#sk-container-id-7 a.estimator_doc_link:hover {\n",
3610
       "  /* unfitted */\n",
3611
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
3612
       "  color: var(--sklearn-color-background);\n",
3613
       "  text-decoration: none;\n",
3614
       "}\n",
3615
       "\n",
3616
       "#sk-container-id-7 a.estimator_doc_link.fitted:hover {\n",
3617
       "  /* fitted */\n",
3618
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
3619
       "}\n",
3620
       "</style><div id=\"sk-container-id-7\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>SVC(C=0.01, gamma=0.0001, probability=True)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-9\" type=\"checkbox\" checked><label for=\"sk-estimator-id-9\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;SVC<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.svm.SVC.html\">?<span>Documentation for SVC</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>SVC(C=0.01, gamma=0.0001, probability=True)</pre></div> </div></div></div></div>"
3621
      ],
3622
      "text/plain": [
3623
       "SVC(C=0.01, gamma=0.0001, probability=True)"
3624
      ]
3625
     },
3626
     "execution_count": 175,
3627
     "metadata": {},
3628
     "output_type": "execute_result"
3629
    }
3630
   ],
3631
   "source": [
3632
    "svc = SVC(C = 0.01, gamma = 0.0001, probability=True)\n",
3633
    "svc.fit(X_train, y_train)"
3634
   ]
3635
  },
3636
  {
3637
   "cell_type": "code",
3638
   "execution_count": 176,
3639
   "id": "fff63e68-bec8-450b-a147-7544e65e0083",
3640
   "metadata": {},
3641
   "outputs": [
3642
    {
3643
     "name": "stdout",
3644
     "output_type": "stream",
3645
     "text": [
3646
      "0.6181796801505174\n",
3647
      "0.6181069958847737\n"
3648
     ]
3649
    }
3650
   ],
3651
   "source": [
3652
    "# model predictions \n",
3653
    "\n",
3654
    "y_pred = svc.predict(X_test)\n",
3655
    "# accuracy score\n",
3656
    "\n",
3657
    "print(accuracy_score(y_train, svc.predict(X_train)))\n",
3658
    "\n",
3659
    "svc_acc = accuracy_score(y_test, svc.predict(X_test))\n",
3660
    "print(svc_acc)"
3661
   ]
3662
  },
3663
  {
3664
   "cell_type": "code",
3665
   "execution_count": 177,
3666
   "id": "dcc62841-8a28-4802-a196-9548ec0f2b25",
3667
   "metadata": {},
3668
   "outputs": [
3669
    {
3670
     "name": "stdout",
3671
     "output_type": "stream",
3672
     "text": [
3673
      "[[   0 1392]\n",
3674
      " [   0 2253]]\n"
3675
     ]
3676
    }
3677
   ],
3678
   "source": [
3679
    "# confusion matrix\n",
3680
    "\n",
3681
    "print(confusion_matrix(y_test, y_pred))"
3682
   ]
3683
  },
3684
  {
3685
   "cell_type": "code",
3686
   "execution_count": 178,
3687
   "id": "8b94bf63-7488-47b6-901d-a4449b027414",
3688
   "metadata": {},
3689
   "outputs": [
3690
    {
3691
     "name": "stdout",
3692
     "output_type": "stream",
3693
     "text": [
3694
      "              precision    recall  f1-score   support\n",
3695
      "\n",
3696
      "           0       0.00      0.00      0.00      1392\n",
3697
      "           1       0.62      1.00      0.76      2253\n",
3698
      "\n",
3699
      "    accuracy                           0.62      3645\n",
3700
      "   macro avg       0.31      0.50      0.38      3645\n",
3701
      "weighted avg       0.38      0.62      0.47      3645\n",
3702
      "\n"
3703
     ]
3704
    }
3705
   ],
3706
   "source": [
3707
    "# classification report\n",
3708
    "\n",
3709
    "print(classification_report(y_test, y_pred))"
3710
   ]
3711
  },
3712
  {
3713
   "cell_type": "markdown",
3714
   "id": "b8da0359-9496-4444-8154-80cb6831fcd9",
3715
   "metadata": {},
3716
   "source": [
3717
    "DT"
3718
   ]
3719
  },
3720
  {
3721
   "cell_type": "code",
3722
   "execution_count": 179,
3723
   "id": "77c86f51-9a2c-41b0-b9bc-fc29d311ba59",
3724
   "metadata": {},
3725
   "outputs": [
3726
    {
3727
     "name": "stdout",
3728
     "output_type": "stream",
3729
     "text": [
3730
      "Training Accuracy of Decision Tree Classifier is 1.0\n",
3731
      "Test Accuracy of Decision Tree Classifier is 1.0 \n",
3732
      "\n",
3733
      "Confusion Matrix :- \n",
3734
      "[[1392    0]\n",
3735
      " [   0 2253]]\n",
3736
      "\n",
3737
      "Classification Report :- \n",
3738
      "               precision    recall  f1-score   support\n",
3739
      "\n",
3740
      "           0       1.00      1.00      1.00      1392\n",
3741
      "           1       1.00      1.00      1.00      2253\n",
3742
      "\n",
3743
      "    accuracy                           1.00      3645\n",
3744
      "   macro avg       1.00      1.00      1.00      3645\n",
3745
      "weighted avg       1.00      1.00      1.00      3645\n",
3746
      "\n"
3747
     ]
3748
    }
3749
   ],
3750
   "source": [
3751
    "from sklearn.tree import DecisionTreeClassifier\n",
3752
    "\n",
3753
    "dtc = DecisionTreeClassifier()\n",
3754
    "\n",
3755
    "dtc.fit(X_train, y_train)\n",
3756
    "\n",
3757
    "# accuracy score, confusion matrix and classification report of decision tree\n",
3758
    "\n",
3759
    "dtc_acc = accuracy_score(y_test, dtc.predict(X_test))\n",
3760
    "\n",
3761
    "print(f\"Training Accuracy of Decision Tree Classifier is {accuracy_score(y_train, dtc.predict(X_train))}\")\n",
3762
    "print(f\"Test Accuracy of Decision Tree Classifier is {dtc_acc} \\n\")\n",
3763
    "\n",
3764
    "print(f\"Confusion Matrix :- \\n{confusion_matrix(y_test, dtc.predict(X_test))}\\n\")\n",
3765
    "print(f\"Classification Report :- \\n {classification_report(y_test, dtc.predict(X_test))}\")"
3766
   ]
3767
  },
3768
  {
3769
   "cell_type": "code",
3770
   "execution_count": 180,
3771
   "id": "54e581ec-708d-4fae-ba6b-dc815afe3ac1",
3772
   "metadata": {},
3773
   "outputs": [
3774
    {
3775
     "name": "stdout",
3776
     "output_type": "stream",
3777
     "text": [
3778
      "Fitting 24 folds for each of 1200 candidates, totalling 28800 fits\n"
3779
     ]
3780
    },
3781
    {
3782
     "data": {
3783
      "text/html": [
3784
       "<style>#sk-container-id-8 {\n",
3785
       "  /* Definition of color scheme common for light and dark mode */\n",
3786
       "  --sklearn-color-text: black;\n",
3787
       "  --sklearn-color-line: gray;\n",
3788
       "  /* Definition of color scheme for unfitted estimators */\n",
3789
       "  --sklearn-color-unfitted-level-0: #fff5e6;\n",
3790
       "  --sklearn-color-unfitted-level-1: #f6e4d2;\n",
3791
       "  --sklearn-color-unfitted-level-2: #ffe0b3;\n",
3792
       "  --sklearn-color-unfitted-level-3: chocolate;\n",
3793
       "  /* Definition of color scheme for fitted estimators */\n",
3794
       "  --sklearn-color-fitted-level-0: #f0f8ff;\n",
3795
       "  --sklearn-color-fitted-level-1: #d4ebff;\n",
3796
       "  --sklearn-color-fitted-level-2: #b3dbfd;\n",
3797
       "  --sklearn-color-fitted-level-3: cornflowerblue;\n",
3798
       "\n",
3799
       "  /* Specific color for light theme */\n",
3800
       "  --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
3801
       "  --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, white)));\n",
3802
       "  --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, black)));\n",
3803
       "  --sklearn-color-icon: #696969;\n",
3804
       "\n",
3805
       "  @media (prefers-color-scheme: dark) {\n",
3806
       "    /* Redefinition of color scheme for dark theme */\n",
3807
       "    --sklearn-color-text-on-default-background: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
3808
       "    --sklearn-color-background: var(--sg-background-color, var(--theme-background, var(--jp-layout-color0, #111)));\n",
3809
       "    --sklearn-color-border-box: var(--sg-text-color, var(--theme-code-foreground, var(--jp-content-font-color1, white)));\n",
3810
       "    --sklearn-color-icon: #878787;\n",
3811
       "  }\n",
3812
       "}\n",
3813
       "\n",
3814
       "#sk-container-id-8 {\n",
3815
       "  color: var(--sklearn-color-text);\n",
3816
       "}\n",
3817
       "\n",
3818
       "#sk-container-id-8 pre {\n",
3819
       "  padding: 0;\n",
3820
       "}\n",
3821
       "\n",
3822
       "#sk-container-id-8 input.sk-hidden--visually {\n",
3823
       "  border: 0;\n",
3824
       "  clip: rect(1px 1px 1px 1px);\n",
3825
       "  clip: rect(1px, 1px, 1px, 1px);\n",
3826
       "  height: 1px;\n",
3827
       "  margin: -1px;\n",
3828
       "  overflow: hidden;\n",
3829
       "  padding: 0;\n",
3830
       "  position: absolute;\n",
3831
       "  width: 1px;\n",
3832
       "}\n",
3833
       "\n",
3834
       "#sk-container-id-8 div.sk-dashed-wrapped {\n",
3835
       "  border: 1px dashed var(--sklearn-color-line);\n",
3836
       "  margin: 0 0.4em 0.5em 0.4em;\n",
3837
       "  box-sizing: border-box;\n",
3838
       "  padding-bottom: 0.4em;\n",
3839
       "  background-color: var(--sklearn-color-background);\n",
3840
       "}\n",
3841
       "\n",
3842
       "#sk-container-id-8 div.sk-container {\n",
3843
       "  /* jupyter's `normalize.less` sets `[hidden] { display: none; }`\n",
3844
       "     but bootstrap.min.css set `[hidden] { display: none !important; }`\n",
3845
       "     so we also need the `!important` here to be able to override the\n",
3846
       "     default hidden behavior on the sphinx rendered scikit-learn.org.\n",
3847
       "     See: https://github.com/scikit-learn/scikit-learn/issues/21755 */\n",
3848
       "  display: inline-block !important;\n",
3849
       "  position: relative;\n",
3850
       "}\n",
3851
       "\n",
3852
       "#sk-container-id-8 div.sk-text-repr-fallback {\n",
3853
       "  display: none;\n",
3854
       "}\n",
3855
       "\n",
3856
       "div.sk-parallel-item,\n",
3857
       "div.sk-serial,\n",
3858
       "div.sk-item {\n",
3859
       "  /* draw centered vertical line to link estimators */\n",
3860
       "  background-image: linear-gradient(var(--sklearn-color-text-on-default-background), var(--sklearn-color-text-on-default-background));\n",
3861
       "  background-size: 2px 100%;\n",
3862
       "  background-repeat: no-repeat;\n",
3863
       "  background-position: center center;\n",
3864
       "}\n",
3865
       "\n",
3866
       "/* Parallel-specific style estimator block */\n",
3867
       "\n",
3868
       "#sk-container-id-8 div.sk-parallel-item::after {\n",
3869
       "  content: \"\";\n",
3870
       "  width: 100%;\n",
3871
       "  border-bottom: 2px solid var(--sklearn-color-text-on-default-background);\n",
3872
       "  flex-grow: 1;\n",
3873
       "}\n",
3874
       "\n",
3875
       "#sk-container-id-8 div.sk-parallel {\n",
3876
       "  display: flex;\n",
3877
       "  align-items: stretch;\n",
3878
       "  justify-content: center;\n",
3879
       "  background-color: var(--sklearn-color-background);\n",
3880
       "  position: relative;\n",
3881
       "}\n",
3882
       "\n",
3883
       "#sk-container-id-8 div.sk-parallel-item {\n",
3884
       "  display: flex;\n",
3885
       "  flex-direction: column;\n",
3886
       "}\n",
3887
       "\n",
3888
       "#sk-container-id-8 div.sk-parallel-item:first-child::after {\n",
3889
       "  align-self: flex-end;\n",
3890
       "  width: 50%;\n",
3891
       "}\n",
3892
       "\n",
3893
       "#sk-container-id-8 div.sk-parallel-item:last-child::after {\n",
3894
       "  align-self: flex-start;\n",
3895
       "  width: 50%;\n",
3896
       "}\n",
3897
       "\n",
3898
       "#sk-container-id-8 div.sk-parallel-item:only-child::after {\n",
3899
       "  width: 0;\n",
3900
       "}\n",
3901
       "\n",
3902
       "/* Serial-specific style estimator block */\n",
3903
       "\n",
3904
       "#sk-container-id-8 div.sk-serial {\n",
3905
       "  display: flex;\n",
3906
       "  flex-direction: column;\n",
3907
       "  align-items: center;\n",
3908
       "  background-color: var(--sklearn-color-background);\n",
3909
       "  padding-right: 1em;\n",
3910
       "  padding-left: 1em;\n",
3911
       "}\n",
3912
       "\n",
3913
       "\n",
3914
       "/* Toggleable style: style used for estimator/Pipeline/ColumnTransformer box that is\n",
3915
       "clickable and can be expanded/collapsed.\n",
3916
       "- Pipeline and ColumnTransformer use this feature and define the default style\n",
3917
       "- Estimators will overwrite some part of the style using the `sk-estimator` class\n",
3918
       "*/\n",
3919
       "\n",
3920
       "/* Pipeline and ColumnTransformer style (default) */\n",
3921
       "\n",
3922
       "#sk-container-id-8 div.sk-toggleable {\n",
3923
       "  /* Default theme specific background. It is overwritten whether we have a\n",
3924
       "  specific estimator or a Pipeline/ColumnTransformer */\n",
3925
       "  background-color: var(--sklearn-color-background);\n",
3926
       "}\n",
3927
       "\n",
3928
       "/* Toggleable label */\n",
3929
       "#sk-container-id-8 label.sk-toggleable__label {\n",
3930
       "  cursor: pointer;\n",
3931
       "  display: block;\n",
3932
       "  width: 100%;\n",
3933
       "  margin-bottom: 0;\n",
3934
       "  padding: 0.5em;\n",
3935
       "  box-sizing: border-box;\n",
3936
       "  text-align: center;\n",
3937
       "}\n",
3938
       "\n",
3939
       "#sk-container-id-8 label.sk-toggleable__label-arrow:before {\n",
3940
       "  /* Arrow on the left of the label */\n",
3941
       "  content: \"▸\";\n",
3942
       "  float: left;\n",
3943
       "  margin-right: 0.25em;\n",
3944
       "  color: var(--sklearn-color-icon);\n",
3945
       "}\n",
3946
       "\n",
3947
       "#sk-container-id-8 label.sk-toggleable__label-arrow:hover:before {\n",
3948
       "  color: var(--sklearn-color-text);\n",
3949
       "}\n",
3950
       "\n",
3951
       "/* Toggleable content - dropdown */\n",
3952
       "\n",
3953
       "#sk-container-id-8 div.sk-toggleable__content {\n",
3954
       "  max-height: 0;\n",
3955
       "  max-width: 0;\n",
3956
       "  overflow: hidden;\n",
3957
       "  text-align: left;\n",
3958
       "  /* unfitted */\n",
3959
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
3960
       "}\n",
3961
       "\n",
3962
       "#sk-container-id-8 div.sk-toggleable__content.fitted {\n",
3963
       "  /* fitted */\n",
3964
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
3965
       "}\n",
3966
       "\n",
3967
       "#sk-container-id-8 div.sk-toggleable__content pre {\n",
3968
       "  margin: 0.2em;\n",
3969
       "  border-radius: 0.25em;\n",
3970
       "  color: var(--sklearn-color-text);\n",
3971
       "  /* unfitted */\n",
3972
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
3973
       "}\n",
3974
       "\n",
3975
       "#sk-container-id-8 div.sk-toggleable__content.fitted pre {\n",
3976
       "  /* unfitted */\n",
3977
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
3978
       "}\n",
3979
       "\n",
3980
       "#sk-container-id-8 input.sk-toggleable__control:checked~div.sk-toggleable__content {\n",
3981
       "  /* Expand drop-down */\n",
3982
       "  max-height: 200px;\n",
3983
       "  max-width: 100%;\n",
3984
       "  overflow: auto;\n",
3985
       "}\n",
3986
       "\n",
3987
       "#sk-container-id-8 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {\n",
3988
       "  content: \"▾\";\n",
3989
       "}\n",
3990
       "\n",
3991
       "/* Pipeline/ColumnTransformer-specific style */\n",
3992
       "\n",
3993
       "#sk-container-id-8 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
3994
       "  color: var(--sklearn-color-text);\n",
3995
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
3996
       "}\n",
3997
       "\n",
3998
       "#sk-container-id-8 div.sk-label.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
3999
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
4000
       "}\n",
4001
       "\n",
4002
       "/* Estimator-specific style */\n",
4003
       "\n",
4004
       "/* Colorize estimator box */\n",
4005
       "#sk-container-id-8 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
4006
       "  /* unfitted */\n",
4007
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
4008
       "}\n",
4009
       "\n",
4010
       "#sk-container-id-8 div.sk-estimator.fitted input.sk-toggleable__control:checked~label.sk-toggleable__label {\n",
4011
       "  /* fitted */\n",
4012
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
4013
       "}\n",
4014
       "\n",
4015
       "#sk-container-id-8 div.sk-label label.sk-toggleable__label,\n",
4016
       "#sk-container-id-8 div.sk-label label {\n",
4017
       "  /* The background is the default theme color */\n",
4018
       "  color: var(--sklearn-color-text-on-default-background);\n",
4019
       "}\n",
4020
       "\n",
4021
       "/* On hover, darken the color of the background */\n",
4022
       "#sk-container-id-8 div.sk-label:hover label.sk-toggleable__label {\n",
4023
       "  color: var(--sklearn-color-text);\n",
4024
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
4025
       "}\n",
4026
       "\n",
4027
       "/* Label box, darken color on hover, fitted */\n",
4028
       "#sk-container-id-8 div.sk-label.fitted:hover label.sk-toggleable__label.fitted {\n",
4029
       "  color: var(--sklearn-color-text);\n",
4030
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
4031
       "}\n",
4032
       "\n",
4033
       "/* Estimator label */\n",
4034
       "\n",
4035
       "#sk-container-id-8 div.sk-label label {\n",
4036
       "  font-family: monospace;\n",
4037
       "  font-weight: bold;\n",
4038
       "  display: inline-block;\n",
4039
       "  line-height: 1.2em;\n",
4040
       "}\n",
4041
       "\n",
4042
       "#sk-container-id-8 div.sk-label-container {\n",
4043
       "  text-align: center;\n",
4044
       "}\n",
4045
       "\n",
4046
       "/* Estimator-specific */\n",
4047
       "#sk-container-id-8 div.sk-estimator {\n",
4048
       "  font-family: monospace;\n",
4049
       "  border: 1px dotted var(--sklearn-color-border-box);\n",
4050
       "  border-radius: 0.25em;\n",
4051
       "  box-sizing: border-box;\n",
4052
       "  margin-bottom: 0.5em;\n",
4053
       "  /* unfitted */\n",
4054
       "  background-color: var(--sklearn-color-unfitted-level-0);\n",
4055
       "}\n",
4056
       "\n",
4057
       "#sk-container-id-8 div.sk-estimator.fitted {\n",
4058
       "  /* fitted */\n",
4059
       "  background-color: var(--sklearn-color-fitted-level-0);\n",
4060
       "}\n",
4061
       "\n",
4062
       "/* on hover */\n",
4063
       "#sk-container-id-8 div.sk-estimator:hover {\n",
4064
       "  /* unfitted */\n",
4065
       "  background-color: var(--sklearn-color-unfitted-level-2);\n",
4066
       "}\n",
4067
       "\n",
4068
       "#sk-container-id-8 div.sk-estimator.fitted:hover {\n",
4069
       "  /* fitted */\n",
4070
       "  background-color: var(--sklearn-color-fitted-level-2);\n",
4071
       "}\n",
4072
       "\n",
4073
       "/* Specification for estimator info (e.g. \"i\" and \"?\") */\n",
4074
       "\n",
4075
       "/* Common style for \"i\" and \"?\" */\n",
4076
       "\n",
4077
       ".sk-estimator-doc-link,\n",
4078
       "a:link.sk-estimator-doc-link,\n",
4079
       "a:visited.sk-estimator-doc-link {\n",
4080
       "  float: right;\n",
4081
       "  font-size: smaller;\n",
4082
       "  line-height: 1em;\n",
4083
       "  font-family: monospace;\n",
4084
       "  background-color: var(--sklearn-color-background);\n",
4085
       "  border-radius: 1em;\n",
4086
       "  height: 1em;\n",
4087
       "  width: 1em;\n",
4088
       "  text-decoration: none !important;\n",
4089
       "  margin-left: 1ex;\n",
4090
       "  /* unfitted */\n",
4091
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
4092
       "  color: var(--sklearn-color-unfitted-level-1);\n",
4093
       "}\n",
4094
       "\n",
4095
       ".sk-estimator-doc-link.fitted,\n",
4096
       "a:link.sk-estimator-doc-link.fitted,\n",
4097
       "a:visited.sk-estimator-doc-link.fitted {\n",
4098
       "  /* fitted */\n",
4099
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
4100
       "  color: var(--sklearn-color-fitted-level-1);\n",
4101
       "}\n",
4102
       "\n",
4103
       "/* On hover */\n",
4104
       "div.sk-estimator:hover .sk-estimator-doc-link:hover,\n",
4105
       ".sk-estimator-doc-link:hover,\n",
4106
       "div.sk-label-container:hover .sk-estimator-doc-link:hover,\n",
4107
       ".sk-estimator-doc-link:hover {\n",
4108
       "  /* unfitted */\n",
4109
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
4110
       "  color: var(--sklearn-color-background);\n",
4111
       "  text-decoration: none;\n",
4112
       "}\n",
4113
       "\n",
4114
       "div.sk-estimator.fitted:hover .sk-estimator-doc-link.fitted:hover,\n",
4115
       ".sk-estimator-doc-link.fitted:hover,\n",
4116
       "div.sk-label-container:hover .sk-estimator-doc-link.fitted:hover,\n",
4117
       ".sk-estimator-doc-link.fitted:hover {\n",
4118
       "  /* fitted */\n",
4119
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
4120
       "  color: var(--sklearn-color-background);\n",
4121
       "  text-decoration: none;\n",
4122
       "}\n",
4123
       "\n",
4124
       "/* Span, style for the box shown on hovering the info icon */\n",
4125
       ".sk-estimator-doc-link span {\n",
4126
       "  display: none;\n",
4127
       "  z-index: 9999;\n",
4128
       "  position: relative;\n",
4129
       "  font-weight: normal;\n",
4130
       "  right: .2ex;\n",
4131
       "  padding: .5ex;\n",
4132
       "  margin: .5ex;\n",
4133
       "  width: min-content;\n",
4134
       "  min-width: 20ex;\n",
4135
       "  max-width: 50ex;\n",
4136
       "  color: var(--sklearn-color-text);\n",
4137
       "  box-shadow: 2pt 2pt 4pt #999;\n",
4138
       "  /* unfitted */\n",
4139
       "  background: var(--sklearn-color-unfitted-level-0);\n",
4140
       "  border: .5pt solid var(--sklearn-color-unfitted-level-3);\n",
4141
       "}\n",
4142
       "\n",
4143
       ".sk-estimator-doc-link.fitted span {\n",
4144
       "  /* fitted */\n",
4145
       "  background: var(--sklearn-color-fitted-level-0);\n",
4146
       "  border: var(--sklearn-color-fitted-level-3);\n",
4147
       "}\n",
4148
       "\n",
4149
       ".sk-estimator-doc-link:hover span {\n",
4150
       "  display: block;\n",
4151
       "}\n",
4152
       "\n",
4153
       "/* \"?\"-specific style due to the `<a>` HTML tag */\n",
4154
       "\n",
4155
       "#sk-container-id-8 a.estimator_doc_link {\n",
4156
       "  float: right;\n",
4157
       "  font-size: 1rem;\n",
4158
       "  line-height: 1em;\n",
4159
       "  font-family: monospace;\n",
4160
       "  background-color: var(--sklearn-color-background);\n",
4161
       "  border-radius: 1rem;\n",
4162
       "  height: 1rem;\n",
4163
       "  width: 1rem;\n",
4164
       "  text-decoration: none;\n",
4165
       "  /* unfitted */\n",
4166
       "  color: var(--sklearn-color-unfitted-level-1);\n",
4167
       "  border: var(--sklearn-color-unfitted-level-1) 1pt solid;\n",
4168
       "}\n",
4169
       "\n",
4170
       "#sk-container-id-8 a.estimator_doc_link.fitted {\n",
4171
       "  /* fitted */\n",
4172
       "  border: var(--sklearn-color-fitted-level-1) 1pt solid;\n",
4173
       "  color: var(--sklearn-color-fitted-level-1);\n",
4174
       "}\n",
4175
       "\n",
4176
       "/* On hover */\n",
4177
       "#sk-container-id-8 a.estimator_doc_link:hover {\n",
4178
       "  /* unfitted */\n",
4179
       "  background-color: var(--sklearn-color-unfitted-level-3);\n",
4180
       "  color: var(--sklearn-color-background);\n",
4181
       "  text-decoration: none;\n",
4182
       "}\n",
4183
       "\n",
4184
       "#sk-container-id-8 a.estimator_doc_link.fitted:hover {\n",
4185
       "  /* fitted */\n",
4186
       "  background-color: var(--sklearn-color-fitted-level-3);\n",
4187
       "}\n",
4188
       "</style><div id=\"sk-container-id-8\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>GridSearchCV(cv=24, estimator=DecisionTreeClassifier(), n_jobs=-1,\n",
4189
       "             param_grid={&#x27;criterion&#x27;: [&#x27;gini&#x27;, &#x27;entropy&#x27;],\n",
4190
       "                         &#x27;max_depth&#x27;: [3, 5, 7, 10],\n",
4191
       "                         &#x27;max_features&#x27;: [&#x27;auto&#x27;, &#x27;sqrt&#x27;, &#x27;log2&#x27;],\n",
4192
       "                         &#x27;min_samples_leaf&#x27;: [1, 2, 3, 5, 7],\n",
4193
       "                         &#x27;min_samples_split&#x27;: [1, 2, 3, 5, 7],\n",
4194
       "                         &#x27;splitter&#x27;: [&#x27;best&#x27;, &#x27;random&#x27;]},\n",
4195
       "             verbose=1)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item sk-dashed-wrapped\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-10\" type=\"checkbox\" ><label for=\"sk-estimator-id-10\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;&nbsp;GridSearchCV<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.model_selection.GridSearchCV.html\">?<span>Documentation for GridSearchCV</span></a><span class=\"sk-estimator-doc-link fitted\">i<span>Fitted</span></span></label><div class=\"sk-toggleable__content fitted\"><pre>GridSearchCV(cv=24, estimator=DecisionTreeClassifier(), n_jobs=-1,\n",
4196
       "             param_grid={&#x27;criterion&#x27;: [&#x27;gini&#x27;, &#x27;entropy&#x27;],\n",
4197
       "                         &#x27;max_depth&#x27;: [3, 5, 7, 10],\n",
4198
       "                         &#x27;max_features&#x27;: [&#x27;auto&#x27;, &#x27;sqrt&#x27;, &#x27;log2&#x27;],\n",
4199
       "                         &#x27;min_samples_leaf&#x27;: [1, 2, 3, 5, 7],\n",
4200
       "                         &#x27;min_samples_split&#x27;: [1, 2, 3, 5, 7],\n",
4201
       "                         &#x27;splitter&#x27;: [&#x27;best&#x27;, &#x27;random&#x27;]},\n",
4202
       "             verbose=1)</pre></div> </div></div><div class=\"sk-parallel\"><div class=\"sk-parallel-item\"><div class=\"sk-item\"><div class=\"sk-label-container\"><div class=\"sk-label fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-11\" type=\"checkbox\" ><label for=\"sk-estimator-id-11\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">estimator: DecisionTreeClassifier</label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeClassifier()</pre></div> </div></div><div class=\"sk-serial\"><div class=\"sk-item\"><div class=\"sk-estimator fitted sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-12\" type=\"checkbox\" ><label for=\"sk-estimator-id-12\" class=\"sk-toggleable__label fitted sk-toggleable__label-arrow fitted\">&nbsp;DecisionTreeClassifier<a class=\"sk-estimator-doc-link fitted\" rel=\"noreferrer\" target=\"_blank\" href=\"https://scikit-learn.org/1.4/modules/generated/sklearn.tree.DecisionTreeClassifier.html\">?<span>Documentation for DecisionTreeClassifier</span></a></label><div class=\"sk-toggleable__content fitted\"><pre>DecisionTreeClassifier()</pre></div> </div></div></div></div></div></div></div></div></div>"
4203
      ],
4204
      "text/plain": [
4205
       "GridSearchCV(cv=24, estimator=DecisionTreeClassifier(), n_jobs=-1,\n",
4206
       "             param_grid={'criterion': ['gini', 'entropy'],\n",
4207
       "                         'max_depth': [3, 5, 7, 10],\n",
4208
       "                         'max_features': ['auto', 'sqrt', 'log2'],\n",
4209
       "                         'min_samples_leaf': [1, 2, 3, 5, 7],\n",
4210
       "                         'min_samples_split': [1, 2, 3, 5, 7],\n",
4211
       "                         'splitter': ['best', 'random']},\n",
4212
       "             verbose=1)"
4213
      ]
4214
     },
4215
     "execution_count": 180,
4216
     "metadata": {},
4217
     "output_type": "execute_result"
4218
    }
4219
   ],
4220
   "source": [
4221
    "# hyper parameter tuning of decision tree \n",
4222
    "\n",
4223
    "from sklearn.model_selection import GridSearchCV\n",
4224
    "grid_param = {\n",
4225
    "    'criterion' : ['gini', 'entropy'],\n",
4226
    "    'max_depth' : [3, 5, 7, 10],\n",
4227
    "    'splitter' : ['best', 'random'],\n",
4228
    "    'min_samples_leaf' : [1, 2, 3, 5, 7],\n",
4229
    "    'min_samples_split' : [1, 2, 3, 5, 7],\n",
4230
    "    'max_features' : ['auto', 'sqrt', 'log2']\n",
4231
    "}\n",
4232
    "\n",
4233
    "grid_search_dtc = GridSearchCV(dtc, grid_param, cv = 24, n_jobs = -1, verbose = 1)\n",
4234
    "grid_search_dtc.fit(X_train, y_train)"
4235
   ]
4236
  },
4237
  {
4238
   "cell_type": "code",
4239
   "execution_count": 181,
4240
   "id": "99f5a2e2-e069-4984-b10b-0d16cca9ca73",
4241
   "metadata": {},
4242
   "outputs": [
4243
    {
4244
     "name": "stdout",
4245
     "output_type": "stream",
4246
     "text": [
4247
      "{'criterion': 'gini', 'max_depth': 10, 'max_features': 'sqrt', 'min_samples_leaf': 5, 'min_samples_split': 5, 'splitter': 'best'}\n"
4248
     ]
4249
    }
4250
   ],
4251
   "source": [
4252
    "# best parameters \n",
4253
    "\n",
4254
    "print(grid_search_dtc.best_params_)\n"
4255
   ]
4256
  },
4257
  {
4258
   "cell_type": "code",
4259
   "execution_count": 182,
4260
   "id": "e5a2f0d1-c71a-474c-b7de-e3eae9d8df24",
4261
   "metadata": {},
4262
   "outputs": [
4263
    {
4264
     "name": "stdout",
4265
     "output_type": "stream",
4266
     "text": [
4267
      "Training Accuracy of Decision Tree Classifier is 0.8996942615239887\n",
4268
      "Test Accuracy of Decision Tree Classifier is 0.8880658436213992 \n",
4269
      "\n",
4270
      "Confusion Matrix :- \n",
4271
      "[[1184  208]\n",
4272
      " [ 200 2053]]\n",
4273
      "\n",
4274
      "Classification Report :- \n",
4275
      "               precision    recall  f1-score   support\n",
4276
      "\n",
4277
      "           0       0.86      0.85      0.85      1392\n",
4278
      "           1       0.91      0.91      0.91      2253\n",
4279
      "\n",
4280
      "    accuracy                           0.89      3645\n",
4281
      "   macro avg       0.88      0.88      0.88      3645\n",
4282
      "weighted avg       0.89      0.89      0.89      3645\n",
4283
      "\n"
4284
     ]
4285
    }
4286
   ],
4287
   "source": [
4288
    "# best estimator\n",
4289
    "\n",
4290
    "dtc = grid_search_dtc.best_estimator_\n",
4291
    "\n",
4292
    "# accuracy score, confusion matrix and classification report of decision tree\n",
4293
    "\n",
4294
    "dtc_acc = accuracy_score(y_test, dtc.predict(X_test))\n",
4295
    "\n",
4296
    "print(f\"Training Accuracy of Decision Tree Classifier is {accuracy_score(y_train, dtc.predict(X_train))}\")\n",
4297
    "print(f\"Test Accuracy of Decision Tree Classifier is {dtc_acc} \\n\")\n",
4298
    "\n",
4299
    "print(f\"Confusion Matrix :- \\n{confusion_matrix(y_test, dtc.predict(X_test))}\\n\")\n",
4300
    "print(f\"Classification Report :- \\n {classification_report(y_test, dtc.predict(X_test))}\")"
4301
   ]
4302
  },
4303
  {
4304
   "cell_type": "code",
4305
   "execution_count": 183,
4306
   "id": "72e8219b-e200-4672-bbd4-850b9ef03428",
4307
   "metadata": {},
4308
   "outputs": [
4309
    {
4310
     "data": {
4311
      "text/html": [
4312
       "<div>\n",
4313
       "<style scoped>\n",
4314
       "    .dataframe tbody tr th:only-of-type {\n",
4315
       "        vertical-align: middle;\n",
4316
       "    }\n",
4317
       "\n",
4318
       "    .dataframe tbody tr th {\n",
4319
       "        vertical-align: top;\n",
4320
       "    }\n",
4321
       "\n",
4322
       "    .dataframe thead th {\n",
4323
       "        text-align: right;\n",
4324
       "    }\n",
4325
       "</style>\n",
4326
       "<table border=\"1\" class=\"dataframe\">\n",
4327
       "  <thead>\n",
4328
       "    <tr style=\"text-align: right;\">\n",
4329
       "      <th></th>\n",
4330
       "      <th>Model</th>\n",
4331
       "      <th>Score</th>\n",
4332
       "    </tr>\n",
4333
       "  </thead>\n",
4334
       "  <tbody>\n",
4335
       "    <tr>\n",
4336
       "      <th>3</th>\n",
4337
       "      <td>Decision Tree Classifier</td>\n",
4338
       "      <td>88.81</td>\n",
4339
       "    </tr>\n",
4340
       "    <tr>\n",
4341
       "      <th>1</th>\n",
4342
       "      <td>KNN</td>\n",
4343
       "      <td>81.54</td>\n",
4344
       "    </tr>\n",
4345
       "    <tr>\n",
4346
       "      <th>0</th>\n",
4347
       "      <td>Logistic Regression</td>\n",
4348
       "      <td>69.71</td>\n",
4349
       "    </tr>\n",
4350
       "    <tr>\n",
4351
       "      <th>2</th>\n",
4352
       "      <td>SVM</td>\n",
4353
       "      <td>61.81</td>\n",
4354
       "    </tr>\n",
4355
       "  </tbody>\n",
4356
       "</table>\n",
4357
       "</div>"
4358
      ],
4359
      "text/plain": [
4360
       "                      Model  Score\n",
4361
       "3  Decision Tree Classifier  88.81\n",
4362
       "1                       KNN  81.54\n",
4363
       "0       Logistic Regression  69.71\n",
4364
       "2                       SVM  61.81"
4365
      ]
4366
     },
4367
     "execution_count": 183,
4368
     "metadata": {},
4369
     "output_type": "execute_result"
4370
    }
4371
   ],
4372
   "source": [
4373
    "models = pd.DataFrame({\n",
4374
    "    'Model': ['Logistic Regression', 'KNN', 'SVM', 'Decision Tree Classifier'],\n",
4375
    "    'Score': [100*round(lr_acc,4), 100*round(knn_acc,4), 100*round(svc_acc,4), 100*round(dtc_acc,4)]\n",
4376
    "})\n",
4377
    "models.sort_values(by = 'Score', ascending = False)"
4378
   ]
4379
  },
4380
  {
4381
   "cell_type": "code",
4382
   "execution_count": 184,
4383
   "id": "f394aff4-ce78-465b-98a9-aabfbc2b020b",
4384
   "metadata": {},
4385
   "outputs": [
4386
    {
4387
     "name": "stdout",
4388
     "output_type": "stream",
4389
     "text": [
4390
      "Model: LR\n",
4391
      "Mean Accuracy: 69.71%\n",
4392
      "Mean ROC AUC: 76.83%\n",
4393
      "------------------------------\n",
4394
      "Model: DT\n",
4395
      "Mean Accuracy: 87.24%\n",
4396
      "Mean ROC AUC: 96.03%\n",
4397
      "------------------------------\n",
4398
      "Model: SVM\n",
4399
      "Mean Accuracy: 61.81%\n",
4400
      "Mean ROC AUC: 74.32%\n",
4401
      "------------------------------\n",
4402
      "Model: KNN\n",
4403
      "Mean Accuracy: 81.54%\n",
4404
      "Mean ROC AUC: 90.04%\n",
4405
      "------------------------------\n"
4406
     ]
4407
    },
4408
    {
4409
     "data": {
4410
      "image/png": "",
4411
      "text/plain": [
4412
       "<Figure size 800x500 with 1 Axes>"
4413
      ]
4414
     },
4415
     "metadata": {},
4416
     "output_type": "display_data"
4417
    }
4418
   ],
4419
   "source": [
4420
    "from sklearn import metrics\n",
4421
    "import numpy as np\n",
4422
    "import matplotlib.pyplot as plt\n",
4423
    "\n",
4424
    "# Define models with labels\n",
4425
    "models = [\n",
4426
    "    {'label': 'LR', 'model': lr},\n",
4427
    "    {'label': 'DT', 'model': dtc},\n",
4428
    "    {'label': 'SVM', 'model': svc},\n",
4429
    "    {'label': 'KNN', 'model': knn}\n",
4430
    "]\n",
4431
    "\n",
4432
    "# Initialize lists to store performance metrics\n",
4433
    "means_roc = []\n",
4434
    "means_accuracy = []\n",
4435
    "\n",
4436
    "# Evaluate each model\n",
4437
    "for m in models:\n",
4438
    "    model = m['model']\n",
4439
    "    label = m['label']\n",
4440
    "    \n",
4441
    "    # Fit model and make predictions\n",
4442
    "    model.fit(X_train, y_train)\n",
4443
    "    y_pred = model.predict(X_test)\n",
4444
    "    \n",
4445
    "    # Calculate accuracy\n",
4446
    "    accuracy = metrics.accuracy_score(y_test, y_pred)\n",
4447
    "    mean_accuracy = 100 * round(accuracy, 4)\n",
4448
    "    means_accuracy.append(mean_accuracy)\n",
4449
    "    \n",
4450
    "    # Calculate ROC AUC\n",
4451
    "    if hasattr(model, 'predict_proba'):\n",
4452
    "        y_pred_prob = model.predict_proba(X_test)[:,1]\n",
4453
    "        mean_roc = 100 * round(metrics.roc_auc_score(y_test, y_pred_prob), 4)\n",
4454
    "    else:\n",
4455
    "        mean_roc = np.nan  # Use NaN if predict_proba is not available\n",
4456
    "    \n",
4457
    "    means_roc.append(mean_roc)\n",
4458
    "    \n",
4459
    "    # Display mean accuracy and mean ROC AUC for each model\n",
4460
    "    print(f\"Model: {label}\")\n",
4461
    "    print(f\"Mean Accuracy: {mean_accuracy:.2f}%\")\n",
4462
    "    print(f\"Mean ROC AUC: {mean_roc:.2f}%\")\n",
4463
    "    print(\"-\" * 30)\n",
4464
    "\n",
4465
    "# Convert lists to tuples for plotting\n",
4466
    "means_accuracy = tuple(means_accuracy)\n",
4467
    "means_roc = tuple(means_roc)\n",
4468
    "\n",
4469
    "# Create plot\n",
4470
    "index = np.arange(len(models))\n",
4471
    "bar_width = 0.35\n",
4472
    "\n",
4473
    "# Create a figure\n",
4474
    "fig, ax = plt.subplots(figsize=(8, 5))\n",
4475
    "\n",
4476
    "# Plot accuracy and ROC AUC as bar charts\n",
4477
    "rects1 = plt.bar(index, means_accuracy, bar_width, alpha=0.8, color='mediumpurple', label='Accuracy (%)')\n",
4478
    "rects2 = plt.bar(index + bar_width, means_roc, bar_width, alpha=0.8, color='rebeccapurple', label='ROC AUC (%)')\n",
4479
    "\n",
4480
    "# Set axis labels and title\n",
4481
    "ax.set_xlabel('Models')\n",
4482
    "ax.set_ylabel('Performance (%)')\n",
4483
    "ax.set_title('Performance Evaluation - Liver Disease Prediction')\n",
4484
    "\n",
4485
    "# Set x-ticks and labels\n",
4486
    "ax.set_xticks(index + bar_width / 2)\n",
4487
    "ax.set_xticklabels([model['label'] for model in models], rotation=40, ha='center')\n",
4488
    "\n",
4489
    "# Add legend\n",
4490
    "ax.legend()\n",
4491
    "\n",
4492
    "# Show the plot\n",
4493
    "plt.show()\n"
4494
   ]
4495
  },
4496
  {
4497
   "cell_type": "code",
4498
   "execution_count": 190,
4499
   "id": "4339012f",
4500
   "metadata": {},
4501
   "outputs": [
4502
    {
4503
     "data": {
4504
      "text/html": [
4505
       "<div>\n",
4506
       "<style scoped>\n",
4507
       "    .dataframe tbody tr th:only-of-type {\n",
4508
       "        vertical-align: middle;\n",
4509
       "    }\n",
4510
       "\n",
4511
       "    .dataframe tbody tr th {\n",
4512
       "        vertical-align: top;\n",
4513
       "    }\n",
4514
       "\n",
4515
       "    .dataframe thead th {\n",
4516
       "        text-align: right;\n",
4517
       "    }\n",
4518
       "</style>\n",
4519
       "<table border=\"1\" class=\"dataframe\">\n",
4520
       "  <thead>\n",
4521
       "    <tr style=\"text-align: right;\">\n",
4522
       "      <th></th>\n",
4523
       "      <th>Age of the patient</th>\n",
4524
       "      <th>Gender of the patient</th>\n",
4525
       "      <th>Total Bilirubin</th>\n",
4526
       "      <th>Direct Bilirubin</th>\n",
4527
       "      <th>Alkphos Alkaline Phosphotase</th>\n",
4528
       "      <th>Sgpt Alamine Aminotransferase</th>\n",
4529
       "      <th>Aspartate_Aminotransferase</th>\n",
4530
       "      <th>Total Protiens</th>\n",
4531
       "      <th>ALB Albumin</th>\n",
4532
       "      <th>A/G Ratio Albumin and Globulin Ratio</th>\n",
4533
       "      <th>Result</th>\n",
4534
       "    </tr>\n",
4535
       "  </thead>\n",
4536
       "  <tbody>\n",
4537
       "    <tr>\n",
4538
       "      <th>15940</th>\n",
4539
       "      <td>50.0</td>\n",
4540
       "      <td>1</td>\n",
4541
       "      <td>0.8</td>\n",
4542
       "      <td>0.2</td>\n",
4543
       "      <td>148.0</td>\n",
4544
       "      <td>23.0</td>\n",
4545
       "      <td>35.0</td>\n",
4546
       "      <td>6.0</td>\n",
4547
       "      <td>3.0</td>\n",
4548
       "      <td>1.0</td>\n",
4549
       "      <td>1</td>\n",
4550
       "    </tr>\n",
4551
       "    <tr>\n",
4552
       "      <th>11861</th>\n",
4553
       "      <td>30.0</td>\n",
4554
       "      <td>1</td>\n",
4555
       "      <td>0.8</td>\n",
4556
       "      <td>0.2</td>\n",
4557
       "      <td>650.0</td>\n",
4558
       "      <td>70.0</td>\n",
4559
       "      <td>138.0</td>\n",
4560
       "      <td>6.6</td>\n",
4561
       "      <td>3.1</td>\n",
4562
       "      <td>0.8</td>\n",
4563
       "      <td>1</td>\n",
4564
       "    </tr>\n",
4565
       "    <tr>\n",
4566
       "      <th>14768</th>\n",
4567
       "      <td>35.0</td>\n",
4568
       "      <td>1</td>\n",
4569
       "      <td>2.2</td>\n",
4570
       "      <td>1.0</td>\n",
4571
       "      <td>310.0</td>\n",
4572
       "      <td>119.0</td>\n",
4573
       "      <td>42.0</td>\n",
4574
       "      <td>7.9</td>\n",
4575
       "      <td>4.1</td>\n",
4576
       "      <td>1.0</td>\n",
4577
       "      <td>0</td>\n",
4578
       "    </tr>\n",
4579
       "    <tr>\n",
4580
       "      <th>17664</th>\n",
4581
       "      <td>26.0</td>\n",
4582
       "      <td>0</td>\n",
4583
       "      <td>0.7</td>\n",
4584
       "      <td>0.1</td>\n",
4585
       "      <td>162.0</td>\n",
4586
       "      <td>52.0</td>\n",
4587
       "      <td>41.0</td>\n",
4588
       "      <td>5.2</td>\n",
4589
       "      <td>2.5</td>\n",
4590
       "      <td>0.9</td>\n",
4591
       "      <td>0</td>\n",
4592
       "    </tr>\n",
4593
       "    <tr>\n",
4594
       "      <th>4052</th>\n",
4595
       "      <td>57.0</td>\n",
4596
       "      <td>0</td>\n",
4597
       "      <td>1.4</td>\n",
4598
       "      <td>0.8</td>\n",
4599
       "      <td>178.0</td>\n",
4600
       "      <td>13.0</td>\n",
4601
       "      <td>26.0</td>\n",
4602
       "      <td>8.0</td>\n",
4603
       "      <td>4.6</td>\n",
4604
       "      <td>1.3</td>\n",
4605
       "      <td>0</td>\n",
4606
       "    </tr>\n",
4607
       "  </tbody>\n",
4608
       "</table>\n",
4609
       "</div>"
4610
      ],
4611
      "text/plain": [
4612
       "       Age of the patient  Gender of the patient  Total Bilirubin  \\\n",
4613
       "15940                50.0                      1              0.8   \n",
4614
       "11861                30.0                      1              0.8   \n",
4615
       "14768                35.0                      1              2.2   \n",
4616
       "17664                26.0                      0              0.7   \n",
4617
       "4052                 57.0                      0              1.4   \n",
4618
       "\n",
4619
       "       Direct Bilirubin   Alkphos Alkaline Phosphotase  \\\n",
4620
       "15940               0.2                          148.0   \n",
4621
       "11861               0.2                          650.0   \n",
4622
       "14768               1.0                          310.0   \n",
4623
       "17664               0.1                          162.0   \n",
4624
       "4052                0.8                          178.0   \n",
4625
       "\n",
4626
       "        Sgpt Alamine Aminotransferase  Aspartate_Aminotransferase  \\\n",
4627
       "15940                            23.0                        35.0   \n",
4628
       "11861                            70.0                       138.0   \n",
4629
       "14768                           119.0                        42.0   \n",
4630
       "17664                            52.0                        41.0   \n",
4631
       "4052                             13.0                        26.0   \n",
4632
       "\n",
4633
       "       Total Protiens   ALB Albumin  A/G Ratio Albumin and Globulin Ratio  \\\n",
4634
       "15940             6.0           3.0                                   1.0   \n",
4635
       "11861             6.6           3.1                                   0.8   \n",
4636
       "14768             7.9           4.1                                   1.0   \n",
4637
       "17664             5.2           2.5                                   0.9   \n",
4638
       "4052              8.0           4.6                                   1.3   \n",
4639
       "\n",
4640
       "       Result  \n",
4641
       "15940       1  \n",
4642
       "11861       1  \n",
4643
       "14768       0  \n",
4644
       "17664       0  \n",
4645
       "4052        0  "
4646
      ]
4647
     },
4648
     "execution_count": 190,
4649
     "metadata": {},
4650
     "output_type": "execute_result"
4651
    }
4652
   ],
4653
   "source": [
4654
    "df.sample(5)"
4655
   ]
4656
  },
4657
  {
4658
   "cell_type": "code",
4659
   "execution_count": 192,
4660
   "id": "d7770e6a-f844-4c3c-8a8f-3c864d76f281",
4661
   "metadata": {},
4662
   "outputs": [
4663
    {
4664
     "name": "stdout",
4665
     "output_type": "stream",
4666
     "text": [
4667
      "[1]\n",
4668
      "M\n"
4669
     ]
4670
    }
4671
   ],
4672
   "source": [
4673
    "from sklearn.linear_model import LogisticRegression\n",
4674
    "input_data =(26.0,0,0.7,0.1,162.0,52.0,41.0,5.2,2.5,0.9)\n",
4675
    "#(26.0,0,0.7,0.2,185.0,16.0,22.0,7.3,3.7,1.00)\n",
4676
    "#100,12,1,0,1,1,1,1,1,1,1,1,1,1,0,0,1,1,0,0,0,1) \n",
4677
    "\n",
4678
    "'''(14.36,0.09779,0.08129,0.04781,0.1885,0.05766,0.7886,23.56,0.008462,\n",
4679
    "0.0146,0.02387,0.01315,0.0198,0.0023,15.11,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259\n",
4680
    ")'''\n",
4681
    "\n",
4682
    "input_data_as_numpy_array = np.asarray(input_data)\n",
4683
    "\n",
4684
    "# reshape the array as we are predicting for one instance\n",
4685
    "input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
4686
    "predictions = lr.predict(input_data_reshaped)\n",
4687
    "print(predictions)\n",
4688
    "if (predictions == 0):\n",
4689
    "  print('B')\n",
4690
    "else:\n",
4691
    "  print('M')"
4692
   ]
4693
  },
4694
  {
4695
   "cell_type": "code",
4696
   "execution_count": null,
4697
   "id": "e8337a7a-1d7d-4141-821c-ba6d4af45151",
4698
   "metadata": {},
4699
   "outputs": [],
4700
   "source": []
4701
  },
4702
  {
4703
   "cell_type": "code",
4704
   "execution_count": null,
4705
   "id": "1f597232-8f9c-4a8d-8edd-495539d95274",
4706
   "metadata": {},
4707
   "outputs": [],
4708
   "source": [
4709
    "\n",
4710
    "import pickle\n",
4711
    "model = dtc\n",
4712
    "filename = r'C:\\Users\\Pranshu Saini\\Desktop\\disease-prediction-main\\docpat\\model\\liver_prediction.pkl'\n",
4713
    "pickle.dump(model, open(filename,'wb'))"
4714
   ]
4715
  },
4716
  {
4717
   "cell_type": "code",
4718
   "execution_count": null,
4719
   "id": "10843248",
4720
   "metadata": {},
4721
   "outputs": [],
4722
   "source": []
4723
  },
4724
  {
4725
   "cell_type": "code",
4726
   "execution_count": null,
4727
   "id": "fd047969",
4728
   "metadata": {},
4729
   "outputs": [],
4730
   "source": []
4731
  }
4732
 ],
4733
 "metadata": {
4734
  "kernelspec": {
4735
   "display_name": "Python 3",
4736
   "language": "python",
4737
   "name": "python3"
4738
  },
4739
  "language_info": {
4740
   "codemirror_mode": {
4741
    "name": "ipython",
4742
    "version": 3
4743
   },
4744
   "file_extension": ".py",
4745
   "mimetype": "text/x-python",
4746
   "name": "python",
4747
   "nbconvert_exporter": "python",
4748
   "pygments_lexer": "ipython3",
4749
   "version": "3.12.4"
4750
  }
4751
 },
4752
 "nbformat": 4,
4753
 "nbformat_minor": 5
4754
}