Diff of /AdaBoost.ipynb [000000] .. [28fc72]

Switch to unified view

a b/AdaBoost.ipynb
1
{
2
  "cells": [
3
    {
4
      "cell_type": "code",
5
      "execution_count": 128,
6
      "metadata": {
7
        "colab": {
8
          "base_uri": "https://localhost:8080/"
9
        },
10
        "id": "SGXfU_7VEoUf",
11
        "outputId": "f7067405-aea3-45a2-d352-f6fce9b34267"
12
      },
13
      "outputs": [
14
        {
15
          "output_type": "stream",
16
          "name": "stdout",
17
          "text": [
18
            "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
19
          ]
20
        }
21
      ],
22
      "source": [
23
        "from google.colab import drive\n",
24
        "drive.mount('/content/drive')"
25
      ],
26
      "id": "SGXfU_7VEoUf"
27
    },
28
    {
29
      "cell_type": "code",
30
      "execution_count": 129,
31
      "metadata": {
32
        "id": "3fb9958c-527b-47ca-a515-fc2ffc560f48"
33
      },
34
      "outputs": [],
35
      "source": [
36
        "import pandas as pd\n",
37
        "import numpy as np\n",
38
        "import seaborn as sns\n",
39
        "import matplotlib.pyplot as plt\n",
40
        "#from imblearn.over_sampling import SMOTE\n",
41
        "from imblearn.under_sampling import NearMiss\n",
42
        "from sklearn.model_selection import train_test_split, RandomizedSearchCV\n",
43
        "from sklearn.preprocessing import StandardScaler\n",
44
        "from sklearn.ensemble import AdaBoostClassifier\n",
45
        "from imblearn.pipeline import Pipeline as ImbPipeline\n",
46
        "from sklearn.metrics import classification_report\n",
47
        "from sklearn.tree import DecisionTreeClassifier\n",
48
        "from collections import Counter\n",
49
        "import warnings\n",
50
        "warnings.filterwarnings('ignore')\n",
51
        "warnings.filterwarnings(\"ignore\", category=UserWarning, module=\"joblib\")\n",
52
        "warnings.filterwarnings(\"ignore\", category=UserWarning, module=\"sklearn\")\n",
53
        "warnings.filterwarnings('ignore', category=UserWarning, message=\"Line Search failed\")"
54
      ],
55
      "id": "3fb9958c-527b-47ca-a515-fc2ffc560f48"
56
    },
57
    {
58
      "cell_type": "code",
59
      "execution_count": 130,
60
      "metadata": {
61
        "colab": {
62
          "base_uri": "https://localhost:8080/"
63
        },
64
        "id": "23fa808c-e180-4f48-8cd9-09a00a32f799",
65
        "outputId": "c8c69249-26ad-4bdd-e634-bc1613365b41"
66
      },
67
      "outputs": [
68
        {
69
          "output_type": "stream",
70
          "name": "stdout",
71
          "text": [
72
            "      Name        FC     logFC    logCPM   P-Value       FDR  SCLC  NSCLC\n",
73
            "0  KRT16P5 -1.474275 -0.560006 -2.065784  0.423250  0.645529   0.0    0.0\n",
74
            "1  KRT16P3 -1.158475 -0.212227  0.698547  0.598622  0.779482   0.0    0.0\n",
75
            "2  KRT16P2  1.785481  0.836313  3.744968  0.060200  0.211667   0.0    0.0\n",
76
            "3  KRT16P6 -2.534136 -1.341494  0.404997  0.023716  0.123727   0.0    0.0\n",
77
            "4    CRHBP  1.441891  0.527962 -0.015277  0.034942  0.153404   0.0    0.0\n",
78
            "Name       0\n",
79
            "FC         0\n",
80
            "logFC      0\n",
81
            "logCPM     0\n",
82
            "P-Value    0\n",
83
            "FDR        0\n",
84
            "SCLC       0\n",
85
            "NSCLC      0\n",
86
            "dtype: int64\n"
87
          ]
88
        }
89
      ],
90
      "source": [
91
        "#load data and preprocess\n",
92
        "file_path = \"/content/drive/MyDrive/ML_HW_4_5/labelled data.csv\"\n",
93
        "data = pd.read_csv(file_path,index_col=0).fillna(0)\n",
94
        "print(data.head())\n",
95
        "print(data.isnull().sum())"
96
      ],
97
      "id": "23fa808c-e180-4f48-8cd9-09a00a32f799"
98
    },
99
    {
100
      "cell_type": "markdown",
101
      "source": [
102
        "EDA"
103
      ],
104
      "metadata": {
105
        "id": "MkHa06tdNJOb"
106
      },
107
      "id": "MkHa06tdNJOb"
108
    },
109
    {
110
      "cell_type": "code",
111
      "source": [
112
        "print(data.shape)\n",
113
        "print()\n",
114
        "print(data.describe)\n",
115
        "print()\n",
116
        "print(data.info)\n",
117
        "print()\n",
118
        "print(data.duplicated())\n",
119
        "print()\n",
120
        "print(data.dtypes)"
121
      ],
122
      "metadata": {
123
        "colab": {
124
          "base_uri": "https://localhost:8080/"
125
        },
126
        "id": "m1zN40S1Ma-m",
127
        "outputId": "cb3a97ed-66fc-4641-8a37-2d61e0b169ae"
128
      },
129
      "id": "m1zN40S1Ma-m",
130
      "execution_count": 131,
131
      "outputs": [
132
        {
133
          "output_type": "stream",
134
          "name": "stdout",
135
          "text": [
136
            "(19778, 8)\n",
137
            "\n",
138
            "<bound method NDFrame.describe of                Name        FC     logFC    logCPM   P-Value       FDR  SCLC  \\\n",
139
            "0           KRT16P5 -1.474275 -0.560006 -2.065784  0.423250  0.645529   0.0   \n",
140
            "1           KRT16P3 -1.158475 -0.212227  0.698547  0.598622  0.779482   0.0   \n",
141
            "2           KRT16P2  1.785481  0.836313  3.744968  0.060200  0.211667   0.0   \n",
142
            "3           KRT16P6 -2.534136 -1.341494  0.404997  0.023716  0.123727   0.0   \n",
143
            "4             CRHBP  1.441891  0.527962 -0.015277  0.034942  0.153404   0.0   \n",
144
            "...             ...       ...       ...       ...       ...       ...   ...   \n",
145
            "19773  LOC105369958  2.196994  1.135531  1.382694  0.003199  0.040235   0.0   \n",
146
            "19774         ABCC2  3.461301  1.791314  5.336636  0.000671  0.015864   0.0   \n",
147
            "19775         TRAV6  3.849574  1.944699 -0.401490  0.000016  0.001372   0.0   \n",
148
            "19776  LOC105369904  2.226049  1.154485 -1.006838  0.002616  0.035713   0.0   \n",
149
            "19777  LOC101928636  2.279563  1.188757  0.151617  0.000219  0.007721   0.0   \n",
150
            "\n",
151
            "       NSCLC  \n",
152
            "0        0.0  \n",
153
            "1        0.0  \n",
154
            "2        0.0  \n",
155
            "3        0.0  \n",
156
            "4        0.0  \n",
157
            "...      ...  \n",
158
            "19773    0.0  \n",
159
            "19774    0.0  \n",
160
            "19775    0.0  \n",
161
            "19776    0.0  \n",
162
            "19777    0.0  \n",
163
            "\n",
164
            "[19778 rows x 8 columns]>\n",
165
            "\n",
166
            "<bound method DataFrame.info of                Name        FC     logFC    logCPM   P-Value       FDR  SCLC  \\\n",
167
            "0           KRT16P5 -1.474275 -0.560006 -2.065784  0.423250  0.645529   0.0   \n",
168
            "1           KRT16P3 -1.158475 -0.212227  0.698547  0.598622  0.779482   0.0   \n",
169
            "2           KRT16P2  1.785481  0.836313  3.744968  0.060200  0.211667   0.0   \n",
170
            "3           KRT16P6 -2.534136 -1.341494  0.404997  0.023716  0.123727   0.0   \n",
171
            "4             CRHBP  1.441891  0.527962 -0.015277  0.034942  0.153404   0.0   \n",
172
            "...             ...       ...       ...       ...       ...       ...   ...   \n",
173
            "19773  LOC105369958  2.196994  1.135531  1.382694  0.003199  0.040235   0.0   \n",
174
            "19774         ABCC2  3.461301  1.791314  5.336636  0.000671  0.015864   0.0   \n",
175
            "19775         TRAV6  3.849574  1.944699 -0.401490  0.000016  0.001372   0.0   \n",
176
            "19776  LOC105369904  2.226049  1.154485 -1.006838  0.002616  0.035713   0.0   \n",
177
            "19777  LOC101928636  2.279563  1.188757  0.151617  0.000219  0.007721   0.0   \n",
178
            "\n",
179
            "       NSCLC  \n",
180
            "0        0.0  \n",
181
            "1        0.0  \n",
182
            "2        0.0  \n",
183
            "3        0.0  \n",
184
            "4        0.0  \n",
185
            "...      ...  \n",
186
            "19773    0.0  \n",
187
            "19774    0.0  \n",
188
            "19775    0.0  \n",
189
            "19776    0.0  \n",
190
            "19777    0.0  \n",
191
            "\n",
192
            "[19778 rows x 8 columns]>\n",
193
            "\n",
194
            "0        False\n",
195
            "1        False\n",
196
            "2        False\n",
197
            "3        False\n",
198
            "4        False\n",
199
            "         ...  \n",
200
            "19773    False\n",
201
            "19774    False\n",
202
            "19775    False\n",
203
            "19776    False\n",
204
            "19777    False\n",
205
            "Length: 19778, dtype: bool\n",
206
            "\n",
207
            "Name        object\n",
208
            "FC         float64\n",
209
            "logFC      float64\n",
210
            "logCPM     float64\n",
211
            "P-Value    float64\n",
212
            "FDR        float64\n",
213
            "SCLC       float64\n",
214
            "NSCLC      float64\n",
215
            "dtype: object\n"
216
          ]
217
        }
218
      ]
219
    },
220
    {
221
      "cell_type": "code",
222
      "source": [
223
        "print(data.columns[data.isna().any()])\n",
224
        "print()"
225
      ],
226
      "metadata": {
227
        "colab": {
228
          "base_uri": "https://localhost:8080/"
229
        },
230
        "id": "9A0vU-rYM47R",
231
        "outputId": "ddabfc5c-5387-4f20-8244-2152d061df03"
232
      },
233
      "id": "9A0vU-rYM47R",
234
      "execution_count": 132,
235
      "outputs": [
236
        {
237
          "output_type": "stream",
238
          "name": "stdout",
239
          "text": [
240
            "Index([], dtype='object')\n",
241
            "\n"
242
          ]
243
        }
244
      ]
245
    },
246
    {
247
      "cell_type": "code",
248
      "source": [
249
        "correlation_matrix = data.corr()\n",
250
        "# Create a heatmap using Seaborn\n",
251
        "plt.figure(figsize=(10, 8))\n",
252
        "sns.heatmap(correlation_matrix, annot=True, cmap=\"coolwarm\", fmt=\".2f\", linewidths=.5)\n",
253
        "plt.title(\"Correlation Matrix\")\n",
254
        "plt.show()"
255
      ],
256
      "metadata": {
257
        "colab": {
258
          "base_uri": "https://localhost:8080/",
259
          "height": 699
260
        },
261
        "id": "bXSyUBFmNkgr",
262
        "outputId": "6ca94eff-aa58-420b-a1d4-45a761cb4f3f"
263
      },
264
      "id": "bXSyUBFmNkgr",
265
      "execution_count": 134,
266
      "outputs": [
267
        {
268
          "output_type": "display_data",
269
          "data": {
270
            "text/plain": [
271
              "<Figure size 1000x800 with 2 Axes>"
272
            ],
273
            "image/png": "\n"
274
          },
275
          "metadata": {}
276
        }
277
      ]
278
    },
279
    {
280
      "cell_type": "code",
281
      "execution_count": 135,
282
      "metadata": {
283
        "id": "899e19db-549e-49e5-a0da-7fa1467afff9"
284
      },
285
      "outputs": [],
286
      "source": [
287
        "#feature selection\n",
288
        "features = data[['FC', 'logFC', 'P-Value']]\n",
289
        "targets = {'NSCLC': data['NSCLC'], 'SCLC': data['SCLC']}"
290
      ],
291
      "id": "899e19db-549e-49e5-a0da-7fa1467afff9"
292
    },
293
    {
294
      "cell_type": "code",
295
      "source": [
296
        "nsclc = data['NSCLC']\n",
297
        "sclc = data['SCLC']"
298
      ],
299
      "metadata": {
300
        "id": "JOJMqGN3E1n0"
301
      },
302
      "id": "JOJMqGN3E1n0",
303
      "execution_count": 136,
304
      "outputs": []
305
    },
306
    {
307
      "cell_type": "code",
308
      "source": [
309
        "nm = NearMiss()\n",
310
        "print('SCLC Original Shape:', Counter(sclc))\n",
311
        "features_nm_sclc, nm_sclc = nm.fit_resample(features, sclc)\n",
312
        "print('SCLC Resample Shape:', Counter(nm_sclc))\n",
313
        "print('NSCLC Original Shape:', Counter(nsclc))\n",
314
        "features_nm_nsclc, nm_nsclc = nm.fit_resample(features, nsclc)\n",
315
        "print('NSCLC Resample Shape:', Counter(nm_nsclc))"
316
      ],
317
      "metadata": {
318
        "colab": {
319
          "base_uri": "https://localhost:8080/"
320
        },
321
        "id": "HZNVKpm4E4jC",
322
        "outputId": "418338fe-44fd-4518-9ca8-d70197bf9766"
323
      },
324
      "id": "HZNVKpm4E4jC",
325
      "execution_count": 137,
326
      "outputs": [
327
        {
328
          "output_type": "stream",
329
          "name": "stdout",
330
          "text": [
331
            "SCLC Original Shape: Counter({0.0: 18857, 1.0: 921})\n",
332
            "SCLC Resample Shape: Counter({0.0: 921, 1.0: 921})\n",
333
            "NSCLC Original Shape: Counter({0.0: 19087, 1.0: 691})\n",
334
            "NSCLC Resample Shape: Counter({0.0: 691, 1.0: 691})\n"
335
          ]
336
        }
337
      ]
338
    },
339
    {
340
      "cell_type": "code",
341
      "source": [
342
        "features_sclc = features_nm_sclc\n",
343
        "features_nsclc = features_nm_nsclc\n",
344
        "sclc = nm_sclc\n",
345
        "nsclc = nm_nsclc"
346
      ],
347
      "metadata": {
348
        "id": "viq_jXlFE8o9"
349
      },
350
      "id": "viq_jXlFE8o9",
351
      "execution_count": 138,
352
      "outputs": []
353
    },
354
    {
355
      "cell_type": "code",
356
      "execution_count": 139,
357
      "metadata": {
358
        "colab": {
359
          "base_uri": "https://localhost:8080/",
360
          "height": 963
361
        },
362
        "id": "26952ab4-1311-48af-8ca9-3d327b96b9da",
363
        "outputId": "1c4123f6-2cc7-4813-ed36-fb9d1e86e3f3"
364
      },
365
      "outputs": [
366
        {
367
          "output_type": "display_data",
368
          "data": {
369
            "text/plain": [
370
              "<Figure size 1500x600 with 4 Axes>"
371
            ],
372
            "image/png": "\n"
373
          },
374
          "metadata": {}
375
        },
376
        {
377
          "output_type": "display_data",
378
          "data": {
379
            "text/plain": [
380
              "<Figure size 640x480 with 2 Axes>"
381
            ],
382
            "image/png": "\n"
383
          },
384
          "metadata": {}
385
        }
386
      ],
387
      "source": [
388
        "#EDA\n",
389
        "#histograms and correlation matrix\n",
390
        "features.hist(bins=15, figsize=(15, 6), layout=(2, 2))\n",
391
        "plt.show()\n",
392
        "sns.heatmap(features.corr(), annot=True)\n",
393
        "plt.show()"
394
      ],
395
      "id": "26952ab4-1311-48af-8ca9-3d327b96b9da"
396
    },
397
    {
398
      "cell_type": "code",
399
      "execution_count": 140,
400
      "metadata": {
401
        "id": "b2e91298-7a42-4fef-8c0f-a4a52e48cb01"
402
      },
403
      "outputs": [],
404
      "source": [
405
        "def train_test_and_standardize(features, target, test_size=0.2, random_state=42):\n",
406
        "    # Split the data into train and test sets\n",
407
        "    x_train, x_test, y_train, y_test = train_test_split(features, target, test_size=test_size, random_state=random_state)\n",
408
        "\n",
409
        "    # Standardize the features using StandardScaler\n",
410
        "    scaler = StandardScaler()\n",
411
        "    x_train = scaler.fit_transform(x_train)\n",
412
        "    x_test = scaler.transform(x_test)\n",
413
        "\n",
414
        "    return x_train, x_test, y_train, y_test\n",
415
        "\n",
416
        "xtrain_sclc, xtest_sclc, ytrain_sclc, ytest_sclc = train_test_and_standardize(features_sclc, sclc)\n",
417
        "xtrain_nsclc, xtest_nsclc, ytrain_nsclc, ytest_nsclc = train_test_and_standardize(features_nsclc, nsclc)"
418
      ],
419
      "id": "b2e91298-7a42-4fef-8c0f-a4a52e48cb01"
420
    },
421
    {
422
      "cell_type": "code",
423
      "execution_count": 141,
424
      "metadata": {
425
        "id": "784fa15e-b3fc-4fe2-b8bb-89dccd210191"
426
      },
427
      "outputs": [],
428
      "source": [
429
        "# AdaBoost Pipeline\n",
430
        "base_estimator = DecisionTreeClassifier(class_weight='balanced')  # base estimator\n",
431
        "pipeline = ImbPipeline([\n",
432
        "    ('scaler', StandardScaler()),\n",
433
        "    ('adaboost', AdaBoostClassifier(base_estimator=base_estimator))\n",
434
        "])\n",
435
        "\n",
436
        "# Parameter Grid for AdaBoost\n",
437
        "param_grid_adaboost = {\n",
438
        "    'adaboost__n_estimators': [50, 100, 200],\n",
439
        "    'adaboost__learning_rate': [0.05, 0.1, 0.5, 1.0],\n",
440
        "    'adaboost__algorithm': ['SAMME', 'SAMME.R'],\n",
441
        "    'adaboost__random_state': [None, 42, 100],\n",
442
        "    'adaboost__base_estimator__max_depth': [1, 2, 3, 4],\n",
443
        "    'adaboost__base_estimator__min_samples_split': [2, 5, 10],\n",
444
        "    'adaboost__base_estimator__min_samples_leaf': [1, 2, 4, 6],\n",
445
        "    'adaboost__base_estimator__criterion': ['gini', 'entropy'],\n",
446
        "    'adaboost__base_estimator__max_features': [None, 'sqrt', 'log2'],\n",
447
        "    'adaboost__base_estimator__splitter': ['best', 'random']\n",
448
        "}"
449
      ],
450
      "id": "784fa15e-b3fc-4fe2-b8bb-89dccd210191"
451
    },
452
    {
453
      "cell_type": "code",
454
      "execution_count": 142,
455
      "metadata": {
456
        "id": "45807704-f39e-46d8-9715-9e799c989fd6"
457
      },
458
      "outputs": [],
459
      "source": [
460
        "def evaluate_adaboost(x_train, y_train, x_test, y_test, param_grid):\n",
461
        "    grid_search = RandomizedSearchCV(estimator=pipeline, param_distributions=param_grid, scoring='f1',n_iter= 1000, cv=10, verbose=1, n_jobs=-1)\n",
462
        "    grid_search.fit(x_train, y_train)  # Fit on training data\n",
463
        "    best_params = grid_search.best_params_\n",
464
        "    best_score = grid_search.best_score_\n",
465
        "    best_adaboost = pipeline.set_params(**best_params)\n",
466
        "    best_adaboost.fit(x_train, y_train)  # Refit on training data\n",
467
        "    y_test_pred = best_adaboost.predict(x_test)  # Predict on test data\n",
468
        "    report = classification_report(y_test, y_test_pred)\n",
469
        "    return best_params, best_score, report"
470
      ],
471
      "id": "45807704-f39e-46d8-9715-9e799c989fd6"
472
    },
473
    {
474
      "cell_type": "code",
475
      "execution_count": 143,
476
      "metadata": {
477
        "colab": {
478
          "base_uri": "https://localhost:8080/"
479
        },
480
        "id": "c54aca08-253d-4da3-ac79-f5ab64ab64d9",
481
        "outputId": "479b54f1-3b24-4224-aa80-3a098587f1c0"
482
      },
483
      "outputs": [
484
        {
485
          "output_type": "stream",
486
          "name": "stdout",
487
          "text": [
488
            "Fitting 10 folds for each of 1000 candidates, totalling 10000 fits\n",
489
            "Best Parameters for NSCLC: {'adaboost__random_state': 100, 'adaboost__n_estimators': 50, 'adaboost__learning_rate': 0.1, 'adaboost__base_estimator__splitter': 'best', 'adaboost__base_estimator__min_samples_split': 2, 'adaboost__base_estimator__min_samples_leaf': 1, 'adaboost__base_estimator__max_features': 'sqrt', 'adaboost__base_estimator__max_depth': 3, 'adaboost__base_estimator__criterion': 'gini', 'adaboost__algorithm': 'SAMME'}\n",
490
            "Best F1 Score for NSCLC: 0.8498770054338733\n",
491
            "Classification Report for NSCLC (Test Data):\n",
492
            "               precision    recall  f1-score   support\n",
493
            "\n",
494
            "         0.0       0.82      0.82      0.82       142\n",
495
            "         1.0       0.81      0.81      0.81       135\n",
496
            "\n",
497
            "    accuracy                           0.82       277\n",
498
            "   macro avg       0.82      0.82      0.82       277\n",
499
            "weighted avg       0.82      0.82      0.82       277\n",
500
            "\n"
501
          ]
502
        }
503
      ],
504
      "source": [
505
        "best_params_nsclc, best_score_nsclc, report_nsclc = evaluate_adaboost(xtrain_nsclc, ytrain_nsclc, xtest_nsclc, ytest_nsclc, param_grid_adaboost)\n",
506
        "print(\"Best Parameters for NSCLC:\", best_params_nsclc)\n",
507
        "print(\"Best F1 Score for NSCLC:\", best_score_nsclc)\n",
508
        "print(\"Classification Report for NSCLC (Test Data):\\n\", report_nsclc)"
509
      ],
510
      "id": "c54aca08-253d-4da3-ac79-f5ab64ab64d9"
511
    },
512
    {
513
      "cell_type": "code",
514
      "execution_count": 127,
515
      "metadata": {
516
        "id": "7d7a9138-6d26-4ea8-a254-7cf99186473f",
517
        "colab": {
518
          "base_uri": "https://localhost:8080/"
519
        },
520
        "outputId": "2120bb04-d90d-453b-a7f5-d67a30eaf2b8"
521
      },
522
      "outputs": [
523
        {
524
          "output_type": "stream",
525
          "name": "stdout",
526
          "text": [
527
            "Fitting 10 folds for each of 500 candidates, totalling 5000 fits\n",
528
            "Best Parameters for NSCLC: {'adaboost__random_state': 100, 'adaboost__n_estimators': 100, 'adaboost__learning_rate': 0.05, 'adaboost__base_estimator__splitter': 'best', 'adaboost__base_estimator__min_samples_split': 10, 'adaboost__base_estimator__min_samples_leaf': 2, 'adaboost__base_estimator__max_features': 'sqrt', 'adaboost__base_estimator__max_depth': 3, 'adaboost__base_estimator__criterion': 'entropy', 'adaboost__algorithm': 'SAMME.R'}\n",
529
            "Best F1 Score for NSCLC: 0.8115145193902691\n",
530
            "Classification Report for NSCLC (Test Data):\n",
531
            "               precision    recall  f1-score   support\n",
532
            "\n",
533
            "         0.0       0.79      0.88      0.83       188\n",
534
            "         1.0       0.86      0.76      0.80       181\n",
535
            "\n",
536
            "    accuracy                           0.82       369\n",
537
            "   macro avg       0.82      0.82      0.82       369\n",
538
            "weighted avg       0.82      0.82      0.82       369\n",
539
            "\n"
540
          ]
541
        }
542
      ],
543
      "source": [
544
        "best_params_nsclc, best_score_nsclc, report_nsclc = evaluate_adaboost(xtrain_sclc, ytrain_sclc, xtest_sclc, ytest_sclc, param_grid_adaboost)\n",
545
        "print(\"Best Parameters for NSCLC:\", best_params_nsclc)\n",
546
        "print(\"Best F1 Score for NSCLC:\", best_score_nsclc)\n",
547
        "print(\"Classification Report for NSCLC (Test Data):\\n\", report_nsclc)"
548
      ],
549
      "id": "7d7a9138-6d26-4ea8-a254-7cf99186473f"
550
    }
551
  ],
552
  "metadata": {
553
    "colab": {
554
      "provenance": []
555
    },
556
    "kernelspec": {
557
      "display_name": "Python 3",
558
      "name": "python3"
559
    },
560
    "language_info": {
561
      "codemirror_mode": {
562
        "name": "ipython",
563
        "version": 3
564
      },
565
      "file_extension": ".py",
566
      "mimetype": "text/x-python",
567
      "name": "python",
568
      "nbconvert_exporter": "python",
569
      "pygments_lexer": "ipython3",
570
      "version": "3.9.18"
571
    }
572
  },
573
  "nbformat": 4,
574
  "nbformat_minor": 5
575
}