{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "dd2d8358", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd \n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "import sys\n", "\n", "if not sys.warnoptions:\n", " import warnings\n", " warnings.simplefilter(\"ignore\")" ] }, { "cell_type": "code", "execution_count": 2, "id": "071adfe5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | sample_id | \n", "patient_cohort | \n", "sample_origin | \n", "age | \n", "sex | \n", "diagnosis | \n", "stage | \n", "benign_sample_diagnosis | \n", "plasma_CA19_9 | \n", "creatinine | \n", "LYVE1 | \n", "REG1B | \n", "TFF1 | \n", "REG1A | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "S1 | \n", "Cohort1 | \n", "BPTB | \n", "33 | \n", "F | \n", "1 | \n", "NaN | \n", "NaN | \n", "11.7 | \n", "1.83222 | \n", "0.893219 | \n", "52.94884 | \n", "654.282174 | \n", "1262.000 | \n", "
1 | \n", "S10 | \n", "Cohort1 | \n", "BPTB | \n", "81 | \n", "F | \n", "1 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.97266 | \n", "2.037585 | \n", "94.46703 | \n", "209.488250 | \n", "228.407 | \n", "
2 | \n", "S100 | \n", "Cohort2 | \n", "BPTB | \n", "51 | \n", "M | \n", "1 | \n", "NaN | \n", "NaN | \n", "7.0 | \n", "0.78039 | \n", "0.145589 | \n", "102.36600 | \n", "461.141000 | \n", "NaN | \n", "
3 | \n", "S101 | \n", "Cohort2 | \n", "BPTB | \n", "61 | \n", "M | \n", "1 | \n", "NaN | \n", "NaN | \n", "8.0 | \n", "0.70122 | \n", "0.002805 | \n", "60.57900 | \n", "142.950000 | \n", "NaN | \n", "
4 | \n", "S102 | \n", "Cohort2 | \n", "BPTB | \n", "62 | \n", "M | \n", "1 | \n", "NaN | \n", "NaN | \n", "9.0 | \n", "0.21489 | \n", "0.000860 | \n", "65.54000 | \n", "41.088000 | \n", "NaN | \n", "
\n", " | count | \n", "mean | \n", "std | \n", "min | \n", "25% | \n", "50% | \n", "75% | \n", "max | \n", "
---|---|---|---|---|---|---|---|---|
diagnosis | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
1 | \n", "79.0 | \n", "227.871886 | \n", "288.272476 | \n", "0.0 | \n", "62.15350 | \n", "113.000 | \n", "285.60700 | \n", "1617.142 | \n", "
2 | \n", "87.0 | \n", "547.458092 | \n", "1210.159890 | \n", "0.0 | \n", "63.54550 | \n", "127.174 | \n", "405.43800 | \n", "8083.492 | \n", "
3 | \n", "140.0 | \n", "1138.323721 | \n", "1871.810807 | \n", "0.0 | \n", "152.33625 | \n", "411.006 | \n", "1435.60625 | \n", "13200.000 | \n", "
\n", " | count | \n", "mean | \n", "std | \n", "min | \n", "25% | \n", "50% | \n", "75% | \n", "max | \n", "
---|---|---|---|---|---|---|---|---|
diagnosis | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
1 | \n", "92.0 | \n", "8.749569 | \n", "12.189355 | \n", "0.0 | \n", "1.707332 | \n", "5.334598 | \n", "9.15 | \n", "84.3 | \n", "
2 | \n", "108.0 | \n", "61.785741 | \n", "235.816534 | \n", "1.0 | \n", "9.750000 | \n", "17.000000 | \n", "29.25 | \n", "1913.0 | \n", "
3 | \n", "150.0 | \n", "1476.154733 | \n", "3550.038158 | \n", "0.6 | \n", "99.700000 | \n", "427.500000 | \n", "1457.50 | \n", "31000.0 | \n", "
\n", " | age | \n", "sex | \n", "diagnosis | \n", "plasma_CA19_9 | \n", "creatinine | \n", "LYVE1 | \n", "REG1B | \n", "TFF1 | \n", "REG1A | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "33 | \n", "0 | \n", "1 | \n", "11.700000 | \n", "1.83222 | \n", "0.893219 | \n", "52.94884 | \n", "654.282174 | \n", "1262.000000 | \n", "
1 | \n", "81 | \n", "0 | \n", "1 | \n", "8.749569 | \n", "0.97266 | \n", "2.037585 | \n", "94.46703 | \n", "209.488250 | \n", "228.407000 | \n", "
2 | \n", "51 | \n", "1 | \n", "1 | \n", "7.000000 | \n", "0.78039 | \n", "0.145589 | \n", "102.36600 | \n", "461.141000 | \n", "227.871886 | \n", "
3 | \n", "61 | \n", "1 | \n", "1 | \n", "8.000000 | \n", "0.70122 | \n", "0.002805 | \n", "60.57900 | \n", "142.950000 | \n", "227.871886 | \n", "
4 | \n", "62 | \n", "1 | \n", "1 | \n", "9.000000 | \n", "0.21489 | \n", "0.000860 | \n", "65.54000 | \n", "41.088000 | \n", "227.871886 | \n", "
\n", " | age | \n", "sex | \n", "diagnosis | \n", "plasma_CA19_9 | \n", "creatinine | \n", "LYVE1 | \n", "REG1B | \n", "TFF1 | \n", "REG1A | \n", "
---|---|---|---|---|---|---|---|---|---|
0 | \n", "33 | \n", "F | \n", "1 | \n", "11.700000 | \n", "1.83222 | \n", "0.893219 | \n", "52.94884 | \n", "654.282174 | \n", "1262.000000 | \n", "
1 | \n", "81 | \n", "F | \n", "1 | \n", "8.749569 | \n", "0.97266 | \n", "2.037585 | \n", "94.46703 | \n", "209.488250 | \n", "228.407000 | \n", "
2 | \n", "51 | \n", "M | \n", "1 | \n", "7.000000 | \n", "0.78039 | \n", "0.145589 | \n", "102.36600 | \n", "461.141000 | \n", "227.871886 | \n", "
3 | \n", "61 | \n", "M | \n", "1 | \n", "8.000000 | \n", "0.70122 | \n", "0.002805 | \n", "60.57900 | \n", "142.950000 | \n", "227.871886 | \n", "
4 | \n", "62 | \n", "M | \n", "1 | \n", "9.000000 | \n", "0.21489 | \n", "0.000860 | \n", "65.54000 | \n", "41.088000 | \n", "227.871886 | \n", "
GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),\n", " param_grid={'bootstrap': [True, False],\n", " 'criterion': ['gini', 'entropy'],\n", " 'max_depth': [4, 5, 6, 7, 8],\n", " 'min_samples_leaf': [1, 2, 4],\n", " 'min_samples_split': [2, 5, 10],\n", " 'n_estimators': [100, 200, 500]})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42),\n", " param_grid={'bootstrap': [True, False],\n", " 'criterion': ['gini', 'entropy'],\n", " 'max_depth': [4, 5, 6, 7, 8],\n", " 'min_samples_leaf': [1, 2, 4],\n", " 'min_samples_split': [2, 5, 10],\n", " 'n_estimators': [100, 200, 500]})
RandomForestClassifier(random_state=42)
RandomForestClassifier(random_state=42)