{ "cells": [ { "cell_type": "markdown", "id": "24fda7b6-4e16-44c9-abef-9c790cf286d0", "metadata": {}, "source": [ "Breast cancer" ] }, { "cell_type": "code", "execution_count": 59, "id": "6853550c-51ef-4fe6-8e66-b51eb64eb4c4", "metadata": {}, "outputs": [], "source": [ "# Importing libraries\n", "\n", "import pandas as pd\n", "\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", "\n", "sns.set()\n", "plt.style.use('ggplot')" ] }, { "cell_type": "code", "execution_count": 60, "id": "fbe6e81a-874c-4df0-a5b4-5382d1c4b644", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | id | \n", "diagnosis | \n", "radius_mean | \n", "texture_mean | \n", "perimeter_mean | \n", "area_mean | \n", "smoothness_mean | \n", "compactness_mean | \n", "concavity_mean | \n", "concave points_mean | \n", "... | \n", "texture_worst | \n", "perimeter_worst | \n", "area_worst | \n", "smoothness_worst | \n", "compactness_worst | \n", "concavity_worst | \n", "concave points_worst | \n", "symmetry_worst | \n", "fractal_dimension_worst | \n", "Unnamed: 32 | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "842302 | \n", "M | \n", "17.99 | \n", "10.38 | \n", "122.80 | \n", "1001.0 | \n", "0.11840 | \n", "0.27760 | \n", "0.3001 | \n", "0.14710 | \n", "... | \n", "17.33 | \n", "184.60 | \n", "2019.0 | \n", "0.1622 | \n", "0.6656 | \n", "0.7119 | \n", "0.2654 | \n", "0.4601 | \n", "0.11890 | \n", "NaN | \n", "
1 | \n", "842517 | \n", "M | \n", "20.57 | \n", "17.77 | \n", "132.90 | \n", "1326.0 | \n", "0.08474 | \n", "0.07864 | \n", "0.0869 | \n", "0.07017 | \n", "... | \n", "23.41 | \n", "158.80 | \n", "1956.0 | \n", "0.1238 | \n", "0.1866 | \n", "0.2416 | \n", "0.1860 | \n", "0.2750 | \n", "0.08902 | \n", "NaN | \n", "
2 | \n", "84300903 | \n", "M | \n", "19.69 | \n", "21.25 | \n", "130.00 | \n", "1203.0 | \n", "0.10960 | \n", "0.15990 | \n", "0.1974 | \n", "0.12790 | \n", "... | \n", "25.53 | \n", "152.50 | \n", "1709.0 | \n", "0.1444 | \n", "0.4245 | \n", "0.4504 | \n", "0.2430 | \n", "0.3613 | \n", "0.08758 | \n", "NaN | \n", "
3 | \n", "84348301 | \n", "M | \n", "11.42 | \n", "20.38 | \n", "77.58 | \n", "386.1 | \n", "0.14250 | \n", "0.28390 | \n", "0.2414 | \n", "0.10520 | \n", "... | \n", "26.50 | \n", "98.87 | \n", "567.7 | \n", "0.2098 | \n", "0.8663 | \n", "0.6869 | \n", "0.2575 | \n", "0.6638 | \n", "0.17300 | \n", "NaN | \n", "
4 | \n", "84358402 | \n", "M | \n", "20.29 | \n", "14.34 | \n", "135.10 | \n", "1297.0 | \n", "0.10030 | \n", "0.13280 | \n", "0.1980 | \n", "0.10430 | \n", "... | \n", "16.67 | \n", "152.20 | \n", "1575.0 | \n", "0.1374 | \n", "0.2050 | \n", "0.4000 | \n", "0.1625 | \n", "0.2364 | \n", "0.07678 | \n", "NaN | \n", "
5 rows × 33 columns
\n", "\n", " | diagnosis | \n", "radius_mean | \n", "texture_mean | \n", "perimeter_mean | \n", "area_mean | \n", "smoothness_mean | \n", "compactness_mean | \n", "concavity_mean | \n", "concave points_mean | \n", "symmetry_mean | \n", "... | \n", "radius_worst | \n", "texture_worst | \n", "perimeter_worst | \n", "area_worst | \n", "smoothness_worst | \n", "compactness_worst | \n", "concavity_worst | \n", "concave points_worst | \n", "symmetry_worst | \n", "fractal_dimension_worst | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "... | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "
mean | \n", "0.372583 | \n", "14.127292 | \n", "19.289649 | \n", "91.969033 | \n", "654.889104 | \n", "0.096360 | \n", "0.104341 | \n", "0.088799 | \n", "0.048919 | \n", "0.181162 | \n", "... | \n", "16.269190 | \n", "25.677223 | \n", "107.261213 | \n", "880.583128 | \n", "0.132369 | \n", "0.254265 | \n", "0.272188 | \n", "0.114606 | \n", "0.290076 | \n", "0.083946 | \n", "
std | \n", "0.483918 | \n", "3.524049 | \n", "4.301036 | \n", "24.298981 | \n", "351.914129 | \n", "0.014064 | \n", "0.052813 | \n", "0.079720 | \n", "0.038803 | \n", "0.027414 | \n", "... | \n", "4.833242 | \n", "6.146258 | \n", "33.602542 | \n", "569.356993 | \n", "0.022832 | \n", "0.157336 | \n", "0.208624 | \n", "0.065732 | \n", "0.061867 | \n", "0.018061 | \n", "
min | \n", "0.000000 | \n", "6.981000 | \n", "9.710000 | \n", "43.790000 | \n", "143.500000 | \n", "0.052630 | \n", "0.019380 | \n", "0.000000 | \n", "0.000000 | \n", "0.106000 | \n", "... | \n", "7.930000 | \n", "12.020000 | \n", "50.410000 | \n", "185.200000 | \n", "0.071170 | \n", "0.027290 | \n", "0.000000 | \n", "0.000000 | \n", "0.156500 | \n", "0.055040 | \n", "
25% | \n", "0.000000 | \n", "11.700000 | \n", "16.170000 | \n", "75.170000 | \n", "420.300000 | \n", "0.086370 | \n", "0.064920 | \n", "0.029560 | \n", "0.020310 | \n", "0.161900 | \n", "... | \n", "13.010000 | \n", "21.080000 | \n", "84.110000 | \n", "515.300000 | \n", "0.116600 | \n", "0.147200 | \n", "0.114500 | \n", "0.064930 | \n", "0.250400 | \n", "0.071460 | \n", "
50% | \n", "0.000000 | \n", "13.370000 | \n", "18.840000 | \n", "86.240000 | \n", "551.100000 | \n", "0.095870 | \n", "0.092630 | \n", "0.061540 | \n", "0.033500 | \n", "0.179200 | \n", "... | \n", "14.970000 | \n", "25.410000 | \n", "97.660000 | \n", "686.500000 | \n", "0.131300 | \n", "0.211900 | \n", "0.226700 | \n", "0.099930 | \n", "0.282200 | \n", "0.080040 | \n", "
75% | \n", "1.000000 | \n", "15.780000 | \n", "21.800000 | \n", "104.100000 | \n", "782.700000 | \n", "0.105300 | \n", "0.130400 | \n", "0.130700 | \n", "0.074000 | \n", "0.195700 | \n", "... | \n", "18.790000 | \n", "29.720000 | \n", "125.400000 | \n", "1084.000000 | \n", "0.146000 | \n", "0.339100 | \n", "0.382900 | \n", "0.161400 | \n", "0.317900 | \n", "0.092080 | \n", "
max | \n", "1.000000 | \n", "28.110000 | \n", "39.280000 | \n", "188.500000 | \n", "2501.000000 | \n", "0.163400 | \n", "0.345400 | \n", "0.426800 | \n", "0.201200 | \n", "0.304000 | \n", "... | \n", "36.040000 | \n", "49.540000 | \n", "251.200000 | \n", "4254.000000 | \n", "0.222600 | \n", "1.058000 | \n", "1.252000 | \n", "0.291000 | \n", "0.663800 | \n", "0.207500 | \n", "
8 rows × 31 columns
\n", "LogisticRegression()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
LogisticRegression()
KNeighborsClassifier()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
KNeighborsClassifier()
GridSearchCV(estimator=SVC(probability=True),\n", " param_grid={'C': [0.01, 0.05, 0.5, 0.1, 1, 10, 15, 20],\n", " 'gamma': [0.0001, 0.001, 0.01, 0.1]})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(estimator=SVC(probability=True),\n", " param_grid={'C': [0.01, 0.05, 0.5, 0.1, 1, 10, 15, 20],\n", " 'gamma': [0.0001, 0.001, 0.01, 0.1]})
SVC(probability=True)
SVC(probability=True)
SVC(C=10, gamma=0.01, probability=True)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(C=10, gamma=0.01, probability=True)
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,\n", " param_grid={'criterion': ['gini', 'entropy'],\n", " 'max_depth': range(2, 32),\n", " 'min_samples_leaf': range(1, 10),\n", " 'min_samples_split': range(2, 10),\n", " 'splitter': ['best', 'random']},\n", " verbose=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5, estimator=DecisionTreeClassifier(), n_jobs=-1,\n", " param_grid={'criterion': ['gini', 'entropy'],\n", " 'max_depth': range(2, 32),\n", " 'min_samples_leaf': range(1, 10),\n", " 'min_samples_split': range(2, 10),\n", " 'splitter': ['best', 'random']},\n", " verbose=1)
DecisionTreeClassifier()
DecisionTreeClassifier()
DecisionTreeClassifier(criterion='entropy', max_depth=19, min_samples_leaf=4,\n", " min_samples_split=6, splitter='random')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier(criterion='entropy', max_depth=19, min_samples_leaf=4,\n", " min_samples_split=6, splitter='random')
\n", " | Model | \n", "Score | \n", "
---|---|---|
2 | \n", "SVM | \n", "97.66 | \n", "
0 | \n", "Logistic Regression | \n", "95.91 | \n", "
1 | \n", "KNN | \n", "93.57 | \n", "
3 | \n", "Decision Tree Classifier | \n", "92.98 | \n", "