2761 lines (2761 with data), 130.2 kB
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
}
},
"cells": [
{
"cell_type": "markdown",
"source": [
"Import the Dependencies"
],
"metadata": {
"id": "B-PIUdwcHQmC"
}
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "S7nY2W5HDw72"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import sklearn.datasets\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score"
]
},
{
"cell_type": "markdown",
"source": [
"Data Analysis & Pre-processing"
],
"metadata": {
"id": "YSt0F5USHUSw"
}
},
{
"cell_type": "code",
"source": [
"breast_cancer_dataset = sklearn.datasets.load_breast_cancer()"
],
"metadata": {
"id": "MZI0C3YoHPOp"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(breast_cancer_dataset)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "im2LfysVHcLC",
"outputId": "3b62056e-39c7-465d-8646-3b6ec534fbb1"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,\n",
" 1.189e-01],\n",
" [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,\n",
" 8.902e-02],\n",
" [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,\n",
" 8.758e-02],\n",
" ...,\n",
" [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,\n",
" 7.820e-02],\n",
" [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,\n",
" 1.240e-01],\n",
" [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,\n",
" 7.039e-02]]), 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,\n",
" 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,\n",
" 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,\n",
" 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0,\n",
" 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,\n",
" 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0,\n",
" 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0,\n",
" 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1,\n",
" 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n",
" 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0,\n",
" 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,\n",
" 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,\n",
" 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1,\n",
" 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1,\n",
" 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,\n",
" 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1,\n",
" 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1,\n",
" 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n",
" 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1]), 'frame': None, 'target_names': array(['malignant', 'benign'], dtype='<U9'), 'DESCR': '.. _breast_cancer_dataset:\\n\\nBreast cancer wisconsin (diagnostic) dataset\\n--------------------------------------------\\n\\n**Data Set Characteristics:**\\n\\n :Number of Instances: 569\\n\\n :Number of Attributes: 30 numeric, predictive attributes and the class\\n\\n :Attribute Information:\\n - radius (mean of distances from center to points on the perimeter)\\n - texture (standard deviation of gray-scale values)\\n - perimeter\\n - area\\n - smoothness (local variation in radius lengths)\\n - compactness (perimeter^2 / area - 1.0)\\n - concavity (severity of concave portions of the contour)\\n - concave points (number of concave portions of the contour)\\n - symmetry\\n - fractal dimension (\"coastline approximation\" - 1)\\n\\n The mean, standard error, and \"worst\" or largest (mean of the three\\n worst/largest values) of these features were computed for each image,\\n resulting in 30 features. For instance, field 0 is Mean Radius, field\\n 10 is Radius SE, field 20 is Worst Radius.\\n\\n - class:\\n - WDBC-Malignant\\n - WDBC-Benign\\n\\n :Summary Statistics:\\n\\n ===================================== ====== ======\\n Min Max\\n ===================================== ====== ======\\n radius (mean): 6.981 28.11\\n texture (mean): 9.71 39.28\\n perimeter (mean): 43.79 188.5\\n area (mean): 143.5 2501.0\\n smoothness (mean): 0.053 0.163\\n compactness (mean): 0.019 0.345\\n concavity (mean): 0.0 0.427\\n concave points (mean): 0.0 0.201\\n symmetry (mean): 0.106 0.304\\n fractal dimension (mean): 0.05 0.097\\n radius (standard error): 0.112 2.873\\n texture (standard error): 0.36 4.885\\n perimeter (standard error): 0.757 21.98\\n area (standard error): 6.802 542.2\\n smoothness (standard error): 0.002 0.031\\n compactness (standard error): 0.002 0.135\\n concavity (standard error): 0.0 0.396\\n concave points (standard error): 0.0 0.053\\n symmetry (standard error): 0.008 0.079\\n fractal dimension (standard error): 0.001 0.03\\n radius (worst): 7.93 36.04\\n texture (worst): 12.02 49.54\\n perimeter (worst): 50.41 251.2\\n area (worst): 185.2 4254.0\\n smoothness (worst): 0.071 0.223\\n compactness (worst): 0.027 1.058\\n concavity (worst): 0.0 1.252\\n concave points (worst): 0.0 0.291\\n symmetry (worst): 0.156 0.664\\n fractal dimension (worst): 0.055 0.208\\n ===================================== ====== ======\\n\\n :Missing Attribute Values: None\\n\\n :Class Distribution: 212 - Malignant, 357 - Benign\\n\\n :Creator: Dr. William H. Wolberg, W. Nick Street, Olvi L. Mangasarian\\n\\n :Donor: Nick Street\\n\\n :Date: November, 1995\\n\\nThis is a copy of UCI ML Breast Cancer Wisconsin (Diagnostic) datasets.\\nhttps://goo.gl/U2Uwz2\\n\\nFeatures are computed from a digitized image of a fine needle\\naspirate (FNA) of a breast mass. They describe\\ncharacteristics of the cell nuclei present in the image.\\n\\nSeparating plane described above was obtained using\\nMultisurface Method-Tree (MSM-T) [K. P. Bennett, \"Decision Tree\\nConstruction Via Linear Programming.\" Proceedings of the 4th\\nMidwest Artificial Intelligence and Cognitive Science Society,\\npp. 97-101, 1992], a classification method which uses linear\\nprogramming to construct a decision tree. Relevant features\\nwere selected using an exhaustive search in the space of 1-4\\nfeatures and 1-3 separating planes.\\n\\nThe actual linear program used to obtain the separating plane\\nin the 3-dimensional space is that described in:\\n[K. P. Bennett and O. L. Mangasarian: \"Robust Linear\\nProgramming Discrimination of Two Linearly Inseparable Sets\",\\nOptimization Methods and Software 1, 1992, 23-34].\\n\\nThis database is also available through the UW CS ftp server:\\n\\nftp ftp.cs.wisc.edu\\ncd math-prog/cpo-dataset/machine-learn/WDBC/\\n\\n|details-start|\\n**References**\\n|details-split|\\n\\n- W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction \\n for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on \\n Electronic Imaging: Science and Technology, volume 1905, pages 861-870,\\n San Jose, CA, 1993.\\n- O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and \\n prognosis via linear programming. Operations Research, 43(4), pages 570-577, \\n July-August 1995.\\n- W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques\\n to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) \\n 163-171.\\n\\n|details-end|', 'feature_names': array(['mean radius', 'mean texture', 'mean perimeter', 'mean area',\n",
" 'mean smoothness', 'mean compactness', 'mean concavity',\n",
" 'mean concave points', 'mean symmetry', 'mean fractal dimension',\n",
" 'radius error', 'texture error', 'perimeter error', 'area error',\n",
" 'smoothness error', 'compactness error', 'concavity error',\n",
" 'concave points error', 'symmetry error',\n",
" 'fractal dimension error', 'worst radius', 'worst texture',\n",
" 'worst perimeter', 'worst area', 'worst smoothness',\n",
" 'worst compactness', 'worst concavity', 'worst concave points',\n",
" 'worst symmetry', 'worst fractal dimension'], dtype='<U23'), 'filename': 'breast_cancer.csv', 'data_module': 'sklearn.datasets.data'}\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"data_frame = pd.DataFrame(breast_cancer_dataset.data, columns = breast_cancer_dataset.feature_names)"
],
"metadata": {
"id": "kGvI0YZRHgIW"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data_frame.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 290
},
"id": "Y4noEwymHi4U",
"outputId": "8918c766-1f11-4683-9365-71f2019fb9e7"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
"0 17.99 10.38 122.80 1001.0 0.11840 \n",
"1 20.57 17.77 132.90 1326.0 0.08474 \n",
"2 19.69 21.25 130.00 1203.0 0.10960 \n",
"3 11.42 20.38 77.58 386.1 0.14250 \n",
"4 20.29 14.34 135.10 1297.0 0.10030 \n",
"\n",
" mean compactness mean concavity mean concave points mean symmetry \\\n",
"0 0.27760 0.3001 0.14710 0.2419 \n",
"1 0.07864 0.0869 0.07017 0.1812 \n",
"2 0.15990 0.1974 0.12790 0.2069 \n",
"3 0.28390 0.2414 0.10520 0.2597 \n",
"4 0.13280 0.1980 0.10430 0.1809 \n",
"\n",
" mean fractal dimension ... worst radius worst texture worst perimeter \\\n",
"0 0.07871 ... 25.38 17.33 184.60 \n",
"1 0.05667 ... 24.99 23.41 158.80 \n",
"2 0.05999 ... 23.57 25.53 152.50 \n",
"3 0.09744 ... 14.91 26.50 98.87 \n",
"4 0.05883 ... 22.54 16.67 152.20 \n",
"\n",
" worst area worst smoothness worst compactness worst concavity \\\n",
"0 2019.0 0.1622 0.6656 0.7119 \n",
"1 1956.0 0.1238 0.1866 0.2416 \n",
"2 1709.0 0.1444 0.4245 0.4504 \n",
"3 567.7 0.2098 0.8663 0.6869 \n",
"4 1575.0 0.1374 0.2050 0.4000 \n",
"\n",
" worst concave points worst symmetry worst fractal dimension \n",
"0 0.2654 0.4601 0.11890 \n",
"1 0.1860 0.2750 0.08902 \n",
"2 0.2430 0.3613 0.08758 \n",
"3 0.2575 0.6638 0.17300 \n",
"4 0.1625 0.2364 0.07678 \n",
"\n",
"[5 rows x 30 columns]"
],
"text/html": [
"\n",
" <div id=\"df-fde37930-9a0b-46c7-990a-71c18cc188d5\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>...</th>\n",
" <th>worst radius</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>17.99</td>\n",
" <td>10.38</td>\n",
" <td>122.80</td>\n",
" <td>1001.0</td>\n",
" <td>0.11840</td>\n",
" <td>0.27760</td>\n",
" <td>0.3001</td>\n",
" <td>0.14710</td>\n",
" <td>0.2419</td>\n",
" <td>0.07871</td>\n",
" <td>...</td>\n",
" <td>25.38</td>\n",
" <td>17.33</td>\n",
" <td>184.60</td>\n",
" <td>2019.0</td>\n",
" <td>0.1622</td>\n",
" <td>0.6656</td>\n",
" <td>0.7119</td>\n",
" <td>0.2654</td>\n",
" <td>0.4601</td>\n",
" <td>0.11890</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>20.57</td>\n",
" <td>17.77</td>\n",
" <td>132.90</td>\n",
" <td>1326.0</td>\n",
" <td>0.08474</td>\n",
" <td>0.07864</td>\n",
" <td>0.0869</td>\n",
" <td>0.07017</td>\n",
" <td>0.1812</td>\n",
" <td>0.05667</td>\n",
" <td>...</td>\n",
" <td>24.99</td>\n",
" <td>23.41</td>\n",
" <td>158.80</td>\n",
" <td>1956.0</td>\n",
" <td>0.1238</td>\n",
" <td>0.1866</td>\n",
" <td>0.2416</td>\n",
" <td>0.1860</td>\n",
" <td>0.2750</td>\n",
" <td>0.08902</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>19.69</td>\n",
" <td>21.25</td>\n",
" <td>130.00</td>\n",
" <td>1203.0</td>\n",
" <td>0.10960</td>\n",
" <td>0.15990</td>\n",
" <td>0.1974</td>\n",
" <td>0.12790</td>\n",
" <td>0.2069</td>\n",
" <td>0.05999</td>\n",
" <td>...</td>\n",
" <td>23.57</td>\n",
" <td>25.53</td>\n",
" <td>152.50</td>\n",
" <td>1709.0</td>\n",
" <td>0.1444</td>\n",
" <td>0.4245</td>\n",
" <td>0.4504</td>\n",
" <td>0.2430</td>\n",
" <td>0.3613</td>\n",
" <td>0.08758</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>11.42</td>\n",
" <td>20.38</td>\n",
" <td>77.58</td>\n",
" <td>386.1</td>\n",
" <td>0.14250</td>\n",
" <td>0.28390</td>\n",
" <td>0.2414</td>\n",
" <td>0.10520</td>\n",
" <td>0.2597</td>\n",
" <td>0.09744</td>\n",
" <td>...</td>\n",
" <td>14.91</td>\n",
" <td>26.50</td>\n",
" <td>98.87</td>\n",
" <td>567.7</td>\n",
" <td>0.2098</td>\n",
" <td>0.8663</td>\n",
" <td>0.6869</td>\n",
" <td>0.2575</td>\n",
" <td>0.6638</td>\n",
" <td>0.17300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>20.29</td>\n",
" <td>14.34</td>\n",
" <td>135.10</td>\n",
" <td>1297.0</td>\n",
" <td>0.10030</td>\n",
" <td>0.13280</td>\n",
" <td>0.1980</td>\n",
" <td>0.10430</td>\n",
" <td>0.1809</td>\n",
" <td>0.05883</td>\n",
" <td>...</td>\n",
" <td>22.54</td>\n",
" <td>16.67</td>\n",
" <td>152.20</td>\n",
" <td>1575.0</td>\n",
" <td>0.1374</td>\n",
" <td>0.2050</td>\n",
" <td>0.4000</td>\n",
" <td>0.1625</td>\n",
" <td>0.2364</td>\n",
" <td>0.07678</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 30 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-fde37930-9a0b-46c7-990a-71c18cc188d5')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-fde37930-9a0b-46c7-990a-71c18cc188d5 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-fde37930-9a0b-46c7-990a-71c18cc188d5');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-203d84bf-9157-44b6-bc3a-3f53d71d5a1d\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-203d84bf-9157-44b6-bc3a-3f53d71d5a1d')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-203d84bf-9157-44b6-bc3a-3f53d71d5a1d button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "data_frame"
}
},
"metadata": {},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"source": [
"# Add the 'target' column to the data frame\n",
"data_frame['label'] = breast_cancer_dataset.target"
],
"metadata": {
"id": "1X5OJYjyHkll"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"data_frame.tail()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 290
},
"id": "RZQ_tywdHqP9",
"outputId": "e6be35b8-db7c-4374-a696-0bb3bc2d088f"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
"564 21.56 22.39 142.00 1479.0 0.11100 \n",
"565 20.13 28.25 131.20 1261.0 0.09780 \n",
"566 16.60 28.08 108.30 858.1 0.08455 \n",
"567 20.60 29.33 140.10 1265.0 0.11780 \n",
"568 7.76 24.54 47.92 181.0 0.05263 \n",
"\n",
" mean compactness mean concavity mean concave points mean symmetry \\\n",
"564 0.11590 0.24390 0.13890 0.1726 \n",
"565 0.10340 0.14400 0.09791 0.1752 \n",
"566 0.10230 0.09251 0.05302 0.1590 \n",
"567 0.27700 0.35140 0.15200 0.2397 \n",
"568 0.04362 0.00000 0.00000 0.1587 \n",
"\n",
" mean fractal dimension ... worst texture worst perimeter worst area \\\n",
"564 0.05623 ... 26.40 166.10 2027.0 \n",
"565 0.05533 ... 38.25 155.00 1731.0 \n",
"566 0.05648 ... 34.12 126.70 1124.0 \n",
"567 0.07016 ... 39.42 184.60 1821.0 \n",
"568 0.05884 ... 30.37 59.16 268.6 \n",
"\n",
" worst smoothness worst compactness worst concavity \\\n",
"564 0.14100 0.21130 0.4107 \n",
"565 0.11660 0.19220 0.3215 \n",
"566 0.11390 0.30940 0.3403 \n",
"567 0.16500 0.86810 0.9387 \n",
"568 0.08996 0.06444 0.0000 \n",
"\n",
" worst concave points worst symmetry worst fractal dimension label \n",
"564 0.2216 0.2060 0.07115 0 \n",
"565 0.1628 0.2572 0.06637 0 \n",
"566 0.1418 0.2218 0.07820 0 \n",
"567 0.2650 0.4087 0.12400 0 \n",
"568 0.0000 0.2871 0.07039 1 \n",
"\n",
"[5 rows x 31 columns]"
],
"text/html": [
"\n",
" <div id=\"df-f651a602-1ced-48de-b671-af3d808327b8\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>...</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>564</th>\n",
" <td>21.56</td>\n",
" <td>22.39</td>\n",
" <td>142.00</td>\n",
" <td>1479.0</td>\n",
" <td>0.11100</td>\n",
" <td>0.11590</td>\n",
" <td>0.24390</td>\n",
" <td>0.13890</td>\n",
" <td>0.1726</td>\n",
" <td>0.05623</td>\n",
" <td>...</td>\n",
" <td>26.40</td>\n",
" <td>166.10</td>\n",
" <td>2027.0</td>\n",
" <td>0.14100</td>\n",
" <td>0.21130</td>\n",
" <td>0.4107</td>\n",
" <td>0.2216</td>\n",
" <td>0.2060</td>\n",
" <td>0.07115</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>565</th>\n",
" <td>20.13</td>\n",
" <td>28.25</td>\n",
" <td>131.20</td>\n",
" <td>1261.0</td>\n",
" <td>0.09780</td>\n",
" <td>0.10340</td>\n",
" <td>0.14400</td>\n",
" <td>0.09791</td>\n",
" <td>0.1752</td>\n",
" <td>0.05533</td>\n",
" <td>...</td>\n",
" <td>38.25</td>\n",
" <td>155.00</td>\n",
" <td>1731.0</td>\n",
" <td>0.11660</td>\n",
" <td>0.19220</td>\n",
" <td>0.3215</td>\n",
" <td>0.1628</td>\n",
" <td>0.2572</td>\n",
" <td>0.06637</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>566</th>\n",
" <td>16.60</td>\n",
" <td>28.08</td>\n",
" <td>108.30</td>\n",
" <td>858.1</td>\n",
" <td>0.08455</td>\n",
" <td>0.10230</td>\n",
" <td>0.09251</td>\n",
" <td>0.05302</td>\n",
" <td>0.1590</td>\n",
" <td>0.05648</td>\n",
" <td>...</td>\n",
" <td>34.12</td>\n",
" <td>126.70</td>\n",
" <td>1124.0</td>\n",
" <td>0.11390</td>\n",
" <td>0.30940</td>\n",
" <td>0.3403</td>\n",
" <td>0.1418</td>\n",
" <td>0.2218</td>\n",
" <td>0.07820</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>567</th>\n",
" <td>20.60</td>\n",
" <td>29.33</td>\n",
" <td>140.10</td>\n",
" <td>1265.0</td>\n",
" <td>0.11780</td>\n",
" <td>0.27700</td>\n",
" <td>0.35140</td>\n",
" <td>0.15200</td>\n",
" <td>0.2397</td>\n",
" <td>0.07016</td>\n",
" <td>...</td>\n",
" <td>39.42</td>\n",
" <td>184.60</td>\n",
" <td>1821.0</td>\n",
" <td>0.16500</td>\n",
" <td>0.86810</td>\n",
" <td>0.9387</td>\n",
" <td>0.2650</td>\n",
" <td>0.4087</td>\n",
" <td>0.12400</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>568</th>\n",
" <td>7.76</td>\n",
" <td>24.54</td>\n",
" <td>47.92</td>\n",
" <td>181.0</td>\n",
" <td>0.05263</td>\n",
" <td>0.04362</td>\n",
" <td>0.00000</td>\n",
" <td>0.00000</td>\n",
" <td>0.1587</td>\n",
" <td>0.05884</td>\n",
" <td>...</td>\n",
" <td>30.37</td>\n",
" <td>59.16</td>\n",
" <td>268.6</td>\n",
" <td>0.08996</td>\n",
" <td>0.06444</td>\n",
" <td>0.0000</td>\n",
" <td>0.0000</td>\n",
" <td>0.2871</td>\n",
" <td>0.07039</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 31 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f651a602-1ced-48de-b671-af3d808327b8')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-f651a602-1ced-48de-b671-af3d808327b8 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-f651a602-1ced-48de-b671-af3d808327b8');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-63e96ee7-81d3-4b5f-979c-9d9355b2b83b\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-63e96ee7-81d3-4b5f-979c-9d9355b2b83b')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-63e96ee7-81d3-4b5f-979c-9d9355b2b83b button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe"
}
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"source": [
"data_frame.shape"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "W7SAp1pFHsXd",
"outputId": "0b626a93-8596-400a-8250-12d361a38400"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"(569, 31)"
]
},
"metadata": {},
"execution_count": 8
}
]
},
{
"cell_type": "code",
"source": [
"data_frame.info()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "grVoKuMyHuaB",
"outputId": "f594916e-cf29-49a5-881c-c68a7fb2e9b5"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 569 entries, 0 to 568\n",
"Data columns (total 31 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 mean radius 569 non-null float64\n",
" 1 mean texture 569 non-null float64\n",
" 2 mean perimeter 569 non-null float64\n",
" 3 mean area 569 non-null float64\n",
" 4 mean smoothness 569 non-null float64\n",
" 5 mean compactness 569 non-null float64\n",
" 6 mean concavity 569 non-null float64\n",
" 7 mean concave points 569 non-null float64\n",
" 8 mean symmetry 569 non-null float64\n",
" 9 mean fractal dimension 569 non-null float64\n",
" 10 radius error 569 non-null float64\n",
" 11 texture error 569 non-null float64\n",
" 12 perimeter error 569 non-null float64\n",
" 13 area error 569 non-null float64\n",
" 14 smoothness error 569 non-null float64\n",
" 15 compactness error 569 non-null float64\n",
" 16 concavity error 569 non-null float64\n",
" 17 concave points error 569 non-null float64\n",
" 18 symmetry error 569 non-null float64\n",
" 19 fractal dimension error 569 non-null float64\n",
" 20 worst radius 569 non-null float64\n",
" 21 worst texture 569 non-null float64\n",
" 22 worst perimeter 569 non-null float64\n",
" 23 worst area 569 non-null float64\n",
" 24 worst smoothness 569 non-null float64\n",
" 25 worst compactness 569 non-null float64\n",
" 26 worst concavity 569 non-null float64\n",
" 27 worst concave points 569 non-null float64\n",
" 28 worst symmetry 569 non-null float64\n",
" 29 worst fractal dimension 569 non-null float64\n",
" 30 label 569 non-null int64 \n",
"dtypes: float64(30), int64(1)\n",
"memory usage: 137.9 KB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"data_frame.isnull().sum()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 1000
},
"id": "vtmbLHIWHw2D",
"outputId": "e67e3fce-43f7-4c77-c517-27d337b0b1ea"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"mean radius 0\n",
"mean texture 0\n",
"mean perimeter 0\n",
"mean area 0\n",
"mean smoothness 0\n",
"mean compactness 0\n",
"mean concavity 0\n",
"mean concave points 0\n",
"mean symmetry 0\n",
"mean fractal dimension 0\n",
"radius error 0\n",
"texture error 0\n",
"perimeter error 0\n",
"area error 0\n",
"smoothness error 0\n",
"compactness error 0\n",
"concavity error 0\n",
"concave points error 0\n",
"symmetry error 0\n",
"fractal dimension error 0\n",
"worst radius 0\n",
"worst texture 0\n",
"worst perimeter 0\n",
"worst area 0\n",
"worst smoothness 0\n",
"worst compactness 0\n",
"worst concavity 0\n",
"worst concave points 0\n",
"worst symmetry 0\n",
"worst fractal dimension 0\n",
"label 0\n",
"dtype: int64"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>mean radius</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean texture</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean perimeter</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean area</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean smoothness</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean compactness</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean concavity</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean concave points</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean symmetry</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean fractal dimension</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>radius error</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>texture error</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>perimeter error</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>area error</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>smoothness error</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>compactness error</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>concavity error</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>concave points error</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>symmetry error</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>fractal dimension error</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>worst radius</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>worst texture</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>worst perimeter</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>worst area</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>worst smoothness</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>worst compactness</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>worst concavity</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>worst concave points</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>worst symmetry</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>worst fractal dimension</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>label</th>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div><br><label><b>dtype:</b> int64</label>"
]
},
"metadata": {},
"execution_count": 10
}
]
},
{
"cell_type": "code",
"source": [
"data_frame.describe()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 384
},
"id": "8q4q1hLuHzPU",
"outputId": "1b761bc2-da60-4fa9-84ad-c0ad7630a621"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" mean radius mean texture mean perimeter mean area \\\n",
"count 569.000000 569.000000 569.000000 569.000000 \n",
"mean 14.127292 19.289649 91.969033 654.889104 \n",
"std 3.524049 4.301036 24.298981 351.914129 \n",
"min 6.981000 9.710000 43.790000 143.500000 \n",
"25% 11.700000 16.170000 75.170000 420.300000 \n",
"50% 13.370000 18.840000 86.240000 551.100000 \n",
"75% 15.780000 21.800000 104.100000 782.700000 \n",
"max 28.110000 39.280000 188.500000 2501.000000 \n",
"\n",
" mean smoothness mean compactness mean concavity mean concave points \\\n",
"count 569.000000 569.000000 569.000000 569.000000 \n",
"mean 0.096360 0.104341 0.088799 0.048919 \n",
"std 0.014064 0.052813 0.079720 0.038803 \n",
"min 0.052630 0.019380 0.000000 0.000000 \n",
"25% 0.086370 0.064920 0.029560 0.020310 \n",
"50% 0.095870 0.092630 0.061540 0.033500 \n",
"75% 0.105300 0.130400 0.130700 0.074000 \n",
"max 0.163400 0.345400 0.426800 0.201200 \n",
"\n",
" mean symmetry mean fractal dimension ... worst texture \\\n",
"count 569.000000 569.000000 ... 569.000000 \n",
"mean 0.181162 0.062798 ... 25.677223 \n",
"std 0.027414 0.007060 ... 6.146258 \n",
"min 0.106000 0.049960 ... 12.020000 \n",
"25% 0.161900 0.057700 ... 21.080000 \n",
"50% 0.179200 0.061540 ... 25.410000 \n",
"75% 0.195700 0.066120 ... 29.720000 \n",
"max 0.304000 0.097440 ... 49.540000 \n",
"\n",
" worst perimeter worst area worst smoothness worst compactness \\\n",
"count 569.000000 569.000000 569.000000 569.000000 \n",
"mean 107.261213 880.583128 0.132369 0.254265 \n",
"std 33.602542 569.356993 0.022832 0.157336 \n",
"min 50.410000 185.200000 0.071170 0.027290 \n",
"25% 84.110000 515.300000 0.116600 0.147200 \n",
"50% 97.660000 686.500000 0.131300 0.211900 \n",
"75% 125.400000 1084.000000 0.146000 0.339100 \n",
"max 251.200000 4254.000000 0.222600 1.058000 \n",
"\n",
" worst concavity worst concave points worst symmetry \\\n",
"count 569.000000 569.000000 569.000000 \n",
"mean 0.272188 0.114606 0.290076 \n",
"std 0.208624 0.065732 0.061867 \n",
"min 0.000000 0.000000 0.156500 \n",
"25% 0.114500 0.064930 0.250400 \n",
"50% 0.226700 0.099930 0.282200 \n",
"75% 0.382900 0.161400 0.317900 \n",
"max 1.252000 0.291000 0.663800 \n",
"\n",
" worst fractal dimension label \n",
"count 569.000000 569.000000 \n",
"mean 0.083946 0.627417 \n",
"std 0.018061 0.483918 \n",
"min 0.055040 0.000000 \n",
"25% 0.071460 0.000000 \n",
"50% 0.080040 1.000000 \n",
"75% 0.092080 1.000000 \n",
"max 0.207500 1.000000 \n",
"\n",
"[8 rows x 31 columns]"
],
"text/html": [
"\n",
" <div id=\"df-6d0b8fca-3002-4047-89d4-c6b7f4267c0a\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>...</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" <th>label</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>...</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" <td>569.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>14.127292</td>\n",
" <td>19.289649</td>\n",
" <td>91.969033</td>\n",
" <td>654.889104</td>\n",
" <td>0.096360</td>\n",
" <td>0.104341</td>\n",
" <td>0.088799</td>\n",
" <td>0.048919</td>\n",
" <td>0.181162</td>\n",
" <td>0.062798</td>\n",
" <td>...</td>\n",
" <td>25.677223</td>\n",
" <td>107.261213</td>\n",
" <td>880.583128</td>\n",
" <td>0.132369</td>\n",
" <td>0.254265</td>\n",
" <td>0.272188</td>\n",
" <td>0.114606</td>\n",
" <td>0.290076</td>\n",
" <td>0.083946</td>\n",
" <td>0.627417</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>3.524049</td>\n",
" <td>4.301036</td>\n",
" <td>24.298981</td>\n",
" <td>351.914129</td>\n",
" <td>0.014064</td>\n",
" <td>0.052813</td>\n",
" <td>0.079720</td>\n",
" <td>0.038803</td>\n",
" <td>0.027414</td>\n",
" <td>0.007060</td>\n",
" <td>...</td>\n",
" <td>6.146258</td>\n",
" <td>33.602542</td>\n",
" <td>569.356993</td>\n",
" <td>0.022832</td>\n",
" <td>0.157336</td>\n",
" <td>0.208624</td>\n",
" <td>0.065732</td>\n",
" <td>0.061867</td>\n",
" <td>0.018061</td>\n",
" <td>0.483918</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>6.981000</td>\n",
" <td>9.710000</td>\n",
" <td>43.790000</td>\n",
" <td>143.500000</td>\n",
" <td>0.052630</td>\n",
" <td>0.019380</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.106000</td>\n",
" <td>0.049960</td>\n",
" <td>...</td>\n",
" <td>12.020000</td>\n",
" <td>50.410000</td>\n",
" <td>185.200000</td>\n",
" <td>0.071170</td>\n",
" <td>0.027290</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.156500</td>\n",
" <td>0.055040</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>11.700000</td>\n",
" <td>16.170000</td>\n",
" <td>75.170000</td>\n",
" <td>420.300000</td>\n",
" <td>0.086370</td>\n",
" <td>0.064920</td>\n",
" <td>0.029560</td>\n",
" <td>0.020310</td>\n",
" <td>0.161900</td>\n",
" <td>0.057700</td>\n",
" <td>...</td>\n",
" <td>21.080000</td>\n",
" <td>84.110000</td>\n",
" <td>515.300000</td>\n",
" <td>0.116600</td>\n",
" <td>0.147200</td>\n",
" <td>0.114500</td>\n",
" <td>0.064930</td>\n",
" <td>0.250400</td>\n",
" <td>0.071460</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>13.370000</td>\n",
" <td>18.840000</td>\n",
" <td>86.240000</td>\n",
" <td>551.100000</td>\n",
" <td>0.095870</td>\n",
" <td>0.092630</td>\n",
" <td>0.061540</td>\n",
" <td>0.033500</td>\n",
" <td>0.179200</td>\n",
" <td>0.061540</td>\n",
" <td>...</td>\n",
" <td>25.410000</td>\n",
" <td>97.660000</td>\n",
" <td>686.500000</td>\n",
" <td>0.131300</td>\n",
" <td>0.211900</td>\n",
" <td>0.226700</td>\n",
" <td>0.099930</td>\n",
" <td>0.282200</td>\n",
" <td>0.080040</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>15.780000</td>\n",
" <td>21.800000</td>\n",
" <td>104.100000</td>\n",
" <td>782.700000</td>\n",
" <td>0.105300</td>\n",
" <td>0.130400</td>\n",
" <td>0.130700</td>\n",
" <td>0.074000</td>\n",
" <td>0.195700</td>\n",
" <td>0.066120</td>\n",
" <td>...</td>\n",
" <td>29.720000</td>\n",
" <td>125.400000</td>\n",
" <td>1084.000000</td>\n",
" <td>0.146000</td>\n",
" <td>0.339100</td>\n",
" <td>0.382900</td>\n",
" <td>0.161400</td>\n",
" <td>0.317900</td>\n",
" <td>0.092080</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>28.110000</td>\n",
" <td>39.280000</td>\n",
" <td>188.500000</td>\n",
" <td>2501.000000</td>\n",
" <td>0.163400</td>\n",
" <td>0.345400</td>\n",
" <td>0.426800</td>\n",
" <td>0.201200</td>\n",
" <td>0.304000</td>\n",
" <td>0.097440</td>\n",
" <td>...</td>\n",
" <td>49.540000</td>\n",
" <td>251.200000</td>\n",
" <td>4254.000000</td>\n",
" <td>0.222600</td>\n",
" <td>1.058000</td>\n",
" <td>1.252000</td>\n",
" <td>0.291000</td>\n",
" <td>0.663800</td>\n",
" <td>0.207500</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8 rows × 31 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6d0b8fca-3002-4047-89d4-c6b7f4267c0a')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-6d0b8fca-3002-4047-89d4-c6b7f4267c0a button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-6d0b8fca-3002-4047-89d4-c6b7f4267c0a');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-2bcff5fc-35a6-46f5-b2d1-cee6f6a0c8bd\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-2bcff5fc-35a6-46f5-b2d1-cee6f6a0c8bd')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-2bcff5fc-35a6-46f5-b2d1-cee6f6a0c8bd button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe"
}
},
"metadata": {},
"execution_count": 11
}
]
},
{
"cell_type": "code",
"source": [
"data_frame['label'].value_counts()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 178
},
"id": "b6xgHPVwH1ae",
"outputId": "eeab5cd4-6019-4e0e-df69-312943bf6743"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"label\n",
"1 357\n",
"0 212\n",
"Name: count, dtype: int64"
],
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <th>label</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>357</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>212</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div><br><label><b>dtype:</b> int64</label>"
]
},
"metadata": {},
"execution_count": 12
}
]
},
{
"cell_type": "markdown",
"source": [
"Malignant = 0\n",
"Benign = 1"
],
"metadata": {
"id": "YSienVVxH6Fh"
}
},
{
"cell_type": "code",
"source": [
"data_frame.groupby('label').mean()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 227
},
"id": "4vSVKXroH3M8",
"outputId": "db8ef6e7-e86c-408f-8256-eaf42bdf4c81"
},
"execution_count": null,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
"label \n",
"0 17.462830 21.604906 115.365377 978.376415 0.102898 \n",
"1 12.146524 17.914762 78.075406 462.790196 0.092478 \n",
"\n",
" mean compactness mean concavity mean concave points mean symmetry \\\n",
"label \n",
"0 0.145188 0.160775 0.087990 0.192909 \n",
"1 0.080085 0.046058 0.025717 0.174186 \n",
"\n",
" mean fractal dimension ... worst radius worst texture \\\n",
"label ... \n",
"0 0.062680 ... 21.134811 29.318208 \n",
"1 0.062867 ... 13.379801 23.515070 \n",
"\n",
" worst perimeter worst area worst smoothness worst compactness \\\n",
"label \n",
"0 141.370330 1422.286321 0.144845 0.374824 \n",
"1 87.005938 558.899440 0.124959 0.182673 \n",
"\n",
" worst concavity worst concave points worst symmetry \\\n",
"label \n",
"0 0.450606 0.182237 0.323468 \n",
"1 0.166238 0.074444 0.270246 \n",
"\n",
" worst fractal dimension \n",
"label \n",
"0 0.091530 \n",
"1 0.079442 \n",
"\n",
"[2 rows x 30 columns]"
],
"text/html": [
"\n",
" <div id=\"df-66356f80-4bcf-462d-ab26-b5aad17cbce9\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>mean radius</th>\n",
" <th>mean texture</th>\n",
" <th>mean perimeter</th>\n",
" <th>mean area</th>\n",
" <th>mean smoothness</th>\n",
" <th>mean compactness</th>\n",
" <th>mean concavity</th>\n",
" <th>mean concave points</th>\n",
" <th>mean symmetry</th>\n",
" <th>mean fractal dimension</th>\n",
" <th>...</th>\n",
" <th>worst radius</th>\n",
" <th>worst texture</th>\n",
" <th>worst perimeter</th>\n",
" <th>worst area</th>\n",
" <th>worst smoothness</th>\n",
" <th>worst compactness</th>\n",
" <th>worst concavity</th>\n",
" <th>worst concave points</th>\n",
" <th>worst symmetry</th>\n",
" <th>worst fractal dimension</th>\n",
" </tr>\n",
" <tr>\n",
" <th>label</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>17.462830</td>\n",
" <td>21.604906</td>\n",
" <td>115.365377</td>\n",
" <td>978.376415</td>\n",
" <td>0.102898</td>\n",
" <td>0.145188</td>\n",
" <td>0.160775</td>\n",
" <td>0.087990</td>\n",
" <td>0.192909</td>\n",
" <td>0.062680</td>\n",
" <td>...</td>\n",
" <td>21.134811</td>\n",
" <td>29.318208</td>\n",
" <td>141.370330</td>\n",
" <td>1422.286321</td>\n",
" <td>0.144845</td>\n",
" <td>0.374824</td>\n",
" <td>0.450606</td>\n",
" <td>0.182237</td>\n",
" <td>0.323468</td>\n",
" <td>0.091530</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>12.146524</td>\n",
" <td>17.914762</td>\n",
" <td>78.075406</td>\n",
" <td>462.790196</td>\n",
" <td>0.092478</td>\n",
" <td>0.080085</td>\n",
" <td>0.046058</td>\n",
" <td>0.025717</td>\n",
" <td>0.174186</td>\n",
" <td>0.062867</td>\n",
" <td>...</td>\n",
" <td>13.379801</td>\n",
" <td>23.515070</td>\n",
" <td>87.005938</td>\n",
" <td>558.899440</td>\n",
" <td>0.124959</td>\n",
" <td>0.182673</td>\n",
" <td>0.166238</td>\n",
" <td>0.074444</td>\n",
" <td>0.270246</td>\n",
" <td>0.079442</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2 rows × 30 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-66356f80-4bcf-462d-ab26-b5aad17cbce9')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-66356f80-4bcf-462d-ab26-b5aad17cbce9 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-66356f80-4bcf-462d-ab26-b5aad17cbce9');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-a91a93dd-e166-43d9-a1c2-d542b7bcf680\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-a91a93dd-e166-43d9-a1c2-d542b7bcf680')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-a91a93dd-e166-43d9-a1c2-d542b7bcf680 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe"
}
},
"metadata": {},
"execution_count": 13
}
]
},
{
"cell_type": "markdown",
"source": [
"Separate the Features and Target"
],
"metadata": {
"id": "gf7QdK6jIGk2"
}
},
{
"cell_type": "code",
"source": [
"X = data_frame.drop(columns='label', axis=1)\n",
"Y = data_frame['label']"
],
"metadata": {
"id": "HlCTLAA_IEXf"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(X)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "55a0OSDRILgs",
"outputId": "04d4c561-1c38-4bf3-ae74-8293e9c062e4"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
" mean radius mean texture mean perimeter mean area mean smoothness \\\n",
"0 17.99 10.38 122.80 1001.0 0.11840 \n",
"1 20.57 17.77 132.90 1326.0 0.08474 \n",
"2 19.69 21.25 130.00 1203.0 0.10960 \n",
"3 11.42 20.38 77.58 386.1 0.14250 \n",
"4 20.29 14.34 135.10 1297.0 0.10030 \n",
".. ... ... ... ... ... \n",
"564 21.56 22.39 142.00 1479.0 0.11100 \n",
"565 20.13 28.25 131.20 1261.0 0.09780 \n",
"566 16.60 28.08 108.30 858.1 0.08455 \n",
"567 20.60 29.33 140.10 1265.0 0.11780 \n",
"568 7.76 24.54 47.92 181.0 0.05263 \n",
"\n",
" mean compactness mean concavity mean concave points mean symmetry \\\n",
"0 0.27760 0.30010 0.14710 0.2419 \n",
"1 0.07864 0.08690 0.07017 0.1812 \n",
"2 0.15990 0.19740 0.12790 0.2069 \n",
"3 0.28390 0.24140 0.10520 0.2597 \n",
"4 0.13280 0.19800 0.10430 0.1809 \n",
".. ... ... ... ... \n",
"564 0.11590 0.24390 0.13890 0.1726 \n",
"565 0.10340 0.14400 0.09791 0.1752 \n",
"566 0.10230 0.09251 0.05302 0.1590 \n",
"567 0.27700 0.35140 0.15200 0.2397 \n",
"568 0.04362 0.00000 0.00000 0.1587 \n",
"\n",
" mean fractal dimension ... worst radius worst texture \\\n",
"0 0.07871 ... 25.380 17.33 \n",
"1 0.05667 ... 24.990 23.41 \n",
"2 0.05999 ... 23.570 25.53 \n",
"3 0.09744 ... 14.910 26.50 \n",
"4 0.05883 ... 22.540 16.67 \n",
".. ... ... ... ... \n",
"564 0.05623 ... 25.450 26.40 \n",
"565 0.05533 ... 23.690 38.25 \n",
"566 0.05648 ... 18.980 34.12 \n",
"567 0.07016 ... 25.740 39.42 \n",
"568 0.05884 ... 9.456 30.37 \n",
"\n",
" worst perimeter worst area worst smoothness worst compactness \\\n",
"0 184.60 2019.0 0.16220 0.66560 \n",
"1 158.80 1956.0 0.12380 0.18660 \n",
"2 152.50 1709.0 0.14440 0.42450 \n",
"3 98.87 567.7 0.20980 0.86630 \n",
"4 152.20 1575.0 0.13740 0.20500 \n",
".. ... ... ... ... \n",
"564 166.10 2027.0 0.14100 0.21130 \n",
"565 155.00 1731.0 0.11660 0.19220 \n",
"566 126.70 1124.0 0.11390 0.30940 \n",
"567 184.60 1821.0 0.16500 0.86810 \n",
"568 59.16 268.6 0.08996 0.06444 \n",
"\n",
" worst concavity worst concave points worst symmetry \\\n",
"0 0.7119 0.2654 0.4601 \n",
"1 0.2416 0.1860 0.2750 \n",
"2 0.4504 0.2430 0.3613 \n",
"3 0.6869 0.2575 0.6638 \n",
"4 0.4000 0.1625 0.2364 \n",
".. ... ... ... \n",
"564 0.4107 0.2216 0.2060 \n",
"565 0.3215 0.1628 0.2572 \n",
"566 0.3403 0.1418 0.2218 \n",
"567 0.9387 0.2650 0.4087 \n",
"568 0.0000 0.0000 0.2871 \n",
"\n",
" worst fractal dimension \n",
"0 0.11890 \n",
"1 0.08902 \n",
"2 0.08758 \n",
"3 0.17300 \n",
"4 0.07678 \n",
".. ... \n",
"564 0.07115 \n",
"565 0.06637 \n",
"566 0.07820 \n",
"567 0.12400 \n",
"568 0.07039 \n",
"\n",
"[569 rows x 30 columns]\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"print(Y)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "b96DPjWIIN6G",
"outputId": "3b8e7956-24bf-4ac4-d70c-5e48e80fc3d6"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"0 0\n",
"1 0\n",
"2 0\n",
"3 0\n",
"4 0\n",
" ..\n",
"564 0\n",
"565 0\n",
"566 0\n",
"567 0\n",
"568 1\n",
"Name: label, Length: 569, dtype: int64\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"Splitting the data into Training & Testing data"
],
"metadata": {
"id": "kooJBvgZIhDr"
}
},
{
"cell_type": "code",
"source": [
"X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=2)"
],
"metadata": {
"id": "iT9WmioDIQPC"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print(X.shape, X_train.shape, X_test.shape)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "qvYOHnAFImkY",
"outputId": "a936b458-1142-40f2-f169-88436433969f"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"(569, 30) (455, 30) (114, 30)\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"Model Training by Logistic Regression"
],
"metadata": {
"id": "czy8VUU5Iqcp"
}
},
{
"cell_type": "code",
"source": [
"model = LogisticRegression()"
],
"metadata": {
"id": "KOQVSpwuIpSz"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"model.fit(X_train, Y_train)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 233
},
"id": "Gx8E2Xi3IwA3",
"outputId": "a7ad2e71-5968-463d-da3f-728f5a0cbede"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:460: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
"Please also refer to the documentation for alternative solver options:\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
" n_iter_i = _check_optimize_result(\n"
]
},
{
"output_type": "execute_result",
"data": {
"text/plain": [
"LogisticRegression()"
],
"text/html": [
"<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>LogisticRegression()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">LogisticRegression</label><div class=\"sk-toggleable__content\"><pre>LogisticRegression()</pre></div></div></div></div></div>"
]
},
"metadata": {},
"execution_count": 20
}
]
},
{
"cell_type": "markdown",
"source": [
"Model Evaluation\n",
"\n",
"Checking the accuracy Score"
],
"metadata": {
"id": "xBI9e3TDI2U9"
}
},
{
"cell_type": "code",
"source": [
"# accuracy on training data\n",
"X_train_prediction = model.predict(X_train)\n",
"training_data_accuracy = accuracy_score(Y_train, X_train_prediction)"
],
"metadata": {
"id": "4DUfVsBkI0tJ"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print('Accuracy on training data = ', training_data_accuracy)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "8iF4BrQtI_J9",
"outputId": "31086752-26a9-4052-cc09-cae17a635cdd"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Accuracy on training data = 0.9472527472527472\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"# accuracy on test data\n",
"X_test_prediction = model.predict(X_test)\n",
"test_data_accuracy = accuracy_score(Y_test, X_test_prediction)"
],
"metadata": {
"id": "DazhoemIJBAA"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"print('Accuracy on test data = ', test_data_accuracy)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "o0mUizMaJD24",
"outputId": "13aa3c19-f8b5-43bf-c4e9-1068d92c8c73"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Accuracy on test data = 0.9298245614035088\n"
]
}
]
},
{
"cell_type": "markdown",
"source": [
"Building a Predictive Model"
],
"metadata": {
"id": "XhPKsGRIJHYh"
}
},
{
"cell_type": "code",
"source": [
"input_data = (13.54,14.36,87.46,566.3,0.09779,0.08129,0.06664,0.04781,0.1885,0.05766,0.2699,0.7886,2.058,23.56,0.008462,0.0146,0.02387,0.01315,0.0198,0.0023,15.11,19.26,99.7,711.2,0.144,0.1773,0.239,0.1288,0.2977,0.07259)\n",
"\n",
"# change the input data to a numpy array\n",
"input_data_as_numpy_array = np.asarray(input_data)\n",
"\n",
"# reshape the numpy array as we are predicting for one datapoint\n",
"input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)\n",
"\n",
"prediction = model.predict(input_data_reshaped)\n",
"print(prediction)\n",
"\n",
"if (prediction[0] == 0):\n",
" print('The Breast cancer is Malignant')\n",
"\n",
"else:\n",
" print('The Breast Cancer is Benign')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "xROZrsNhJF4-",
"outputId": "7b8be292-8d8d-4a49-9b3f-f70eeb169d1d"
},
"execution_count": null,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"[1]\n",
"The Breast Cancer is Benign\n"
]
},
{
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/sklearn/base.py:465: UserWarning: X does not have valid feature names, but LogisticRegression was fitted with feature names\n",
" warnings.warn(\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "9nP-Mb9SJQIo"
},
"execution_count": null,
"outputs": []
}
]
}