{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "a5Vf7VhQNGDs" }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv('/content/CS279 Data - Sheet1.csv')" ] }, { "cell_type": "code", "source": [ "df = df.dropna()" ], "metadata": { "id": "TkOLhVxqNmNR" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.preprocessing import LabelEncoder\n", "label_encoder = LabelEncoder()\n", "df['Shape_Encoded'] = label_encoder.fit_transform(df['Shape'])\n", "df['Color_Encoded'] = label_encoder.fit_transform(df['Color'])\n", "df['Diagnosis'] = label_encoder.fit_transform(df['Infected'])" ], "metadata": { "id": "3iLH8iynNppc" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "X = df[['Area']]\n", "y = df[['Diagnosis']]" ], "metadata": { "id": "9MrbV8gdOgAR" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" ], "metadata": { "id": "yKmSraTfO300" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from sklearn.linear_model import LogisticRegression\n", "log_reg_model = LogisticRegression(random_state=42)\n", "log_reg_model.fit(X_train, y_train)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 136 }, "id": "pRAanHQZO--R", "outputId": "b7e6c4ae-8364-4cb7-f2c7-0495bc22058c" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/sklearn/utils/validation.py:1339: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", " y = column_or_1d(y, warn=True)\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "LogisticRegression(random_state=42)" ], "text/html": [ "
LogisticRegression(random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ] }, "metadata": {}, "execution_count": 17 } ] }, { "cell_type": "code", "source": [ "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix\n", "from sklearn.metrics import roc_auc_score" ], "metadata": { "id": "0J50xt-YPlio" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "y_pred = log_reg_model.predict(X_test)\n", "accuracy = accuracy_score(y_test, y_pred)\n", "precision = precision_score(y_test, y_pred)\n", "recall = recall_score(y_test, y_pred)\n", "f1 = f1_score(y_test, y_pred)" ], "metadata": { "id": "WgLjC2AfPnZg" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "y_scores = log_reg_model.predict_proba(X_test)[:, 1]\n", "\n", "# Compute the ROC AUC score\n", "roc_auc = roc_auc_score(y_test, y_scores)\n", "\n", "print(f\"ROC AUC Score: {roc_auc:.2f}\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JKbm1CT6PwDa", "outputId": "fa7ef655-f7df-438c-a022-e6427c0549d7" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "ROC AUC Score: 0.94\n" ] } ] }, { "cell_type": "code", "source": [ "print(f'Accuracy: {accuracy * 100:.2f}%')\n", "print(f'Precision: {precision * 100:.2f}%')\n", "print(f'Recall: {recall * 100:.2f}%')\n", "print(f'F1 Score: {f1 * 100:.2f}%')" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "bkUrSfC0Pzul", "outputId": "29a0e64c-40f3-417e-8300-b841403ccaea" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Accuracy: 91.67%\n", "Precision: 66.67%\n", "Recall: 80.00%\n", "F1 Score: 72.73%\n" ] } ] } ] }