Switch to side-by-side view

--- a
+++ b/Classification pCR/Classification_Dev.ipynb
@@ -0,0 +1,2083 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "b348fcda-1cd7-424f-aafb-f0fb5f4fe94d",
+   "metadata": {},
+   "source": [
+    "### Loading dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "564c5a25-bc61-4d3d-acca-d54d998bdb4a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ID</th>\n",
+       "      <th>pCR (outcome)</th>\n",
+       "      <th>RelapseFreeSurvival (outcome)</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>ER</th>\n",
+       "      <th>PgR</th>\n",
+       "      <th>HER2</th>\n",
+       "      <th>TrippleNegative</th>\n",
+       "      <th>ChemoGrade</th>\n",
+       "      <th>Proliferation</th>\n",
+       "      <th>...</th>\n",
+       "      <th>original_glszm_SmallAreaHighGrayLevelEmphasis</th>\n",
+       "      <th>original_glszm_SmallAreaLowGrayLevelEmphasis</th>\n",
+       "      <th>original_glszm_ZoneEntropy</th>\n",
+       "      <th>original_glszm_ZonePercentage</th>\n",
+       "      <th>original_glszm_ZoneVariance</th>\n",
+       "      <th>original_ngtdm_Busyness</th>\n",
+       "      <th>original_ngtdm_Coarseness</th>\n",
+       "      <th>original_ngtdm_Complexity</th>\n",
+       "      <th>original_ngtdm_Contrast</th>\n",
+       "      <th>original_ngtdm_Strength</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>TRG002174</td>\n",
+       "      <td>1</td>\n",
+       "      <td>144.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.517172</td>\n",
+       "      <td>0.375126</td>\n",
+       "      <td>3.325332</td>\n",
+       "      <td>0.002314</td>\n",
+       "      <td>3880771.500</td>\n",
+       "      <td>473.464852</td>\n",
+       "      <td>0.000768</td>\n",
+       "      <td>0.182615</td>\n",
+       "      <td>0.030508</td>\n",
+       "      <td>0.000758</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>TRG002178</td>\n",
+       "      <td>0</td>\n",
+       "      <td>142.0</td>\n",
+       "      <td>39.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.444391</td>\n",
+       "      <td>0.444391</td>\n",
+       "      <td>3.032144</td>\n",
+       "      <td>0.005612</td>\n",
+       "      <td>2372009.744</td>\n",
+       "      <td>59.459710</td>\n",
+       "      <td>0.004383</td>\n",
+       "      <td>0.032012</td>\n",
+       "      <td>0.001006</td>\n",
+       "      <td>0.003685</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>TRG002204</td>\n",
+       "      <td>1</td>\n",
+       "      <td>135.0</td>\n",
+       "      <td>31.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.534549</td>\n",
+       "      <td>0.534549</td>\n",
+       "      <td>2.485848</td>\n",
+       "      <td>0.006752</td>\n",
+       "      <td>1540027.421</td>\n",
+       "      <td>33.935384</td>\n",
+       "      <td>0.007584</td>\n",
+       "      <td>0.024062</td>\n",
+       "      <td>0.000529</td>\n",
+       "      <td>0.006447</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>TRG002206</td>\n",
+       "      <td>0</td>\n",
+       "      <td>12.0</td>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>3</td>\n",
+       "      <td>3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.506185</td>\n",
+       "      <td>0.506185</td>\n",
+       "      <td>2.606255</td>\n",
+       "      <td>0.003755</td>\n",
+       "      <td>6936740.794</td>\n",
+       "      <td>46.859265</td>\n",
+       "      <td>0.005424</td>\n",
+       "      <td>0.013707</td>\n",
+       "      <td>0.000178</td>\n",
+       "      <td>0.004543</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>TRG002210</td>\n",
+       "      <td>0</td>\n",
+       "      <td>109.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.462282</td>\n",
+       "      <td>0.462282</td>\n",
+       "      <td>2.809279</td>\n",
+       "      <td>0.006521</td>\n",
+       "      <td>1265399.054</td>\n",
+       "      <td>39.621023</td>\n",
+       "      <td>0.006585</td>\n",
+       "      <td>0.034148</td>\n",
+       "      <td>0.001083</td>\n",
+       "      <td>0.005626</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 121 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          ID  pCR (outcome)  RelapseFreeSurvival (outcome)   Age  ER  PgR  \\\n",
+       "0  TRG002174              1                          144.0  41.0   0    0   \n",
+       "1  TRG002178              0                          142.0  39.0   1    1   \n",
+       "2  TRG002204              1                          135.0  31.0   0    0   \n",
+       "3  TRG002206              0                           12.0  35.0   0    0   \n",
+       "4  TRG002210              0                          109.0  61.0   1    0   \n",
+       "\n",
+       "   HER2  TrippleNegative  ChemoGrade  Proliferation  ...  \\\n",
+       "0     0                1           3              3  ...   \n",
+       "1     0                0           3              3  ...   \n",
+       "2     0                1           2              1  ...   \n",
+       "3     0                1           3              3  ...   \n",
+       "4     0                0           2              1  ...   \n",
+       "\n",
+       "   original_glszm_SmallAreaHighGrayLevelEmphasis  \\\n",
+       "0                                       0.517172   \n",
+       "1                                       0.444391   \n",
+       "2                                       0.534549   \n",
+       "3                                       0.506185   \n",
+       "4                                       0.462282   \n",
+       "\n",
+       "   original_glszm_SmallAreaLowGrayLevelEmphasis  original_glszm_ZoneEntropy  \\\n",
+       "0                                      0.375126                    3.325332   \n",
+       "1                                      0.444391                    3.032144   \n",
+       "2                                      0.534549                    2.485848   \n",
+       "3                                      0.506185                    2.606255   \n",
+       "4                                      0.462282                    2.809279   \n",
+       "\n",
+       "   original_glszm_ZonePercentage  original_glszm_ZoneVariance  \\\n",
+       "0                       0.002314                  3880771.500   \n",
+       "1                       0.005612                  2372009.744   \n",
+       "2                       0.006752                  1540027.421   \n",
+       "3                       0.003755                  6936740.794   \n",
+       "4                       0.006521                  1265399.054   \n",
+       "\n",
+       "   original_ngtdm_Busyness  original_ngtdm_Coarseness  \\\n",
+       "0               473.464852                   0.000768   \n",
+       "1                59.459710                   0.004383   \n",
+       "2                33.935384                   0.007584   \n",
+       "3                46.859265                   0.005424   \n",
+       "4                39.621023                   0.006585   \n",
+       "\n",
+       "   original_ngtdm_Complexity  original_ngtdm_Contrast  original_ngtdm_Strength  \n",
+       "0                   0.182615                 0.030508                 0.000758  \n",
+       "1                   0.032012                 0.001006                 0.003685  \n",
+       "2                   0.024062                 0.000529                 0.006447  \n",
+       "3                   0.013707                 0.000178                 0.004543  \n",
+       "4                   0.034148                 0.001083                 0.005626  \n",
+       "\n",
+       "[5 rows x 121 columns]"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "df= pd.read_excel(\"TrainDataset2024.xls\")\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6cecd481-9f9e-45a6-9ece-6b43a71b2fb1",
+   "metadata": {},
+   "source": [
+    "## Data Preprocessing"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "52341c70-e6d4-4738-8817-eda5398912d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.rename(columns={\"pCR (outcome)\": \"PCR\"})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "8457af35-f78b-4595-b162-77fa6f62621d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of null values:  105\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "df.replace(999, np.nan, inplace= True)\n",
+    "null = df.isna().sum().sum()\n",
+    "print(\"Number of null values: \",null)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "cf17629c-72ca-4fb7-a094-8c26cfd34dfa",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Features with Missing Values ===\n",
+      "PCR                 5\n",
+      "PgR                 1\n",
+      "HER2                1\n",
+      "TrippleNegative     1\n",
+      "ChemoGrade          3\n",
+      "Proliferation       2\n",
+      "HistologyType       3\n",
+      "LNStatus            1\n",
+      "Gene               88\n",
+      "dtype: int64\n"
+     ]
+    },
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 1200x600 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "=== Overall Missing Data Summary ===\n",
+      "Total number of missing values: 105\n",
+      "Total number of cells in dataset: 48400\n",
+      "Percentage of missing data: 0.22%\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Replace 999 with NaN for further analysis\n",
+    "df.replace(999, np.nan, inplace=True)\n",
+    "\n",
+    "# Count missing (999) values in each column\n",
+    "missing_counts = (df.isna().sum())\n",
+    "\n",
+    "# Filter out columns with no missing values\n",
+    "missing_counts = missing_counts[missing_counts > 0]\n",
+    "\n",
+    "# Display feature names and missing value counts\n",
+    "print(\"\\n=== Features with Missing Values ===\")\n",
+    "print(missing_counts)\n",
+    "\n",
+    "# Visualize the distribution of missing values\n",
+    "plt.figure(figsize=(12, 6))\n",
+    "missing_counts.plot(kind='bar', color='skyblue', edgecolor='black')\n",
+    "plt.title('Number of Missing Values by Feature')\n",
+    "plt.xlabel('Features')\n",
+    "plt.ylabel('Count of Missing Values')\n",
+    "plt.xticks(rotation=45, ha='right')\n",
+    "plt.tight_layout()\n",
+    "plt.show()\n",
+    "\n",
+    "# Summary statistics\n",
+    "total_missing = missing_counts.sum()\n",
+    "total_cells = df.size\n",
+    "missing_percentage = (total_missing / total_cells) * 100\n",
+    "\n",
+    "print(\"\\n=== Overall Missing Data Summary ===\")\n",
+    "print(f\"Total number of missing values: {total_missing}\")\n",
+    "print(f\"Total number of cells in dataset: {total_cells}\")\n",
+    "print(f\"Percentage of missing data: {missing_percentage:.2f}%\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "68cedf35-6386-426c-ac64-1127dd4c9935",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PCR                0\n",
+      "PgR                0\n",
+      "HER2               0\n",
+      "TrippleNegative    0\n",
+      "ChemoGrade         0\n",
+      "Proliferation      0\n",
+      "HistologyType      0\n",
+      "LNStatus           0\n",
+      "Gene               0\n",
+      "dtype: int64\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\LLR User\\AppData\\Local\\Temp\\ipykernel_14388\\4226577187.py:12: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
+      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
+      "\n",
+      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
+      "\n",
+      "\n",
+      "  df[feature].fillna(mode_value, inplace=True)  # Replace NaN with the mode\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "# List of features you want to modify\n",
+    "features_to_modify = [\n",
+    "    \"PCR\", \"PgR\", \"HER2\", \"TrippleNegative\", \"ChemoGrade\", \n",
+    "    \"Proliferation\", \"HistologyType\", \"LNStatus\", \"Gene\"\n",
+    "]\n",
+    "\n",
+    "# Loop through the columns and replace NaN values with the mode of each column\n",
+    "for feature in features_to_modify:\n",
+    "    mode_value = df[feature].mode()[0]  # Get the mode of the feature\n",
+    "    df[feature].fillna(mode_value, inplace=True)  # Replace NaN with the mode\n",
+    "\n",
+    "# Check the changes\n",
+    "print(df[features_to_modify].isna().sum())  # Check how many NaN values are left\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "995b86b2-4475-4b94-8a7e-044bebaf8556",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df = df.drop('ID', axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "2f8b60cc-c014-4424-ba37-d7705812d8bc",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Number of outliers: 87\n",
+      "Number of outliers: PCR                                 65.000000\n",
+      "RelapseFreeSurvival (outcome)    17185.083333\n",
+      "Age                              16080.576595\n",
+      "ER                                 166.000000\n",
+      "PgR                                126.000000\n",
+      "                                     ...     \n",
+      "original_ngtdm_Busyness          36656.435955\n",
+      "original_ngtdm_Coarseness            4.608712\n",
+      "original_ngtdm_Complexity           18.424557\n",
+      "original_ngtdm_Contrast              1.797244\n",
+      "original_ngtdm_Strength              4.089319\n",
+      "Length: 120, dtype: float64\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Calculate Z-scores for the entire dataset\n",
+    "z_scores = np.abs((df - df.mean()) / df.std())\n",
+    "\n",
+    "# Identify rows that have z-scores above the threshold (3 in this case)\n",
+    "outliers = (z_scores > 3).any(axis=1)\n",
+    "\n",
+    "# Print the number of outliers\n",
+    "print(f\"Number of outliers: {outliers.sum()}\")\n",
+    "\n",
+    "# Remove outliers from the dataset\n",
+    "df_no_outliers = df[~outliers]\n",
+    "\n",
+    "print(f\"Number of outliers: {df_no_outliers.sum()}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "37f55653-9445-404e-8730-c1c93c45e00f",
+   "metadata": {},
+   "source": [
+    "### Feature Selection"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "70899e82-33c6-495d-baaf-e8d6d5b4afa1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "correlated features:  82\n"
+     ]
+    }
+   ],
+   "source": [
+    "df1 = df.copy()\n",
+    "# checking for correlated features of dataset\n",
+    "def correlation(data, threshold):\n",
+    "    col_corr = {}  # Dictionary to store correlated features\n",
+    "    corr_matrix = data.corr()\n",
+    "    for i in range(len(corr_matrix.columns)):\n",
+    "        for j in range(i):\n",
+    "            if abs(corr_matrix.iloc[i, j]) > threshold:  # We are interested in absolute coefficient value\n",
+    "                colname = corr_matrix.columns[i]\n",
+    "                if colname not in col_corr:\n",
+    "                    col_corr[colname] = set()\n",
+    "                col_corr[colname].add(corr_matrix.columns[j])\n",
+    "\n",
+    "    return col_corr\n",
+    "\n",
+    "corr_features = correlation(df1, 0.8)\n",
+    "print('correlated features: ', len(corr_features))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "e8695d75-11cd-4f48-b301-d019ce147584",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(400, 38)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# removing the correlated features\n",
+    "df_corr= df1.drop(labels=corr_features, axis=1)\n",
+    "df_corr.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "164fcddb-b51c-41b7-b23a-a4f6c7725c84",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 640x480 with 1 Axes>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# checking the count of target values after data processing and feature selection\n",
+    "outcomes = df_corr['PCR'].value_counts()\n",
+    "outcome_labels =['0','1']\n",
+    "outcome_values = outcomes.values\n",
+    "plt.bar(outcome_labels, outcome_values)\n",
+    "plt.xlabel('Outcome')\n",
+    "plt.ylabel('Count')\n",
+    "plt.title('Distribution of Outcomes in PCR')\n",
+    "plt.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "8f4b2fc4-664b-4c47-af59-e5646acb9aed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# dataset using correlated feature selection\n",
+    "X_corr = df_corr.drop([\"PCR\",\"RelapseFreeSurvival (outcome)\"],axis=1)\n",
+    "y_corr = df_corr[\"PCR\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "683c242f-c3ba-4268-8318-eb01d4f1d9aa",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Age</th>\n",
+       "      <th>ER</th>\n",
+       "      <th>PgR</th>\n",
+       "      <th>HER2</th>\n",
+       "      <th>TrippleNegative</th>\n",
+       "      <th>ChemoGrade</th>\n",
+       "      <th>HistologyType</th>\n",
+       "      <th>LNStatus</th>\n",
+       "      <th>TumourStage</th>\n",
+       "      <th>Gene</th>\n",
+       "      <th>...</th>\n",
+       "      <th>original_gldm_SmallDependenceEmphasis</th>\n",
+       "      <th>original_glrlm_LongRunLowGrayLevelEmphasis</th>\n",
+       "      <th>original_glrlm_ShortRunHighGrayLevelEmphasis</th>\n",
+       "      <th>original_glszm_GrayLevelNonUniformity</th>\n",
+       "      <th>original_glszm_GrayLevelNonUniformityNormalized</th>\n",
+       "      <th>original_glszm_LargeAreaEmphasis</th>\n",
+       "      <th>original_glszm_SizeZoneNonUniformityNormalized</th>\n",
+       "      <th>original_glszm_SmallAreaEmphasis</th>\n",
+       "      <th>original_ngtdm_Busyness</th>\n",
+       "      <th>original_ngtdm_Strength</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>41.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.005563</td>\n",
+       "      <td>10.779989</td>\n",
+       "      <td>0.789987</td>\n",
+       "      <td>27.545455</td>\n",
+       "      <td>0.834711</td>\n",
+       "      <td>4067578.818</td>\n",
+       "      <td>0.180900</td>\n",
+       "      <td>0.403535</td>\n",
+       "      <td>473.464852</td>\n",
+       "      <td>0.000758</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>39.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.006518</td>\n",
+       "      <td>27.650685</td>\n",
+       "      <td>0.442279</td>\n",
+       "      <td>78.025000</td>\n",
+       "      <td>0.975313</td>\n",
+       "      <td>2403756.075</td>\n",
+       "      <td>0.198125</td>\n",
+       "      <td>0.444391</td>\n",
+       "      <td>59.459710</td>\n",
+       "      <td>0.003685</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>31.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.007181</td>\n",
+       "      <td>25.338218</td>\n",
+       "      <td>0.503046</td>\n",
+       "      <td>72.027027</td>\n",
+       "      <td>0.973338</td>\n",
+       "      <td>1561963.432</td>\n",
+       "      <td>0.275749</td>\n",
+       "      <td>0.534549</td>\n",
+       "      <td>33.935384</td>\n",
+       "      <td>0.006447</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>35.0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.004902</td>\n",
+       "      <td>31.461354</td>\n",
+       "      <td>0.399896</td>\n",
+       "      <td>99.019802</td>\n",
+       "      <td>0.980394</td>\n",
+       "      <td>7007670.723</td>\n",
+       "      <td>0.253014</td>\n",
+       "      <td>0.506185</td>\n",
+       "      <td>46.859265</td>\n",
+       "      <td>0.004543</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>61.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.007222</td>\n",
+       "      <td>27.916261</td>\n",
+       "      <td>0.473278</td>\n",
+       "      <td>56.034483</td>\n",
+       "      <td>0.966112</td>\n",
+       "      <td>1288913.690</td>\n",
+       "      <td>0.216409</td>\n",
+       "      <td>0.462282</td>\n",
+       "      <td>39.621023</td>\n",
+       "      <td>0.005626</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 36 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    Age  ER  PgR  HER2  TrippleNegative  ChemoGrade  HistologyType  LNStatus  \\\n",
+       "0  41.0   0  0.0   0.0              1.0         3.0            1.0       1.0   \n",
+       "1  39.0   1  1.0   0.0              0.0         3.0            1.0       1.0   \n",
+       "2  31.0   0  0.0   0.0              1.0         2.0            1.0       0.0   \n",
+       "3  35.0   0  0.0   0.0              1.0         3.0            1.0       1.0   \n",
+       "4  61.0   1  0.0   0.0              0.0         2.0            1.0       0.0   \n",
+       "\n",
+       "   TumourStage  Gene  ...  original_gldm_SmallDependenceEmphasis  \\\n",
+       "0            2   1.0  ...                               0.005563   \n",
+       "1            2   0.0  ...                               0.006518   \n",
+       "2            2   1.0  ...                               0.007181   \n",
+       "3            3   1.0  ...                               0.004902   \n",
+       "4            2   1.0  ...                               0.007222   \n",
+       "\n",
+       "   original_glrlm_LongRunLowGrayLevelEmphasis  \\\n",
+       "0                                   10.779989   \n",
+       "1                                   27.650685   \n",
+       "2                                   25.338218   \n",
+       "3                                   31.461354   \n",
+       "4                                   27.916261   \n",
+       "\n",
+       "   original_glrlm_ShortRunHighGrayLevelEmphasis  \\\n",
+       "0                                      0.789987   \n",
+       "1                                      0.442279   \n",
+       "2                                      0.503046   \n",
+       "3                                      0.399896   \n",
+       "4                                      0.473278   \n",
+       "\n",
+       "   original_glszm_GrayLevelNonUniformity  \\\n",
+       "0                              27.545455   \n",
+       "1                              78.025000   \n",
+       "2                              72.027027   \n",
+       "3                              99.019802   \n",
+       "4                              56.034483   \n",
+       "\n",
+       "   original_glszm_GrayLevelNonUniformityNormalized  \\\n",
+       "0                                         0.834711   \n",
+       "1                                         0.975313   \n",
+       "2                                         0.973338   \n",
+       "3                                         0.980394   \n",
+       "4                                         0.966112   \n",
+       "\n",
+       "   original_glszm_LargeAreaEmphasis  \\\n",
+       "0                       4067578.818   \n",
+       "1                       2403756.075   \n",
+       "2                       1561963.432   \n",
+       "3                       7007670.723   \n",
+       "4                       1288913.690   \n",
+       "\n",
+       "   original_glszm_SizeZoneNonUniformityNormalized  \\\n",
+       "0                                        0.180900   \n",
+       "1                                        0.198125   \n",
+       "2                                        0.275749   \n",
+       "3                                        0.253014   \n",
+       "4                                        0.216409   \n",
+       "\n",
+       "   original_glszm_SmallAreaEmphasis  original_ngtdm_Busyness  \\\n",
+       "0                          0.403535               473.464852   \n",
+       "1                          0.444391                59.459710   \n",
+       "2                          0.534549                33.935384   \n",
+       "3                          0.506185                46.859265   \n",
+       "4                          0.462282                39.621023   \n",
+       "\n",
+       "   original_ngtdm_Strength  \n",
+       "0                 0.000758  \n",
+       "1                 0.003685  \n",
+       "2                 0.006447  \n",
+       "3                 0.004543  \n",
+       "4                 0.005626  \n",
+       "\n",
+       "[5 rows x 36 columns]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_corr.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "b361c4c4-54c2-4de3-b5d3-c9a276019f36",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# saving the feature in text file for future use in model training and prediction\n",
+    "with open('30cor.txt', 'w') as f:\n",
+    "    for feature in X_corr.columns:\n",
+    "        f.write(feature + '\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "766f1c86-ea45-4a3d-be13-c1335f27268b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "# Split the correlated feature into training and testing sets ( correlated features)\n",
+    "X_train1, X_test1, y_train1, y_test1 = train_test_split(X_corr, y_corr, test_size=0.2,shuffle=True,random_state=42)\n",
+    "# Standardize the features\n",
+    "scaler = StandardScaler()\n",
+    "X_train_sc1 = scaler.fit_transform(X_train1)\n",
+    "X_test_sc1 = scaler.fit_transform(X_test1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "790b0a2c-e36e-4f70-ac0f-a2bc8a2f30ce",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((320, 36),)"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X_train1.shape, "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "08017321-a5fe-4ae4-82c8-8f65b5b9191c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# dataset using correlated feature selection for forward feature selection\n",
+    "X_for = df_corr.drop([\"PCR\",\"RelapseFreeSurvival (outcome)\"],axis=1)\n",
+    "y_for = df_corr[\"PCR\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "9e854be0-bf8e-4ad2-9d12-85584abe4f16",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Selected features: Index(['ER', 'PgR', 'HER2', 'TrippleNegative', 'ChemoGrade', 'HistologyType',\n",
+      "       'LNStatus', 'Gene', 'original_shape_Elongation',\n",
+      "       'original_shape_MeshVolume', 'original_firstorder_InterquartileRange',\n",
+      "       'original_firstorder_Kurtosis', 'original_glcm_Imc1',\n",
+      "       'original_gldm_SmallDependenceEmphasis',\n",
+      "       'original_glrlm_LongRunLowGrayLevelEmphasis',\n",
+      "       'original_glszm_GrayLevelNonUniformity',\n",
+      "       'original_glszm_GrayLevelNonUniformityNormalized',\n",
+      "       'original_glszm_SizeZoneNonUniformityNormalized'],\n",
+      "      dtype='object')\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.feature_selection import SequentialFeatureSelector\n",
+    "\n",
+    "# Handle missing values in y_for\n",
+    "y_for = y_for.dropna()  # Alternatively, use fillna()\n",
+    "\n",
+    "# Ensure X_for matches the cleaned y_for\n",
+    "X_for = X_for.loc[y_for.index]\n",
+    "\n",
+    "# Sequential Feature Selector\n",
+    "selector = SequentialFeatureSelector(\n",
+    "    estimator=RandomForestClassifier(n_estimators=100),\n",
+    "    n_features_to_select='auto'\n",
+    ")\n",
+    "\n",
+    "# Fit the selector to the data\n",
+    "selector.fit(X_for, y_for)\n",
+    "\n",
+    "# Get the selected features\n",
+    "selected_features1 = X_for.columns[selector.get_support()]\n",
+    "print(\"Selected features:\", selected_features1)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "662c1167-d3b1-4f56-ab3d-de46dfaf915e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 400 entries, 0 to 399\n",
+      "Data columns (total 18 columns):\n",
+      " #   Column                                           Non-Null Count  Dtype  \n",
+      "---  ------                                           --------------  -----  \n",
+      " 0   ER                                               400 non-null    int64  \n",
+      " 1   PgR                                              400 non-null    float64\n",
+      " 2   HER2                                             400 non-null    float64\n",
+      " 3   TrippleNegative                                  400 non-null    float64\n",
+      " 4   ChemoGrade                                       400 non-null    float64\n",
+      " 5   HistologyType                                    400 non-null    float64\n",
+      " 6   LNStatus                                         400 non-null    float64\n",
+      " 7   Gene                                             400 non-null    float64\n",
+      " 8   original_shape_Elongation                        400 non-null    float64\n",
+      " 9   original_shape_MeshVolume                        400 non-null    float64\n",
+      " 10  original_firstorder_InterquartileRange           400 non-null    float64\n",
+      " 11  original_firstorder_Kurtosis                     400 non-null    float64\n",
+      " 12  original_glcm_Imc1                               400 non-null    float64\n",
+      " 13  original_gldm_SmallDependenceEmphasis            400 non-null    float64\n",
+      " 14  original_glrlm_LongRunLowGrayLevelEmphasis       400 non-null    float64\n",
+      " 15  original_glszm_GrayLevelNonUniformity            400 non-null    float64\n",
+      " 16  original_glszm_GrayLevelNonUniformityNormalized  400 non-null    float64\n",
+      " 17  original_glszm_SizeZoneNonUniformityNormalized   400 non-null    float64\n",
+      "dtypes: float64(17), int64(1)\n",
+      "memory usage: 56.4 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Create a copy of the dataset with the selected features\n",
+    "X_selected_for1 = df_corr[selected_features1]\n",
+    "X_selected_for1.info()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "fd51f64a-7f57-493b-8485-41f3e313a61f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# X and y for forward feature selection\n",
+    "df3 = df.copy()\n",
+    "X2 = df3[selected_features1]\n",
+    "y2 = df3[\"PCR\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "c6f0e344-3a90-4418-8f20-2646f5e66f13",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(400, 18)"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "X2.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "d81d0dcb-5b32-4a06-93aa-9c8d97a7d575",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Splitting the data into training and testing sets ( forward feature selection)\n",
+    "X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, test_size=0.2,shuffle=True,random_state=42)\n",
+    "# Standardize the features\n",
+    "scaler = StandardScaler()\n",
+    "X_train_sc2 = scaler.fit_transform(X_train2)\n",
+    "X_test_sc2 = scaler.fit_transform(X_test2)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ed7db500-58aa-4326-9dd4-d8c4589bac06",
+   "metadata": {},
+   "source": [
+    "## Models"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "97247500-54a1-4046-bc15-4d8c2d511e97",
+   "metadata": {},
+   "source": [
+    "### Linear Regression "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "b17e1da1-7a02-493f-a2a1-4907be57276b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Initial Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.82      0.87      0.85        94\n",
+      "         1.0       0.40      0.31      0.35        26\n",
+      "\n",
+      "    accuracy                           0.75       120\n",
+      "   macro avg       0.61      0.59      0.60       120\n",
+      "weighted avg       0.73      0.75      0.74       120\n",
+      "\n",
+      "\n",
+      "Initial Balanced Accuracy Score:\n",
+      "0.5900163666121113\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Import required libraries\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, balanced_accuracy_score\n",
+    "\n",
+    "# Splitting the data into training and testing sets\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X_corr, y_corr, test_size=0.3, shuffle=True, random_state=64)\n",
+    "\n",
+    "# Standardizing the features\n",
+    "scaler = StandardScaler()\n",
+    "X_train_norm = scaler.fit_transform(X_train)\n",
+    "X_test_norm = scaler.transform(X_test)\n",
+    "\n",
+    "# Initial training of Logistic Regression model (without SMOTE)\n",
+    "log_reg = LogisticRegression(random_state=42)\n",
+    "log_reg.fit(X_train_norm, y_train)\n",
+    "\n",
+    "# Predicting class labels for testing data (initial prediction)\n",
+    "y_pred = log_reg.predict(X_test_norm)\n",
+    "\n",
+    "# Calculating classification report and balanced accuracy score (initial performance)\n",
+    "report = classification_report(y_test, y_pred)\n",
+    "balanced_accuracy = balanced_accuracy_score(y_test, y_pred)\n",
+    "\n",
+    "print('Initial Classification Report:')\n",
+    "print(report)\n",
+    "\n",
+    "print('\\nInitial Balanced Accuracy Score:')\n",
+    "print(balanced_accuracy)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "315e1a8f-b4d6-49f1-b7a3-704178f5074f",
+   "metadata": {},
+   "source": [
+    "### Random forest"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5a51ba9-666c-4dfd-b370-a9f84555a35f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "from sklearn.metrics import balanced_accuracy_score, classification_report\n",
+    "from sklearn.impute import SimpleImputer\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Handling missing values in features\n",
+    "imputer = SimpleImputer(strategy='mean')\n",
+    "X_train_sc1 = imputer.fit_transform(X_train_sc1)\n",
+    "X_train_sc2 = imputer.fit_transform(X_train_sc2)\n",
+    "\n",
+    "# Handling missing values in targets\n",
+    "y_train1 = pd.Series(y_train1).fillna(pd.Series(y_train1).mode()[0])\n",
+    "y_train2 = pd.Series(y_train2).fillna(pd.Series(y_train2).mode()[0])\n",
+    "\n",
+    "# Defining hyperparameter grid for RandomForestClassifier\n",
+    "param_grid = {\n",
+    "    'n_estimators': [100, 200, 300],\n",
+    "    'max_depth': [2, 5, 10],\n",
+    "    'min_samples_split': [2, 5, 10],\n",
+    "    'min_samples_leaf': [1, 2, 4],\n",
+    "    'random_state': [42]\n",
+    "}\n",
+    "\n",
+    "# Instantiate RandomForestClassifier\n",
+    "model1 = RandomForestClassifier()\n",
+    "\n",
+    "# Creating GridSearchCV objects for dataset1 and dataset2\n",
+    "grid_search1 = GridSearchCV(estimator=model1, param_grid=param_grid, scoring='balanced_accuracy', cv=5)\n",
+    "grid_search2 = GridSearchCV(estimator=model1, param_grid=param_grid, scoring='balanced_accuracy', cv=5)\n",
+    "\n",
+    "# Fitting the GridSearchCV objects for dataset1 and dataset2\n",
+    "grid_search1.fit(X_train_sc1, y_train1)\n",
+    "grid_search2.fit(X_train_sc2, y_train2)\n",
+    "\n",
+    "# Getting the best parameters from GridSearchCV for dataset1 and dataset2\n",
+    "best_params1 = grid_search1.best_params_\n",
+    "best_params2 = grid_search2.best_params_\n",
+    "\n",
+    "# Printing the best parameters for each dataset\n",
+    "print(\"Best Parameters for Dataset 1:\", best_params1)\n",
+    "print(\"Best Parameters for Dataset 2:\", best_params2)\n",
+    "\n",
+    "# Getting the balanced accuracy for the best model from GridSearchCV for each dataset\n",
+    "best_model1 = RandomForestClassifier(**best_params1)\n",
+    "best_model1.fit(X_train_sc1, y_train1)\n",
+    "y_pred1 = best_model1.predict(X_test_sc1)\n",
+    "balanced_accuracy1 = balanced_accuracy_score(y_test1, y_pred1)\n",
+    "\n",
+    "best_model2 = RandomForestClassifier(**best_params2)\n",
+    "best_model2.fit(X_train_sc2, y_train2)\n",
+    "y_pred2 = best_model2.predict(X_test_sc2)\n",
+    "balanced_accuracy2 = balanced_accuracy_score(y_test2, y_pred2)\n",
+    "\n",
+    "# Printing the balanced accuracy for each dataset\n",
+    "print(\"Dataset 1 Balanced Accuracy:\", balanced_accuracy1)\n",
+    "print(\"Dataset 2 Balanced Accuracy:\", balanced_accuracy2)\n",
+    "\n",
+    "# Getting the classification report for the best model from GridSearchCV for each dataset\n",
+    "print(\"Dataset 1 Classification Report:\\n\", classification_report(y_test1, y_pred1))\n",
+    "print(\"Dataset 2 Classification Report:\\n\", classification_report(y_test2, y_pred2))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "da95f0ab-9167-4bdf-a459-27a1a5caa8c9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l1)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:1197: UserWarning: l1_ratio parameter is only used when penalty is 'elasticnet'. Got (penalty=l2)\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n",
+      "C:\\Users\\LLR User\\miniconda3\\envs\\MLE\\Lib\\site-packages\\sklearn\\linear_model\\_sag.py:349: ConvergenceWarning: The max_iter was reached which means the coef_ did not converge\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Best Parameters: {'C': 1, 'l1_ratio': 0.9, 'penalty': 'elasticnet', 'solver': 'saga'}\n",
+      "Balanced Accuracy: 0.671875\n",
+      "Classification Report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.87      0.91      0.89        64\n",
+      "         1.0       0.54      0.44      0.48        16\n",
+      "\n",
+      "    accuracy                           0.81        80\n",
+      "   macro avg       0.70      0.67      0.68        80\n",
+      "weighted avg       0.80      0.81      0.80        80\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "from sklearn.metrics import balanced_accuracy_score, classification_report\n",
+    "\n",
+    "# Simplified hyperparameter grid\n",
+    "param_grid = {\n",
+    "    'penalty': ['l1', 'l2', 'elasticnet'],  # Common penalties\n",
+    "    'solver': ['saga'],  # 'saga' supports all penalties including 'elasticnet'\n",
+    "    'C': [0.01, 0.1, 1, 10],  # Regularization strength\n",
+    "    'l1_ratio': [0.1, 0.5, 0.9]  # Required only for 'elasticnet'\n",
+    "}\n",
+    "\n",
+    "# Instantiate Logistic Regression model\n",
+    "model = LogisticRegression(max_iter=1000, random_state=42)  # Increased max_iter for convergence\n",
+    "\n",
+    "# Create GridSearchCV\n",
+    "grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring='balanced_accuracy', cv=5)\n",
+    "\n",
+    "# Fit the GridSearchCV to the data\n",
+    "grid_search.fit(X_train_sc1, y_train1)\n",
+    "\n",
+    "# Get the best parameters and score\n",
+    "best_params = grid_search.best_params_\n",
+    "print(\"Best Parameters:\", best_params)\n",
+    "\n",
+    "# Evaluate on the test set\n",
+    "best_model = LogisticRegression(**best_params, max_iter=1000, random_state=42)\n",
+    "best_model.fit(X_train_sc1, y_train1)\n",
+    "y_pred = best_model.predict(X_test_sc1)\n",
+    "\n",
+    "# Balanced accuracy and classification report\n",
+    "balanced_accuracy = balanced_accuracy_score(y_test1, y_pred)\n",
+    "print(\"Balanced Accuracy:\", balanced_accuracy)\n",
+    "print(\"Classification Report:\\n\", classification_report(y_test1, y_pred))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2f818089-499c-41bc-a8e7-1f79940f18a9",
+   "metadata": {},
+   "source": [
+    "### SVC"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "7e32d5dd-48fe-415a-8ab5-69a8c5716144",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset1: {'C': 100, 'gamma': 0.001, 'kernel': 'linear'}\n",
+      "Dataset2: {'C': 150, 'gamma': 0.01, 'kernel': 'rbf'}\n",
+      "correlation balanced accuracy: 0.640625\n",
+      "forward balanced accuracy: 0.546875\n",
+      "correlation classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.86      0.84      0.85        64\n",
+      "         1.0       0.41      0.44      0.42        16\n",
+      "\n",
+      "    accuracy                           0.76        80\n",
+      "   macro avg       0.63      0.64      0.64        80\n",
+      "weighted avg       0.77      0.76      0.77        80\n",
+      "\n",
+      "forward classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.82      0.84      0.83        64\n",
+      "         1.0       0.29      0.25      0.27        16\n",
+      "\n",
+      "    accuracy                           0.72        80\n",
+      "   macro avg       0.55      0.55      0.55        80\n",
+      "weighted avg       0.71      0.72      0.72        80\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.svm import SVC\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "from sklearn.metrics import balanced_accuracy_score, classification_report\n",
+    "\n",
+    "# Defining hyperparameter grid for SVC\n",
+    "param_grid = {\n",
+    "    'kernel': ['linear', 'rbf', 'poly'],\n",
+    "    'C': [0.01, 0.1, 1, 10, 100, 150],\n",
+    "    'gamma': [0.001, 0.01, 0.1, 1]\n",
+    "}\n",
+    "\n",
+    "# Instantiating SVC classifier \n",
+    "model1 = SVC()\n",
+    "\n",
+    "# Creating the GridSearchCV objects for dataset 1 and 2 (correlation and forward selection datasets)\n",
+    "grid_search1 = GridSearchCV(estimator=model1, param_grid=param_grid, scoring='balanced_accuracy', cv=5)\n",
+    "grid_search2 = GridSearchCV(estimator=model1, param_grid=param_grid, scoring='balanced_accuracy', cv=5)\n",
+    "\n",
+    "# Fitting the GridSearchCV objects for dataset 1 and 2\n",
+    "grid_search1.fit(X_train_sc1, y_train1)\n",
+    "grid_search2.fit(X_train_sc2, y_train2)\n",
+    "\n",
+    "# Getting the best parameters from GridSearchCV for dataset 1 and 2\n",
+    "best_params1 = grid_search1.best_params_\n",
+    "best_params2 = grid_search2.best_params_\n",
+    "\n",
+    "# Printing the best parameters for each dataset\n",
+    "print(\"Dataset1:\", best_params1)\n",
+    "print(\"Dataset2:\", best_params2)\n",
+    "\n",
+    "# Getting the balanced accuracy for the best model from GridSearchCV for each dataset\n",
+    "best_model1 = SVC(**best_params1)\n",
+    "best_model1.fit(X_train_sc1, y_train1)\n",
+    "y_pred1 = best_model1.predict(X_test_sc1)\n",
+    "balanced_accuracy1 = balanced_accuracy_score(y_test1, y_pred1)\n",
+    "\n",
+    "best_model2 = SVC(**best_params2)\n",
+    "best_model2.fit(X_train_sc2, y_train2)\n",
+    "y_pred2 = best_model2.predict(X_test_sc2)\n",
+    "balanced_accuracy2 = balanced_accuracy_score(y_test2, y_pred2)\n",
+    "\n",
+    "# Printing the balanced accuracy for each dataset\n",
+    "print(\"correlation balanced accuracy:\", balanced_accuracy1)\n",
+    "print(\"forward balanced accuracy:\", balanced_accuracy2)\n",
+    "\n",
+    "# Getting the classification report for the best model from GridSearchCV for each dataset\n",
+    "print(\"correlation classification report:\\n\", classification_report(y_test1, y_pred1))\n",
+    "print(\"forward classification report:\\n\", classification_report(y_test2, y_pred2))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "936900ac-9890-449e-a8e7-f1d7aef72777",
+   "metadata": {},
+   "source": [
+    "### Decision Tree"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "360fab3f-6fd3-488b-a6af-9432216714d1",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Dataset1: {'criterion': 'gini', 'max_depth': 5, 'min_samples_leaf': 4, 'min_samples_split': 2, 'random_state': 42}\n",
+      "Dataset2: {'criterion': 'gini', 'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 5, 'random_state': 42}\n",
+      "correlation balanced accuracy: 0.515625\n",
+      "forward balanced accuracy: 0.53125\n",
+      "correlation classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.81      0.91      0.85        64\n",
+      "         1.0       0.25      0.12      0.17        16\n",
+      "\n",
+      "    accuracy                           0.75        80\n",
+      "   macro avg       0.53      0.52      0.51        80\n",
+      "weighted avg       0.69      0.75      0.72        80\n",
+      "\n",
+      "forward classification report:\n",
+      "               precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.81      0.91      0.85        64\n",
+      "         1.0       0.25      0.12      0.17        16\n",
+      "\n",
+      "    accuracy                           0.75        80\n",
+      "   macro avg       0.53      0.52      0.51        80\n",
+      "weighted avg       0.69      0.75      0.72        80\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.tree import DecisionTreeClassifier\n",
+    "\n",
+    "# Defining hyperparameter grid for DecisionTreeClassifier\n",
+    "param_grid = {\n",
+    "    'criterion': ['entropy', 'gini'],\n",
+    "    'max_depth': [2, 5, 10],\n",
+    "    'min_samples_split': [2, 5, 10],\n",
+    "    'min_samples_leaf': [1, 2, 4],\n",
+    "    'random_state': [42]\n",
+    "}\n",
+    "\n",
+    "# Instantiating DecisionTreeClassifier \n",
+    "model1 = DecisionTreeClassifier()\n",
+    "\n",
+    "# Creating GridSearchCV objects for dataset 1 and 2\n",
+    "grid_search1 = GridSearchCV(estimator=model1, param_grid=param_grid, scoring='balanced_accuracy', cv=5)\n",
+    "grid_search2 = GridSearchCV(estimator=model1, param_grid=param_grid, scoring='balanced_accuracy', cv=5)\n",
+    "\n",
+    "# Fitting the GridSearchCV objects for dataset 1 and 2\n",
+    "grid_search1.fit(X_train_sc1, y_train1)\n",
+    "grid_search2.fit(X_train_sc2, y_train2)\n",
+    "\n",
+    "# Getting the best parameters from GridSearchCV for dataset 1 and 2\n",
+    "best_params1 = grid_search1.best_params_\n",
+    "best_params2 = grid_search2.best_params_\n",
+    "\n",
+    "# Printting the best parameters for each dataset\n",
+    "print(\"Dataset1:\", best_params1)\n",
+    "print(\"Dataset2:\", best_params2)\n",
+    "\n",
+    "# Getting the balanced accuracy for the best model from GridSearchCV for each dataset\n",
+    "best_model1 = DecisionTreeClassifier(**best_params1)\n",
+    "best_model1.fit(X_train_sc1, y_train1)\n",
+    "y_pred1 = best_model1.predict(X_test_sc1)\n",
+    "balanced_accuracy1 = balanced_accuracy_score(y_test1, y_pred1)\n",
+    "\n",
+    "best_model2 = DecisionTreeClassifier(**best_params2)\n",
+    "best_model2.fit(X_train_sc2, y_train2)\n",
+    "y_pred2 = best_model2.predict(X_test_sc2)\n",
+    "balanced_accuracy2 = balanced_accuracy_score(y_test2, y_pred2)\n",
+    "\n",
+    "# Printting the balanced accuracy for each dataset\n",
+    "print(\"correlation balanced accuracy:\", balanced_accuracy1)\n",
+    "print(\"forward balanced accuracy:\", balanced_accuracy2)\n",
+    "\n",
+    "# Getting the classification report for the best model from GridSearchCV for each dataset\n",
+    "print(\"correlation classification report:\\n\", classification_report(y_test1, y_pred1))\n",
+    "print(\"forward classification report:\\n\", classification_report(y_test2, y_pred1))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "a26e6775-c605-4dd2-8ef2-5d89a255a8f5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.79      1.00      0.88        94\n",
+      "         1.0       1.00      0.04      0.07        26\n",
+      "\n",
+      "    accuracy                           0.79       120\n",
+      "   macro avg       0.89      0.52      0.48       120\n",
+      "weighted avg       0.84      0.79      0.71       120\n",
+      "\n",
+      "\n",
+      "Balanced Accuracy Score:\n",
+      "0.5192307692307693\n"
+     ]
+    }
+   ],
+   "source": [
+    "# training the random forest model for correlated features dataset\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X_corr, y_corr, test_size=0.3,shuffle= True, random_state=64)\n",
+    "scaler = StandardScaler()\n",
+    "X_train_norm = scaler.fit_transform(X_train)\n",
+    "X_test_norm = scaler.transform(X_test)\n",
+    "\n",
+    "# Creating and train Random Forest classifier with specific parameters got from gridsearch\n",
+    "rf_classifier = RandomForestClassifier(max_depth=10, min_samples_leaf=3, min_samples_split=15, n_estimators=1000, random_state=42)\n",
+    "rf_classifier.fit(X_train_norm, y_train)\n",
+    "\n",
+    "# Predicting class labels for testing data\n",
+    "y_pred = rf_classifier.predict(X_test_norm)\n",
+    "\n",
+    "# Calculating classification report and balanced accuracy score\n",
+    "classification_report = classification_report(y_test, y_pred)\n",
+    "balanced_accuracy = balanced_accuracy_score(y_test, y_pred)\n",
+    "\n",
+    "print('Classification Report:')\n",
+    "print(classification_report)\n",
+    "\n",
+    "print('\\nBalanced Accuracy Score:')\n",
+    "print(balanced_accuracy)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b2491b03-0e49-4f7b-934c-300f6bfd5fa9",
+   "metadata": {},
+   "source": [
+    "### Smote for Random forest"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "95e1c8ab-bec2-4eee-a4d3-ff255ff2461d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Classification Reports for each fold:\n",
+      "\n",
+      "Fold 1 Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.84      0.97      0.90        33\n",
+      "         1.0       0.50      0.14      0.22         7\n",
+      "\n",
+      "    accuracy                           0.82        40\n",
+      "   macro avg       0.67      0.56      0.56        40\n",
+      "weighted avg       0.78      0.82      0.78        40\n",
+      "\n",
+      "\n",
+      "Fold 2 Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.82      0.90      0.86        31\n",
+      "         1.0       0.50      0.33      0.40         9\n",
+      "\n",
+      "    accuracy                           0.78        40\n",
+      "   macro avg       0.66      0.62      0.63        40\n",
+      "weighted avg       0.75      0.78      0.76        40\n",
+      "\n",
+      "\n",
+      "Fold 3 Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.79      0.97      0.87        31\n",
+      "         1.0       0.50      0.11      0.18         9\n",
+      "\n",
+      "    accuracy                           0.78        40\n",
+      "   macro avg       0.64      0.54      0.53        40\n",
+      "weighted avg       0.72      0.78      0.71        40\n",
+      "\n",
+      "\n",
+      "Fold 4 Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.86      0.91      0.88        33\n",
+      "         1.0       0.40      0.29      0.33         7\n",
+      "\n",
+      "    accuracy                           0.80        40\n",
+      "   macro avg       0.63      0.60      0.61        40\n",
+      "weighted avg       0.78      0.80      0.79        40\n",
+      "\n",
+      "\n",
+      "Fold 5 Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.79      0.79      0.79        29\n",
+      "         1.0       0.45      0.45      0.45        11\n",
+      "\n",
+      "    accuracy                           0.70        40\n",
+      "   macro avg       0.62      0.62      0.62        40\n",
+      "weighted avg       0.70      0.70      0.70        40\n",
+      "\n",
+      "\n",
+      "Fold 6 Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.81      0.94      0.87        31\n",
+      "         1.0       0.50      0.22      0.31         9\n",
+      "\n",
+      "    accuracy                           0.78        40\n",
+      "   macro avg       0.65      0.58      0.59        40\n",
+      "weighted avg       0.74      0.78      0.74        40\n",
+      "\n",
+      "\n",
+      "Fold 7 Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.78      1.00      0.88        29\n",
+      "         1.0       1.00      0.27      0.43        11\n",
+      "\n",
+      "    accuracy                           0.80        40\n",
+      "   macro avg       0.89      0.64      0.65        40\n",
+      "weighted avg       0.84      0.80      0.75        40\n",
+      "\n",
+      "\n",
+      "Fold 8 Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.94      0.86      0.90        37\n",
+      "         1.0       0.17      0.33      0.22         3\n",
+      "\n",
+      "    accuracy                           0.82        40\n",
+      "   macro avg       0.55      0.60      0.56        40\n",
+      "weighted avg       0.88      0.82      0.85        40\n",
+      "\n",
+      "\n",
+      "Fold 9 Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.83      0.94      0.88        32\n",
+      "         1.0       0.50      0.25      0.33         8\n",
+      "\n",
+      "    accuracy                           0.80        40\n",
+      "   macro avg       0.67      0.59      0.61        40\n",
+      "weighted avg       0.77      0.80      0.77        40\n",
+      "\n",
+      "\n",
+      "Fold 10 Classification Report:\n",
+      "              precision    recall  f1-score   support\n",
+      "\n",
+      "         0.0       0.84      0.90      0.87        30\n",
+      "         1.0       0.62      0.50      0.56        10\n",
+      "\n",
+      "    accuracy                           0.80        40\n",
+      "   macro avg       0.73      0.70      0.71        40\n",
+      "weighted avg       0.79      0.80      0.79        40\n",
+      "\n",
+      "\n",
+      "Balanced Accuracy Scores for each fold:\n",
+      "Fold 1 Balanced Accuracy: 0.5563\n",
+      "Fold 2 Balanced Accuracy: 0.6183\n",
+      "Fold 3 Balanced Accuracy: 0.5394\n",
+      "Fold 4 Balanced Accuracy: 0.5974\n",
+      "Fold 5 Balanced Accuracy: 0.6238\n",
+      "Fold 6 Balanced Accuracy: 0.5789\n",
+      "Fold 7 Balanced Accuracy: 0.6364\n",
+      "Fold 8 Balanced Accuracy: 0.5991\n",
+      "Fold 9 Balanced Accuracy: 0.5938\n",
+      "Fold 10 Balanced Accuracy: 0.7000\n",
+      "\n",
+      "Mean Balanced Accuracy Score across all folds: 0.6043275980337994\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import KFold\n",
+    "from imblearn.over_sampling import SMOTE\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.metrics import classification_report as clf_report, balanced_accuracy_score\n",
+    "import numpy as np\n",
+    "\n",
+    "# Initialize KFold with 10 splits\n",
+    "k = KFold(n_splits=10, shuffle=True, random_state=42)\n",
+    "\n",
+    "# Initialize SMOTE\n",
+    "sm = SMOTE(random_state=42)\n",
+    "\n",
+    "# Initialize Random Forest model\n",
+    "rf_classifier = RandomForestClassifier(max_depth=10, min_samples_leaf=3, min_samples_split=10, n_estimators=1000, random_state=42)\n",
+    "\n",
+    "# Lists to hold the results\n",
+    "all_classification_reports = []\n",
+    "all_balanced_accuracies = []\n",
+    "\n",
+    "# K-Fold Cross-Validation Loop\n",
+    "for train_idx, test_idx in k.split(X_corr, y_corr):\n",
+    "    # Split the data into train and test based on the indices\n",
+    "    X_train, X_test = X_corr.iloc[train_idx], X_corr.iloc[test_idx]\n",
+    "    y_train, y_test = y_corr.iloc[train_idx], y_corr.iloc[test_idx]\n",
+    "    \n",
+    "    # Resampling the data using SMOTE only for the training data\n",
+    "    X_train_resampled, y_train_resampled = sm.fit_resample(X_train, y_train)\n",
+    "    \n",
+    "    # Scaling data with StandardScaler\n",
+    "    scaler = StandardScaler()\n",
+    "    X_train_norm = scaler.fit_transform(X_train_resampled)\n",
+    "    X_test_norm = scaler.transform(X_test)  # Only transform test set\n",
+    "    \n",
+    "    # Train the Random Forest model\n",
+    "    rf_classifier.fit(X_train_norm, y_train_resampled)\n",
+    "    \n",
+    "    # Predicting\n",
+    "    y_pred = rf_classifier.predict(X_test_norm)\n",
+    "    \n",
+    "    # Generating the classification report and balanced accuracy score for each fold\n",
+    "    report = clf_report(y_test, y_pred, zero_division=0)\n",
+    "    balanced_accuracy = balanced_accuracy_score(y_test, y_pred)\n",
+    "    \n",
+    "    # Append results\n",
+    "    all_classification_reports.append(report)\n",
+    "    all_balanced_accuracies.append(balanced_accuracy)\n",
+    "\n",
+    "# Displaying the results\n",
+    "print(\"\\nClassification Reports for each fold:\")\n",
+    "for i, report in enumerate(all_classification_reports, 1):\n",
+    "    print(f\"\\nFold {i} Classification Report:\")\n",
+    "    print(report)\n",
+    "\n",
+    "print(\"\\nBalanced Accuracy Scores for each fold:\")\n",
+    "for i, score in enumerate(all_balanced_accuracies, 1):\n",
+    "    print(f\"Fold {i} Balanced Accuracy: {score:.4f}\")\n",
+    "\n",
+    "# Optionally, you can also display the mean of the balanced accuracies\n",
+    "print(\"\\nMean Balanced Accuracy Score across all folds:\", np.mean(all_balanced_accuracies))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2b182154-61c7-48ec-b601-7edf2537fd0c",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}