--- a +++ b/03-Experiments/04-XGBoost_With_Optuna.ipynb @@ -0,0 +1,1091 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Global Experiment Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "<Experiment: artifact_location='/Users/arham/Downloads/Projects/03-Experiments/mlruns/2', creation_time=1713912394972, experiment_id='2', last_update_time=1713912394972, lifecycle_stage='active', name='XGBoost', tags={}>" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import mlflow\n", + "\n", + "\n", + "# Set the MLflow tracking URI to a new SQLite URI\n", + "mlflow.set_tracking_uri(\"sqlite:///new_mlflow.db\")\n", + "mlflow.set_experiment(\"XGBoost\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import numpy as np\n", + "from sklearn.preprocessing import MinMaxScaler\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "import lightgbm as lgb\n", + "from sklearn.metrics import accuracy_score\n", + "import warnings\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.metrics import accuracy_score, precision_score, recall_score\n", + "import xgboost as xgb\n", + "from sklearn.metrics import accuracy_score, precision_score, recall_score\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "\n", + "def load_data(path):\n", + " df = pd.read_csv(path)\n", + " train_df, test_df = train_test_split(df, test_size=0.35, random_state=42)\n", + " train_df, val_df, = train_test_split(train_df, test_size=0.20, random_state=42)\n", + " train_df = train_df.drop(['id'], axis=1).drop_duplicates().reset_index(drop=True)\n", + " test_df = test_df.drop(['id'], axis=1).drop_duplicates().reset_index(drop=True)\n", + " val_df = val_df.drop(['id'], axis=1).drop_duplicates().reset_index(drop=True)\n", + " return train_df, val_df, test_df\n", + "\n", + "def encode_target(train):\n", + " target_key = {'Insufficient_Weight': 0, 'Normal_Weight': 1, 'Overweight_Level_I': 2, 'Overweight_Level_II': 3, 'Obesity_Type_I': 4,'Obesity_Type_II' : 5, 'Obesity_Type_III': 6}\n", + " train['NObeyesdad'] = train['NObeyesdad'].map(target_key)\n", + " return train\n", + "\n", + "def make_gender_binary(train):\n", + " train['Gender'] = train['Gender'].map({'Male':0, 'Female':1})\n", + "\n", + "def datatypes(train):\n", + " train['Weight'] = train['Weight'].astype(float)\n", + " train['Age'] = train['Age'].astype(float)\n", + " train['Height'] = train['Height'].astype(float)\n", + " return train\n", + "\n", + "# def age_binning(train_df):\n", + "# # train_df['Age_Group'] = pd.cut(train_df['Age'], bins=[0, 20, 30, 40, 50, train_df['Age'].max()], labels=['0-20', '21-30', '31-40', '41-50', '50+'])\n", + "# train_df['Age_Group'] = pd.cut(train_df['Age'], bins=[0, 20, 30, 40, 50, train_df['Age'].max()], labels=[1, 2, 3, 4, 5])\n", + "# train_df['Age_Group'] = train_df['Age_Group'].astype(int)\n", + "# return train_df\n", + "\n", + "def age_binning(df):\n", + " age_groups = []\n", + " for age in df['Age']:\n", + " if age <= 20:\n", + " age_group = 1\n", + " elif age <= 30:\n", + " age_group = 2\n", + " elif age <= 40:\n", + " age_group = 3\n", + " elif age <= 50:\n", + " age_group = 4\n", + " else:\n", + " age_group = 5\n", + " age_groups.append(age_group)\n", + " df['Age_Group'] = age_groups\n", + " return df\n", + "\n", + "def age_scaling_log(train_df):\n", + " train_df['Age'] = train_df['Age'].astype(float)\n", + " train_df['Log_Age'] = np.log1p(train_df['Age'])\n", + " return train_df\n", + "\n", + "def age_scaling_minmax(train_df):\n", + " train_df['Age'] = train_df['Age'].astype(float)\n", + " scaler_age = MinMaxScaler()\n", + " train_df['Scaled_Age'] = scaler_age.fit_transform(train_df['Age'].values.reshape(-1, 1))\n", + " return train_df, scaler_age\n", + "\n", + "def weight_scaling_log(train_df):\n", + " train_df['Weight'] = train_df['Weight'].astype(float)\n", + " train_df['Log_Weight'] = np.log1p(train_df['Weight'])\n", + " return train_df\n", + "\n", + "def weight_scaling_minmax(train_df):\n", + " train_df['Weight'] = train_df['Weight'].astype(float)\n", + " scaler_weight = MinMaxScaler()\n", + " train_df['Scaled_Weight'] = scaler_weight.fit_transform(train_df['Weight'].values.reshape(-1, 1))\n", + " return train_df, scaler_weight\n", + "\n", + "def height_scaling_log(train_df):\n", + " train_df['Log_Height'] = np.log1p(train_df['Height'])\n", + " return train_df\n", + "\n", + "def height_scaling_minmax(train_df):\n", + " scaler_height = MinMaxScaler()\n", + " train_df['Scaled_Height'] = scaler_height.fit_transform(train_df['Height'].values.reshape(-1, 1))\n", + " return train_df, scaler_height\n", + "\n", + "def make_gender_binary(train):\n", + " train['Gender'] = train['Gender'].map({'Female':1, 'Male':0})\n", + " return train\n", + "\n", + "def fix_binary_columns(train):\n", + " Binary_Cols = ['family_history_with_overweight','FAVC', 'SCC','SMOKE']\n", + " # if yes then 1 else 0\n", + " for col in Binary_Cols:\n", + " train[col] = train[col].map({'yes': 1, 'no': 0})\n", + " # column datatype integer\n", + " train[col] = train[col].astype(int)\n", + " return train\n", + "\n", + "def freq_cat_cols(train):\n", + " # One hot encoding\n", + " cat_cols = ['CAEC', 'CALC']\n", + " for col in cat_cols:\n", + " train[col] = train[col].map({'no': 0, 'Sometimes': 1, 'Frequently': 2, 'Always': 3})\n", + " return train\n", + "\n", + "def Mtrans(train):\n", + " \"\"\"\n", + " Public_Transportation 8692\n", + " Automobile 1835\n", + " Walking 231\n", + " Motorbike 19\n", + " Bike 16\n", + " \"\"\"\n", + " # train['MTRANS'] = train['MTRANS'].map({'Public_Transportation': 3, 'Automobile': 5, 'Walking': 1, 'Motorbike': 4, 'Bike': 2})\n", + " # dummify column\n", + " train = pd.get_dummies(train, columns=['MTRANS'])\n", + " # convert these columns to integer\n", + " train['MTRANS_Automobile'] = train['MTRANS_Automobile'].astype(int)\n", + " train['MTRANS_Walking'] = train['MTRANS_Walking'].astype(int)\n", + " train['MTRANS_Motorbike'] = train['MTRANS_Motorbike'].astype(int)\n", + " train['MTRANS_Bike'] = train['MTRANS_Bike'].astype(int)\n", + " train['MTRANS_Public_Transportation'] = train['MTRANS_Public_Transportation'].astype(int)\n", + " return train\n", + "\n", + "\n", + "def other_features(train):\n", + " train['BMI'] = train['Weight'] / (train['Height'] ** 2)\n", + " # train['Age'*'Gender'] = train['Age'] * train['Gender']\n", + " polynomial_features = PolynomialFeatures(degree=2)\n", + " X_poly = polynomial_features.fit_transform(train[['Age', 'BMI']])\n", + " poly_features_df = pd.DataFrame(X_poly, columns=['Age^2', 'Age^3', 'BMI^2', 'Age * BMI', 'Age * BMI^2', 'Age^2 * BMI^2'])\n", + " train = pd.concat([train, poly_features_df], axis=1)\n", + " return train\n", + "\n", + "\n", + "def test_pipeline(test, scaler_age, scaler_weight, scaler_height):\n", + " test = datatypes(test)\n", + " test = encode_target(test)\n", + " test = age_binning(test)\n", + " test = age_scaling_log(test)\n", + " test['Scaled_Age'] = scaler_age.transform(test['Age'].values.reshape(-1, 1))\n", + " test = weight_scaling_log(test)\n", + " test['Scaled_Weight'] = scaler_weight.transform(test['Weight'].values.reshape(-1, 1))\n", + " test = height_scaling_log(test)\n", + " test['Scaled_Height'] = scaler_height.transform(test['Height'].values.reshape(-1, 1))\n", + " test = make_gender_binary(test)\n", + " test = fix_binary_columns(test)\n", + " test = freq_cat_cols(test)\n", + " test = Mtrans(test)\n", + " test = other_features(test)\n", + "\n", + " return test\n", + "\n", + "def train_model(params, X_train, y_train):\n", + " lgb_train = lgb.Dataset(X_train, y_train)\n", + " model = lgb.train(params, lgb_train, num_boost_round=1000)\n", + " return model\n", + "\n", + "def evaluate_model(model, X_val, y_val):\n", + " y_pred = model.predict(X_val)\n", + " y_pred = [np.argmax(y) for y in y_pred]\n", + " accuracy = accuracy_score(y_val, y_pred)\n", + " return accuracy\n", + "\n", + "def objective(trial, X_train, y_train):\n", + " params = {\n", + " 'objective': 'multiclass',\n", + " 'num_class': 7,\n", + " 'metric': 'multi_logloss',\n", + " 'boosting_type': 'gbdt',\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.005, 0.5),\n", + " 'num_leaves': trial.suggest_int('num_leaves', 10, 1000),\n", + " 'max_depth': trial.suggest_int('max_depth', -1, 20),\n", + " 'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.6, 0.95),\n", + " 'feature_fraction': trial.suggest_uniform('feature_fraction', 0.6, 0.95),\n", + " 'verbosity': -1\n", + " }\n", + "\n", + " n_splits = 5\n", + " kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)\n", + " scores = []\n", + "\n", + " for train_index, val_index in kf.split(X_train, y_train):\n", + " X_tr, X_val = X_train.iloc[train_index], X_train.iloc[val_index]\n", + " y_tr, y_val = y_train.iloc[train_index], y_train.iloc[val_index]\n", + "\n", + " model = train_model(params, X_tr, y_tr)\n", + " accuracy = evaluate_model(model, X_val, y_val)\n", + " scores.append(accuracy)\n", + "\n", + " return np.mean(scores)\n", + "\n", + "def optimize_hyperparameters(X_train, y_train, n_trials=2):\n", + " study = optuna.create_study(direction='maximize')\n", + " study.optimize(lambda trial: objective(trial, X_train, y_train), n_trials=n_trials)\n", + " return study.best_params\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### XGB With Optuna" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "path = '/Users/arham/Downloads/Projects/01-Dataset/01-Data-for-model-building/train.csv'\n", + "train_df, val_df, test_df = load_data(path)\n", + "\n", + "train_df = datatypes(train_df)\n", + "train_df = encode_target(train_df)\n", + "train_df = age_binning(train_df)\n", + "train_df, scaler_age = age_scaling_minmax(train_df)\n", + "train_df = age_scaling_log(train_df)\n", + "train_df, scaler_weight = weight_scaling_minmax(train_df)\n", + "train_df = weight_scaling_log(train_df)\n", + "train_df, scaler_height = height_scaling_minmax(train_df)\n", + "train_df = height_scaling_log(train_df)\n", + "train_df = make_gender_binary(train_df)\n", + "train_df = fix_binary_columns(train_df)\n", + "train_df = freq_cat_cols(train_df)\n", + "train_df = Mtrans(train_df)\n", + "train_df = other_features(train_df)\n", + "\n", + "val_df = test_pipeline(val_df, scaler_age, scaler_weight, scaler_height)\n", + "test_df = test_pipeline(test_df, scaler_age, scaler_weight, scaler_height)\n", + "\n", + "Target = 'NObeyesdad'\n", + "# features = train_df.columns.drop(Target)\n", + "features = ['Gender', 'Age', 'Height', 'Weight', 'family_history_with_overweight',\n", + " 'FAVC', 'FCVC', 'NCP', 'CAEC', 'SMOKE', 'CH2O', 'SCC', 'FAF', 'TUE',\n", + " 'CALC', 'Age_Group', \n", + " 'MTRANS_Automobile', 'MTRANS_Bike', 'MTRANS_Motorbike',\n", + " 'MTRANS_Public_Transportation', 'MTRANS_Walking', 'BMI', 'Age^2',\n", + " 'Age^3', 'BMI^2', 'Age * BMI', 'Age * BMI^2', 'Age^2 * BMI^2'] \n", + "\n", + " #'Scaled_Age', 'Log_Age', 'Scaled_Weight', 'Log_Weight', 'Scaled_Height', 'Log_Height',\n", + "\n", + "\n", + "X_train = train_df[features]\n", + "y_train = train_df[Target]\n", + "X_val = val_df[features]\n", + "y_val = val_df[Target]\n", + "X_test = test_df[features]\n", + "y_test = test_df[Target]\n", + "\n", + "# save X_train, y_train, X_val, X_test, y_test\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:07:02,465] A new study created in memory with name: no-name-64002971-707f-4151-be7c-8ed470ee8bc1\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.88003705 0.88235294 0.86984715 0.86376274 0.87673772]\n", + "Mean CV Accuracy (XGBoost): 0.8745475220438529\n", + "\n", + "Accuracy (XGBoost): 0.8684698036309745\n", + "Precision (XGBoost): 0.8684692480457514\n", + "Recall (XGBoost): 0.8684698036309745\n", + "F1 (XGBoost): 0.8684695258382741\n", + "Recall for class 0: 0.9113924050632911\n", + "Recall for class 1: 0.8770053475935828\n", + "Recall for class 2: 0.7432835820895523\n", + "Recall for class 3: 0.7355623100303952\n", + "Recall for class 4: 0.8218527315914489\n", + "Recall for class 5: 0.9330143540669856\n", + "Recall for class 6: 0.9901185770750988\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:07:19,496] Trial 0 finished with value: 0.8745475220438529 and parameters: {'max_depth': 5, 'learning_rate': 0.013394988506670473, 'n_estimators': 77, 'min_child_weight': 10, 'subsample': 1.0, 'colsample_bytree': 0.7, 'gamma': 6.905297068817435e-07, 'reg_alpha': 0.678891339417352, 'reg_lambda': 2.8230926058910324}. Best is trial 0 with value: 0.8745475220438529.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.90504863 0.90690134 0.90273275 0.8952734 0.90037071]\n", + "Mean CV Accuracy (XGBoost): 0.9020653676808635\n", + "\n", + "Accuracy (XGBoost): 0.9018154872174879\n", + "Precision (XGBoost): 0.9021161905127771\n", + "Recall (XGBoost): 0.9018154872174879\n", + "F1 (XGBoost): 0.9019658138025226\n", + "Recall for class 0: 0.9272151898734177\n", + "Recall for class 1: 0.9010695187165776\n", + "Recall for class 2: 0.7761194029850746\n", + "Recall for class 3: 0.8237082066869301\n", + "Recall for class 4: 0.8646080760095012\n", + "Recall for class 5: 0.9688995215311005\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:10:12,141] Trial 1 finished with value: 0.9020653676808635 and parameters: {'max_depth': 10, 'learning_rate': 0.09249377976464467, 'n_estimators': 721, 'min_child_weight': 7, 'subsample': 0.9, 'colsample_bytree': 0.8, 'gamma': 0.00012452373684121342, 'reg_alpha': 0.71016682383988, 'reg_lambda': 1.2412790195572406e-06}. Best is trial 1 with value: 0.9020653676808635.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.90504863 0.91014358 0.90736452 0.89944393 0.90222428]\n", + "Mean CV Accuracy (XGBoost): 0.9048449901075782\n", + "\n", + "Accuracy (XGBoost): 0.9040385327899222\n", + "Precision (XGBoost): 0.9045766402024644\n", + "Recall (XGBoost): 0.9040385327899222\n", + "F1 (XGBoost): 0.9043075064461108\n", + "Recall for class 0: 0.9335443037974683\n", + "Recall for class 1: 0.9037433155080213\n", + "Recall for class 2: 0.7611940298507462\n", + "Recall for class 3: 0.8328267477203647\n", + "Recall for class 4: 0.8741092636579573\n", + "Recall for class 5: 0.9712918660287081\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:10:55,359] Trial 2 finished with value: 0.9048449901075782 and parameters: {'max_depth': 8, 'learning_rate': 0.1480724813458801, 'n_estimators': 201, 'min_child_weight': 10, 'subsample': 0.7, 'colsample_bytree': 0.6, 'gamma': 4.241734906017588e-07, 'reg_alpha': 1.1757709861728217e-05, 'reg_lambda': 3.398219063416917}. Best is trial 2 with value: 0.9048449901075782.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.89717462 0.8994905 0.89161649 0.88878591 0.89712697]\n", + "Mean CV Accuracy (XGBoost): 0.8948388988311532\n", + "\n", + "Accuracy (XGBoost): 0.889218228973694\n", + "Precision (XGBoost): 0.8896005944786827\n", + "Recall (XGBoost): 0.889218228973694\n", + "F1 (XGBoost): 0.8894093706305529\n", + "Recall for class 0: 0.9208860759493671\n", + "Recall for class 1: 0.8823529411764706\n", + "Recall for class 2: 0.7522388059701492\n", + "Recall for class 3: 0.8206686930091185\n", + "Recall for class 4: 0.838479809976247\n", + "Recall for class 5: 0.9593301435406698\n", + "Recall for class 6: 0.9940711462450593\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:11:45,267] Trial 3 finished with value: 0.8948388988311532 and parameters: {'max_depth': 6, 'learning_rate': 0.0915177926130075, 'n_estimators': 339, 'min_child_weight': 5, 'subsample': 0.9, 'colsample_bytree': 0.7, 'gamma': 0.0813995583239973, 'reg_alpha': 20.619635618013948, 'reg_lambda': 3.974196714305311e-07}. Best is trial 2 with value: 0.9048449901075782.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.89995368 0.90736452 0.89810097 0.89712697 0.89759036]\n", + "Mean CV Accuracy (XGBoost): 0.9000273012812284\n", + "\n", + "Accuracy (XGBoost): 0.8984809188588366\n", + "Precision (XGBoost): 0.898791107190973\n", + "Recall (XGBoost): 0.8984809188588366\n", + "F1 (XGBoost): 0.8986359862574483\n", + "Recall for class 0: 0.9272151898734177\n", + "Recall for class 1: 0.8903743315508021\n", + "Recall for class 2: 0.7522388059701492\n", + "Recall for class 3: 0.8358662613981763\n", + "Recall for class 4: 0.8669833729216152\n", + "Recall for class 5: 0.9641148325358851\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:13:55,859] Trial 4 finished with value: 0.9000273012812284 and parameters: {'max_depth': 9, 'learning_rate': 0.030870513505594194, 'n_estimators': 288, 'min_child_weight': 1, 'subsample': 1.0, 'colsample_bytree': 1.0, 'gamma': 0.9549119955309026, 'reg_alpha': 0.007762320297440327, 'reg_lambda': 2.0978432882529218e-05}. Best is trial 2 with value: 0.9048449901075782.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.90829088 0.90736452 0.90504863 0.89759036 0.90315107]\n", + "Mean CV Accuracy (XGBoost): 0.9042890913781608\n", + "\n", + "Accuracy (XGBoost): 0.9058910707669507\n", + "Precision (XGBoost): 0.9059794693613187\n", + "Recall (XGBoost): 0.9058910707669507\n", + "F1 (XGBoost): 0.9059352679077136\n", + "Recall for class 0: 0.930379746835443\n", + "Recall for class 1: 0.9090909090909091\n", + "Recall for class 2: 0.7791044776119403\n", + "Recall for class 3: 0.8358662613981763\n", + "Recall for class 4: 0.8669833729216152\n", + "Recall for class 5: 0.9712918660287081\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:15:44,305] Trial 5 finished with value: 0.9042890913781608 and parameters: {'max_depth': 8, 'learning_rate': 0.04624235053164599, 'n_estimators': 385, 'min_child_weight': 3, 'subsample': 0.6, 'colsample_bytree': 0.6, 'gamma': 2.4010178478007346e-07, 'reg_alpha': 1.2972918999055175e-08, 'reg_lambda': 0.010144470159191883}. Best is trial 2 with value: 0.9048449901075782.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.89671144 0.90458546 0.89671144 0.89388323 0.89851715]\n", + "Mean CV Accuracy (XGBoost): 0.8980817415813537\n", + "\n", + "Accuracy (XGBoost): 0.8969988884772138\n", + "Precision (XGBoost): 0.897313793211423\n", + "Recall (XGBoost): 0.8969988884772138\n", + "F1 (XGBoost): 0.8971563132111772\n", + "Recall for class 0: 0.9208860759493671\n", + "Recall for class 1: 0.8983957219251337\n", + "Recall for class 2: 0.7731343283582089\n", + "Recall for class 3: 0.7993920972644377\n", + "Recall for class 4: 0.8622327790973872\n", + "Recall for class 5: 0.9688995215311005\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:18:36,520] Trial 6 finished with value: 0.8980817415813537 and parameters: {'max_depth': 6, 'learning_rate': 0.21361268583130158, 'n_estimators': 989, 'min_child_weight': 6, 'subsample': 0.6, 'colsample_bytree': 0.7, 'gamma': 0.15715908126940034, 'reg_alpha': 0.002876421704879931, 'reg_lambda': 0.00020071476895386069}. Best is trial 2 with value: 0.9048449901075782.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.89532191 0.90273275 0.8994905 0.8952734 0.90176089]\n", + "Mean CV Accuracy (XGBoost): 0.8989158901612793\n", + "\n", + "Accuracy (XGBoost): 0.9007039644312709\n", + "Precision (XGBoost): 0.9010058877096547\n", + "Recall (XGBoost): 0.9007039644312709\n", + "F1 (XGBoost): 0.9008549007729195\n", + "Recall for class 0: 0.9272151898734177\n", + "Recall for class 1: 0.9037433155080213\n", + "Recall for class 2: 0.7761194029850746\n", + "Recall for class 3: 0.817629179331307\n", + "Recall for class 4: 0.8598574821852731\n", + "Recall for class 5: 0.9688995215311005\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:19:40,518] Trial 7 finished with value: 0.8989158901612793 and parameters: {'max_depth': 7, 'learning_rate': 0.2701374472461074, 'n_estimators': 220, 'min_child_weight': 4, 'subsample': 0.7, 'colsample_bytree': 1.0, 'gamma': 6.952295076389228e-06, 'reg_alpha': 4.7746605073070416e-08, 'reg_lambda': 0.010461996036680242}. Best is trial 2 with value: 0.9048449901075782.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.88652154 0.89022696 0.87633164 0.87395737 0.88693234]\n", + "Mean CV Accuracy (XGBoost): 0.8827939684773225\n", + "\n", + "Accuracy (XGBoost): 0.8806965542793628\n", + "Precision (XGBoost): 0.8807374877985908\n", + "Recall (XGBoost): 0.8806965542793628\n", + "F1 (XGBoost): 0.880717020563355\n", + "Recall for class 0: 0.9335443037974683\n", + "Recall for class 1: 0.8663101604278075\n", + "Recall for class 2: 0.7432835820895523\n", + "Recall for class 3: 0.7993920972644377\n", + "Recall for class 4: 0.8218527315914489\n", + "Recall for class 5: 0.9545454545454546\n", + "Recall for class 6: 0.9901185770750988\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:20:20,776] Trial 8 finished with value: 0.8827939684773225 and parameters: {'max_depth': 10, 'learning_rate': 0.030824226564898152, 'n_estimators': 161, 'min_child_weight': 10, 'subsample': 1.0, 'colsample_bytree': 0.8, 'gamma': 0.0002761971902093278, 'reg_alpha': 34.12323171902436, 'reg_lambda': 1.1963423659959008e-06}. Best is trial 2 with value: 0.9048449901075782.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.90180639 0.90690134 0.90180639 0.89341983 0.90176089]\n", + "Mean CV Accuracy (XGBoost): 0.9011389699604345\n", + "\n", + "Accuracy (XGBoost): 0.9021859948128936\n", + "Precision (XGBoost): 0.9026219001901948\n", + "Recall (XGBoost): 0.9021859948128936\n", + "F1 (XGBoost): 0.9024038948606238\n", + "Recall for class 0: 0.9367088607594937\n", + "Recall for class 1: 0.8983957219251337\n", + "Recall for class 2: 0.7582089552238805\n", + "Recall for class 3: 0.8237082066869301\n", + "Recall for class 4: 0.8812351543942993\n", + "Recall for class 5: 0.9641148325358851\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:20:52,333] Trial 9 finished with value: 0.9011389699604345 and parameters: {'max_depth': 6, 'learning_rate': 0.30841942106618764, 'n_estimators': 132, 'min_child_weight': 7, 'subsample': 0.9, 'colsample_bytree': 0.7, 'gamma': 5.749686185921016e-07, 'reg_alpha': 1.953546914108059e-08, 'reg_lambda': 0.003027552093067664}. Best is trial 2 with value: 0.9048449901075782.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.91014358 0.91014358 0.90736452 0.90083411 0.90222428]\n", + "Mean CV Accuracy (XGBoost): 0.9061420155986472\n", + "\n", + "Accuracy (XGBoost): 0.9029270100037051\n", + "Precision (XGBoost): 0.9034148150281903\n", + "Recall (XGBoost): 0.9029270100037051\n", + "F1 (XGBoost): 0.9031708466497482\n", + "Recall for class 0: 0.930379746835443\n", + "Recall for class 1: 0.9117647058823529\n", + "Recall for class 2: 0.764179104477612\n", + "Recall for class 3: 0.8328267477203647\n", + "Recall for class 4: 0.8622327790973872\n", + "Recall for class 5: 0.9688995215311005\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:22:05,710] Trial 10 finished with value: 0.9061420155986472 and parameters: {'max_depth': 3, 'learning_rate': 0.14385928468253742, 'n_estimators': 574, 'min_child_weight': 8, 'subsample': 0.7, 'colsample_bytree': 0.6, 'gamma': 1.5922499564605758e-08, 'reg_alpha': 7.445790014646167e-06, 'reg_lambda': 95.07956676133519}. Best is trial 10 with value: 0.9061420155986472.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.91014358 0.91060676 0.90736452 0.90500463 0.90315107]\n", + "Mean CV Accuracy (XGBoost): 0.9072541135432812\n", + "\n", + "Accuracy (XGBoost): 0.9047795479807336\n", + "Precision (XGBoost): 0.9054304429990271\n", + "Recall (XGBoost): 0.9047795479807336\n", + "F1 (XGBoost): 0.9051048784691129\n", + "Recall for class 0: 0.9367088607594937\n", + "Recall for class 1: 0.9064171122994652\n", + "Recall for class 2: 0.7701492537313432\n", + "Recall for class 3: 0.8389057750759878\n", + "Recall for class 4: 0.8622327790973872\n", + "Recall for class 5: 0.9712918660287081\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:23:19,966] Trial 11 finished with value: 0.9072541135432812 and parameters: {'max_depth': 3, 'learning_rate': 0.14050588188423715, 'n_estimators': 601, 'min_child_weight': 9, 'subsample': 0.7, 'colsample_bytree': 0.6, 'gamma': 1.9866742273835196e-08, 'reg_alpha': 1.1945121332468573e-05, 'reg_lambda': 71.93048807006664}. Best is trial 11 with value: 0.9072541135432812.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.90968041 0.91060676 0.91060676 0.90454124 0.90083411]\n", + "Mean CV Accuracy (XGBoost): 0.9072538559840245\n", + "\n", + "Accuracy (XGBoost): 0.9044090403853279\n", + "Precision (XGBoost): 0.9053349503998932\n", + "Recall (XGBoost): 0.9044090403853279\n", + "F1 (XGBoost): 0.9048717585333201\n", + "Recall for class 0: 0.9367088607594937\n", + "Recall for class 1: 0.9117647058823529\n", + "Recall for class 2: 0.764179104477612\n", + "Recall for class 3: 0.8358662613981763\n", + "Recall for class 4: 0.8646080760095012\n", + "Recall for class 5: 0.9688995215311005\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:24:34,779] Trial 12 finished with value: 0.9072538559840245 and parameters: {'max_depth': 3, 'learning_rate': 0.13577039129548693, 'n_estimators': 604, 'min_child_weight': 8, 'subsample': 0.7, 'colsample_bytree': 0.6, 'gamma': 2.3371777592215328e-08, 'reg_alpha': 8.403097374567035e-06, 'reg_lambda': 41.38139839851449}. Best is trial 11 with value: 0.9072541135432812.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.8994905 0.90597499 0.90319592 0.89759036 0.89805375]\n", + "Mean CV Accuracy (XGBoost): 0.9008611064488117\n", + "\n", + "Accuracy (XGBoost): 0.9036680251945165\n", + "Precision (XGBoost): 0.9043871882647396\n", + "Recall (XGBoost): 0.9036680251945165\n", + "F1 (XGBoost): 0.9040274637042609\n", + "Recall for class 0: 0.9272151898734177\n", + "Recall for class 1: 0.9010695187165776\n", + "Recall for class 2: 0.7731343283582089\n", + "Recall for class 3: 0.8267477203647416\n", + "Recall for class 4: 0.8693586698337292\n", + "Recall for class 5: 0.9760765550239234\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:26:22,855] Trial 13 finished with value: 0.9008611064488117 and parameters: {'max_depth': 3, 'learning_rate': 0.45330520785367046, 'n_estimators': 709, 'min_child_weight': 8, 'subsample': 0.8, 'colsample_bytree': 0.9, 'gamma': 1.050198550207745e-08, 'reg_alpha': 1.4096829741801802e-05, 'reg_lambda': 54.59517406603631}. Best is trial 11 with value: 0.9072541135432812.\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:12: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:15: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:16: FutureWarning: suggest_discrete_uniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., step=...) instead.\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:17: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:18: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + "/var/folders/2t/c7s0z0zs4698zw0k9pj4f2r80000gn/T/ipykernel_20288/2035532747.py:19: FutureWarning: suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use suggest_float(..., log=True) instead.\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target Drift For Each Class [0.004943133623686147, 0.011990707821925795, -0.0087675011457998, -0.001077949504617301, -0.017190035106736085, -0.00032756263090533144, 0.01042920694244659]\n", + "Cross-validation Scores (XGBoost): [0.90690134 0.91060676 0.90551181 0.90454124 0.90037071]\n", + "Mean CV Accuracy (XGBoost): 0.9055863744284867\n", + "\n", + "Accuracy (XGBoost): 0.9044090403853279\n", + "Precision (XGBoost): 0.9048073892271471\n", + "Recall (XGBoost): 0.9044090403853279\n", + "F1 (XGBoost): 0.9046081709525012\n", + "Recall for class 0: 0.9335443037974683\n", + "Recall for class 1: 0.9117647058823529\n", + "Recall for class 2: 0.7611940298507462\n", + "Recall for class 3: 0.8297872340425532\n", + "Recall for class 4: 0.8717339667458432\n", + "Recall for class 5: 0.9712918660287081\n", + "Recall for class 6: 0.9960474308300395\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[I 2024-04-25 15:27:41,681] Trial 14 finished with value: 0.9055863744284867 and parameters: {'max_depth': 4, 'learning_rate': 0.05892316102120539, 'n_estimators': 515, 'min_child_weight': 8, 'subsample': 0.8, 'colsample_bytree': 0.6, 'gamma': 0.002339216477344003, 'reg_alpha': 1.5648083825482848e-06, 'reg_lambda': 0.3956617258269376}. Best is trial 11 with value: 0.9072541135432812.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Number of finished trials: 15\n", + "Best trial:\n", + " Value: 0.90725\n", + " Params: \n", + " max_depth: 3\n", + " learning_rate: 0.14050588188423715\n", + " n_estimators: 601\n", + " min_child_weight: 9\n", + " subsample: 0.7\n", + " colsample_bytree: 0.6\n", + " gamma: 1.9866742273835196e-08\n", + " reg_alpha: 1.1945121332468573e-05\n", + " reg_lambda: 71.93048807006664\n" + ] + } + ], + "source": [ + "import optuna\n", + "from sklearn.model_selection import cross_val_score\n", + "from xgboost import XGBClassifier\n", + "from sklearn.metrics import accuracy_score, precision_score, recall_score, precision_recall_fscore_support\n", + "import mlflow\n", + "\n", + "mlflow.sklearn.autolog(disable=True)\n", + "\n", + "def objective(trial):\n", + " params = {\n", + " 'max_depth': trial.suggest_int('max_depth', 3, 10),\n", + " 'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.5),\n", + " 'n_estimators': trial.suggest_int('n_estimators', 50, 1000),\n", + " 'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),\n", + " 'subsample': trial.suggest_discrete_uniform('subsample', 0.6, 1, 0.1),\n", + " 'colsample_bytree': trial.suggest_discrete_uniform('colsample_bytree', 0.6, 1, 0.1),\n", + " 'gamma': trial.suggest_loguniform('gamma', 1e-8, 1.0),\n", + " 'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-8, 100.0),\n", + " 'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-8, 100.0),\n", + " 'random_state': 42,\n", + " 'objective': 'multi:softmax',\n", + " 'num_class': 7,\n", + " 'eval_metric': 'mlogloss'\n", + " }\n", + "\n", + " with mlflow.start_run(run_name=\"XGB_with_Feature_Engineering_optuna_\" + str(trial.number), nested=True):\n", + " # Log hyperparameters as a single dictionary\n", + " mlflow.log_params(params)\n", + "\n", + " # Calculate target drift and log\n", + " class_counts_train = [y_train[y_train == i].count() / y_train.count() for i in range(7)]\n", + " class_counts_val = [y_val[y_val == i].count() / y_val.count() for i in range(7)]\n", + " target_drift = [(train_count - val_count) for train_count, val_count in zip(class_counts_train, class_counts_val)]\n", + " mlflow.log_params({'Target_Drift_' + str(i): freq for i, freq in enumerate(target_drift)})\n", + " print(f\"Target Drift For Each Class {target_drift}\")\n", + "\n", + " # Train XGBoost model\n", + " model = XGBClassifier(**params)\n", + " cv_scores_xgb = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')\n", + " print(\"Cross-validation Scores (XGBoost):\", cv_scores_xgb)\n", + " print(\"Mean CV Accuracy (XGBoost):\", cv_scores_xgb.mean())\n", + " \n", + "\n", + " # Log CV metrics\n", + " mlflow.log_metric('mean_cv_accuracy', cv_scores_xgb.mean())\n", + "\n", + " # Fit model on full training data\n", + " model.fit(X_train, y_train)\n", + " y_val_pred_xgb = model.predict(X_val)\n", + "\n", + " # Calculate evaluation metrics\n", + " accuracy_xgb = accuracy_score(y_val, y_val_pred_xgb)\n", + " precision_xgb = precision_score(y_val, y_val_pred_xgb, average='weighted')\n", + " recall_xgb = recall_score(y_val, y_val_pred_xgb, average='weighted')\n", + " f1_xgb = 2 * (precision_xgb * recall_xgb) / (precision_xgb + recall_xgb)\n", + " print(\"\\nAccuracy (XGBoost):\", accuracy_xgb)\n", + " print(\"Precision (XGBoost):\", precision_xgb)\n", + " print(\"Recall (XGBoost):\", recall_xgb)\n", + " print(\"F1 (XGBoost):\", f1_xgb)\n", + "\n", + " # Log evaluation metrics\n", + " mlflow.log_metric('accuracy', accuracy_xgb)\n", + " mlflow.log_metric('precision', precision_xgb)\n", + " mlflow.log_metric('recall', recall_xgb)\n", + " mlflow.log_metric('f1', f1_xgb)\n", + "\n", + " # Calculate and log precision, recall for each class\n", + " precision_per_class, recall_per_class, _, _ = precision_recall_fscore_support(y_val, y_val_pred_xgb, average=None)\n", + " for i in range(len(recall_per_class)):\n", + " print(f\"Recall for class {i}: {recall_per_class[i]}\")\n", + " mlflow.log_metric(f'recall_class_{i}', recall_per_class[i])\n", + "\n", + " # Log XGBoost model\n", + " mlflow.xgboost.log_model(model, 'model')\n", + "\n", + " # Set tags\n", + " mlflow.set_tag('experiments', 'Arham A.')\n", + " mlflow.set_tag('model_name', 'XGBoost')\n", + " mlflow.set_tag('preprocessing', 'Yes')\n", + "\n", + " return cv_scores_xgb.mean()\n", + "\n", + "study = optuna.create_study(direction='maximize')\n", + "study.optimize(objective, n_trials=15)\n", + "\n", + "print('Number of finished trials:', len(study.trials))\n", + "print('Best trial:')\n", + "trial = study.best_trial\n", + "print(' Value: {:.5f}'.format(trial.value))\n", + "print(' Params: ')\n", + "for key, value in trial.params.items():\n", + " print(' {}: {}'.format(key, value))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2024-04-26 12:25:15 -0400] [60478] [INFO] Starting gunicorn 21.2.0\n", + "[2024-04-26 12:25:15 -0400] [60478] [INFO] Listening at: http://127.0.0.1:5000 (60478)\n", + "[2024-04-26 12:25:15 -0400] [60478] [INFO] Using worker: sync\n", + "[2024-04-26 12:25:15 -0400] [60479] [INFO] Booting worker with pid: 60479\n", + "[2024-04-26 12:25:15 -0400] [60480] [INFO] Booting worker with pid: 60480\n", + "[2024-04-26 12:25:15 -0400] [60481] [INFO] Booting worker with pid: 60481\n", + "[2024-04-26 12:25:16 -0400] [60482] [INFO] Booting worker with pid: 60482\n", + "^C\n", + "[2024-04-26 14:01:01 -0400] [60478] [INFO] Handling signal: int\n", + "[2024-04-26 14:01:01 -0400] [60482] [INFO] Worker exiting (pid: 60482)\n", + "[2024-04-26 14:01:01 -0400] [60480] [INFO] Worker exiting (pid: 60480)\n", + "[2024-04-26 14:01:01 -0400] [60479] [INFO] Worker exiting (pid: 60479)\n", + "[2024-04-26 14:01:01 -0400] [60481] [INFO] Worker exiting (pid: 60481)\n" + ] + } + ], + "source": [ + "!mlflow ui --backend-store-uri \"sqlite:////Users/arham/Downloads/Projects/03-Experiments/new_mlflow.db\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "DataScience", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}