2 lines (1 with data), 366.6 kB
{"cells":[{"cell_type":"code","execution_count":2,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T17:14:02.183007Z","iopub.status.busy":"2024-06-14T17:14:02.182568Z","iopub.status.idle":"2024-06-14T17:14:06.187080Z","shell.execute_reply":"2024-06-14T17:14:06.185673Z","shell.execute_reply.started":"2024-06-14T17:14:02.182972Z"},"trusted":true},"outputs":[],"source":["# Configuration du chemin d'accès\n","import sys\n","import os\n","sys.path.append(os.path.abspath(os.path.join('..', 'src')))\n","\n","# Configuration du chemin d'accès\n","import sys\n","import os\n","sys.path.append(os.path.abspath(os.path.join('..', 'src')))\n","\n","# PANDAS\n","import pandas as pd \n","pd.set_option(\"display.max_rows\", None, \"display.max_columns\", None) \n","\n","# WARNINGS\n","import warnings\n","warnings.filterwarnings('ignore')\n","\n","# NUMPY\n","import numpy as np\n","\n","# STATS\n","import scipy.stats as stats\n","from scipy.stats import norm, skew\n","import scipy as sp\n","from scipy.stats import chi2_contingency\n","\n","# MATPLOTLIB\n","import matplotlib as mlp\n","import matplotlib.pyplot as plt\n","plt.style.use('fivethirtyeight') \n","%matplotlib inline\n","\n","# PANDAS\n","import pandas as pd \n","pd.set_option(\"display.max_rows\", None, \"display.max_columns\", None) \n","\n","# SEABORN\n","import seaborn as sns\n","\n","# SCIKIT-LEARN: MODELES\n","from sklearn.linear_model import LogisticRegression # Régression logistique\n","from sklearn.svm import SVC # Support Vector Classifier\n","from sklearn.ensemble import RandomForestClassifier # Random Forest\n","from sklearn.ensemble import GradientBoostingClassifier # Gradient Boosting\n","from sklearn.ensemble import AdaBoostClassifier # AdaBoost\n","from sklearn.ensemble import BaggingClassifier # Bagging\n","\n","\n","# SCIKIT-LEARN: VALIDATION CROISEE + OPTIMISATION\n","from sklearn.model_selection import train_test_split # Séparer en données train et test\n","from sklearn.model_selection import cross_val_score # Validation croisée pour comparison entre modèles\n","from sklearn.model_selection import validation_curve # Courbe de validation : visulaisr des scores lors du choix d'un hyper-paramètre\n","from sklearn.model_selection import GridSearchCV # Tester plusieurs hyper-paramètres\n","from sklearn.model_selection import learning_curve # Courbe d'apprentissage : visualisation des scores du train et du validation sets en fonction des quanitiés des données\n","from sklearn.impute import SimpleImputer\n","from sklearn.preprocessing import OrdinalEncoder\n","\n"," ## YellowBrick\n","from yellowbrick.model_selection import LearningCurve\n","from yellowbrick.model_selection import ValidationCurve\n","\n","## EVALUATION\n","from sklearn.metrics import accuracy_score\n","from sklearn.metrics import f1_score\n","from sklearn.metrics import confusion_matrix\n","from sklearn.metrics import ConfusionMatrixDisplay\n","from sklearn.metrics import classification_report\n","\n","# SCHIKIT-LEARN: PIPELINE AND TRANSFORMATEURll\n","from sklearn.pipeline import make_pipeline\n","from sklearn.compose import make_column_transformer"]},{"cell_type":"code","execution_count":3,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T17:14:06.189932Z","iopub.status.busy":"2024-06-14T17:14:06.189341Z","iopub.status.idle":"2024-06-14T17:14:06.836996Z","shell.execute_reply":"2024-06-14T17:14:06.835598Z","shell.execute_reply.started":"2024-06-14T17:14:06.189891Z"},"trusted":true},"outputs":[],"source":["data = pd.read_csv('/kaggle/input/smoking-drinking-dataset/smoking_driking_dataset_Ver01.csv'\n"," , nrows=100000\n"," )\n","df_smoking_drinking = data.copy()"]},{"cell_type":"code","execution_count":4,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T17:14:08.670551Z","iopub.status.busy":"2024-06-14T17:14:08.670120Z","iopub.status.idle":"2024-06-14T17:14:08.678701Z","shell.execute_reply":"2024-06-14T17:14:08.677478Z","shell.execute_reply.started":"2024-06-14T17:14:08.670511Z"},"trusted":true},"outputs":[],"source":["def preprocess_data(data, target):\n"," X = data.drop(columns=[target])\n"," y = data[target]\n"," return X, y\n","\n","def split_data(data, target, test_size=0.2, val_size=0.1):\n"," X, y = preprocess_data(data, target)\n"," X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=test_size + val_size, stratify=y)\n"," X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=test_size / (test_size + val_size), stratify=y_temp)\n"," return X_train, X_val, X_test, y_train, y_val, y_test"]},{"cell_type":"code","execution_count":5,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T17:14:11.727056Z","iopub.status.busy":"2024-06-14T17:14:11.726604Z","iopub.status.idle":"2024-06-14T17:14:11.859708Z","shell.execute_reply":"2024-06-14T17:14:11.858423Z","shell.execute_reply.started":"2024-06-14T17:14:11.727026Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["X_train shape: (69999, 23)\n","X_val shape: (10000, 23)\n","X_test shape: (20001, 23)\n"]}],"source":["X_train, X_val, X_test, y_train, y_val, y_test = split_data(df_smoking_drinking, 'SMK_stat_type_cd')\n","\n","print('X_train shape:', X_train.shape)\n","print('X_val shape:', X_val.shape)\n","print('X_test shape:', X_test.shape)"]},{"attachments":{},"cell_type":"markdown","metadata":{},"source":["# Séparation variables continue/catégorielles"]},{"cell_type":"code","execution_count":6,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T17:14:14.433968Z","iopub.status.busy":"2024-06-14T17:14:14.433497Z","iopub.status.idle":"2024-06-14T17:14:14.454777Z","shell.execute_reply":"2024-06-14T17:14:14.453533Z","shell.execute_reply.started":"2024-06-14T17:14:14.433932Z"},"trusted":true},"outputs":[],"source":["# Normalisation des variables continues\n","from sklearn.preprocessing import StandardScaler\n","\n","cont_features = df_smoking_drinking.select_dtypes('float64').columns\n","cont_features = cont_features.drop('SMK_stat_type_cd')\n","\n","cat_features = df_smoking_drinking.select_dtypes(include = ['int64', 'object']).columns\n","cat_features = cat_features.drop(['DRK_YN'])"]},{"cell_type":"code","execution_count":7,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T17:14:16.513598Z","iopub.status.busy":"2024-06-14T17:14:16.513171Z","iopub.status.idle":"2024-06-14T17:14:16.522450Z","shell.execute_reply":"2024-06-14T17:14:16.521146Z","shell.execute_reply.started":"2024-06-14T17:14:16.513564Z"},"trusted":true},"outputs":[],"source":["def create_model_pipeline(cat_features, cont_features, model_class, **model_params):\n"," # Créer les pipelines pour les caractéristiques catégorielles et numériques\n"," categorical_pipeline = make_pipeline(\n"," SimpleImputer(strategy='most_frequent'),\n"," OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1) # Gérer les catégories inconnues\n"," )\n","\n"," numeric_pipeline = make_pipeline(\n"," SimpleImputer(),\n"," StandardScaler()\n"," )\n","\n"," # Créer le préprocesseur\n"," preprocessor_robust = make_column_transformer(\n"," (categorical_pipeline, cat_features),\n"," (numeric_pipeline, cont_features)\n"," )\n","\n"," # Créer le pipeline avec le modèle spécifié et ses paramètres\n"," model_pipeline = make_pipeline(preprocessor_robust, model_class(**model_params))\n"," return model_pipeline\n"]},{"cell_type":"code","execution_count":8,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T17:14:20.653547Z","iopub.status.busy":"2024-06-14T17:14:20.653080Z","iopub.status.idle":"2024-06-14T17:14:20.665796Z","shell.execute_reply":"2024-06-14T17:14:20.664301Z","shell.execute_reply.started":"2024-06-14T17:14:20.653507Z"},"trusted":true},"outputs":[],"source":["def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, model_name):\n"," print('\\n\\n')\n"," print(f\"--- Evaluation du modèle : {model_name} ---\")\n","\n"," # Entraînement du modèle\n"," model.fit(X_train, y_train)\n","\n"," # Prédiction sur le jeu de test\n"," y_test_pred = model.predict(X_test)\n","\n"," # Évaluation du modèle\n"," accuracy = accuracy_score(y_test, y_test_pred)\n"," f1 = f1_score(y_test, y_test_pred, average='weighted')\n"," print('Accuracy:', accuracy)\n"," print('F1:', f1)\n","\n"," # Matrice de confusion\n"," cm = confusion_matrix(y_test, y_test_pred)\n"," disp = ConfusionMatrixDisplay(confusion_matrix=cm)\n"," fig, ax = plt.subplots(figsize=(10, 10))\n"," plt.title(f\"Confusion Matrix for {model_name}\") # Add model name to the title\n"," disp.plot(ax=ax, values_format='d') # Utiliser le format 'd' pour afficher les nombres entiers\n","\n","\n"," # Rapport de classification\n"," print(classification_report(y_test, y_test_pred))\n","\n"," # Score du modèle\n"," score = model.score(X_test, y_test)\n"," print('Score :', score)\n"," \n"," return accuracy, f1, score"]},{"cell_type":"code","execution_count":9,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T17:14:32.251994Z","iopub.status.busy":"2024-06-14T17:14:32.251539Z","iopub.status.idle":"2024-06-14T17:14:32.261867Z","shell.execute_reply":"2024-06-14T17:14:32.260385Z","shell.execute_reply.started":"2024-06-14T17:14:32.251961Z"},"trusted":true},"outputs":[],"source":["import numpy as np\n","import matplotlib.pyplot as plt\n","from yellowbrick.model_selection import LearningCurve\n","\n","def plot_learning_curve(pipeline, X_train, y_train, model_name=\"Model\", cv=3, scoring='accuracy'):\n"," # Extract the last estimator from the pipeline\n"," model_step_name = list(pipeline.named_steps.keys())[-1]\n"," model = pipeline.named_steps[model_step_name]\n"," \n"," plt.figure()\n"," visualizer = LearningCurve(\n"," model,\n"," cv=cv,\n"," scoring=scoring,\n"," train_sizes=np.linspace(0.1, 1.0, 10),\n"," n_jobs=-1\n"," )\n"," visualizer.fit(pipeline[:-1].fit_transform(X_train, y_train), y_train)\n"," visualizer.finalize()\n"," visualizer.show(title=f\"Learning Curve for {model_name}\")\n"," plt.show()\n"]},{"cell_type":"code","execution_count":10,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T17:14:34.607948Z","iopub.status.busy":"2024-06-14T17:14:34.607487Z","iopub.status.idle":"2024-06-14T17:14:34.617990Z","shell.execute_reply":"2024-06-14T17:14:34.616292Z","shell.execute_reply.started":"2024-06-14T17:14:34.607911Z"},"trusted":true},"outputs":[],"source":["import numpy as np\n","import matplotlib.pyplot as plt\n","from yellowbrick.model_selection import ValidationCurve\n","\n","def plot_validation_curve(pipeline, X_train, y_train, param_name, param_range, model_name=\"Model\", cv=3, scoring='accuracy'):\n"," # Extract the last estimator from the pipeline\n"," model_step_name = list(pipeline.named_steps.keys())[-1]\n"," model = pipeline.named_steps[model_step_name]\n"," \n"," plt.figure()\n"," visualizer = ValidationCurve(\n"," model, param_name=param_name, param_range=param_range,\n"," cv=cv, scoring=scoring, n_jobs=-1\n"," )\n"," visualizer.fit(pipeline[:-1].fit_transform(X_train, y_train), y_train)\n"," visualizer.finalize()\n"," visualizer.show(title=f\"Validation Curve for {model_name} with parameter {param_name}\")\n"," plt.show()\n"]},{"attachments":{},"cell_type":"markdown","metadata":{},"source":["## Apprentissage par ensemble - Pipeline"]},{"cell_type":"code","execution_count":27,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T18:08:00.045955Z","iopub.status.busy":"2024-06-14T18:08:00.045512Z","iopub.status.idle":"2024-06-14T18:08:00.053062Z","shell.execute_reply":"2024-06-14T18:08:00.051706Z","shell.execute_reply.started":"2024-06-14T18:08:00.045921Z"},"trusted":true},"outputs":[],"source":["# Création du pipeline RandomForest\n","rf_pipeline = create_model_pipeline(\n"," cat_features, cont_features, RandomForestClassifier, random_state=42, class_weight='balanced')\n","\n","# Création du pipeline GradientBoosting\n","gb_pipeline = create_model_pipeline(\n"," cat_features, cont_features, GradientBoostingClassifier, n_estimators=300, learning_rate=0.1, max_depth=3,random_state=42)\n"]},{"cell_type":"code","execution_count":13,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T09:41:58.379841Z","iopub.status.busy":"2024-06-14T09:41:58.379519Z","iopub.status.idle":"2024-06-14T09:45:22.175731Z","shell.execute_reply":"2024-06-14T09:45:22.174757Z","shell.execute_reply.started":"2024-06-14T09:41:58.379816Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","\n","\n","--- Evaluation du modèle : RandomForest ---\n","Accuracy: 0.6785660716964151\n","F1: 0.6997764771537723\n"," precision recall f1-score support\n","\n"," 1.0 0.94 0.73 0.82 12112\n"," 2.0 0.42 0.58 0.49 3551\n"," 3.0 0.48 0.62 0.54 4338\n","\n"," accuracy 0.68 20001\n"," macro avg 0.61 0.64 0.61 20001\n","weighted avg 0.75 0.68 0.70 20001\n","\n","Score : 0.6785660716964151\n","\n","\n","\n","--- Evaluation du modèle : GradientBoosting ---\n","Accuracy: 0.7000649967501625\n","F1: 0.7012485169618449\n"," precision recall f1-score support\n","\n"," 1.0 0.84 0.83 0.83 12112\n"," 2.0 0.45 0.42 0.43 3551\n"," 3.0 0.52 0.58 0.55 4338\n","\n"," accuracy 0.70 20001\n"," macro avg 0.60 0.61 0.61 20001\n","weighted avg 0.70 0.70 0.70 20001\n","\n","Score : 0.7000649967501625\n","CPU times: user 3min 22s, sys: 65.8 ms, total: 3min 23s\n","Wall time: 3min 23s\n"]},{"data":{"text/plain":["(0.7000649967501625, 0.7012485169618449, 0.7000649967501625)"]},"execution_count":13,"metadata":{},"output_type":"execute_result"},{"data":{"image/png":"","text/plain":["<Figure size 1000x1000 with 2 Axes>"]},"metadata":{},"output_type":"display_data"},{"data":{"image/png":"","text/plain":["<Figure size 1000x1000 with 2 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["%%time\n","train_and_evaluate_model(rf_pipeline, X_train, y_train, X_test, y_test, model_name='RandomForest')\n","train_and_evaluate_model(gb_pipeline, X_train, y_train, X_test, y_test, model_name='GradientBoosting')"]},{"cell_type":"code","execution_count":17,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T09:46:54.996725Z","iopub.status.busy":"2024-06-14T09:46:54.995604Z","iopub.status.idle":"2024-06-14T10:06:05.459503Z","shell.execute_reply":"2024-06-14T10:06:05.458405Z","shell.execute_reply.started":"2024-06-14T09:46:54.996690Z"},"trusted":true},"outputs":[{"data":{"image/png":"","text/plain":["<Figure size 800x550 with 1 Axes>"]},"metadata":{},"output_type":"display_data"},{"data":{"image/png":"","text/plain":["<Figure size 800x550 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["# Affichage de la courbe d'apprentissage\n","plot_learning_curve(rf_pipeline, X_train, y_train, model_name=\"RandomForest\")\n","plot_learning_curve(gb_pipeline, X_train, y_train, model_name=\"GradientBoosting\")\n"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T10:06:05.462494Z","iopub.status.busy":"2024-06-14T10:06:05.462050Z"},"trusted":true},"outputs":[{"data":{"image/png":"","text/plain":["<Figure size 800x550 with 1 Axes>"]},"metadata":{},"output_type":"display_data"}],"source":["# Affichage de la courbe de validation pour le paramètre 'n_estimators'\n","param_range_n_estimators = np.arange(50, 1501, 50) # Valeurs pour n_estimators\n","plot_validation_curve(rf_pipeline, X_train, y_train, param_name='n_estimators', param_range=param_range_n_estimators, model_name=\"RandomForest\")"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T12:46:11.686495Z","iopub.status.busy":"2024-06-14T12:46:11.685755Z"},"trusted":true},"outputs":[],"source":["# Affichage de la courbe de validation pour le paramètre 'n_estimators'\n","param_range_n_estimators = np.arange(50, 1501, 50) # Valeurs pour n_estimators\n","plot_validation_curve(gb_pipeline, X_train, y_train, param_name='n_estimators', param_range=param_range_n_estimators, model_name=\"GradientBoosting\")\n"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["import optuna\n","from sklearn.ensemble import GradientBoostingClassifier\n","from sklearn.model_selection import cross_val_score\n","from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay, classification_report\n","import matplotlib.pyplot as plt\n","\n","def hyperoptimize_gb_model(pipeline, X_train, y_train, X_test, y_test, n_trials=5, timeout=600):\n"," # Définissez la fonction objective pour Optuna\n"," def objective(trial):\n"," n_estimators = trial.suggest_int('gradientboostingclassifier__n_estimators',5, 2000)\n"," learning_rate = trial.suggest_loguniform('gradientboostingclassifier__learning_rate', 0.01, 0.3)\n"," max_depth = trial.suggest_int('gradientboostingclassifier__max_depth', 2, 32)\n"," min_samples_split = trial.suggest_int('gradientboostingclassifier__min_samples_split', 2, 10)\n"," min_samples_leaf = trial.suggest_int('gradientboostingclassifier__min_samples_leaf', 2, 10)\n"," \n"," pipeline.set_params(\n"," gradientboostingclassifier__n_estimators=n_estimators,\n"," gradientboostingclassifier__learning_rate=learning_rate,\n"," gradientboostingclassifier__max_depth=max_depth,\n"," gradientboostingclassifier__min_samples_split=min_samples_split,\n"," gradientboostingclassifier__min_samples_leaf=min_samples_leaf\n"," )\n","\n"," # Apply the preprocessing steps except the last step\n"," X_train_transformed = pipeline[:-1].fit_transform(X_train, y_train)\n"," score = cross_val_score(pipeline.named_steps['gradientboostingclassifier'], X_train_transformed, y_train, cv=3, scoring='accuracy').mean()\n"," return score\n","\n"," # Créez un objet study et optimisez la fonction objective\n"," study = optuna.create_study(direction='maximize')\n"," study.optimize(objective, n_trials=n_trials, timeout=timeout)\n","\n"," # Affichez les meilleurs hyperparamètres\n"," print('Best parameters:', study.best_params)\n","\n"," # Entraînez le modèle avec les meilleurs hyperparamètres\n"," pipeline.set_params(**study.best_params)\n"," pipeline.fit(X_train, y_train)\n","\n"," # Prédiction sur le jeu de test\n"," y_test_pred = pipeline.predict(X_test)\n","\n"," # Évaluation du modèle\n"," accuracy = accuracy_score(y_test, y_test_pred)\n"," f1 = f1_score(y_test, y_test_pred, average='weighted')\n"," print('Accuracy on test set:', accuracy)\n"," print('F1 score on test set:', f1)\n","\n"," # Matrice de confusion\n"," cm = confusion_matrix(y_test, y_test_pred)\n"," disp = ConfusionMatrixDisplay(confusion_matrix=cm)\n"," fig, ax = plt.subplots(figsize=(10, 10))\n"," disp.plot(ax=ax, values_format='d') # Utiliser le format 'd' pour afficher les nombres entiers\n"," plt.title(\"Confusion Matrix for GradientBoostingClassifier\")\n"," plt.show()\n","\n","\n"," # Rapport de classification\n"," print(classification_report(y_test, y_test_pred))\n","\n"," return study.best_params, accuracy, f1\n"]},{"cell_type":"code","execution_count":null,"metadata":{"trusted":true},"outputs":[],"source":["%%time\n","# Hyper-optimisation pour GradientBoosting\n","best_params_gb, accuracy_gb, f1_gb = hyperoptimize_gb_model(\n"," gb_pipeline, X_train, y_train, X_test, y_test, n_trials=5, timeout=600)"]},{"cell_type":"code","execution_count":24,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T18:07:19.743354Z","iopub.status.busy":"2024-06-14T18:07:19.742210Z","iopub.status.idle":"2024-06-14T18:07:20.147821Z","shell.execute_reply":"2024-06-14T18:07:20.146580Z","shell.execute_reply.started":"2024-06-14T18:07:19.743312Z"},"trusted":true},"outputs":[],"source":["import optuna\n","from sklearn.ensemble import RandomForestClassifier\n","from sklearn.model_selection import cross_val_score\n","from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay, classification_report\n","import matplotlib.pyplot as plt\n","\n","def hyperoptimize_rf_model(pipeline, X_train, y_train, X_test, y_test, n_trials=5, timeout=600):\n"," # Définissez la fonction objective pour Optuna\n"," def objective(trial):\n"," n_estimators = trial.suggest_int('randomforestclassifier__n_estimators', 50, 2000)\n"," max_depth = trial.suggest_int('randomforestclassifier__max_depth', 2, 32)\n"," min_samples_split = trial.suggest_int('randomforestclassifier__min_samples_split', 2, 15)\n"," min_samples_leaf = trial.suggest_int('randomforestclassifier__min_samples_leaf', 3, 10)\n"," \n"," pipeline.set_params(\n"," randomforestclassifier__n_estimators=n_estimators,\n"," randomforestclassifier__max_depth=max_depth,\n"," randomforestclassifier__min_samples_split=min_samples_split,\n"," randomforestclassifier__min_samples_leaf=min_samples_leaf\n"," )\n","\n"," # Apply the preprocessing steps except the last step\n"," X_train_transformed = pipeline[:-1].fit_transform(X_train, y_train)\n"," score = cross_val_score(pipeline.named_steps['randomforestclassifier'], X_train_transformed, y_train, cv=3, scoring='accuracy').mean()\n"," return score\n","\n"," # Créez un objet study et optimisez la fonction objective\n"," study = optuna.create_study(direction='maximize')\n"," study.optimize(objective, n_trials=n_trials, timeout=timeout)\n","\n"," # Affichez les meilleurs hyperparamètres\n"," print('Best parameters:', study.best_params)\n","\n"," # Entraînez le modèle avec les meilleurs hyperparamètres\n"," pipeline.set_params(**study.best_params)\n"," pipeline.fit(X_train, y_train)\n","\n"," # Prédiction sur le jeu de test\n"," y_test_pred = pipeline.predict(X_test)\n","\n"," # Évaluation du modèle\n"," accuracy = accuracy_score(y_test, y_test_pred)\n"," f1 = f1_score(y_test, y_test_pred, average='weighted')\n"," print('Accuracy on test set:', accuracy)\n"," print('F1 score on test set:', f1)\n","\n"," # Matrice de confusion\n"," cm = confusion_matrix(y_test, y_test_pred)\n"," disp = ConfusionMatrixDisplay(confusion_matrix=cm)\n"," fig, ax = plt.subplots(figsize=(10, 10))\n"," disp.plot(ax=ax, values_format='d') # Utiliser le format 'd' pour afficher les nombres entiers\n"," plt.title(\"Confusion Matrix for RandomForestClassifier\")\n"," plt.show()\n","\n"," # Rapport de classification\n"," print(classification_report(y_test, y_test_pred))\n","\n"," return study.best_params, accuracy, f1\n"]},{"cell_type":"code","execution_count":28,"metadata":{"collapsed":true,"execution":{"iopub.execute_input":"2024-06-14T18:08:13.657862Z","iopub.status.busy":"2024-06-14T18:08:13.657426Z","iopub.status.idle":"2024-06-14T18:14:14.141088Z","shell.execute_reply":"2024-06-14T18:14:14.139640Z","shell.execute_reply.started":"2024-06-14T18:08:13.657828Z"},"jupyter":{"outputs_hidden":true},"trusted":true},"outputs":[{"name":"stderr","output_type":"stream","text":["[I 2024-06-14 18:08:13,660] A new study created in memory with name: no-name-3ebd0091-70f3-4ad3-bfe2-ca7559ea5b30\n","[W 2024-06-14 18:14:13,339] Trial 0 failed with parameters: {'randomforestclassifier__n_estimators': 1171, 'randomforestclassifier__max_depth': 31, 'randomforestclassifier__min_samples_split': 11, 'randomforestclassifier__min_samples_leaf': 7} because of the following error: KeyboardInterrupt().\n","Traceback (most recent call last):\n"," File \"/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py\", line 196, in _run_trial\n"," value_or_values = func(trial)\n"," File \"/tmp/ipykernel_33/1879812907.py\", line 24, in objective\n"," score = cross_val_score(pipeline.named_steps['randomforestclassifier'], X_train_transformed, y_train, cv=3, scoring='accuracy').mean()\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py\", line 515, in cross_val_score\n"," cv_results = cross_validate(\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py\", line 266, in cross_validate\n"," results = parallel(\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py\", line 63, in __call__\n"," return super().__call__(iterable_with_config)\n"," File \"/opt/conda/lib/python3.10/site-packages/joblib/parallel.py\", line 1918, in __call__\n"," return output if self.return_generator else list(output)\n"," File \"/opt/conda/lib/python3.10/site-packages/joblib/parallel.py\", line 1847, in _get_sequential_output\n"," res = func(*args, **kwargs)\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py\", line 123, in __call__\n"," return self.function(*args, **kwargs)\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py\", line 686, in _fit_and_score\n"," estimator.fit(X_train, y_train, **fit_params)\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_forest.py\", line 473, in fit\n"," trees = Parallel(\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py\", line 63, in __call__\n"," return super().__call__(iterable_with_config)\n"," File \"/opt/conda/lib/python3.10/site-packages/joblib/parallel.py\", line 1918, in __call__\n"," return output if self.return_generator else list(output)\n"," File \"/opt/conda/lib/python3.10/site-packages/joblib/parallel.py\", line 1847, in _get_sequential_output\n"," res = func(*args, **kwargs)\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py\", line 123, in __call__\n"," return self.function(*args, **kwargs)\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_forest.py\", line 184, in _parallel_build_trees\n"," tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/tree/_classes.py\", line 889, in fit\n"," super().fit(\n"," File \"/opt/conda/lib/python3.10/site-packages/sklearn/tree/_classes.py\", line 379, in fit\n"," builder.build(self.tree_, X, y, sample_weight)\n","KeyboardInterrupt\n","[W 2024-06-14 18:14:13,344] Trial 0 failed with value None.\n"]},{"ename":"KeyboardInterrupt","evalue":"","output_type":"error","traceback":["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m","\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)","File \u001b[0;32m<timed exec>:2\u001b[0m\n","Cell \u001b[0;32mIn[24], line 29\u001b[0m, in \u001b[0;36mhyperoptimize_rf_model\u001b[0;34m(pipeline, X_train, y_train, X_test, y_test, n_trials, timeout)\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;66;03m# Créez un objet study et optimisez la fonction objective\u001b[39;00m\n\u001b[1;32m 28\u001b[0m study \u001b[38;5;241m=\u001b[39m optuna\u001b[38;5;241m.\u001b[39mcreate_study(direction\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmaximize\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 29\u001b[0m \u001b[43mstudy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptimize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobjective\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_trials\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_trials\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 31\u001b[0m \u001b[38;5;66;03m# Affichez les meilleurs hyperparamètres\u001b[39;00m\n\u001b[1;32m 32\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mBest parameters:\u001b[39m\u001b[38;5;124m'\u001b[39m, study\u001b[38;5;241m.\u001b[39mbest_params)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/optuna/study/study.py:451\u001b[0m, in \u001b[0;36mStudy.optimize\u001b[0;34m(self, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)\u001b[0m\n\u001b[1;32m 348\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21moptimize\u001b[39m(\n\u001b[1;32m 349\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 350\u001b[0m func: ObjectiveFuncType,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 357\u001b[0m show_progress_bar: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 358\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 359\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Optimize an objective function.\u001b[39;00m\n\u001b[1;32m 360\u001b[0m \n\u001b[1;32m 361\u001b[0m \u001b[38;5;124;03m Optimization is done by choosing a suitable set of hyperparameter values from a given\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 449\u001b[0m \u001b[38;5;124;03m If nested invocation of this method occurs.\u001b[39;00m\n\u001b[1;32m 450\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 451\u001b[0m \u001b[43m_optimize\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 452\u001b[0m \u001b[43m \u001b[49m\u001b[43mstudy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 453\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 454\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_trials\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_trials\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 455\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 456\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_jobs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_jobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 457\u001b[0m \u001b[43m \u001b[49m\u001b[43mcatch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mtuple\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcatch\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcatch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mIterable\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mcatch\u001b[49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 458\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 459\u001b[0m \u001b[43m \u001b[49m\u001b[43mgc_after_trial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgc_after_trial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 460\u001b[0m \u001b[43m \u001b[49m\u001b[43mshow_progress_bar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mshow_progress_bar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 461\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py:62\u001b[0m, in \u001b[0;36m_optimize\u001b[0;34m(study, func, n_trials, timeout, n_jobs, catch, callbacks, gc_after_trial, show_progress_bar)\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m n_jobs \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m---> 62\u001b[0m \u001b[43m_optimize_sequential\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 63\u001b[0m \u001b[43m \u001b[49m\u001b[43mstudy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 64\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 65\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_trials\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 66\u001b[0m \u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 67\u001b[0m \u001b[43m \u001b[49m\u001b[43mcatch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 68\u001b[0m \u001b[43m \u001b[49m\u001b[43mcallbacks\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 69\u001b[0m \u001b[43m \u001b[49m\u001b[43mgc_after_trial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 70\u001b[0m \u001b[43m \u001b[49m\u001b[43mreseed_sampler_rng\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 71\u001b[0m \u001b[43m \u001b[49m\u001b[43mtime_start\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 72\u001b[0m \u001b[43m \u001b[49m\u001b[43mprogress_bar\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mprogress_bar\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 73\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 74\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 75\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m n_jobs \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m:\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py:159\u001b[0m, in \u001b[0;36m_optimize_sequential\u001b[0;34m(study, func, n_trials, timeout, catch, callbacks, gc_after_trial, reseed_sampler_rng, time_start, progress_bar)\u001b[0m\n\u001b[1;32m 156\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 158\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 159\u001b[0m frozen_trial \u001b[38;5;241m=\u001b[39m \u001b[43m_run_trial\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstudy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcatch\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 160\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 161\u001b[0m \u001b[38;5;66;03m# The following line mitigates memory problems that can be occurred in some\u001b[39;00m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;66;03m# environments (e.g., services that use computing containers such as GitHub Actions).\u001b[39;00m\n\u001b[1;32m 163\u001b[0m \u001b[38;5;66;03m# Please refer to the following PR for further details:\u001b[39;00m\n\u001b[1;32m 164\u001b[0m \u001b[38;5;66;03m# https://github.com/optuna/optuna/pull/325.\u001b[39;00m\n\u001b[1;32m 165\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m gc_after_trial:\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py:247\u001b[0m, in \u001b[0;36m_run_trial\u001b[0;34m(study, func, catch)\u001b[0m\n\u001b[1;32m 240\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mShould not reach.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 242\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 243\u001b[0m frozen_trial\u001b[38;5;241m.\u001b[39mstate \u001b[38;5;241m==\u001b[39m TrialState\u001b[38;5;241m.\u001b[39mFAIL\n\u001b[1;32m 244\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m func_err \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 245\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(func_err, catch)\n\u001b[1;32m 246\u001b[0m ):\n\u001b[0;32m--> 247\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m func_err\n\u001b[1;32m 248\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m frozen_trial\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/optuna/study/_optimize.py:196\u001b[0m, in \u001b[0;36m_run_trial\u001b[0;34m(study, func, catch)\u001b[0m\n\u001b[1;32m 194\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m get_heartbeat_thread(trial\u001b[38;5;241m.\u001b[39m_trial_id, study\u001b[38;5;241m.\u001b[39m_storage):\n\u001b[1;32m 195\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 196\u001b[0m value_or_values \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrial\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m exceptions\u001b[38;5;241m.\u001b[39mTrialPruned \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 198\u001b[0m \u001b[38;5;66;03m# TODO(mamu): Handle multi-objective cases.\u001b[39;00m\n\u001b[1;32m 199\u001b[0m state \u001b[38;5;241m=\u001b[39m TrialState\u001b[38;5;241m.\u001b[39mPRUNED\n","Cell \u001b[0;32mIn[24], line 24\u001b[0m, in \u001b[0;36mhyperoptimize_rf_model.<locals>.objective\u001b[0;34m(trial)\u001b[0m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;66;03m# Apply the preprocessing steps except the last step\u001b[39;00m\n\u001b[1;32m 23\u001b[0m X_train_transformed \u001b[38;5;241m=\u001b[39m pipeline[:\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\u001b[38;5;241m.\u001b[39mfit_transform(X_train, y_train)\n\u001b[0;32m---> 24\u001b[0m score \u001b[38;5;241m=\u001b[39m \u001b[43mcross_val_score\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnamed_steps\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrandomforestclassifier\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX_train_transformed\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mscoring\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43maccuracy\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mmean()\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m score\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:515\u001b[0m, in \u001b[0;36mcross_val_score\u001b[0;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, error_score)\u001b[0m\n\u001b[1;32m 512\u001b[0m \u001b[38;5;66;03m# To ensure multimetric format is not supported\u001b[39;00m\n\u001b[1;32m 513\u001b[0m scorer \u001b[38;5;241m=\u001b[39m check_scoring(estimator, scoring\u001b[38;5;241m=\u001b[39mscoring)\n\u001b[0;32m--> 515\u001b[0m cv_results \u001b[38;5;241m=\u001b[39m \u001b[43mcross_validate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 516\u001b[0m \u001b[43m \u001b[49m\u001b[43mestimator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 517\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 518\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 519\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroups\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 520\u001b[0m \u001b[43m \u001b[49m\u001b[43mscoring\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mscore\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mscorer\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 521\u001b[0m \u001b[43m \u001b[49m\u001b[43mcv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcv\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 522\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_jobs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_jobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 523\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 524\u001b[0m \u001b[43m \u001b[49m\u001b[43mfit_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfit_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 525\u001b[0m \u001b[43m \u001b[49m\u001b[43mpre_dispatch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpre_dispatch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 526\u001b[0m \u001b[43m \u001b[49m\u001b[43merror_score\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merror_score\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 527\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 528\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cv_results[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_score\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:266\u001b[0m, in \u001b[0;36mcross_validate\u001b[0;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch, return_train_score, return_estimator, error_score)\u001b[0m\n\u001b[1;32m 263\u001b[0m \u001b[38;5;66;03m# We clone the estimator to make sure that all the folds are\u001b[39;00m\n\u001b[1;32m 264\u001b[0m \u001b[38;5;66;03m# independent, and that it is pickle-able.\u001b[39;00m\n\u001b[1;32m 265\u001b[0m parallel \u001b[38;5;241m=\u001b[39m Parallel(n_jobs\u001b[38;5;241m=\u001b[39mn_jobs, verbose\u001b[38;5;241m=\u001b[39mverbose, pre_dispatch\u001b[38;5;241m=\u001b[39mpre_dispatch)\n\u001b[0;32m--> 266\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mparallel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 267\u001b[0m \u001b[43m \u001b[49m\u001b[43mdelayed\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_fit_and_score\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 268\u001b[0m \u001b[43m \u001b[49m\u001b[43mclone\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 269\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 270\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 271\u001b[0m \u001b[43m \u001b[49m\u001b[43mscorers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 272\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 273\u001b[0m \u001b[43m \u001b[49m\u001b[43mtest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 274\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 275\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 276\u001b[0m \u001b[43m \u001b[49m\u001b[43mfit_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 277\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_train_score\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_train_score\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 278\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_times\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 279\u001b[0m \u001b[43m \u001b[49m\u001b[43mreturn_estimator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mreturn_estimator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 280\u001b[0m \u001b[43m \u001b[49m\u001b[43merror_score\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merror_score\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 281\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 282\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mcv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mgroups\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 283\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 285\u001b[0m _warn_or_raise_about_fit_failures(results, error_score)\n\u001b[1;32m 287\u001b[0m \u001b[38;5;66;03m# For callabe scoring, the return type is only know after calling. If the\u001b[39;00m\n\u001b[1;32m 288\u001b[0m \u001b[38;5;66;03m# return type is a dictionary, the error scores can now be inserted with\u001b[39;00m\n\u001b[1;32m 289\u001b[0m \u001b[38;5;66;03m# the correct key.\u001b[39;00m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py:63\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 58\u001b[0m config \u001b[38;5;241m=\u001b[39m get_config()\n\u001b[1;32m 59\u001b[0m iterable_with_config \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 60\u001b[0m (_with_config(delayed_func, config), args, kwargs)\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m delayed_func, args, kwargs \u001b[38;5;129;01min\u001b[39;00m iterable\n\u001b[1;32m 62\u001b[0m )\n\u001b[0;32m---> 63\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__call__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43miterable_with_config\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/joblib/parallel.py:1918\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 1916\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_sequential_output(iterable)\n\u001b[1;32m 1917\u001b[0m \u001b[38;5;28mnext\u001b[39m(output)\n\u001b[0;32m-> 1918\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_generator \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1920\u001b[0m \u001b[38;5;66;03m# Let's create an ID that uniquely identifies the current call. If the\u001b[39;00m\n\u001b[1;32m 1921\u001b[0m \u001b[38;5;66;03m# call is interrupted early and that the same instance is immediately\u001b[39;00m\n\u001b[1;32m 1922\u001b[0m \u001b[38;5;66;03m# re-used, this id will be used to prevent workers that were\u001b[39;00m\n\u001b[1;32m 1923\u001b[0m \u001b[38;5;66;03m# concurrently finalizing a task from the previous call to run the\u001b[39;00m\n\u001b[1;32m 1924\u001b[0m \u001b[38;5;66;03m# callback.\u001b[39;00m\n\u001b[1;32m 1925\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock:\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/joblib/parallel.py:1847\u001b[0m, in \u001b[0;36mParallel._get_sequential_output\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 1845\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_dispatched_batches \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 1846\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_dispatched_tasks \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m-> 1847\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1848\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_completed_tasks \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 1849\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprint_progress()\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py:123\u001b[0m, in \u001b[0;36m_FuncWrapper.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 121\u001b[0m config \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 122\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig):\n\u001b[0;32m--> 123\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/model_selection/_validation.py:686\u001b[0m, in \u001b[0;36m_fit_and_score\u001b[0;34m(estimator, X, y, scorer, train, test, verbose, parameters, fit_params, return_train_score, return_parameters, return_n_test_samples, return_times, return_estimator, split_progress, candidate_progress, error_score)\u001b[0m\n\u001b[1;32m 684\u001b[0m estimator\u001b[38;5;241m.\u001b[39mfit(X_train, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfit_params)\n\u001b[1;32m 685\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 686\u001b[0m \u001b[43mestimator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfit_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 688\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m:\n\u001b[1;32m 689\u001b[0m \u001b[38;5;66;03m# Note fit time as time until error\u001b[39;00m\n\u001b[1;32m 690\u001b[0m fit_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mtime() \u001b[38;5;241m-\u001b[39m start_time\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_forest.py:473\u001b[0m, in \u001b[0;36mBaseForest.fit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 462\u001b[0m trees \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 463\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_estimator(append\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, random_state\u001b[38;5;241m=\u001b[39mrandom_state)\n\u001b[1;32m 464\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(n_more_estimators)\n\u001b[1;32m 465\u001b[0m ]\n\u001b[1;32m 467\u001b[0m \u001b[38;5;66;03m# Parallel loop: we prefer the threading backend as the Cython code\u001b[39;00m\n\u001b[1;32m 468\u001b[0m \u001b[38;5;66;03m# for fitting the trees is internally releasing the Python GIL\u001b[39;00m\n\u001b[1;32m 469\u001b[0m \u001b[38;5;66;03m# making threading more efficient than multiprocessing in\u001b[39;00m\n\u001b[1;32m 470\u001b[0m \u001b[38;5;66;03m# that case. However, for joblib 0.12+ we respect any\u001b[39;00m\n\u001b[1;32m 471\u001b[0m \u001b[38;5;66;03m# parallel_backend contexts set at a higher level,\u001b[39;00m\n\u001b[1;32m 472\u001b[0m \u001b[38;5;66;03m# since correctness does not rely on using threads.\u001b[39;00m\n\u001b[0;32m--> 473\u001b[0m trees \u001b[38;5;241m=\u001b[39m \u001b[43mParallel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 474\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_jobs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_jobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 475\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 476\u001b[0m \u001b[43m \u001b[49m\u001b[43mprefer\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mthreads\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 477\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 478\u001b[0m \u001b[43m \u001b[49m\u001b[43mdelayed\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_parallel_build_trees\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 479\u001b[0m \u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 480\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbootstrap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 481\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 482\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 483\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 484\u001b[0m \u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 485\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtrees\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 486\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 487\u001b[0m \u001b[43m \u001b[49m\u001b[43mclass_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mclass_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 488\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_samples_bootstrap\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_samples_bootstrap\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 489\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 490\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mt\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43menumerate\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mtrees\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 491\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 493\u001b[0m \u001b[38;5;66;03m# Collect newly grown trees\u001b[39;00m\n\u001b[1;32m 494\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestimators_\u001b[38;5;241m.\u001b[39mextend(trees)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py:63\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 58\u001b[0m config \u001b[38;5;241m=\u001b[39m get_config()\n\u001b[1;32m 59\u001b[0m iterable_with_config \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 60\u001b[0m (_with_config(delayed_func, config), args, kwargs)\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m delayed_func, args, kwargs \u001b[38;5;129;01min\u001b[39;00m iterable\n\u001b[1;32m 62\u001b[0m )\n\u001b[0;32m---> 63\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__call__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43miterable_with_config\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/joblib/parallel.py:1918\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 1916\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_sequential_output(iterable)\n\u001b[1;32m 1917\u001b[0m \u001b[38;5;28mnext\u001b[39m(output)\n\u001b[0;32m-> 1918\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_generator \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1920\u001b[0m \u001b[38;5;66;03m# Let's create an ID that uniquely identifies the current call. If the\u001b[39;00m\n\u001b[1;32m 1921\u001b[0m \u001b[38;5;66;03m# call is interrupted early and that the same instance is immediately\u001b[39;00m\n\u001b[1;32m 1922\u001b[0m \u001b[38;5;66;03m# re-used, this id will be used to prevent workers that were\u001b[39;00m\n\u001b[1;32m 1923\u001b[0m \u001b[38;5;66;03m# concurrently finalizing a task from the previous call to run the\u001b[39;00m\n\u001b[1;32m 1924\u001b[0m \u001b[38;5;66;03m# callback.\u001b[39;00m\n\u001b[1;32m 1925\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock:\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/joblib/parallel.py:1847\u001b[0m, in \u001b[0;36mParallel._get_sequential_output\u001b[0;34m(self, iterable)\u001b[0m\n\u001b[1;32m 1845\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_dispatched_batches \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 1846\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_dispatched_tasks \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m-> 1847\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1848\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_completed_tasks \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[1;32m 1849\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprint_progress()\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/utils/parallel.py:123\u001b[0m, in \u001b[0;36m_FuncWrapper.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 121\u001b[0m config \u001b[38;5;241m=\u001b[39m {}\n\u001b[1;32m 122\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mconfig):\n\u001b[0;32m--> 123\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunction\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_forest.py:184\u001b[0m, in \u001b[0;36m_parallel_build_trees\u001b[0;34m(tree, bootstrap, X, y, sample_weight, tree_idx, n_trees, verbose, class_weight, n_samples_bootstrap)\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m class_weight \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbalanced_subsample\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 182\u001b[0m curr_sample_weight \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m=\u001b[39m compute_sample_weight(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mbalanced\u001b[39m\u001b[38;5;124m\"\u001b[39m, y, indices\u001b[38;5;241m=\u001b[39mindices)\n\u001b[0;32m--> 184\u001b[0m \u001b[43mtree\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcurr_sample_weight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcheck_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m 185\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 186\u001b[0m tree\u001b[38;5;241m.\u001b[39mfit(X, y, sample_weight\u001b[38;5;241m=\u001b[39msample_weight, check_input\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/tree/_classes.py:889\u001b[0m, in \u001b[0;36mDecisionTreeClassifier.fit\u001b[0;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[1;32m 859\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfit\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, y, sample_weight\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, check_input\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[1;32m 860\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Build a decision tree classifier from the training set (X, y).\u001b[39;00m\n\u001b[1;32m 861\u001b[0m \n\u001b[1;32m 862\u001b[0m \u001b[38;5;124;03m Parameters\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 886\u001b[0m \u001b[38;5;124;03m Fitted estimator.\u001b[39;00m\n\u001b[1;32m 887\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 889\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 890\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 891\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 892\u001b[0m \u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 893\u001b[0m \u001b[43m \u001b[49m\u001b[43mcheck_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcheck_input\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 894\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 895\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\n","File \u001b[0;32m/opt/conda/lib/python3.10/site-packages/sklearn/tree/_classes.py:379\u001b[0m, in \u001b[0;36mBaseDecisionTree.fit\u001b[0;34m(self, X, y, sample_weight, check_input)\u001b[0m\n\u001b[1;32m 368\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 369\u001b[0m builder \u001b[38;5;241m=\u001b[39m BestFirstTreeBuilder(\n\u001b[1;32m 370\u001b[0m splitter,\n\u001b[1;32m 371\u001b[0m min_samples_split,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 376\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmin_impurity_decrease,\n\u001b[1;32m 377\u001b[0m )\n\u001b[0;32m--> 379\u001b[0m \u001b[43mbuilder\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbuild\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtree_\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 381\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_outputs_ \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m is_classifier(\u001b[38;5;28mself\u001b[39m):\n\u001b[1;32m 382\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_classes_ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_classes_[\u001b[38;5;241m0\u001b[39m]\n","\u001b[0;31mKeyboardInterrupt\u001b[0m: "]}],"source":["%%time\n","# Hyper-optimisation pour RandomForest\n","best_params_rf, accuracy_rf, f1_rf = hyperoptimize_rf_model(\n"," rf_pipeline, X_train, y_train, X_test, y_test, n_trials=5, timeout=600)"]},{"cell_type":"code","execution_count":33,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T18:16:56.075934Z","iopub.status.busy":"2024-06-14T18:16:56.075396Z","iopub.status.idle":"2024-06-14T18:16:56.082830Z","shell.execute_reply":"2024-06-14T18:16:56.081142Z","shell.execute_reply.started":"2024-06-14T18:16:56.075896Z"},"trusted":true},"outputs":[],"source":["gb_pipeline_2ND = create_model_pipeline(\n"," cat_features, cont_features, GradientBoostingClassifier, n_estimators=3000, learning_rate=0.1, max_depth=5, min_samples_leaf=4,random_state=42)"]},{"cell_type":"code","execution_count":null,"metadata":{"execution":{"iopub.execute_input":"2024-06-14T18:16:57.527130Z","iopub.status.busy":"2024-06-14T18:16:57.526655Z"},"trusted":true},"outputs":[{"name":"stdout","output_type":"stream","text":["\n","\n","\n","--- Evaluation du modèle : GradientBoostingClassifier 2nd ---\n"]}],"source":["train_and_evaluate_model(gb_pipeline_2ND, X_train, y_train, X_test, y_test, model_name='GradientBoostingClassifier 2nd')"]}],"metadata":{"kaggle":{"accelerator":"none","dataSources":[{"datasetId":3679617,"sourceId":6386941,"sourceType":"datasetVersion"}],"dockerImageVersionId":30732,"isGpuEnabled":false,"isInternetEnabled":false,"language":"python","sourceType":"notebook"},"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.10.13"}},"nbformat":4,"nbformat_minor":4}