{ "cells": [ { "cell_type": "markdown", "id": "3eefedc4", "metadata": {}, "source": [ "**MACHINE LEARNING COURSEWORK - REGRESSION**" ] }, { "cell_type": "code", "execution_count": 1, "id": "d8f1f6b6", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.ensemble import RandomForestRegressor\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import warnings\n", "import plotly.offline as py\n", "import plotly.graph_objs as go\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score\n", "from sklearn.metrics import r2_score\n", "from sklearn.svm import SVR \n", "from sklearn.linear_model import Lasso\n", "from sklearn.feature_selection import RFE\n", "import xgboost as xgb\n", "from sklearn.metrics import mean_absolute_error\n", "from sklearn.feature_selection import SelectKBest, f_regression\n", "import seaborn as sns\n", "from sklearn.preprocessing import MinMaxScaler\n", "from sklearn.linear_model import SGDRegressor\n", "from sklearn.neural_network import MLPRegressor\n", "import pickle\n", "\n", "warnings.filterwarnings('ignore') #ignore warning messages" ] }, { "cell_type": "code", "execution_count": 2, "id": "b376f248", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ID | \n", "pCR (outcome) | \n", "RelapseFreeSurvival (outcome) | \n", "Age | \n", "ER | \n", "PgR | \n", "HER2 | \n", "TrippleNegative | \n", "ChemoGrade | \n", "Proliferation | \n", "... | \n", "original_glszm_SmallAreaHighGrayLevelEmphasis | \n", "original_glszm_SmallAreaLowGrayLevelEmphasis | \n", "original_glszm_ZoneEntropy | \n", "original_glszm_ZonePercentage | \n", "original_glszm_ZoneVariance | \n", "original_ngtdm_Busyness | \n", "original_ngtdm_Coarseness | \n", "original_ngtdm_Complexity | \n", "original_ngtdm_Contrast | \n", "original_ngtdm_Strength | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "TRG002174 | \n", "1 | \n", "144.0 | \n", "41.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "3 | \n", "3 | \n", "... | \n", "0.517172 | \n", "0.375126 | \n", "3.325332 | \n", "0.002314 | \n", "3880771.500 | \n", "473.464852 | \n", "0.000768 | \n", "0.182615 | \n", "0.030508 | \n", "0.000758 | \n", "
1 | \n", "TRG002178 | \n", "0 | \n", "142.0 | \n", "39.0 | \n", "1 | \n", "1 | \n", "0 | \n", "0 | \n", "3 | \n", "3 | \n", "... | \n", "0.444391 | \n", "0.444391 | \n", "3.032144 | \n", "0.005612 | \n", "2372009.744 | \n", "59.459710 | \n", "0.004383 | \n", "0.032012 | \n", "0.001006 | \n", "0.003685 | \n", "
2 | \n", "TRG002204 | \n", "1 | \n", "135.0 | \n", "31.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "2 | \n", "1 | \n", "... | \n", "0.534549 | \n", "0.534549 | \n", "2.485848 | \n", "0.006752 | \n", "1540027.421 | \n", "33.935384 | \n", "0.007584 | \n", "0.024062 | \n", "0.000529 | \n", "0.006447 | \n", "
3 | \n", "TRG002206 | \n", "0 | \n", "12.0 | \n", "35.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "3 | \n", "3 | \n", "... | \n", "0.506185 | \n", "0.506185 | \n", "2.606255 | \n", "0.003755 | \n", "6936740.794 | \n", "46.859265 | \n", "0.005424 | \n", "0.013707 | \n", "0.000178 | \n", "0.004543 | \n", "
4 | \n", "TRG002210 | \n", "0 | \n", "109.0 | \n", "61.0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "2 | \n", "1 | \n", "... | \n", "0.462282 | \n", "0.462282 | \n", "2.809279 | \n", "0.006521 | \n", "1265399.054 | \n", "39.621023 | \n", "0.006585 | \n", "0.034148 | \n", "0.001083 | \n", "0.005626 | \n", "
5 rows × 120 columns
\n", "\n", " | pCR (outcome) | \n", "RelapseFreeSurvival (outcome) | \n", "Age | \n", "ER | \n", "PgR | \n", "HER2 | \n", "TrippleNegative | \n", "ChemoGrade | \n", "Proliferation | \n", "HistologyType | \n", "... | \n", "original_glszm_SmallAreaHighGrayLevelEmphasis | \n", "original_glszm_SmallAreaLowGrayLevelEmphasis | \n", "original_glszm_ZoneEntropy | \n", "original_glszm_ZonePercentage | \n", "original_glszm_ZoneVariance | \n", "original_ngtdm_Busyness | \n", "original_ngtdm_Coarseness | \n", "original_ngtdm_Complexity | \n", "original_ngtdm_Contrast | \n", "original_ngtdm_Strength | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.00000 | \n", "... | \n", "4.000000e+02 | \n", "4.000000e+02 | \n", "4.000000e+02 | \n", "400.000000 | \n", "4.000000e+02 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "
mean | \n", "12.697500 | \n", "56.000208 | \n", "51.804674 | \n", "0.547500 | \n", "2.902500 | \n", "2.797500 | \n", "2.830000 | \n", "9.875000 | \n", "6.562500 | \n", "8.63250 | \n", "... | \n", "3.957637e-01 | \n", "3.911005e-01 | \n", "2.722189e+00 | \n", "0.003347 | \n", "5.679717e+07 | \n", "178.311246 | \n", "32500.032620 | \n", "0.056935 | \n", "0.005965 | \n", "0.029322 | \n", "
std | \n", "111.107417 | \n", "27.137584 | \n", "10.948522 | \n", "0.498362 | \n", "49.932114 | \n", "49.937068 | \n", "49.935558 | \n", "86.092911 | \n", "70.444284 | \n", "86.20034 | \n", "... | \n", "1.666319e-01 | \n", "1.615922e-01 | \n", "7.648849e-01 | \n", "0.002419 | \n", "7.063846e+08 | \n", "1045.453432 | \n", "177545.921568 | \n", "0.047179 | \n", "0.008379 | \n", "0.115915 | \n", "
min | \n", "0.000000 | \n", "0.000000 | \n", "23.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.00000 | \n", "... | \n", "7.050000e-11 | \n", "7.050000e-11 | \n", "-3.200000e-16 | \n", "0.000008 | \n", "0.000000e+00 | \n", "0.000000 | \n", "0.000248 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
25% | \n", "0.000000 | \n", "38.000000 | \n", "44.516769 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "2.000000 | \n", "1.000000 | \n", "1.00000 | \n", "... | \n", "3.199017e-01 | \n", "3.184398e-01 | \n", "2.340783e+00 | \n", "0.001389 | \n", "1.030473e+06 | \n", "18.760570 | \n", "0.001826 | \n", "0.018628 | \n", "0.000310 | \n", "0.001464 | \n", "
50% | \n", "0.000000 | \n", "55.000000 | \n", "51.019507 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "2.000000 | \n", "1.000000 | \n", "1.00000 | \n", "... | \n", "4.095627e-01 | \n", "4.054695e-01 | \n", "2.814884e+00 | \n", "0.002944 | \n", "3.277334e+06 | \n", "67.929659 | \n", "0.004383 | \n", "0.047740 | \n", "0.002330 | \n", "0.003276 | \n", "
75% | \n", "0.000000 | \n", "73.000000 | \n", "60.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "3.000000 | \n", "2.000000 | \n", "1.00000 | \n", "... | \n", "5.000049e-01 | \n", "4.956920e-01 | \n", "3.304411e+00 | \n", "0.004798 | \n", "9.079686e+06 | \n", "157.370294 | \n", "0.013769 | \n", "0.085321 | \n", "0.007962 | \n", "0.009479 | \n", "
max | \n", "999.000000 | \n", "144.000000 | \n", "79.603012 | \n", "1.000000 | \n", "999.000000 | \n", "999.000000 | \n", "999.000000 | \n", "999.000000 | \n", "999.000000 | \n", "999.00000 | \n", "... | \n", "8.773779e-01 | \n", "8.571429e-01 | \n", "4.947427e+00 | \n", "0.011301 | \n", "1.390001e+10 | \n", "20764.693790 | \n", "1000000.000000 | \n", "0.285100 | \n", "0.060742 | \n", "1.145601 | \n", "
8 rows × 119 columns
\n", "\n", " | Age | \n", "ER | \n", "PgR | \n", "HER2 | \n", "TrippleNegative | \n", "ChemoGrade | \n", "Proliferation | \n", "HistologyType | \n", "LNStatus | \n", "TumourStage | \n", "
---|---|---|---|---|---|---|---|---|---|---|
count | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "
mean | \n", "51.804674 | \n", "0.547500 | \n", "0.405000 | \n", "0.300000 | \n", "0.332500 | \n", "2.397500 | \n", "1.572500 | \n", "1.147500 | \n", "0.535000 | \n", "2.607500 | \n", "
std | \n", "10.948522 | \n", "0.498362 | \n", "0.491507 | \n", "0.458831 | \n", "0.471699 | \n", "0.500119 | \n", "0.765643 | \n", "0.355048 | \n", "0.499398 | \n", "0.897473 | \n", "
min | \n", "23.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "0.000000 | \n", "1.000000 | \n", "
25% | \n", "44.516769 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "2.000000 | \n", "1.000000 | \n", "1.000000 | \n", "0.000000 | \n", "2.000000 | \n", "
50% | \n", "51.019507 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "2.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "2.000000 | \n", "
75% | \n", "60.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "3.000000 | \n", "2.000000 | \n", "1.000000 | \n", "1.000000 | \n", "3.000000 | \n", "
max | \n", "79.603012 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "3.000000 | \n", "3.000000 | \n", "2.000000 | \n", "1.000000 | \n", "4.000000 | \n", "
\n", " | RelapseFreeSurvival (outcome) | \n", "Age | \n", "ER | \n", "PgR | \n", "HER2 | \n", "TrippleNegative | \n", "ChemoGrade | \n", "Proliferation | \n", "HistologyType | \n", "LNStatus | \n", "... | \n", "original_glszm_SmallAreaHighGrayLevelEmphasis | \n", "original_glszm_SmallAreaLowGrayLevelEmphasis | \n", "original_glszm_ZoneEntropy | \n", "original_glszm_ZonePercentage | \n", "original_glszm_ZoneVariance | \n", "original_ngtdm_Busyness | \n", "original_ngtdm_Coarseness | \n", "original_ngtdm_Complexity | \n", "original_ngtdm_Contrast | \n", "original_ngtdm_Strength | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "... | \n", "4.000000e+02 | \n", "4.000000e+02 | \n", "4.000000e+02 | \n", "400.000000 | \n", "4.000000e+02 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "400.000000 | \n", "
mean | \n", "56.000208 | \n", "51.804674 | \n", "0.547500 | \n", "0.405000 | \n", "0.300000 | \n", "0.332500 | \n", "2.397500 | \n", "1.572500 | \n", "1.147500 | \n", "0.535000 | \n", "... | \n", "3.957637e-01 | \n", "3.911005e-01 | \n", "2.722189e+00 | \n", "0.003347 | \n", "5.679717e+07 | \n", "178.311246 | \n", "32500.032620 | \n", "0.056935 | \n", "0.005965 | \n", "0.029322 | \n", "
std | \n", "27.137584 | \n", "10.948522 | \n", "0.498362 | \n", "0.491507 | \n", "0.458831 | \n", "0.471699 | \n", "0.500119 | \n", "0.765643 | \n", "0.355048 | \n", "0.499398 | \n", "... | \n", "1.666319e-01 | \n", "1.615922e-01 | \n", "7.648849e-01 | \n", "0.002419 | \n", "7.063846e+08 | \n", "1045.453432 | \n", "177545.921568 | \n", "0.047179 | \n", "0.008379 | \n", "0.115915 | \n", "
min | \n", "0.000000 | \n", "23.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "0.000000 | \n", "... | \n", "7.050000e-11 | \n", "7.050000e-11 | \n", "-3.200000e-16 | \n", "0.000008 | \n", "0.000000e+00 | \n", "0.000000 | \n", "0.000248 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "
25% | \n", "38.000000 | \n", "44.516769 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "2.000000 | \n", "1.000000 | \n", "1.000000 | \n", "0.000000 | \n", "... | \n", "3.199017e-01 | \n", "3.184398e-01 | \n", "2.340783e+00 | \n", "0.001389 | \n", "1.030473e+06 | \n", "18.760570 | \n", "0.001826 | \n", "0.018628 | \n", "0.000310 | \n", "0.001464 | \n", "
50% | \n", "55.000000 | \n", "51.019507 | \n", "1.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "2.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "... | \n", "4.095627e-01 | \n", "4.054695e-01 | \n", "2.814884e+00 | \n", "0.002944 | \n", "3.277334e+06 | \n", "67.929659 | \n", "0.004383 | \n", "0.047740 | \n", "0.002330 | \n", "0.003276 | \n", "
75% | \n", "73.000000 | \n", "60.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "3.000000 | \n", "2.000000 | \n", "1.000000 | \n", "1.000000 | \n", "... | \n", "5.000049e-01 | \n", "4.956920e-01 | \n", "3.304411e+00 | \n", "0.004798 | \n", "9.079686e+06 | \n", "157.370294 | \n", "0.013769 | \n", "0.085321 | \n", "0.007962 | \n", "0.009479 | \n", "
max | \n", "144.000000 | \n", "79.603012 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1.000000 | \n", "3.000000 | \n", "3.000000 | \n", "2.000000 | \n", "1.000000 | \n", "... | \n", "8.773779e-01 | \n", "8.571429e-01 | \n", "4.947427e+00 | \n", "0.011301 | \n", "1.390001e+10 | \n", "20764.693790 | \n", "1000000.000000 | \n", "0.285100 | \n", "0.060742 | \n", "1.145601 | \n", "
8 rows × 118 columns
\n", "RandomForestRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestRegressor()
\n", " | Age | \n", "original_shape_MinorAxisLength | \n", "original_firstorder_Kurtosis | \n", "original_firstorder_Range | \n", "original_firstorder_RobustMeanAbsoluteDeviation | \n", "original_firstorder_Skewness | \n", "original_firstorder_Variance | \n", "original_glszm_SizeZoneNonUniformity | \n", "
---|---|---|---|---|---|---|---|---|
Age | \n", "1.000000 | \n", "-0.039905 | \n", "-0.237100 | \n", "0.003989 | \n", "0.184021 | \n", "0.046479 | \n", "0.120923 | \n", "-0.005217 | \n", "
original_shape_MinorAxisLength | \n", "-0.039905 | \n", "1.000000 | \n", "0.213189 | \n", "0.566847 | \n", "0.120234 | \n", "0.251225 | \n", "0.222845 | \n", "0.488662 | \n", "
original_firstorder_Kurtosis | \n", "-0.237100 | \n", "0.213189 | \n", "1.000000 | \n", "0.073074 | \n", "-0.528313 | \n", "-0.006455 | \n", "-0.304241 | \n", "0.012039 | \n", "
original_firstorder_Range | \n", "0.003989 | \n", "0.566847 | \n", "0.073074 | \n", "1.000000 | \n", "0.662713 | \n", "0.532942 | \n", "0.784092 | \n", "0.332502 | \n", "
original_firstorder_RobustMeanAbsoluteDeviation | \n", "0.184021 | \n", "0.120234 | \n", "-0.528313 | \n", "0.662713 | \n", "1.000000 | \n", "0.417489 | \n", "0.938349 | \n", "0.107124 | \n", "
original_firstorder_Skewness | \n", "0.046479 | \n", "0.251225 | \n", "-0.006455 | \n", "0.532942 | \n", "0.417489 | \n", "1.000000 | \n", "0.453037 | \n", "0.121859 | \n", "
original_firstorder_Variance | \n", "0.120923 | \n", "0.222845 | \n", "-0.304241 | \n", "0.784092 | \n", "0.938349 | \n", "0.453037 | \n", "1.000000 | \n", "0.128130 | \n", "
original_glszm_SizeZoneNonUniformity | \n", "-0.005217 | \n", "0.488662 | \n", "0.012039 | \n", "0.332502 | \n", "0.107124 | \n", "0.121859 | \n", "0.128130 | \n", "1.000000 | \n", "
GridSearchCV(cv=30, estimator=SVR(),\n", " param_grid={'C': [0.1, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15,\n", " 20, 25],\n", " 'gamma': ['scale', 'auto'],\n", " 'kernel': ['linear', 'rbf']})In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=30, estimator=SVR(),\n", " param_grid={'C': [0.1, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15,\n", " 20, 25],\n", " 'gamma': ['scale', 'auto'],\n", " 'kernel': ['linear', 'rbf']})
SVR()
SVR()
RandomForestRegressor()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
RandomForestRegressor()
GridSearchCV(cv=5, estimator=RandomForestRegressor(), n_jobs=-1,\n", " param_grid={'max_depth': [None, 4, 5, 7, 10, 20],\n", " 'min_samples_leaf': [1, 2, 4, 5, 7, 20],\n", " 'min_samples_split': [2, 5, 5, 7, 10, 20],\n", " 'n_estimators': [100, 200, 300, 400, 500]},\n", " scoring='neg_mean_absolute_error')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5, estimator=RandomForestRegressor(), n_jobs=-1,\n", " param_grid={'max_depth': [None, 4, 5, 7, 10, 20],\n", " 'min_samples_leaf': [1, 2, 4, 5, 7, 20],\n", " 'min_samples_split': [2, 5, 5, 7, 10, 20],\n", " 'n_estimators': [100, 200, 300, 400, 500]},\n", " scoring='neg_mean_absolute_error')
RandomForestRegressor()
RandomForestRegressor()
GridSearchCV(estimator=MLPRegressor(random_state=42),\n", " param_grid={'alpha': [0.0001, 0.001, 0.01, 0.1],\n", " 'hidden_layer_sizes': [(50,), (100,), (50, 50),\n", " (70, 30)]},\n", " scoring='neg_mean_squared_error')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(estimator=MLPRegressor(random_state=42),\n", " param_grid={'alpha': [0.0001, 0.001, 0.01, 0.1],\n", " 'hidden_layer_sizes': [(50,), (100,), (50, 50),\n", " (70, 30)]},\n", " scoring='neg_mean_squared_error')
MLPRegressor(random_state=42)
MLPRegressor(random_state=42)
MLPRegressor(alpha=0.01, hidden_layer_sizes=(50, 50), random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MLPRegressor(alpha=0.01, hidden_layer_sizes=(50, 50), random_state=42)
GridSearchCV(cv=5,\n", " estimator=XGBRegressor(base_score=None, booster=None,\n", " callbacks=None, colsample_bylevel=None,\n", " colsample_bynode=None,\n", " colsample_bytree=None, device=None,\n", " early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None,\n", " feature_types=None, gamma=None,\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None,\n", " learning_rate=None, m...\n", " monotone_constraints=None,\n", " multi_strategy=None, n_estimators=None,\n", " n_jobs=None, num_parallel_tree=None,\n", " random_state=None, ...),\n", " n_jobs=-1,\n", " param_grid={'colsample_bytree': [0.6, 0.8, 1.0],\n", " 'gamma': [0, 0.1, 0.2],\n", " 'learning_rate': [0.1, 0.01, 0.001, 0.0001],\n", " 'max_depth': [2, 3, 5, 7, 9],\n", " 'n_estimators': [100, 200, 300],\n", " 'subsample': [0.6, 0.8, 1.0]},\n", " scoring='neg_mean_squared_error')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5,\n", " estimator=XGBRegressor(base_score=None, booster=None,\n", " callbacks=None, colsample_bylevel=None,\n", " colsample_bynode=None,\n", " colsample_bytree=None, device=None,\n", " early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None,\n", " feature_types=None, gamma=None,\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None,\n", " learning_rate=None, m...\n", " monotone_constraints=None,\n", " multi_strategy=None, n_estimators=None,\n", " n_jobs=None, num_parallel_tree=None,\n", " random_state=None, ...),\n", " n_jobs=-1,\n", " param_grid={'colsample_bytree': [0.6, 0.8, 1.0],\n", " 'gamma': [0, 0.1, 0.2],\n", " 'learning_rate': [0.1, 0.01, 0.001, 0.0001],\n", " 'max_depth': [2, 3, 5, 7, 9],\n", " 'n_estimators': [100, 200, 300],\n", " 'subsample': [0.6, 0.8, 1.0]},\n", " scoring='neg_mean_squared_error')
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=None, grow_policy=None, importance_type=None,\n", " interaction_constraints=None, learning_rate=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " multi_strategy=None, n_estimators=None, n_jobs=None,\n", " num_parallel_tree=None, random_state=None, ...)
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=None, grow_policy=None, importance_type=None,\n", " interaction_constraints=None, learning_rate=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " multi_strategy=None, n_estimators=None, n_jobs=None,\n", " num_parallel_tree=None, random_state=None, ...)
GridSearchCV(cv=3,\n", " estimator=XGBRegressor(base_score=None, booster=None,\n", " callbacks=None, colsample_bylevel=None,\n", " colsample_bynode=None,\n", " colsample_bytree=None, device=None,\n", " early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None,\n", " feature_types=None, gamma=None,\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None,\n", " learning_rate=None, m...\n", " multi_strategy=None, n_estimators=None,\n", " n_jobs=None, num_parallel_tree=None,\n", " random_state=None, ...),\n", " n_jobs=-1,\n", " param_grid={'colsample_bytree': [0.6, 0.8, 1.0],\n", " 'gamma': [0, 0.1, 0.2],\n", " 'learning_rate': [0.1, 0.01, 0.001],\n", " 'max_depth': [2, 3, 5, 7, 9],\n", " 'n_estimators': [50, 100, 200, 300],\n", " 'reg_alpha': [0, 0.1, 0.5],\n", " 'reg_lambda': [0, 0.1, 0.5],\n", " 'subsample': [0.6, 0.8, 1.0]},\n", " scoring='neg_mean_absolute_error')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=3,\n", " estimator=XGBRegressor(base_score=None, booster=None,\n", " callbacks=None, colsample_bylevel=None,\n", " colsample_bynode=None,\n", " colsample_bytree=None, device=None,\n", " early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None,\n", " feature_types=None, gamma=None,\n", " grow_policy=None, importance_type=None,\n", " interaction_constraints=None,\n", " learning_rate=None, m...\n", " multi_strategy=None, n_estimators=None,\n", " n_jobs=None, num_parallel_tree=None,\n", " random_state=None, ...),\n", " n_jobs=-1,\n", " param_grid={'colsample_bytree': [0.6, 0.8, 1.0],\n", " 'gamma': [0, 0.1, 0.2],\n", " 'learning_rate': [0.1, 0.01, 0.001],\n", " 'max_depth': [2, 3, 5, 7, 9],\n", " 'n_estimators': [50, 100, 200, 300],\n", " 'reg_alpha': [0, 0.1, 0.5],\n", " 'reg_lambda': [0, 0.1, 0.5],\n", " 'subsample': [0.6, 0.8, 1.0]},\n", " scoring='neg_mean_absolute_error')
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=None, grow_policy=None, importance_type=None,\n", " interaction_constraints=None, learning_rate=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " multi_strategy=None, n_estimators=None, n_jobs=None,\n", " num_parallel_tree=None, random_state=None, ...)
XGBRegressor(base_score=None, booster=None, callbacks=None,\n", " colsample_bylevel=None, colsample_bynode=None,\n", " colsample_bytree=None, device=None, early_stopping_rounds=None,\n", " enable_categorical=False, eval_metric=None, feature_types=None,\n", " gamma=None, grow_policy=None, importance_type=None,\n", " interaction_constraints=None, learning_rate=None, max_bin=None,\n", " max_cat_threshold=None, max_cat_to_onehot=None,\n", " max_delta_step=None, max_depth=None, max_leaves=None,\n", " min_child_weight=None, missing=nan, monotone_constraints=None,\n", " multi_strategy=None, n_estimators=None, n_jobs=None,\n", " num_parallel_tree=None, random_state=None, ...)
Lasso(alpha=0.1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Lasso(alpha=0.1)
Lasso(alpha=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Lasso(alpha=1)
GridSearchCV(cv=5, estimator=RandomForestRegressor(), n_jobs=-1,\n", " param_grid={'bootstrap': [True, False],\n", " 'criterion': ['friedman_mse', 'friedman_mse'],\n", " 'max_depth': [None, 5, 10],\n", " 'max_features': ['auto', 'sqrt', 'log2'],\n", " 'min_samples_leaf': [1, 2, 4],\n", " 'min_samples_split': [2, 5, 10],\n", " 'n_estimators': [100, 200, 300]},\n", " scoring='neg_mean_squared_error')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
GridSearchCV(cv=5, estimator=RandomForestRegressor(), n_jobs=-1,\n", " param_grid={'bootstrap': [True, False],\n", " 'criterion': ['friedman_mse', 'friedman_mse'],\n", " 'max_depth': [None, 5, 10],\n", " 'max_features': ['auto', 'sqrt', 'log2'],\n", " 'min_samples_leaf': [1, 2, 4],\n", " 'min_samples_split': [2, 5, 10],\n", " 'n_estimators': [100, 200, 300]},\n", " scoring='neg_mean_squared_error')
RandomForestRegressor()
RandomForestRegressor()