|
a |
|
b/src/scpanel/SVMRFECV.py |
|
|
1 |
"""Recursive feature elimination for feature ranking""" |
|
|
2 |
|
|
|
3 |
import math |
|
|
4 |
import numbers |
|
|
5 |
|
|
|
6 |
import numpy as np |
|
|
7 |
from joblib import Parallel, effective_n_jobs |
|
|
8 |
from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier |
|
|
9 |
from sklearn.feature_selection._base import SelectorMixin, _get_feature_importances |
|
|
10 |
from sklearn.metrics import check_scoring |
|
|
11 |
from sklearn.model_selection import check_cv |
|
|
12 |
from sklearn.model_selection._validation import _score |
|
|
13 |
from sklearn.utils._tags import _safe_tags |
|
|
14 |
from sklearn.utils.deprecation import deprecated |
|
|
15 |
from sklearn.utils.fixes import delayed |
|
|
16 |
from sklearn.utils.metaestimators import _safe_split, if_delegate_has_method |
|
|
17 |
from sklearn.utils.validation import check_is_fitted |
|
|
18 |
from numpy import ndarray |
|
|
19 |
from sklearn.svm._classes import SVC |
|
|
20 |
from typing import Dict, List, Optional |
|
|
21 |
|
|
|
22 |
# def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer): |
|
|
23 |
# """ |
|
|
24 |
# Return the score for a fit across one fold. |
|
|
25 |
# """ |
|
|
26 |
# X_train, y_train = _safe_split(estimator, X, y, train) |
|
|
27 |
# X_test, y_test = _safe_split(estimator, X, y, test, train) |
|
|
28 |
# return rfe._fit( |
|
|
29 |
# X_train, |
|
|
30 |
# y_train, |
|
|
31 |
# lambda estimator, features: _score( |
|
|
32 |
# estimator, X_test[:, features], y_test, scorer |
|
|
33 |
# ), |
|
|
34 |
# ).scores_ |
|
|
35 |
|
|
|
36 |
|
|
|
37 |
def _rfe_single_fit( |
|
|
38 |
rfe, estimator, X, y, train_idx, val_idx, scorer, sample_weight=None |
|
|
39 |
): |
|
|
40 |
""" |
|
|
41 |
Return the score for a fit across one fold. |
|
|
42 |
""" |
|
|
43 |
X_train, y_train = X[train_idx], y[train_idx] |
|
|
44 |
X_test, y_test = X[val_idx], y[val_idx] |
|
|
45 |
|
|
|
46 |
if sample_weight is not None: |
|
|
47 |
return ( |
|
|
48 |
rfe._fit( |
|
|
49 |
X_train, |
|
|
50 |
y_train, |
|
|
51 |
lambda estimator, features: _score( |
|
|
52 |
estimator, X_test[:, features], y_test, scorer |
|
|
53 |
), |
|
|
54 |
sample_weight=sample_weight, |
|
|
55 |
).scores_, |
|
|
56 |
rfe._fit( |
|
|
57 |
X_train, |
|
|
58 |
y_train, |
|
|
59 |
lambda estimator, features: _score( |
|
|
60 |
estimator, X_test[:, features], y_test, scorer |
|
|
61 |
), |
|
|
62 |
sample_weight=sample_weight, |
|
|
63 |
).ranking_, |
|
|
64 |
) |
|
|
65 |
else: |
|
|
66 |
return ( |
|
|
67 |
rfe._fit( |
|
|
68 |
X_train, |
|
|
69 |
y_train, |
|
|
70 |
lambda estimator, features: _score( |
|
|
71 |
estimator, X_test[:, features], y_test, scorer |
|
|
72 |
), |
|
|
73 |
).scores_, |
|
|
74 |
rfe._fit( |
|
|
75 |
X_train, |
|
|
76 |
y_train, |
|
|
77 |
lambda estimator, features: _score( |
|
|
78 |
estimator, X_test[:, features], y_test, scorer |
|
|
79 |
), |
|
|
80 |
).ranking_, |
|
|
81 |
) |
|
|
82 |
|
|
|
83 |
|
|
|
84 |
class RFE(SelectorMixin, MetaEstimatorMixin, BaseEstimator): |
|
|
85 |
"""Feature ranking with recursive feature elimination. |
|
|
86 |
Given an external estimator that assigns weights to features (e.g., the |
|
|
87 |
coefficients of a linear model), the goal of recursive feature elimination |
|
|
88 |
(RFE) is to select features by recursively considering smaller and smaller |
|
|
89 |
sets of features. First, the estimator is trained on the initial set of |
|
|
90 |
features and the importance of each feature is obtained either through |
|
|
91 |
any specific attribute or callable. |
|
|
92 |
Then, the least important features are pruned from current set of features. |
|
|
93 |
That procedure is recursively repeated on the pruned set until the desired |
|
|
94 |
number of features to select is eventually reached. |
|
|
95 |
Read more in the :ref:`User Guide <rfe>`. |
|
|
96 |
Parameters |
|
|
97 |
---------- |
|
|
98 |
estimator : ``Estimator`` instance |
|
|
99 |
A supervised learning estimator with a ``fit`` method that provides |
|
|
100 |
information about feature importance |
|
|
101 |
(e.g. `coef_`, `feature_importances_`). |
|
|
102 |
n_features_to_select : int or float, default=None |
|
|
103 |
The number of features to select. If `None`, half of the features are |
|
|
104 |
selected. If integer, the parameter is the absolute number of features |
|
|
105 |
to select. If float between 0 and 1, it is the fraction of features to |
|
|
106 |
select. |
|
|
107 |
.. versionchanged:: 0.24 |
|
|
108 |
Added float values for fractions. |
|
|
109 |
step : int or float, default=1 |
|
|
110 |
If greater than or equal to 1, then ``step`` corresponds to the |
|
|
111 |
(integer) number of features to remove at each iteration. |
|
|
112 |
If within (0.0, 1.0), then ``step`` corresponds to the percentage |
|
|
113 |
(rounded down) of features to remove at each iteration. |
|
|
114 |
verbose : int, default=0 |
|
|
115 |
Controls verbosity of output. |
|
|
116 |
importance_getter : str or callable, default='auto' |
|
|
117 |
If 'auto', uses the feature importance either through a `coef_` |
|
|
118 |
or `feature_importances_` attributes of estimator. |
|
|
119 |
Also accepts a string that specifies an attribute name/path |
|
|
120 |
for extracting feature importance (implemented with `attrgetter`). |
|
|
121 |
For example, give `regressor_.coef_` in case of |
|
|
122 |
:class:`~sklearn.compose.TransformedTargetRegressor` or |
|
|
123 |
`named_steps.clf.feature_importances_` in case of |
|
|
124 |
class:`~sklearn.pipeline.Pipeline` with its last step named `clf`. |
|
|
125 |
If `callable`, overrides the default feature importance getter. |
|
|
126 |
The callable is passed with the fitted estimator and it should |
|
|
127 |
return importance for each feature. |
|
|
128 |
.. versionadded:: 0.24 |
|
|
129 |
Attributes |
|
|
130 |
---------- |
|
|
131 |
classes_ : ndarray of shape (n_classes,) |
|
|
132 |
The classes labels. Only available when `estimator` is a classifier. |
|
|
133 |
estimator_ : ``Estimator`` instance |
|
|
134 |
The fitted estimator used to select features. |
|
|
135 |
n_features_ : int |
|
|
136 |
The number of selected features. |
|
|
137 |
n_features_in_ : int |
|
|
138 |
Number of features seen during :term:`fit`. Only defined if the |
|
|
139 |
underlying estimator exposes such an attribute when fit. |
|
|
140 |
.. versionadded:: 0.24 |
|
|
141 |
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
|
142 |
Names of features seen during :term:`fit`. Defined only when `X` |
|
|
143 |
has feature names that are all strings. |
|
|
144 |
.. versionadded:: 1.0 |
|
|
145 |
ranking_ : ndarray of shape (n_features,) |
|
|
146 |
The feature ranking, such that ``ranking_[i]`` corresponds to the |
|
|
147 |
ranking position of the i-th feature. Selected (i.e., estimated |
|
|
148 |
best) features are assigned rank 1. |
|
|
149 |
support_ : ndarray of shape (n_features,) |
|
|
150 |
The mask of selected features. |
|
|
151 |
See Also |
|
|
152 |
-------- |
|
|
153 |
RFECV : Recursive feature elimination with built-in cross-validated |
|
|
154 |
selection of the best number of features. |
|
|
155 |
SelectFromModel : Feature selection based on thresholds of importance |
|
|
156 |
weights. |
|
|
157 |
SequentialFeatureSelector : Sequential cross-validation based feature |
|
|
158 |
selection. Does not rely on importance weights. |
|
|
159 |
Notes |
|
|
160 |
----- |
|
|
161 |
Allows NaN/Inf in the input if the underlying estimator does as well. |
|
|
162 |
References |
|
|
163 |
---------- |
|
|
164 |
.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., "Gene selection |
|
|
165 |
for cancer classification using support vector machines", |
|
|
166 |
Mach. Learn., 46(1-3), 389--422, 2002. |
|
|
167 |
Examples |
|
|
168 |
-------- |
|
|
169 |
The following example shows how to retrieve the 5 most informative |
|
|
170 |
features in the Friedman #1 dataset. |
|
|
171 |
>>> from sklearn.datasets import make_friedman1 |
|
|
172 |
>>> from sklearn.feature_selection import RFE |
|
|
173 |
>>> from sklearn.svm import SVR |
|
|
174 |
>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0) |
|
|
175 |
>>> estimator = SVR(kernel="linear") |
|
|
176 |
>>> selector = RFE(estimator, n_features_to_select=5, step=1) |
|
|
177 |
>>> selector = selector.fit(X, y) |
|
|
178 |
>>> selector.support_ |
|
|
179 |
array([ True, True, True, True, True, False, False, False, False, |
|
|
180 |
False]) |
|
|
181 |
>>> selector.ranking_ |
|
|
182 |
array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5]) |
|
|
183 |
""" |
|
|
184 |
|
|
|
185 |
def __init__( |
|
|
186 |
self, |
|
|
187 |
estimator: SVC, |
|
|
188 |
*, |
|
|
189 |
n_features_to_select=None, |
|
|
190 |
step=1, |
|
|
191 |
verbose=0, |
|
|
192 |
importance_getter="auto", |
|
|
193 |
) -> None: |
|
|
194 |
self.estimator = estimator |
|
|
195 |
self.n_features_to_select = n_features_to_select |
|
|
196 |
self.step = step |
|
|
197 |
self.importance_getter = importance_getter |
|
|
198 |
self.verbose = verbose |
|
|
199 |
|
|
|
200 |
@property |
|
|
201 |
def _estimator_type(self): |
|
|
202 |
return self.estimator._estimator_type |
|
|
203 |
|
|
|
204 |
@property |
|
|
205 |
def classes_(self): |
|
|
206 |
"""Classes labels available when `estimator` is a classifier. |
|
|
207 |
Returns |
|
|
208 |
------- |
|
|
209 |
ndarray of shape (n_classes,) |
|
|
210 |
""" |
|
|
211 |
return self.estimator_.classes_ |
|
|
212 |
|
|
|
213 |
def fit(self, X: ndarray, y: ndarray, **fit_params) -> "RFE": |
|
|
214 |
"""Fit the RFE model and then the underlying estimator on the selected features. |
|
|
215 |
Parameters |
|
|
216 |
---------- |
|
|
217 |
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
|
218 |
The training input samples. |
|
|
219 |
y : array-like of shape (n_samples,) |
|
|
220 |
The target values. |
|
|
221 |
**fit_params : dict |
|
|
222 |
Additional parameters passed to the `fit` method of the underlying |
|
|
223 |
estimator. |
|
|
224 |
Returns |
|
|
225 |
------- |
|
|
226 |
self : object |
|
|
227 |
Fitted estimator. |
|
|
228 |
""" |
|
|
229 |
return self._fit(X, y, **fit_params) |
|
|
230 |
|
|
|
231 |
def _fit(self, X: ndarray, y: ndarray, step_score: None=None, **fit_params) -> "RFE": |
|
|
232 |
# Parameter step_score controls the calculation of self.scores_ |
|
|
233 |
# step_score is not exposed to users |
|
|
234 |
# and is used when implementing RFECV |
|
|
235 |
# self.scores_ will not be calculated when calling _fit through fit |
|
|
236 |
|
|
|
237 |
tags = self._get_tags() |
|
|
238 |
X, y = self._validate_data( |
|
|
239 |
X, |
|
|
240 |
y, |
|
|
241 |
accept_sparse="csc", |
|
|
242 |
ensure_min_features=2, |
|
|
243 |
force_all_finite=not tags.get("allow_nan", True), |
|
|
244 |
multi_output=True, |
|
|
245 |
) |
|
|
246 |
error_msg = ( |
|
|
247 |
"n_features_to_select must be either None, a " |
|
|
248 |
"positive integer representing the absolute " |
|
|
249 |
"number of features or a float in (0.0, 1.0] " |
|
|
250 |
"representing a percentage of features to " |
|
|
251 |
f"select. Got {self.n_features_to_select}" |
|
|
252 |
) |
|
|
253 |
|
|
|
254 |
# Initialization |
|
|
255 |
n_features = X.shape[1] |
|
|
256 |
if self.n_features_to_select is None: |
|
|
257 |
n_features_to_select = n_features // 2 |
|
|
258 |
elif self.n_features_to_select < 0: |
|
|
259 |
raise ValueError(error_msg) |
|
|
260 |
elif isinstance(self.n_features_to_select, numbers.Integral): # int |
|
|
261 |
n_features_to_select = self.n_features_to_select |
|
|
262 |
elif self.n_features_to_select > 1.0: # float > 1 |
|
|
263 |
raise ValueError(error_msg) |
|
|
264 |
else: # float |
|
|
265 |
n_features_to_select = int(n_features * self.n_features_to_select) |
|
|
266 |
|
|
|
267 |
# if 0.0 < self.step < 1.0: |
|
|
268 |
# step = int(max(1, self.step * n_features)) |
|
|
269 |
# else: |
|
|
270 |
# step = int(self.step) |
|
|
271 |
# if step <= 0: |
|
|
272 |
# raise ValueError("Step must be >0") |
|
|
273 |
|
|
|
274 |
support_ = np.ones(n_features, dtype=bool) |
|
|
275 |
ranking_ = np.ones(n_features, dtype=int) |
|
|
276 |
|
|
|
277 |
if step_score: |
|
|
278 |
self.scores_ = [] |
|
|
279 |
|
|
|
280 |
# collect feature importance score in each round of elimation |
|
|
281 |
self.importances_ = [] |
|
|
282 |
|
|
|
283 |
# Elimination |
|
|
284 |
while np.sum(support_) > n_features_to_select: |
|
|
285 |
# Remaining features |
|
|
286 |
features = np.arange(n_features)[support_] |
|
|
287 |
|
|
|
288 |
# Rank the remaining features |
|
|
289 |
estimator = clone(self.estimator) |
|
|
290 |
if self.verbose > 0: |
|
|
291 |
print("Fitting estimator with %d features." % np.sum(support_)) |
|
|
292 |
|
|
|
293 |
estimator.fit(X[:, features], y, **fit_params) |
|
|
294 |
|
|
|
295 |
# Get importance and rank them |
|
|
296 |
importances = _get_feature_importances( |
|
|
297 |
estimator, |
|
|
298 |
self.importance_getter, |
|
|
299 |
transform_func="square", |
|
|
300 |
) |
|
|
301 |
ranks = np.argsort(importances) |
|
|
302 |
|
|
|
303 |
# for sparse case ranks is matrix |
|
|
304 |
ranks = np.ravel(ranks) |
|
|
305 |
|
|
|
306 |
nstep = self.step * np.sum(support_) |
|
|
307 |
|
|
|
308 |
# Eliminate the worse features |
|
|
309 |
threshold = min(math.ceil(nstep), np.sum(support_) - n_features_to_select) |
|
|
310 |
|
|
|
311 |
# Compute step score on the previous selection iteration |
|
|
312 |
# because 'estimator' must use features |
|
|
313 |
# that have not been eliminated yet |
|
|
314 |
if step_score: |
|
|
315 |
self.scores_.append(step_score(estimator, features)) |
|
|
316 |
|
|
|
317 |
self.importances_.append(importances) |
|
|
318 |
|
|
|
319 |
support_[features[ranks][:threshold]] = False |
|
|
320 |
ranking_[np.logical_not(support_)] += 1 |
|
|
321 |
|
|
|
322 |
# Set final attributes |
|
|
323 |
features = np.arange(n_features)[support_] |
|
|
324 |
self.estimator_ = clone(self.estimator) |
|
|
325 |
self.estimator_.fit(X[:, features], y, **fit_params) |
|
|
326 |
|
|
|
327 |
# Compute step score when only n_features_to_select features left |
|
|
328 |
if step_score: |
|
|
329 |
self.scores_.append(step_score(self.estimator_, features)) |
|
|
330 |
self.n_features_ = support_.sum() |
|
|
331 |
self.support_ = support_ |
|
|
332 |
self.ranking_ = ranking_ |
|
|
333 |
|
|
|
334 |
return self |
|
|
335 |
|
|
|
336 |
@if_delegate_has_method(delegate="estimator") |
|
|
337 |
def predict(self, X): |
|
|
338 |
"""Reduce X to the selected features and then predict using the underlying estimator. |
|
|
339 |
Parameters |
|
|
340 |
---------- |
|
|
341 |
X : array of shape [n_samples, n_features] |
|
|
342 |
The input samples. |
|
|
343 |
Returns |
|
|
344 |
------- |
|
|
345 |
y : array of shape [n_samples] |
|
|
346 |
The predicted target values. |
|
|
347 |
""" |
|
|
348 |
check_is_fitted(self) |
|
|
349 |
return self.estimator_.predict(self.transform(X)) |
|
|
350 |
|
|
|
351 |
@if_delegate_has_method(delegate="estimator") |
|
|
352 |
def score(self, X, y, **fit_params): |
|
|
353 |
"""Reduce X to the selected features and return the score of the underlying estimator. |
|
|
354 |
Parameters |
|
|
355 |
---------- |
|
|
356 |
X : array of shape [n_samples, n_features] |
|
|
357 |
The input samples. |
|
|
358 |
y : array of shape [n_samples] |
|
|
359 |
The target values. |
|
|
360 |
**fit_params : dict |
|
|
361 |
Parameters to pass to the `score` method of the underlying |
|
|
362 |
estimator. |
|
|
363 |
.. versionadded:: 1.0 |
|
|
364 |
Returns |
|
|
365 |
------- |
|
|
366 |
score : float |
|
|
367 |
Score of the underlying base estimator computed with the selected |
|
|
368 |
features returned by `rfe.transform(X)` and `y`. |
|
|
369 |
""" |
|
|
370 |
check_is_fitted(self) |
|
|
371 |
return self.estimator_.score(self.transform(X), y, **fit_params) |
|
|
372 |
|
|
|
373 |
def _get_support_mask(self): |
|
|
374 |
check_is_fitted(self) |
|
|
375 |
return self.support_ |
|
|
376 |
|
|
|
377 |
@if_delegate_has_method(delegate="estimator") |
|
|
378 |
def decision_function(self, X): |
|
|
379 |
"""Compute the decision function of ``X``. |
|
|
380 |
Parameters |
|
|
381 |
---------- |
|
|
382 |
X : {array-like or sparse matrix} of shape (n_samples, n_features) |
|
|
383 |
The input samples. Internally, it will be converted to |
|
|
384 |
``dtype=np.float32`` and if a sparse matrix is provided |
|
|
385 |
to a sparse ``csr_matrix``. |
|
|
386 |
Returns |
|
|
387 |
------- |
|
|
388 |
score : array, shape = [n_samples, n_classes] or [n_samples] |
|
|
389 |
The decision function of the input samples. The order of the |
|
|
390 |
classes corresponds to that in the attribute :term:`classes_`. |
|
|
391 |
Regression and binary classification produce an array of shape |
|
|
392 |
[n_samples]. |
|
|
393 |
""" |
|
|
394 |
check_is_fitted(self) |
|
|
395 |
return self.estimator_.decision_function(self.transform(X)) |
|
|
396 |
|
|
|
397 |
@if_delegate_has_method(delegate="estimator") |
|
|
398 |
def predict_proba(self, X): |
|
|
399 |
"""Predict class probabilities for X. |
|
|
400 |
Parameters |
|
|
401 |
---------- |
|
|
402 |
X : {array-like or sparse matrix} of shape (n_samples, n_features) |
|
|
403 |
The input samples. Internally, it will be converted to |
|
|
404 |
``dtype=np.float32`` and if a sparse matrix is provided |
|
|
405 |
to a sparse ``csr_matrix``. |
|
|
406 |
Returns |
|
|
407 |
------- |
|
|
408 |
p : array of shape (n_samples, n_classes) |
|
|
409 |
The class probabilities of the input samples. The order of the |
|
|
410 |
classes corresponds to that in the attribute :term:`classes_`. |
|
|
411 |
""" |
|
|
412 |
check_is_fitted(self) |
|
|
413 |
return self.estimator_.predict_proba(self.transform(X)) |
|
|
414 |
|
|
|
415 |
@if_delegate_has_method(delegate="estimator") |
|
|
416 |
def predict_log_proba(self, X): |
|
|
417 |
"""Predict class log-probabilities for X. |
|
|
418 |
Parameters |
|
|
419 |
---------- |
|
|
420 |
X : array of shape [n_samples, n_features] |
|
|
421 |
The input samples. |
|
|
422 |
Returns |
|
|
423 |
------- |
|
|
424 |
p : array of shape (n_samples, n_classes) |
|
|
425 |
The class log-probabilities of the input samples. The order of the |
|
|
426 |
classes corresponds to that in the attribute :term:`classes_`. |
|
|
427 |
""" |
|
|
428 |
check_is_fitted(self) |
|
|
429 |
return self.estimator_.predict_log_proba(self.transform(X)) |
|
|
430 |
|
|
|
431 |
def _more_tags(self) -> Dict[str, bool]: |
|
|
432 |
return { |
|
|
433 |
"poor_score": True, |
|
|
434 |
"allow_nan": _safe_tags(self.estimator, key="allow_nan"), |
|
|
435 |
"requires_y": True, |
|
|
436 |
} |
|
|
437 |
|
|
|
438 |
|
|
|
439 |
class RFECV(RFE): |
|
|
440 |
"""Recursive feature elimination with cross-validation to select the number of features. |
|
|
441 |
See glossary entry for :term:`cross-validation estimator`. |
|
|
442 |
Read more in the :ref:`User Guide <rfe>`. |
|
|
443 |
Parameters |
|
|
444 |
---------- |
|
|
445 |
estimator : ``Estimator`` instance |
|
|
446 |
A supervised learning estimator with a ``fit`` method that provides |
|
|
447 |
information about feature importance either through a ``coef_`` |
|
|
448 |
attribute or through a ``feature_importances_`` attribute. |
|
|
449 |
step : int or float, default=1 |
|
|
450 |
If greater than or equal to 1, then ``step`` corresponds to the |
|
|
451 |
(integer) number of features to remove at each iteration. |
|
|
452 |
If within (0.0, 1.0), then ``step`` corresponds to the percentage |
|
|
453 |
(rounded down) of features to remove at each iteration. |
|
|
454 |
Note that the last iteration may remove fewer than ``step`` features in |
|
|
455 |
order to reach ``min_features_to_select``. |
|
|
456 |
min_features_to_select : int, default=1 |
|
|
457 |
The minimum number of features to be selected. This number of features |
|
|
458 |
will always be scored, even if the difference between the original |
|
|
459 |
feature count and ``min_features_to_select`` isn't divisible by |
|
|
460 |
``step``. |
|
|
461 |
.. versionadded:: 0.20 |
|
|
462 |
cv : int, cross-validation generator or an iterable, default=None |
|
|
463 |
Determines the cross-validation splitting strategy. |
|
|
464 |
Possible inputs for cv are: |
|
|
465 |
- None, to use the default 5-fold cross-validation, |
|
|
466 |
- integer, to specify the number of folds. |
|
|
467 |
- :term:`CV splitter`, |
|
|
468 |
- An iterable yielding (train, test) splits as arrays of indices. |
|
|
469 |
For integer/None inputs, if ``y`` is binary or multiclass, |
|
|
470 |
:class:`~sklearn.model_selection.StratifiedKFold` is used. If the |
|
|
471 |
estimator is a classifier or if ``y`` is neither binary nor multiclass, |
|
|
472 |
:class:`~sklearn.model_selection.KFold` is used. |
|
|
473 |
Refer :ref:`User Guide <cross_validation>` for the various |
|
|
474 |
cross-validation strategies that can be used here. |
|
|
475 |
.. versionchanged:: 0.22 |
|
|
476 |
``cv`` default value of None changed from 3-fold to 5-fold. |
|
|
477 |
scoring : str, callable or None, default=None |
|
|
478 |
A string (see model evaluation documentation) or |
|
|
479 |
a scorer callable object / function with signature |
|
|
480 |
``scorer(estimator, X, y)``. |
|
|
481 |
verbose : int, default=0 |
|
|
482 |
Controls verbosity of output. |
|
|
483 |
n_jobs : int or None, default=None |
|
|
484 |
Number of cores to run in parallel while fitting across folds. |
|
|
485 |
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. |
|
|
486 |
``-1`` means using all processors. See :term:`Glossary <n_jobs>` |
|
|
487 |
for more details. |
|
|
488 |
.. versionadded:: 0.18 |
|
|
489 |
importance_getter : str or callable, default='auto' |
|
|
490 |
If 'auto', uses the feature importance either through a `coef_` |
|
|
491 |
or `feature_importances_` attributes of estimator. |
|
|
492 |
Also accepts a string that specifies an attribute name/path |
|
|
493 |
for extracting feature importance. |
|
|
494 |
For example, give `regressor_.coef_` in case of |
|
|
495 |
:class:`~sklearn.compose.TransformedTargetRegressor` or |
|
|
496 |
`named_steps.clf.feature_importances_` in case of |
|
|
497 |
:class:`~sklearn.pipeline.Pipeline` with its last step named `clf`. |
|
|
498 |
If `callable`, overrides the default feature importance getter. |
|
|
499 |
The callable is passed with the fitted estimator and it should |
|
|
500 |
return importance for each feature. |
|
|
501 |
.. versionadded:: 0.24 |
|
|
502 |
Attributes |
|
|
503 |
---------- |
|
|
504 |
classes_ : ndarray of shape (n_classes,) |
|
|
505 |
The classes labels. Only available when `estimator` is a classifier. |
|
|
506 |
estimator_ : ``Estimator`` instance |
|
|
507 |
The fitted estimator used to select features. |
|
|
508 |
grid_scores_ : ndarray of shape (n_subsets_of_features,) |
|
|
509 |
The cross-validation scores such that |
|
|
510 |
``grid_scores_[i]`` corresponds to |
|
|
511 |
the CV score of the i-th subset of features. |
|
|
512 |
.. deprecated:: 1.0 |
|
|
513 |
The `grid_scores_` attribute is deprecated in version 1.0 in favor |
|
|
514 |
of `cv_results_` and will be removed in version 1.2. |
|
|
515 |
cv_results_ : dict of ndarrays |
|
|
516 |
A dict with keys: |
|
|
517 |
split(k)_test_score : ndarray of shape (n_features,) |
|
|
518 |
The cross-validation scores across (k)th fold. |
|
|
519 |
mean_test_score : ndarray of shape (n_features,) |
|
|
520 |
Mean of scores over the folds. |
|
|
521 |
std_test_score : ndarray of shape (n_features,) |
|
|
522 |
Standard deviation of scores over the folds. |
|
|
523 |
.. versionadded:: 1.0 |
|
|
524 |
n_features_ : int |
|
|
525 |
The number of selected features with cross-validation. |
|
|
526 |
n_features_in_ : int |
|
|
527 |
Number of features seen during :term:`fit`. Only defined if the |
|
|
528 |
underlying estimator exposes such an attribute when fit. |
|
|
529 |
.. versionadded:: 0.24 |
|
|
530 |
feature_names_in_ : ndarray of shape (`n_features_in_`,) |
|
|
531 |
Names of features seen during :term:`fit`. Defined only when `X` |
|
|
532 |
has feature names that are all strings. |
|
|
533 |
.. versionadded:: 1.0 |
|
|
534 |
ranking_ : narray of shape (n_features,) |
|
|
535 |
The feature ranking, such that `ranking_[i]` |
|
|
536 |
corresponds to the ranking |
|
|
537 |
position of the i-th feature. |
|
|
538 |
Selected (i.e., estimated best) |
|
|
539 |
features are assigned rank 1. |
|
|
540 |
support_ : ndarray of shape (n_features,) |
|
|
541 |
The mask of selected features. |
|
|
542 |
See Also |
|
|
543 |
-------- |
|
|
544 |
RFE : Recursive feature elimination. |
|
|
545 |
Notes |
|
|
546 |
----- |
|
|
547 |
The size of ``grid_scores_`` is equal to |
|
|
548 |
``ceil((n_features - min_features_to_select) / step) + 1``, |
|
|
549 |
where step is the number of features removed at each iteration. |
|
|
550 |
Allows NaN/Inf in the input if the underlying estimator does as well. |
|
|
551 |
References |
|
|
552 |
---------- |
|
|
553 |
.. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., "Gene selection |
|
|
554 |
for cancer classification using support vector machines", |
|
|
555 |
Mach. Learn., 46(1-3), 389--422, 2002. |
|
|
556 |
Examples |
|
|
557 |
-------- |
|
|
558 |
The following example shows how to retrieve the a-priori not known 5 |
|
|
559 |
informative features in the Friedman #1 dataset. |
|
|
560 |
>>> from sklearn.datasets import make_friedman1 |
|
|
561 |
>>> from sklearn.feature_selection import RFECV |
|
|
562 |
>>> from sklearn.svm import SVR |
|
|
563 |
>>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0) |
|
|
564 |
>>> estimator = SVR(kernel="linear") |
|
|
565 |
>>> selector = RFECV(estimator, step=1, cv=5) |
|
|
566 |
>>> selector = selector.fit(X, y) |
|
|
567 |
>>> selector.support_ |
|
|
568 |
array([ True, True, True, True, True, False, False, False, False, |
|
|
569 |
False]) |
|
|
570 |
>>> selector.ranking_ |
|
|
571 |
array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5]) |
|
|
572 |
""" |
|
|
573 |
|
|
|
574 |
def __init__( |
|
|
575 |
self, |
|
|
576 |
estimator: SVC, |
|
|
577 |
*, |
|
|
578 |
step=1, |
|
|
579 |
min_features_to_select=1, |
|
|
580 |
cv=None, |
|
|
581 |
scoring=None, |
|
|
582 |
verbose=0, |
|
|
583 |
n_jobs=None, |
|
|
584 |
importance_getter="auto", |
|
|
585 |
) -> None: |
|
|
586 |
self.estimator = estimator |
|
|
587 |
self.step = step |
|
|
588 |
self.importance_getter = importance_getter |
|
|
589 |
self.cv = cv |
|
|
590 |
self.scoring = scoring |
|
|
591 |
self.verbose = verbose |
|
|
592 |
self.n_jobs = n_jobs |
|
|
593 |
self.min_features_to_select = min_features_to_select |
|
|
594 |
|
|
|
595 |
def fit( |
|
|
596 |
self, X: ndarray, y: ndarray, train_idx_list: List[List[int]], val_idx_list: List[List[int]], groups: None=None, sample_weight_list: Optional[List[List[float]]]=None |
|
|
597 |
) -> "RFECV": |
|
|
598 |
"""Fit the RFE model and automatically tune the number of selected features. |
|
|
599 |
Parameters |
|
|
600 |
---------- |
|
|
601 |
X : {array-like, sparse matrix} of shape (n_samples, n_features) |
|
|
602 |
Training vector, where `n_samples` is the number of samples and |
|
|
603 |
`n_features` is the total number of features. |
|
|
604 |
y : array-like of shape (n_samples,) |
|
|
605 |
Target values (integers for classification, real numbers for |
|
|
606 |
regression). |
|
|
607 |
groups : array-like of shape (n_samples,) or None, default=None |
|
|
608 |
Group labels for the samples used while splitting the dataset into |
|
|
609 |
train/test set. Only used in conjunction with a "Group" :term:`cv` |
|
|
610 |
instance (e.g., :class:`~sklearn.model_selection.GroupKFold`). |
|
|
611 |
.. versionadded:: 0.20 |
|
|
612 |
Returns |
|
|
613 |
------- |
|
|
614 |
self : object |
|
|
615 |
Fitted estimator. |
|
|
616 |
""" |
|
|
617 |
tags = self._get_tags() |
|
|
618 |
X, y = self._validate_data( |
|
|
619 |
X, |
|
|
620 |
y, |
|
|
621 |
accept_sparse="csr", |
|
|
622 |
ensure_min_features=2, |
|
|
623 |
force_all_finite=not tags.get("allow_nan", True), |
|
|
624 |
multi_output=True, |
|
|
625 |
) |
|
|
626 |
|
|
|
627 |
# Initialization |
|
|
628 |
# cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator)) |
|
|
629 |
scorer = check_scoring(self.estimator, scoring=self.scoring) |
|
|
630 |
n_features = X.shape[1] |
|
|
631 |
|
|
|
632 |
# if 0.0 < self.step < 1.0: |
|
|
633 |
# step = int(max(1, self.step * n_features)) |
|
|
634 |
# else: |
|
|
635 |
# step = int(self.step) |
|
|
636 |
# if step <= 0: |
|
|
637 |
# raise ValueError("Step must be >0") |
|
|
638 |
|
|
|
639 |
# Build an RFE object, which will evaluate and score each possible |
|
|
640 |
# feature count, down to self.min_features_to_select |
|
|
641 |
rfe = RFE( |
|
|
642 |
estimator=self.estimator, |
|
|
643 |
n_features_to_select=self.min_features_to_select, |
|
|
644 |
importance_getter=self.importance_getter, |
|
|
645 |
step=self.step, |
|
|
646 |
verbose=self.verbose, |
|
|
647 |
) |
|
|
648 |
|
|
|
649 |
# Determine the number of subsets of features by fitting across |
|
|
650 |
# the train folds and choosing the "features_to_select" parameter |
|
|
651 |
# that gives the least averaged error across all folds. |
|
|
652 |
|
|
|
653 |
# Note that joblib raises a non-picklable error for bound methods |
|
|
654 |
# even if n_jobs is set to 1 with the default multiprocessing |
|
|
655 |
# backend. |
|
|
656 |
# This branching is done so that to |
|
|
657 |
# make sure that user code that sets n_jobs to 1 |
|
|
658 |
# and provides bound methods as scorers is not broken with the |
|
|
659 |
# addition of n_jobs parameter in version 0.18. |
|
|
660 |
|
|
|
661 |
if effective_n_jobs(self.n_jobs) == 1: |
|
|
662 |
parallel, func = list, _rfe_single_fit |
|
|
663 |
else: |
|
|
664 |
parallel = Parallel(n_jobs=self.n_jobs) |
|
|
665 |
func = delayed(_rfe_single_fit) |
|
|
666 |
|
|
|
667 |
res = parallel( |
|
|
668 |
func(rfe, self.estimator, X, y, train_idx, val_idx, scorer, sample_weight) |
|
|
669 |
for train_idx, val_idx, sample_weight in zip( |
|
|
670 |
train_idx_list, val_idx_list, sample_weight_list |
|
|
671 |
) |
|
|
672 |
) |
|
|
673 |
|
|
|
674 |
# scores = _rfe_single_fit(rfe, self.estimator, X, y, X_ts, y_ts, scorer) |
|
|
675 |
|
|
|
676 |
scores = [] |
|
|
677 |
ranking_ = [] |
|
|
678 |
for i in res: |
|
|
679 |
scores.append(i[0]) |
|
|
680 |
ranking_.append(i[1]) |
|
|
681 |
|
|
|
682 |
scores = np.array(scores) |
|
|
683 |
ranking_ = np.array(ranking_) |
|
|
684 |
|
|
|
685 |
# scores_sum = np.sum(scores, axis=0) |
|
|
686 |
# scores_sum_rev = scores_sum[::-1] |
|
|
687 |
# argmax_idx = len(scores_sum) - np.argmax(scores_sum_rev) - 1 #indices of the maximum value |
|
|
688 |
# n_features_to_select = max( |
|
|
689 |
# n_features - (argmax_idx * step), self.min_features_to_select |
|
|
690 |
# ) |
|
|
691 |
|
|
|
692 |
# # Re-execute an elimination with best_k over the whole set |
|
|
693 |
# rfe = RFE( |
|
|
694 |
# estimator=self.estimator, |
|
|
695 |
# n_features_to_select=n_features_to_select, |
|
|
696 |
# step=self.step, |
|
|
697 |
# importance_getter=self.importance_getter, |
|
|
698 |
# verbose=self.verbose, |
|
|
699 |
# ) |
|
|
700 |
|
|
|
701 |
# rfe.fit(X, y) |
|
|
702 |
|
|
|
703 |
# # Set final attributes |
|
|
704 |
# self.support_ = rfe.support_ |
|
|
705 |
# self.n_features_ = rfe.n_features_ |
|
|
706 |
# self.ranking_ = rfe.ranking_ |
|
|
707 |
# self.estimator_ = clone(self.estimator) |
|
|
708 |
# self.estimator_.fit(self.transform(X), y) |
|
|
709 |
|
|
|
710 |
# reverse to stay consistent with before |
|
|
711 |
scores_rev = scores[:, ::-1] |
|
|
712 |
self.cv_results_ = {} |
|
|
713 |
self.cv_ranking_ = {} |
|
|
714 |
self.cv_results_["mean_test_score"] = np.mean(scores_rev, axis=0) |
|
|
715 |
self.cv_results_["std_test_score"] = np.std(scores_rev, axis=0) |
|
|
716 |
self.cv_ranking_["mean_feature_ranking"] = np.mean(ranking_, axis=0) |
|
|
717 |
|
|
|
718 |
self.top_features = {} |
|
|
719 |
self.top_features["mean_feature_ranking"] = np.mean(ranking_, axis=0) |
|
|
720 |
|
|
|
721 |
for i in range(scores.shape[0]): |
|
|
722 |
self.cv_results_[f"split{i}_test_score"] = scores_rev[i] |
|
|
723 |
self.cv_ranking_[f"split{i}_test_score"] = ranking_[i] |
|
|
724 |
|
|
|
725 |
self.ranking_ = ranking_ |
|
|
726 |
self.scores = scores |
|
|
727 |
|
|
|
728 |
return self |
|
|
729 |
|
|
|
730 |
# TODO: Remove in v1.2 when grid_scores_ is removed |
|
|
731 |
# mypy error: Decorated property not supported |
|
|
732 |
@deprecated( # type: ignore |
|
|
733 |
"The `grid_scores_` attribute is deprecated in version 1.0 in favor " |
|
|
734 |
"of `cv_results_` and will be removed in version 1.2." |
|
|
735 |
) |
|
|
736 |
@property |
|
|
737 |
def grid_scores_(self): |
|
|
738 |
# remove 2 for mean_test_score, std_test_score |
|
|
739 |
grid_size = len(self.cv_results_) - 2 |
|
|
740 |
return np.asarray( |
|
|
741 |
[self.cv_results_[f"split{i}_test_score"] for i in range(grid_size)] |
|
|
742 |
).T |