Diff of /src/scpanel/SVMRFECV.py [000000] .. [d90ecf]

Switch to unified view

a b/src/scpanel/SVMRFECV.py
1
"""Recursive feature elimination for feature ranking"""
2
3
import math
4
import numbers
5
6
import numpy as np
7
from joblib import Parallel, effective_n_jobs
8
from sklearn.base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier
9
from sklearn.feature_selection._base import SelectorMixin, _get_feature_importances
10
from sklearn.metrics import check_scoring
11
from sklearn.model_selection import check_cv
12
from sklearn.model_selection._validation import _score
13
from sklearn.utils._tags import _safe_tags
14
from sklearn.utils.deprecation import deprecated
15
from sklearn.utils.fixes import delayed
16
from sklearn.utils.metaestimators import _safe_split, if_delegate_has_method
17
from sklearn.utils.validation import check_is_fitted
18
from numpy import ndarray
19
from sklearn.svm._classes import SVC
20
from typing import Dict, List, Optional
21
22
# def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer):
23
#     """
24
#     Return the score for a fit across one fold.
25
#     """
26
#     X_train, y_train = _safe_split(estimator, X, y, train)
27
#     X_test, y_test = _safe_split(estimator, X, y, test, train)
28
#     return rfe._fit(
29
#         X_train,
30
#         y_train,
31
#         lambda estimator, features: _score(
32
#             estimator, X_test[:, features], y_test, scorer
33
#         ),
34
#     ).scores_
35
36
37
def _rfe_single_fit(
38
    rfe, estimator, X, y, train_idx, val_idx, scorer, sample_weight=None
39
):
40
    """
41
    Return the score for a fit across one fold.
42
    """
43
    X_train, y_train = X[train_idx], y[train_idx]
44
    X_test, y_test = X[val_idx], y[val_idx]
45
46
    if sample_weight is not None:
47
        return (
48
            rfe._fit(
49
                X_train,
50
                y_train,
51
                lambda estimator, features: _score(
52
                    estimator, X_test[:, features], y_test, scorer
53
                ),
54
                sample_weight=sample_weight,
55
            ).scores_,
56
            rfe._fit(
57
                X_train,
58
                y_train,
59
                lambda estimator, features: _score(
60
                    estimator, X_test[:, features], y_test, scorer
61
                ),
62
                sample_weight=sample_weight,
63
            ).ranking_,
64
        )
65
    else:
66
        return (
67
            rfe._fit(
68
                X_train,
69
                y_train,
70
                lambda estimator, features: _score(
71
                    estimator, X_test[:, features], y_test, scorer
72
                ),
73
            ).scores_,
74
            rfe._fit(
75
                X_train,
76
                y_train,
77
                lambda estimator, features: _score(
78
                    estimator, X_test[:, features], y_test, scorer
79
                ),
80
            ).ranking_,
81
        )
82
83
84
class RFE(SelectorMixin, MetaEstimatorMixin, BaseEstimator):
85
    """Feature ranking with recursive feature elimination.
86
    Given an external estimator that assigns weights to features (e.g., the
87
    coefficients of a linear model), the goal of recursive feature elimination
88
    (RFE) is to select features by recursively considering smaller and smaller
89
    sets of features. First, the estimator is trained on the initial set of
90
    features and the importance of each feature is obtained either through
91
    any specific attribute or callable.
92
    Then, the least important features are pruned from current set of features.
93
    That procedure is recursively repeated on the pruned set until the desired
94
    number of features to select is eventually reached.
95
    Read more in the :ref:`User Guide <rfe>`.
96
    Parameters
97
    ----------
98
    estimator : ``Estimator`` instance
99
        A supervised learning estimator with a ``fit`` method that provides
100
        information about feature importance
101
        (e.g. `coef_`, `feature_importances_`).
102
    n_features_to_select : int or float, default=None
103
        The number of features to select. If `None`, half of the features are
104
        selected. If integer, the parameter is the absolute number of features
105
        to select. If float between 0 and 1, it is the fraction of features to
106
        select.
107
        .. versionchanged:: 0.24
108
           Added float values for fractions.
109
    step : int or float, default=1
110
        If greater than or equal to 1, then ``step`` corresponds to the
111
        (integer) number of features to remove at each iteration.
112
        If within (0.0, 1.0), then ``step`` corresponds to the percentage
113
        (rounded down) of features to remove at each iteration.
114
    verbose : int, default=0
115
        Controls verbosity of output.
116
    importance_getter : str or callable, default='auto'
117
        If 'auto', uses the feature importance either through a `coef_`
118
        or `feature_importances_` attributes of estimator.
119
        Also accepts a string that specifies an attribute name/path
120
        for extracting feature importance (implemented with `attrgetter`).
121
        For example, give `regressor_.coef_` in case of
122
        :class:`~sklearn.compose.TransformedTargetRegressor`  or
123
        `named_steps.clf.feature_importances_` in case of
124
        class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.
125
        If `callable`, overrides the default feature importance getter.
126
        The callable is passed with the fitted estimator and it should
127
        return importance for each feature.
128
        .. versionadded:: 0.24
129
    Attributes
130
    ----------
131
    classes_ : ndarray of shape (n_classes,)
132
        The classes labels. Only available when `estimator` is a classifier.
133
    estimator_ : ``Estimator`` instance
134
        The fitted estimator used to select features.
135
    n_features_ : int
136
        The number of selected features.
137
    n_features_in_ : int
138
        Number of features seen during :term:`fit`. Only defined if the
139
        underlying estimator exposes such an attribute when fit.
140
        .. versionadded:: 0.24
141
    feature_names_in_ : ndarray of shape (`n_features_in_`,)
142
        Names of features seen during :term:`fit`. Defined only when `X`
143
        has feature names that are all strings.
144
        .. versionadded:: 1.0
145
    ranking_ : ndarray of shape (n_features,)
146
        The feature ranking, such that ``ranking_[i]`` corresponds to the
147
        ranking position of the i-th feature. Selected (i.e., estimated
148
        best) features are assigned rank 1.
149
    support_ : ndarray of shape (n_features,)
150
        The mask of selected features.
151
    See Also
152
    --------
153
    RFECV : Recursive feature elimination with built-in cross-validated
154
        selection of the best number of features.
155
    SelectFromModel : Feature selection based on thresholds of importance
156
        weights.
157
    SequentialFeatureSelector : Sequential cross-validation based feature
158
        selection. Does not rely on importance weights.
159
    Notes
160
    -----
161
    Allows NaN/Inf in the input if the underlying estimator does as well.
162
    References
163
    ----------
164
    .. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., "Gene selection
165
           for cancer classification using support vector machines",
166
           Mach. Learn., 46(1-3), 389--422, 2002.
167
    Examples
168
    --------
169
    The following example shows how to retrieve the 5 most informative
170
    features in the Friedman #1 dataset.
171
    >>> from sklearn.datasets import make_friedman1
172
    >>> from sklearn.feature_selection import RFE
173
    >>> from sklearn.svm import SVR
174
    >>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
175
    >>> estimator = SVR(kernel="linear")
176
    >>> selector = RFE(estimator, n_features_to_select=5, step=1)
177
    >>> selector = selector.fit(X, y)
178
    >>> selector.support_
179
    array([ True,  True,  True,  True,  True, False, False, False, False,
180
           False])
181
    >>> selector.ranking_
182
    array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])
183
    """
184
185
    def __init__(
186
        self,
187
        estimator: SVC,
188
        *,
189
        n_features_to_select=None,
190
        step=1,
191
        verbose=0,
192
        importance_getter="auto",
193
    ) -> None:
194
        self.estimator = estimator
195
        self.n_features_to_select = n_features_to_select
196
        self.step = step
197
        self.importance_getter = importance_getter
198
        self.verbose = verbose
199
200
    @property
201
    def _estimator_type(self):
202
        return self.estimator._estimator_type
203
204
    @property
205
    def classes_(self):
206
        """Classes labels available when `estimator` is a classifier.
207
        Returns
208
        -------
209
        ndarray of shape (n_classes,)
210
        """
211
        return self.estimator_.classes_
212
213
    def fit(self, X: ndarray, y: ndarray, **fit_params) -> "RFE":
214
        """Fit the RFE model and then the underlying estimator on the selected features.
215
        Parameters
216
        ----------
217
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
218
            The training input samples.
219
        y : array-like of shape (n_samples,)
220
            The target values.
221
        **fit_params : dict
222
            Additional parameters passed to the `fit` method of the underlying
223
            estimator.
224
        Returns
225
        -------
226
        self : object
227
            Fitted estimator.
228
        """
229
        return self._fit(X, y, **fit_params)
230
231
    def _fit(self, X: ndarray, y: ndarray, step_score: None=None, **fit_params) -> "RFE":
232
        # Parameter step_score controls the calculation of self.scores_
233
        # step_score is not exposed to users
234
        # and is used when implementing RFECV
235
        # self.scores_ will not be calculated when calling _fit through fit
236
237
        tags = self._get_tags()
238
        X, y = self._validate_data(
239
            X,
240
            y,
241
            accept_sparse="csc",
242
            ensure_min_features=2,
243
            force_all_finite=not tags.get("allow_nan", True),
244
            multi_output=True,
245
        )
246
        error_msg = (
247
            "n_features_to_select must be either None, a "
248
            "positive integer representing the absolute "
249
            "number of features or a float in (0.0, 1.0] "
250
            "representing a percentage of features to "
251
            f"select. Got {self.n_features_to_select}"
252
        )
253
254
        # Initialization
255
        n_features = X.shape[1]
256
        if self.n_features_to_select is None:
257
            n_features_to_select = n_features // 2
258
        elif self.n_features_to_select < 0:
259
            raise ValueError(error_msg)
260
        elif isinstance(self.n_features_to_select, numbers.Integral):  # int
261
            n_features_to_select = self.n_features_to_select
262
        elif self.n_features_to_select > 1.0:  # float > 1
263
            raise ValueError(error_msg)
264
        else:  # float
265
            n_features_to_select = int(n_features * self.n_features_to_select)
266
267
        #         if 0.0 < self.step < 1.0:
268
        #             step = int(max(1, self.step * n_features))
269
        #         else:
270
        #             step = int(self.step)
271
        #         if step <= 0:
272
        #             raise ValueError("Step must be >0")
273
274
        support_ = np.ones(n_features, dtype=bool)
275
        ranking_ = np.ones(n_features, dtype=int)
276
277
        if step_score:
278
            self.scores_ = []
279
280
        # collect feature importance score in each round of elimation
281
        self.importances_ = []
282
283
        # Elimination
284
        while np.sum(support_) > n_features_to_select:
285
            # Remaining features
286
            features = np.arange(n_features)[support_]
287
288
            # Rank the remaining features
289
            estimator = clone(self.estimator)
290
            if self.verbose > 0:
291
                print("Fitting estimator with %d features." % np.sum(support_))
292
293
            estimator.fit(X[:, features], y, **fit_params)
294
295
            # Get importance and rank them
296
            importances = _get_feature_importances(
297
                estimator,
298
                self.importance_getter,
299
                transform_func="square",
300
            )
301
            ranks = np.argsort(importances)
302
303
            # for sparse case ranks is matrix
304
            ranks = np.ravel(ranks)
305
306
            nstep = self.step * np.sum(support_)
307
308
            # Eliminate the worse features
309
            threshold = min(math.ceil(nstep), np.sum(support_) - n_features_to_select)
310
311
            # Compute step score on the previous selection iteration
312
            # because 'estimator' must use features
313
            # that have not been eliminated yet
314
            if step_score:
315
                self.scores_.append(step_score(estimator, features))
316
317
            self.importances_.append(importances)
318
319
            support_[features[ranks][:threshold]] = False
320
            ranking_[np.logical_not(support_)] += 1
321
322
        # Set final attributes
323
        features = np.arange(n_features)[support_]
324
        self.estimator_ = clone(self.estimator)
325
        self.estimator_.fit(X[:, features], y, **fit_params)
326
327
        # Compute step score when only n_features_to_select features left
328
        if step_score:
329
            self.scores_.append(step_score(self.estimator_, features))
330
        self.n_features_ = support_.sum()
331
        self.support_ = support_
332
        self.ranking_ = ranking_
333
334
        return self
335
336
    @if_delegate_has_method(delegate="estimator")
337
    def predict(self, X):
338
        """Reduce X to the selected features and then predict using the underlying estimator.
339
        Parameters
340
        ----------
341
        X : array of shape [n_samples, n_features]
342
            The input samples.
343
        Returns
344
        -------
345
        y : array of shape [n_samples]
346
            The predicted target values.
347
        """
348
        check_is_fitted(self)
349
        return self.estimator_.predict(self.transform(X))
350
351
    @if_delegate_has_method(delegate="estimator")
352
    def score(self, X, y, **fit_params):
353
        """Reduce X to the selected features and return the score of the underlying estimator.
354
        Parameters
355
        ----------
356
        X : array of shape [n_samples, n_features]
357
            The input samples.
358
        y : array of shape [n_samples]
359
            The target values.
360
        **fit_params : dict
361
            Parameters to pass to the `score` method of the underlying
362
            estimator.
363
            .. versionadded:: 1.0
364
        Returns
365
        -------
366
        score : float
367
            Score of the underlying base estimator computed with the selected
368
            features returned by `rfe.transform(X)` and `y`.
369
        """
370
        check_is_fitted(self)
371
        return self.estimator_.score(self.transform(X), y, **fit_params)
372
373
    def _get_support_mask(self):
374
        check_is_fitted(self)
375
        return self.support_
376
377
    @if_delegate_has_method(delegate="estimator")
378
    def decision_function(self, X):
379
        """Compute the decision function of ``X``.
380
        Parameters
381
        ----------
382
        X : {array-like or sparse matrix} of shape (n_samples, n_features)
383
            The input samples. Internally, it will be converted to
384
            ``dtype=np.float32`` and if a sparse matrix is provided
385
            to a sparse ``csr_matrix``.
386
        Returns
387
        -------
388
        score : array, shape = [n_samples, n_classes] or [n_samples]
389
            The decision function of the input samples. The order of the
390
            classes corresponds to that in the attribute :term:`classes_`.
391
            Regression and binary classification produce an array of shape
392
            [n_samples].
393
        """
394
        check_is_fitted(self)
395
        return self.estimator_.decision_function(self.transform(X))
396
397
    @if_delegate_has_method(delegate="estimator")
398
    def predict_proba(self, X):
399
        """Predict class probabilities for X.
400
        Parameters
401
        ----------
402
        X : {array-like or sparse matrix} of shape (n_samples, n_features)
403
            The input samples. Internally, it will be converted to
404
            ``dtype=np.float32`` and if a sparse matrix is provided
405
            to a sparse ``csr_matrix``.
406
        Returns
407
        -------
408
        p : array of shape (n_samples, n_classes)
409
            The class probabilities of the input samples. The order of the
410
            classes corresponds to that in the attribute :term:`classes_`.
411
        """
412
        check_is_fitted(self)
413
        return self.estimator_.predict_proba(self.transform(X))
414
415
    @if_delegate_has_method(delegate="estimator")
416
    def predict_log_proba(self, X):
417
        """Predict class log-probabilities for X.
418
        Parameters
419
        ----------
420
        X : array of shape [n_samples, n_features]
421
            The input samples.
422
        Returns
423
        -------
424
        p : array of shape (n_samples, n_classes)
425
            The class log-probabilities of the input samples. The order of the
426
            classes corresponds to that in the attribute :term:`classes_`.
427
        """
428
        check_is_fitted(self)
429
        return self.estimator_.predict_log_proba(self.transform(X))
430
431
    def _more_tags(self) -> Dict[str, bool]:
432
        return {
433
            "poor_score": True,
434
            "allow_nan": _safe_tags(self.estimator, key="allow_nan"),
435
            "requires_y": True,
436
        }
437
438
439
class RFECV(RFE):
440
    """Recursive feature elimination with cross-validation to select the number of features.
441
    See glossary entry for :term:`cross-validation estimator`.
442
    Read more in the :ref:`User Guide <rfe>`.
443
    Parameters
444
    ----------
445
    estimator : ``Estimator`` instance
446
        A supervised learning estimator with a ``fit`` method that provides
447
        information about feature importance either through a ``coef_``
448
        attribute or through a ``feature_importances_`` attribute.
449
    step : int or float, default=1
450
        If greater than or equal to 1, then ``step`` corresponds to the
451
        (integer) number of features to remove at each iteration.
452
        If within (0.0, 1.0), then ``step`` corresponds to the percentage
453
        (rounded down) of features to remove at each iteration.
454
        Note that the last iteration may remove fewer than ``step`` features in
455
        order to reach ``min_features_to_select``.
456
    min_features_to_select : int, default=1
457
        The minimum number of features to be selected. This number of features
458
        will always be scored, even if the difference between the original
459
        feature count and ``min_features_to_select`` isn't divisible by
460
        ``step``.
461
        .. versionadded:: 0.20
462
    cv : int, cross-validation generator or an iterable, default=None
463
        Determines the cross-validation splitting strategy.
464
        Possible inputs for cv are:
465
        - None, to use the default 5-fold cross-validation,
466
        - integer, to specify the number of folds.
467
        - :term:`CV splitter`,
468
        - An iterable yielding (train, test) splits as arrays of indices.
469
        For integer/None inputs, if ``y`` is binary or multiclass,
470
        :class:`~sklearn.model_selection.StratifiedKFold` is used. If the
471
        estimator is a classifier or if ``y`` is neither binary nor multiclass,
472
        :class:`~sklearn.model_selection.KFold` is used.
473
        Refer :ref:`User Guide <cross_validation>` for the various
474
        cross-validation strategies that can be used here.
475
        .. versionchanged:: 0.22
476
            ``cv`` default value of None changed from 3-fold to 5-fold.
477
    scoring : str, callable or None, default=None
478
        A string (see model evaluation documentation) or
479
        a scorer callable object / function with signature
480
        ``scorer(estimator, X, y)``.
481
    verbose : int, default=0
482
        Controls verbosity of output.
483
    n_jobs : int or None, default=None
484
        Number of cores to run in parallel while fitting across folds.
485
        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
486
        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
487
        for more details.
488
        .. versionadded:: 0.18
489
    importance_getter : str or callable, default='auto'
490
        If 'auto', uses the feature importance either through a `coef_`
491
        or `feature_importances_` attributes of estimator.
492
        Also accepts a string that specifies an attribute name/path
493
        for extracting feature importance.
494
        For example, give `regressor_.coef_` in case of
495
        :class:`~sklearn.compose.TransformedTargetRegressor`  or
496
        `named_steps.clf.feature_importances_` in case of
497
        :class:`~sklearn.pipeline.Pipeline` with its last step named `clf`.
498
        If `callable`, overrides the default feature importance getter.
499
        The callable is passed with the fitted estimator and it should
500
        return importance for each feature.
501
        .. versionadded:: 0.24
502
    Attributes
503
    ----------
504
    classes_ : ndarray of shape (n_classes,)
505
        The classes labels. Only available when `estimator` is a classifier.
506
    estimator_ : ``Estimator`` instance
507
        The fitted estimator used to select features.
508
    grid_scores_ : ndarray of shape (n_subsets_of_features,)
509
        The cross-validation scores such that
510
        ``grid_scores_[i]`` corresponds to
511
        the CV score of the i-th subset of features.
512
        .. deprecated:: 1.0
513
            The `grid_scores_` attribute is deprecated in version 1.0 in favor
514
            of `cv_results_` and will be removed in version 1.2.
515
    cv_results_ : dict of ndarrays
516
        A dict with keys:
517
        split(k)_test_score : ndarray of shape (n_features,)
518
            The cross-validation scores across (k)th fold.
519
        mean_test_score : ndarray of shape (n_features,)
520
            Mean of scores over the folds.
521
        std_test_score : ndarray of shape (n_features,)
522
            Standard deviation of scores over the folds.
523
        .. versionadded:: 1.0
524
    n_features_ : int
525
        The number of selected features with cross-validation.
526
    n_features_in_ : int
527
        Number of features seen during :term:`fit`. Only defined if the
528
        underlying estimator exposes such an attribute when fit.
529
        .. versionadded:: 0.24
530
    feature_names_in_ : ndarray of shape (`n_features_in_`,)
531
        Names of features seen during :term:`fit`. Defined only when `X`
532
        has feature names that are all strings.
533
        .. versionadded:: 1.0
534
    ranking_ : narray of shape (n_features,)
535
        The feature ranking, such that `ranking_[i]`
536
        corresponds to the ranking
537
        position of the i-th feature.
538
        Selected (i.e., estimated best)
539
        features are assigned rank 1.
540
    support_ : ndarray of shape (n_features,)
541
        The mask of selected features.
542
    See Also
543
    --------
544
    RFE : Recursive feature elimination.
545
    Notes
546
    -----
547
    The size of ``grid_scores_`` is equal to
548
    ``ceil((n_features - min_features_to_select) / step) + 1``,
549
    where step is the number of features removed at each iteration.
550
    Allows NaN/Inf in the input if the underlying estimator does as well.
551
    References
552
    ----------
553
    .. [1] Guyon, I., Weston, J., Barnhill, S., & Vapnik, V., "Gene selection
554
           for cancer classification using support vector machines",
555
           Mach. Learn., 46(1-3), 389--422, 2002.
556
    Examples
557
    --------
558
    The following example shows how to retrieve the a-priori not known 5
559
    informative features in the Friedman #1 dataset.
560
    >>> from sklearn.datasets import make_friedman1
561
    >>> from sklearn.feature_selection import RFECV
562
    >>> from sklearn.svm import SVR
563
    >>> X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
564
    >>> estimator = SVR(kernel="linear")
565
    >>> selector = RFECV(estimator, step=1, cv=5)
566
    >>> selector = selector.fit(X, y)
567
    >>> selector.support_
568
    array([ True,  True,  True,  True,  True, False, False, False, False,
569
           False])
570
    >>> selector.ranking_
571
    array([1, 1, 1, 1, 1, 6, 4, 3, 2, 5])
572
    """
573
574
    def __init__(
575
        self,
576
        estimator: SVC,
577
        *,
578
        step=1,
579
        min_features_to_select=1,
580
        cv=None,
581
        scoring=None,
582
        verbose=0,
583
        n_jobs=None,
584
        importance_getter="auto",
585
    ) -> None:
586
        self.estimator = estimator
587
        self.step = step
588
        self.importance_getter = importance_getter
589
        self.cv = cv
590
        self.scoring = scoring
591
        self.verbose = verbose
592
        self.n_jobs = n_jobs
593
        self.min_features_to_select = min_features_to_select
594
595
    def fit(
596
        self, X: ndarray, y: ndarray, train_idx_list: List[List[int]], val_idx_list: List[List[int]], groups: None=None, sample_weight_list: Optional[List[List[float]]]=None
597
    ) -> "RFECV":
598
        """Fit the RFE model and automatically tune the number of selected features.
599
        Parameters
600
        ----------
601
        X : {array-like, sparse matrix} of shape (n_samples, n_features)
602
            Training vector, where `n_samples` is the number of samples and
603
            `n_features` is the total number of features.
604
        y : array-like of shape (n_samples,)
605
            Target values (integers for classification, real numbers for
606
            regression).
607
        groups : array-like of shape (n_samples,) or None, default=None
608
            Group labels for the samples used while splitting the dataset into
609
            train/test set. Only used in conjunction with a "Group" :term:`cv`
610
            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
611
            .. versionadded:: 0.20
612
        Returns
613
        -------
614
        self : object
615
            Fitted estimator.
616
        """
617
        tags = self._get_tags()
618
        X, y = self._validate_data(
619
            X,
620
            y,
621
            accept_sparse="csr",
622
            ensure_min_features=2,
623
            force_all_finite=not tags.get("allow_nan", True),
624
            multi_output=True,
625
        )
626
627
        # Initialization
628
        #         cv = check_cv(self.cv, y, classifier=is_classifier(self.estimator))
629
        scorer = check_scoring(self.estimator, scoring=self.scoring)
630
        n_features = X.shape[1]
631
632
        #         if 0.0 < self.step < 1.0:
633
        #             step = int(max(1, self.step * n_features))
634
        #         else:
635
        #             step = int(self.step)
636
        #         if step <= 0:
637
        #             raise ValueError("Step must be >0")
638
639
        # Build an RFE object, which will evaluate and score each possible
640
        # feature count, down to self.min_features_to_select
641
        rfe = RFE(
642
            estimator=self.estimator,
643
            n_features_to_select=self.min_features_to_select,
644
            importance_getter=self.importance_getter,
645
            step=self.step,
646
            verbose=self.verbose,
647
        )
648
649
        # Determine the number of subsets of features by fitting across
650
        # the train folds and choosing the "features_to_select" parameter
651
        # that gives the least averaged error across all folds.
652
653
        # Note that joblib raises a non-picklable error for bound methods
654
        # even if n_jobs is set to 1 with the default multiprocessing
655
        # backend.
656
        # This branching is done so that to
657
        # make sure that user code that sets n_jobs to 1
658
        # and provides bound methods as scorers is not broken with the
659
        # addition of n_jobs parameter in version 0.18.
660
661
        if effective_n_jobs(self.n_jobs) == 1:
662
            parallel, func = list, _rfe_single_fit
663
        else:
664
            parallel = Parallel(n_jobs=self.n_jobs)
665
            func = delayed(_rfe_single_fit)
666
667
        res = parallel(
668
            func(rfe, self.estimator, X, y, train_idx, val_idx, scorer, sample_weight)
669
            for train_idx, val_idx, sample_weight in zip(
670
                train_idx_list, val_idx_list, sample_weight_list
671
            )
672
        )
673
674
        #        scores = _rfe_single_fit(rfe, self.estimator, X, y, X_ts, y_ts, scorer)
675
676
        scores = []
677
        ranking_ = []
678
        for i in res:
679
            scores.append(i[0])
680
            ranking_.append(i[1])
681
682
        scores = np.array(scores)
683
        ranking_ = np.array(ranking_)
684
685
        #         scores_sum = np.sum(scores, axis=0)
686
        #         scores_sum_rev = scores_sum[::-1]
687
        #         argmax_idx = len(scores_sum) - np.argmax(scores_sum_rev) - 1 #indices of the maximum value
688
        #         n_features_to_select = max(
689
        #             n_features - (argmax_idx * step), self.min_features_to_select
690
        #         )
691
692
        #          # Re-execute an elimination with best_k over the whole set
693
        #         rfe = RFE(
694
        #             estimator=self.estimator,
695
        #             n_features_to_select=n_features_to_select,
696
        #             step=self.step,
697
        #             importance_getter=self.importance_getter,
698
        #             verbose=self.verbose,
699
        #         )
700
701
        #         rfe.fit(X, y)
702
703
        #         # Set final attributes
704
        #         self.support_ = rfe.support_
705
        #         self.n_features_ = rfe.n_features_
706
        #         self.ranking_ = rfe.ranking_
707
        #         self.estimator_ = clone(self.estimator)
708
        #         self.estimator_.fit(self.transform(X), y)
709
710
        # reverse to stay consistent with before
711
        scores_rev = scores[:, ::-1]
712
        self.cv_results_ = {}
713
        self.cv_ranking_ = {}
714
        self.cv_results_["mean_test_score"] = np.mean(scores_rev, axis=0)
715
        self.cv_results_["std_test_score"] = np.std(scores_rev, axis=0)
716
        self.cv_ranking_["mean_feature_ranking"] = np.mean(ranking_, axis=0)
717
718
        self.top_features = {}
719
        self.top_features["mean_feature_ranking"] = np.mean(ranking_, axis=0)
720
721
        for i in range(scores.shape[0]):
722
            self.cv_results_[f"split{i}_test_score"] = scores_rev[i]
723
            self.cv_ranking_[f"split{i}_test_score"] = ranking_[i]
724
725
        self.ranking_ = ranking_
726
        self.scores = scores
727
728
        return self
729
730
    # TODO: Remove in v1.2 when grid_scores_ is removed
731
    # mypy error: Decorated property not supported
732
    @deprecated(  # type: ignore
733
        "The `grid_scores_` attribute is deprecated in version 1.0 in favor "
734
        "of `cv_results_` and will be removed in version 1.2."
735
    )
736
    @property
737
    def grid_scores_(self):
738
        # remove 2 for mean_test_score, std_test_score
739
        grid_size = len(self.cv_results_) - 2
740
        return np.asarray(
741
            [self.cv_results_[f"split{i}_test_score"] for i in range(grid_size)]
742
        ).T