@@ -90,6 +90,19 @@ def inner(preds: np.ndarray, dmatrix: DMatrix) -> Tuple[np.ndarray, np.ndarray]:
90
90
return inner
91
91
92
92
93
+ def _metric_decorator (func : Callable ) -> Metric :
94
+ """Decorate a metric function from sklearn.
95
+
96
+ Converts an objective function using the typical sklearn metrics signature so that it
97
+ is compatible with ``xgboost.training.train``
98
+
99
+ """
100
+ def inner (y_score : np .ndarray , dmatrix : DMatrix ) -> float :
101
+ y_true = dmatrix .get_label ()
102
+ return func .__name__ , func (y_true , y_score )
103
+ return inner
104
+
105
+
93
106
__estimator_doc = '''
94
107
n_estimators : int
95
108
Number of gradient boosted trees. Equivalent to number of boosting
@@ -184,6 +197,46 @@ def inner(preds: np.ndarray, dmatrix: DMatrix) -> Tuple[np.ndarray, np.ndarray]:
184
197
Experimental support for categorical data. Do not set to true unless you are
185
198
interested in development. Only valid when `gpu_hist` and dataframe are used.
186
199
200
+ eval_metric : Optional[Union[str, List[str], Callable]]
201
+ Metric used for monitoring the training result and early stopping. It can be a
202
+ string or list of strings as names of predefined metric in XGBoost (See
203
+ doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any other
204
+ user defined metric that looks like `sklearn.metrics`.
205
+
206
+ Unlike scikit-learn `scoring` parameter, when a callable object is provided, it's
207
+ assumed to be a cost function and by default XGBoost will minimize the result
208
+ during early stopping.
209
+
210
+ For advanced usage on Early stopping like directly choosing to maximize instead of
211
+ minimize, see :py:obj:`xgboost.callback.EarlyStopping`.
212
+
213
+ .. versionadded:: 1.5.1
214
+
215
+ .. note::
216
+
217
+ This parameter replaces `eval_metric` in
218
+ :py:meth:`fit` method.
219
+
220
+ early_stopping_rounds : Optional[int]
221
+ Activates early stopping. Validation metric needs to improve at least once in
222
+ every **early_stopping_rounds** round(s) to continue training. Requires at least
223
+ one item in **eval_set** in :py:meth:`xgboost.sklearn.XGBModel.fit`.
224
+
225
+ The method returns the model from the last iteration (not the best one). If
226
+ there's more than one item in **eval_set**, the last entry will be used for early
227
+ stopping. If there's more than one metric in **eval_metric**, the last metric
228
+ will be used for early stopping.
229
+
230
+ If early stopping occurs, the model will have three additional fields:
231
+ ``clf.best_score``, ``clf.best_iteration`` and ``clf.best_ntree_limit``.
232
+
233
+ .. versionadded:: 1.5.1
234
+
235
+ .. note::
236
+
237
+ This parameter replaces `early_stopping_rounds` in
238
+ :py:meth:`fit` method.
239
+
187
240
kwargs : dict, optional
188
241
Keyword arguments for XGBoost Booster object. Full documentation of
189
242
parameters can be found here:
@@ -399,6 +452,8 @@ def __init__(
399
452
validate_parameters : Optional [bool ] = None ,
400
453
predictor : Optional [str ] = None ,
401
454
enable_categorical : bool = False ,
455
+ eval_metric = None ,
456
+ early_stopping_rounds = None ,
402
457
** kwargs : Any
403
458
) -> None :
404
459
if not SKLEARN_INSTALLED :
@@ -435,6 +490,8 @@ def __init__(
435
490
self .validate_parameters = validate_parameters
436
491
self .predictor = predictor
437
492
self .enable_categorical = enable_categorical
493
+ self .eval_metric = eval_metric
494
+ self .early_stopping_rounds = early_stopping_rounds
438
495
if kwargs :
439
496
self .kwargs = kwargs
440
497
@@ -545,10 +602,15 @@ def get_xgb_params(self) -> Dict[str, Any]:
545
602
params = self .get_params ()
546
603
# Parameters that should not go into native learner.
547
604
wrapper_specific = {
548
- 'importance_type' , 'kwargs' , 'missing' , 'n_estimators' , 'use_label_encoder' ,
605
+ 'importance_type' ,
606
+ 'kwargs' ,
607
+ 'missing' ,
608
+ 'n_estimators' ,
609
+ 'use_label_encoder' ,
549
610
"enable_categorical"
611
+ "early_stopping_rounds"
550
612
}
551
- filtered = {}
613
+ filtered = dict ()
552
614
for k , v in params .items ():
553
615
if k not in wrapper_specific and not callable (v ):
554
616
filtered [k ] = v
@@ -636,15 +698,32 @@ def _configure_fit(
636
698
booster : Optional [Union [Booster , "XGBModel" , str ]],
637
699
eval_metric : Optional [Union [Callable , str , List [str ]]],
638
700
params : Dict [str , Any ],
639
- ) -> Tuple [Optional [Union [Booster , str ]], Optional [Metric ], Dict [str , Any ]]:
640
- # pylint: disable=protected-access, no-self-use
641
- if isinstance (booster , XGBModel ):
701
+ early_stopping_rounds : Optional [int ],
702
+ ) -> Tuple [Optional [Union [Booster , str , "XGBModel" ]], Optional [Metric ], Dict [str , Any ], Optional [int ]]:
703
+ # pylint: disable=protected-access
704
+ model = booster
705
+ if hasattr (model , "_Booster" ):
642
706
# Handle the case when xgb_model is a sklearn model object
643
- model : Optional [Union [Booster , str ]] = booster ._Booster
644
- else :
645
- model = booster
707
+ model = model ._Booster
708
+
709
+ if eval_metric is not None :
710
+ warnings .warn (
711
+ "eval_metric for `fit` method is deprecated, use `eval_metric` in "
712
+ "constructor or `set_params` instead." ,
713
+ UserWarning ,
714
+ )
646
715
716
+ # configure callable evaluation metric
647
717
feval = eval_metric if callable (eval_metric ) else None
718
+ if self .eval_metric is not None and feval is not None :
719
+ warnings .warn (
720
+ "Overriding `eval_metric` from `fit` with `eval_metric` from parameter" ,
721
+ UserWarning
722
+ )
723
+ if callable (self .eval_metric ):
724
+ feval = _metric_decorator (self .eval_metric )
725
+
726
+ # configure string/list evaluation metric
648
727
if eval_metric is not None :
649
728
if callable (eval_metric ):
650
729
eval_metric = None
@@ -656,7 +735,26 @@ def _configure_fit(
656
735
" current tree method yet."
657
736
)
658
737
659
- return model , feval , params
738
+ # configure early_stopping_rounds
739
+ if early_stopping_rounds is not None :
740
+ warnings .warn (
741
+ "`early_stopping_rounds` is deprecated, use `early_stopping_rounds` "
742
+ "in constructor or `set_params` instead." ,
743
+ UserWarning ,
744
+ )
745
+ if (
746
+ self .early_stopping_rounds is not None
747
+ and self .early_stopping_rounds != early_stopping_rounds
748
+ ):
749
+ raise ValueError ("2 different `early_stopping_rounds` are provided." )
750
+
751
+ early_stopping_rounds = (
752
+ self .early_stopping_rounds
753
+ if self .early_stopping_rounds is not None
754
+ else early_stopping_rounds
755
+ )
756
+
757
+ return model , feval , params , early_stopping_rounds
660
758
661
759
def _set_evaluation_result (self , evals_result : TrainingCallback .EvalsLog ) -> None :
662
760
if evals_result :
@@ -704,31 +802,10 @@ def fit(
704
802
A list of (X, y) tuple pairs to use as validation sets, for which
705
803
metrics will be computed.
706
804
Validation metrics will help us track the performance of the model.
707
- eval_metric :
708
- If a str, should be a built-in evaluation metric to use. See doc/parameter.rst.
709
-
710
- If a list of str, should be the list of multiple built-in evaluation metrics
711
- to use.
712
-
713
- If callable, a custom evaluation metric. The call signature is
714
- ``func(y_predicted, y_true)`` where ``y_true`` will be a DMatrix object such
715
- that you may need to call the ``get_label`` method. It must return a str,
716
- value pair where the str is a name for the evaluation and value is the value
717
- of the evaluation function. The callable custom objective is always minimized.
718
- early_stopping_rounds :
719
- Activates early stopping. Validation metric needs to improve at least once in
720
- every **early_stopping_rounds** round(s) to continue training.
721
- Requires at least one item in **eval_set**.
722
-
723
- The method returns the model from the last iteration (not the best one).
724
- If there's more than one item in **eval_set**, the last entry will be used
725
- for early stopping.
726
-
727
- If there's more than one metric in **eval_metric**, the last metric will be
728
- used for early stopping.
729
-
730
- If early stopping occurs, the model will have three additional fields:
731
- ``clf.best_score``, ``clf.best_iteration``.
805
+ eval_metric : str, list of str, or callable, optional
806
+ Deprecated, use `eval_metric` in constructor or `set_params` instead.
807
+ early_stopping_rounds : int
808
+ Deprecated, use `early_stopping_rounds` in constructor instead.
732
809
verbose :
733
810
If `verbose` and an evaluation set is used, writes the evaluation metric
734
811
measured on the validation set to stderr.
@@ -785,7 +862,9 @@ def fit(
785
862
else :
786
863
obj = None
787
864
788
- model , feval , params = self ._configure_fit (xgb_model , eval_metric , params )
865
+ model , feval , params , early_stopping_rounds = self ._configure_fit (
866
+ xgb_model , eval_metric , params , early_stopping_rounds
867
+ )
789
868
self ._Booster = train (
790
869
params ,
791
870
train_dmatrix ,
@@ -1223,7 +1302,9 @@ def fit(
1223
1302
else :
1224
1303
label_transform = lambda x : x
1225
1304
1226
- model , feval , params = self ._configure_fit (xgb_model , eval_metric , params )
1305
+ model , feval , params , early_stopping_rounds = self ._configure_fit (
1306
+ xgb_model , eval_metric , params , early_stopping_rounds
1307
+ )
1227
1308
train_dmatrix , evals = _wrap_evaluation_matrices (
1228
1309
missing = self .missing ,
1229
1310
X = X ,
@@ -1359,8 +1440,9 @@ def evals_result(self) -> TrainingCallback.EvalsLog:
1359
1440
1360
1441
If **eval_set** is passed to the `fit` function, you can call
1361
1442
``evals_result()`` to get evaluation results for all passed **eval_sets**.
1362
- When **eval_metric** is also passed to the `fit` function, the
1363
- **evals_result** will contain the **eval_metrics** passed to the `fit` function.
1443
+
1444
+ When **eval_metric** is also passed as a parameter, the **evals_result** will
1445
+ contain the **eval_metric** passed to the `fit` function.
1364
1446
1365
1447
Returns
1366
1448
-------
@@ -1371,13 +1453,14 @@ def evals_result(self) -> TrainingCallback.EvalsLog:
1371
1453
1372
1454
.. code-block:: python
1373
1455
1374
- param_dist = {'objective':'binary:logistic', 'n_estimators':2}
1456
+ param_dist = {
1457
+ 'objective':'binary:logistic', 'n_estimators':2, eval_metric="logloss"
1458
+ }
1375
1459
1376
1460
clf = xgb.XGBClassifier(**param_dist)
1377
1461
1378
1462
clf.fit(X_train, y_train,
1379
1463
eval_set=[(X_train, y_train), (X_test, y_test)],
1380
- eval_metric='logloss',
1381
1464
verbose=True)
1382
1465
1383
1466
evals_result = clf.evals_result()
@@ -1388,6 +1471,7 @@ def evals_result(self) -> TrainingCallback.EvalsLog:
1388
1471
1389
1472
{'validation_0': {'logloss': ['0.604835', '0.531479']},
1390
1473
'validation_1': {'logloss': ['0.41965', '0.17686']}}
1474
+
1391
1475
"""
1392
1476
if self .evals_result_ :
1393
1477
evals_result = self .evals_result_
@@ -1534,15 +1618,15 @@ def fit(
1534
1618
'Implementation of the Scikit-Learn API for XGBoost Ranking.' ,
1535
1619
['estimators' , 'model' ],
1536
1620
end_note = '''
1537
- Note
1538
- ----
1539
- A custom objective function is currently not supported by XGBRanker.
1540
- Likewise, a custom metric function is not supported either.
1621
+ .. note::
1622
+
1623
+ A custom objective function is currently not supported by XGBRanker.
1624
+ Likewise, a custom metric function is not supported either.
1541
1625
1542
- Note
1543
- ----
1544
- Query group information is required for ranking tasks by either using the `group`
1545
- parameter or `qid` parameter in `fit` method.
1626
+ .. note::
1627
+
1628
+ Query group information is required for ranking tasks by either using the
1629
+ `group` parameter or `qid` parameter in `fit` method.
1546
1630
1547
1631
Before fitting the model, your data need to be sorted by query group. When fitting
1548
1632
the model, you need to provide an additional array that contains the size of each
@@ -1644,22 +1728,10 @@ def fit(
1644
1728
eval_qid :
1645
1729
A list in which ``eval_qid[i]`` is the array containing query ID of ``i``-th
1646
1730
pair in **eval_set**.
1647
- eval_metric :
1648
- If a str, should be a built-in evaluation metric to use. See
1649
- doc/parameter.rst.
1650
- If a list of str, should be the list of multiple built-in evaluation metrics
1651
- to use. The custom evaluation metric is not yet supported for the ranker.
1652
- early_stopping_rounds :
1653
- Activates early stopping. Validation metric needs to improve at least once in
1654
- every **early_stopping_rounds** round(s) to continue training. Requires at
1655
- least one item in **eval_set**.
1656
- The method returns the model from the last iteration (not the best one). If
1657
- there's more than one item in **eval_set**, the last entry will be used for
1658
- early stopping.
1659
- If there's more than one metric in **eval_metric**, the last metric will be
1660
- used for early stopping.
1661
- If early stopping occurs, the model will have three additional fields:
1662
- ``clf.best_score``, ``clf.best_iteration`` and ``clf.best_ntree_limit``.
1731
+ eval_metric : str, list of str, optional
1732
+ The custom evaluation metric is not yet supported for the ranker.
1733
+ early_stopping_rounds : int
1734
+ Deprecated, use `early_stopping_rounds` in constructor instead.
1663
1735
verbose :
1664
1736
If `verbose` and an evaluation set is used, writes the evaluation metric
1665
1737
measured on the validation set to stderr.
@@ -1724,7 +1796,9 @@ def fit(
1724
1796
evals_result : TrainingCallback .EvalsLog = {}
1725
1797
params = self .get_xgb_params ()
1726
1798
1727
- model , feval , params = self ._configure_fit (xgb_model , eval_metric , params )
1799
+ model , feval , params , early_stopping_rounds = self ._configure_fit (
1800
+ xgb_model , eval_metric , params , early_stopping_rounds
1801
+ )
1728
1802
if callable (feval ):
1729
1803
raise ValueError (
1730
1804
'Custom evaluation metric is not yet supported for XGBRanker.'
0 commit comments