Skip to content

Commit 85e17db

Browse files
committed
Pass pointer to model parameters.
This PR de-duplicates most of the model parameters except the one in `tree_model.h`. One difficulty is `base_score` is a model property but can be changed at runtime by objective function. Hence when performing model IO, we need to save the one provided by users, instead of the one transformed by objective. Here we created an immutable version of `LearnerModelParam` that represents the value of model parameter after configuration.
1 parent 979f74d commit 85e17db

34 files changed

+628
-405
lines changed

cmake/Version.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ function (write_version)
55
${xgboost_SOURCE_DIR}/include/xgboost/version_config.h @ONLY)
66
configure_file(
77
${xgboost_SOURCE_DIR}/cmake/Python_version.in
8-
${xgboost_SOURCE_DIR}/python-package/xgboost/VERSION)
8+
${xgboost_SOURCE_DIR}/python-package/xgboost/VERSION @ONLY)
99
endfunction (write_version)

include/xgboost/base.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ using bst_float = float; // NOLINT
112112

113113
/*! \brief Type for data column (feature) index. */
114114
using bst_feature_t = uint32_t; // NOLINT
115-
/*! \breif Type for data row index.
115+
/*! \brief Type for data row index.
116116
*
117117
* Be careful `std::size_t' is implementation-defined. Meaning that the binary
118118
* representation of DMatrix might not be portable across platform. Booster model should

include/xgboost/data.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
#include <vector>
2323

2424
namespace xgboost {
25-
// forward declare learner.
26-
class LearnerImpl;
2725
// forward declare dmatrix.
2826
class DMatrix;
2927

include/xgboost/gbm.h

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,8 @@
1111
#include <dmlc/registry.h>
1212
#include <xgboost/base.h>
1313
#include <xgboost/data.h>
14-
#include <xgboost/objective.h>
15-
#include <xgboost/feature_map.h>
16-
#include <xgboost/generic_parameters.h>
1714
#include <xgboost/host_device_vector.h>
15+
#include <xgboost/model.h>
1816

1917
#include <vector>
2018
#include <utility>
@@ -23,6 +21,14 @@
2321
#include <memory>
2422

2523
namespace xgboost {
24+
25+
class Json;
26+
class FeatureMap;
27+
class ObjFunction;
28+
29+
struct GenericParameter;
30+
struct LearnerModelParam;
31+
2632
/*!
2733
* \brief interface of gradient boosting model.
2834
*/
@@ -117,13 +123,14 @@ class GradientBooster {
117123
* \param condition_feature feature to condition on (i.e. fix) during calculations
118124
*/
119125
virtual void PredictContribution(DMatrix* dmat,
120-
std::vector<bst_float>* out_contribs,
121-
unsigned ntree_limit = 0, bool approximate = false,
122-
int condition = 0, unsigned condition_feature = 0) = 0;
126+
std::vector<bst_float>* out_contribs,
127+
unsigned ntree_limit = 0,
128+
bool approximate = false, int condition = 0,
129+
unsigned condition_feature = 0) = 0;
123130

124131
virtual void PredictInteractionContributions(DMatrix* dmat,
125-
std::vector<bst_float>* out_contribs,
126-
unsigned ntree_limit, bool approximate) = 0;
132+
std::vector<bst_float>* out_contribs,
133+
unsigned ntree_limit, bool approximate) = 0;
127134

128135
/*!
129136
* \brief dump the model in the requested format
@@ -136,21 +143,22 @@ class GradientBooster {
136143
bool with_stats,
137144
std::string format) const = 0;
138145
/*!
139-
* \brief Whether the current booster use GPU.
146+
* \brief Whether the current booster uses GPU.
140147
*/
141148
virtual bool UseGPU() const = 0;
142149
/*!
143150
* \brief create a gradient booster from given name
144151
* \param name name of gradient booster
152+
* \param generic_param Pointer to runtime parameters
153+
* \param learner_model_param pointer to global model parameters
145154
* \param cache_mats The cache data matrix of the Booster.
146-
* \param base_margin The base margin of prediction.
147155
* \return The created booster.
148156
*/
149157
static GradientBooster* Create(
150158
const std::string& name,
151-
GenericParameter const* gparam,
152-
const std::vector<std::shared_ptr<DMatrix> >& cache_mats,
153-
bst_float base_margin);
159+
GenericParameter const* generic_param,
160+
LearnerModelParam const* learner_model_param,
161+
const std::vector<std::shared_ptr<DMatrix> >& cache_mats);
154162

155163
static void AssertGPUSupport() {
156164
#ifndef XGBOOST_USE_CUDA
@@ -166,7 +174,7 @@ struct GradientBoosterReg
166174
: public dmlc::FunctionRegEntryBase<
167175
GradientBoosterReg,
168176
std::function<GradientBooster* (const std::vector<std::shared_ptr<DMatrix> > &cached_mats,
169-
bst_float base_margin)> > {
177+
LearnerModelParam const* learner_model_param)> > {
170178
};
171179

172180
/*!

include/xgboost/generic_parameters.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,9 @@
1313
namespace xgboost {
1414
struct GenericParameter : public XGBoostParameter<GenericParameter> {
1515
// Constant representing the device ID of CPU.
16-
static int constexpr kCpuId = -1;
16+
static int32_t constexpr kCpuId = -1;
1717

18+
public:
1819
// stored random seed
1920
int seed;
2021
// whether seed the PRNG each iteration
@@ -26,8 +27,7 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
2627
int gpu_id;
2728
// gpu page size in external memory mode, 0 means using the default.
2829
size_t gpu_page_size;
29-
30-
void ConfigureGpuId(bool require_gpu);
30+
bool enable_experimental_json_serialization {false};
3131

3232
void CheckDeprecated() {
3333
if (this->n_gpus != 0) {
@@ -36,6 +36,12 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
3636
<< this->__MANAGER__()->Find("n_gpus")->GetFieldInfo().description;
3737
}
3838
}
39+
/*!
40+
* \brief Configure the parameter `gpu_id'.
41+
*
42+
* \param require_gpu Whether GPU is explicitly required from user.
43+
*/
44+
void ConfigureGpuId(bool require_gpu);
3945

4046
// declare parameters
4147
DMLC_DECLARE_PARAMETER(GenericParameter) {
@@ -60,6 +66,10 @@ struct GenericParameter : public XGBoostParameter<GenericParameter> {
6066
.set_default(0)
6167
.set_lower_bound(0)
6268
.describe("GPU page size when running in external memory mode.");
69+
DMLC_DECLARE_FIELD(enable_experimental_json_serialization)
70+
.set_default(false)
71+
.describe("Enable using JSON for memory serialization (Python Pickle, "
72+
"rabit checkpoints etc.).");
6373
DMLC_DECLARE_FIELD(n_gpus)
6474
.set_default(0)
6575
.set_range(0, 1)

include/xgboost/learner.h

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,10 @@
99
#define XGBOOST_LEARNER_H_
1010

1111
#include <rabit/rabit.h>
12-
1312
#include <xgboost/base.h>
14-
#include <xgboost/gbm.h>
15-
#include <xgboost/metric.h>
16-
#include <xgboost/objective.h>
1713
#include <xgboost/feature_map.h>
1814
#include <xgboost/generic_parameters.h>
15+
#include <xgboost/host_device_vector.h>
1916
#include <xgboost/model.h>
2017

2118
#include <utility>
@@ -26,6 +23,12 @@
2623

2724
namespace xgboost {
2825

26+
class Metric;
27+
class GradientBooster;
28+
class ObjFunction;
29+
class DMatrix;
30+
class Json;
31+
2932
/*!
3033
* \brief Learner class that does training and prediction.
3134
* This is the user facing module of xgboost training.
@@ -45,7 +48,7 @@ namespace xgboost {
4548
class Learner : public Model, public rabit::Serializable {
4649
public:
4750
/*! \brief virtual destructor */
48-
~Learner() override = default;
51+
~Learner() override;
4952
/*!
5053
* \brief Configure Learner based on set parameters.
5154
*/
@@ -180,16 +183,33 @@ class Learner : public Model, public rabit::Serializable {
180183
virtual const std::map<std::string, std::string>& GetConfigurationArguments() const = 0;
181184

182185
protected:
183-
/*! \brief internal base score of the model */
184-
bst_float base_score_;
185186
/*! \brief objective function */
186187
std::unique_ptr<ObjFunction> obj_;
187188
/*! \brief The gradient booster used by the model*/
188189
std::unique_ptr<GradientBooster> gbm_;
189190
/*! \brief The evaluation metrics used to evaluate the model. */
190191
std::vector<std::unique_ptr<Metric> > metrics_;
191192
/*! \brief Training parameter. */
192-
GenericParameter generic_param_;
193+
GenericParameter generic_parameters_;
194+
};
195+
196+
struct LearnerModelParamLegacy;
197+
198+
/*
199+
* \brief Basic Model Parameters, used to describe the booster.
200+
*/
201+
struct LearnerModelParam {
202+
/* \brief global bias */
203+
bst_float base_score;
204+
/* \brief number of features */
205+
uint32_t num_feature;
206+
/* \brief number of classes, if it is multi-class classification */
207+
uint32_t num_output_group;
208+
209+
LearnerModelParam() : base_score {0.5}, num_feature{0}, num_output_group{0} {}
210+
// As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
211+
// this one as an immutable copy.
212+
LearnerModelParam(LearnerModelParamLegacy const& user_param, float base_margin);
193213
};
194214

195215
} // namespace xgboost

include/xgboost/objective.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*!
2-
* Copyright 2014 by Contributors
2+
* Copyright 2014-2019 by Contributors
33
* \file objective.h
44
* \brief interface of objective function used by xgboost.
55
* \author Tianqi Chen, Kailong Chen

include/xgboost/tree_model.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ namespace xgboost {
2727

2828
struct PathElement; // forward declaration
2929

30+
class Json;
31+
// FIXME(trivialfis): Once binary IO is gone, make this parameter internal as it should
32+
// not be configured by users.
3033
/*! \brief meta parameters of the tree */
3134
struct TreeParam : public dmlc::Parameter<TreeParam> {
3235
/*! \brief (Deprecated) number of start root */
@@ -36,7 +39,7 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
3639
/*!\brief number of deleted nodes */
3740
int num_deleted;
3841
/*! \brief maximum depth, this is a statistics of the tree */
39-
int max_depth;
42+
int deprecated_max_depth;
4043
/*! \brief number of features used for tree construction */
4144
int num_feature;
4245
/*!
@@ -67,7 +70,7 @@ struct TreeParam : public dmlc::Parameter<TreeParam> {
6770

6871
bool operator==(const TreeParam& b) const {
6972
return num_nodes == b.num_nodes &&
70-
num_deleted == b.num_deleted && max_depth == b.max_depth &&
73+
num_deleted == b.num_deleted &&
7174
num_feature == b.num_feature &&
7275
size_leaf_vector == b.size_leaf_vector;
7376
}

include/xgboost/tree_updater.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@
2222
#include <string>
2323

2424
namespace xgboost {
25+
26+
class Json;
27+
2528
/*!
2629
* \brief interface of tree update module, that performs update of a tree.
2730
*/

src/common/hist_util.cc

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,15 @@ void HistogramCuts::Build(DMatrix* dmat, uint32_t const max_num_bins) {
4444
float constexpr kSparsityThreshold = 0.0005;
4545
// FIXME(trivialfis): Distributed environment is not supported.
4646
if (sparsity < kSparsityThreshold && (!rabit::IsDistributed())) {
47-
LOG(INFO) << "Building quantile cut on a sparse dataset.";
4847
SparseCuts cuts(this);
4948
cuts.Build(dmat, max_num_bins);
5049
} else {
51-
LOG(INFO) << "Building quantile cut on a dense dataset or distributed environment.";
50+
if (rabit::IsDistributed() && sparsity < kSparsityThreshold) {
51+
LOG(WARNING) << "Building quantile cuts with a sparse dataset on distributed "
52+
<< "environment, which may incur higher memory usage and longer "
53+
<< "build time.";
54+
}
55+
5256
DenseCuts cuts(this);
5357
cuts.Build(dmat, max_num_bins);
5458
}

src/common/io.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,9 +136,9 @@ std::string LoadSequentialFile(std::string fname) {
136136

137137
buffer.resize(fsize + 1);
138138
fread(&buffer[0], 1, fsize, f);
139-
buffer.back() = '\0';
140139
fclose(f);
141140
#endif // defined(__unix__)
141+
buffer.back() = '\0';
142142
return buffer;
143143
}
144144

0 commit comments

Comments
 (0)