Skip to content

Commit edba724

Browse files
committed
Add test.
1 parent c17de65 commit edba724

File tree

9 files changed

+97
-20
lines changed

9 files changed

+97
-20
lines changed

src/common/error_msg.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@
66
#ifndef XGBOOST_COMMON_ERROR_MSG_H_
77
#define XGBOOST_COMMON_ERROR_MSG_H_
88

9+
#include <cinttypes> // for uint64_t
10+
#include <limits> // for numeric_limits
11+
12+
#include "xgboost/base.h" // for bst_feature_t
13+
#include "xgboost/logging.h"
914
#include "xgboost/string_view.h" // for StringView
1015

1116
namespace xgboost::error {
@@ -35,5 +40,12 @@ constexpr StringView InconsistentMaxBin() {
3540
}
3641

3742
constexpr StringView UnknownDevice() { return "Unknown device type."; }
43+
44+
inline void MaxFeatureSize(std::uint64_t n_features) {
45+
auto max_n_features = std::numeric_limits<bst_feature_t>::max();
46+
CHECK_LE(n_features, max_n_features)
47+
<< "Unfortunately, XGBoost does not support data matrices with "
48+
<< std::numeric_limits<bst_feature_t>::max() << " features or greater";
49+
}
3850
} // namespace xgboost::error
3951
#endif // XGBOOST_COMMON_ERROR_MSG_H_

src/data/adapter.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include <dmlc/data.h>
88

99
#include <algorithm>
10-
#include <cstddef> // std::size_t
10+
#include <cstddef> // for size_t
1111
#include <functional>
1212
#include <limits>
1313
#include <map>
@@ -17,6 +17,7 @@
1717
#include <vector>
1818

1919
#include "../c_api/c_api_error.h"
20+
#include "../common/error_msg.h" // for MaxFeatureSize
2021
#include "../common/math.h"
2122
#include "array_interface.h"
2223
#include "arrow-cdi.h"
@@ -299,10 +300,11 @@ class ArrayAdapter : public detail::SingleBatchDataIter<ArrayAdapterBatch> {
299300
auto j = Json::Load(array_interface);
300301
array_interface_ = ArrayInterface<2>(get<Object const>(j));
301302
batch_ = ArrayAdapterBatch{array_interface_};
303+
error::MaxFeatureSize(this->NumColumns());
302304
}
303-
ArrayAdapterBatch const& Value() const override { return batch_; }
304-
size_t NumRows() const { return array_interface_.Shape(0); }
305-
size_t NumColumns() const { return array_interface_.Shape(1); }
305+
[[nodiscard]] ArrayAdapterBatch const& Value() const override { return batch_; }
306+
[[nodiscard]] std::size_t NumRows() const { return array_interface_.Shape(0); }
307+
[[nodiscard]] std::size_t NumColumns() const { return array_interface_.Shape(1); }
306308

307309
private:
308310
ArrayAdapterBatch batch_;

src/data/proxy_dmatrix.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@
66
#include "proxy_dmatrix.h"
77

88
namespace xgboost::data {
9-
void DMatrixProxy::SetArrayData(char const *c_interface) {
10-
std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter(StringView{c_interface})};
9+
void DMatrixProxy::SetArrayData(StringView interface_str) {
10+
std::shared_ptr<ArrayAdapter> adapter{new ArrayAdapter{interface_str}};
1111
this->batch_ = adapter;
1212
this->Info().num_col_ = adapter->NumColumns();
1313
this->Info().num_row_ = adapter->NumRows();

src/data/proxy_dmatrix.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ class DMatrixProxy : public DMatrix {
6262
#endif // defined(XGBOOST_USE_CUDA)
6363
}
6464

65-
void SetArrayData(char const* c_interface);
65+
void SetArrayData(StringView interface_str);
6666
void SetCSRData(char const* c_indptr, char const* c_indices, char const* c_values,
6767
bst_feature_t n_features, bool on_host);
6868

src/gbm/gbtree.cc

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -616,10 +616,6 @@ void GBTree::InplacePredict(std::shared_ptr<DMatrix> p_m, float missing,
616616
CHECK(configured_);
617617
auto [tree_begin, tree_end] = detail::LayerToTree(model_, layer_begin, layer_end);
618618
CHECK_LE(tree_end, model_.trees.size()) << "Invalid number of trees.";
619-
// fixme: create an option to allow copying data.
620-
// fixme: should we cache the result?
621-
// - We cache the result if the input is DMatrix, otherwise no.
622-
// - scikit-learn needs cache too.
623619
if (p_m->Ctx()->Device() != this->ctx_->Device()) {
624620
LOG(WARNING) << "Falling back to prediction using DMatrix due to mismatched devices. XGBoost "
625621
<< "is running on: " << this->ctx_->DeviceName()

src/learner.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "common/api_entry.h" // for XGBAPIThreadLocalEntry
4141
#include "common/charconv.h" // for to_chars, to_chars_result, NumericLimits, from_...
4242
#include "common/common.h" // for ToString, Split
43+
#include "common/error_msg.h" // for MaxFeatureSize
4344
#include "common/io.h" // for PeekableInStream, ReadAll, FixedSizeStream, Mem...
4445
#include "common/observer.h" // for TrainingObserver
4546
#include "common/random.h" // for GlobalRandom
@@ -763,9 +764,7 @@ class LearnerConfiguration : public Learner {
763764
CHECK(matrix.first.ptr);
764765
CHECK(!matrix.second.ref.expired());
765766
const uint64_t num_col = matrix.first.ptr->Info().num_col_;
766-
CHECK_LE(num_col, static_cast<uint64_t>(std::numeric_limits<unsigned>::max()))
767-
<< "Unfortunately, XGBoost does not support data matrices with "
768-
<< std::numeric_limits<unsigned>::max() << " features or greater";
767+
error::MaxFeatureSize(num_col);
769768
num_feature = std::max(num_feature, static_cast<uint32_t>(num_col));
770769
}
771770

tests/cpp/gbm/test_gbtree.cc

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,20 @@
1-
/*!
2-
* Copyright 2019-2022 XGBoost contributors
1+
/**
2+
* Copyright 2019-2023, XGBoost contributors
33
*/
44
#include <gtest/gtest.h>
55
#include <xgboost/context.h>
6+
#include <xgboost/host_device_vector.h> // for HostDeviceVector
7+
#include <xgboost/learner.h> // for Learner
68

7-
#include "../../../src/data/adapter.h"
8-
#include "../../../src/data/proxy_dmatrix.h"
9+
#include <limits> // for numeric_limits
10+
#include <memory> // for shared_ptr
11+
#include <string> // for string
12+
13+
#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
914
#include "../../../src/gbm/gbtree.h"
1015
#include "../filesystem.h" // dmlc::TemporaryDirectory
1116
#include "../helpers.h"
1217
#include "xgboost/base.h"
13-
#include "xgboost/host_device_vector.h"
14-
#include "xgboost/learner.h"
1518
#include "xgboost/predictor.h"
1619

1720
namespace xgboost {

tests/cpp/gbm/test_gbtree.cu

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
#include <xgboost/context.h> // for Context
2+
#include <xgboost/learner.h> // for Learner
3+
#include <xgboost/string_view.h> // for StringView
4+
5+
#include <limits> // for numeric_limits
6+
#include <memory> // for shared_ptr
7+
#include <string> // for string
8+
9+
#include "../../../src/data/adapter.h" // for ArrayAdapter
10+
#include "../../../src/data/proxy_dmatrix.h" // for DMatrixProxy
11+
#include "../helpers.h" // for RandomDataGenerator
12+
13+
namespace xgboost {
14+
void TestInplaceFallback(std::string tree_method) {
15+
bst_row_t n_samples{1024};
16+
bst_feature_t n_features{32};
17+
HostDeviceVector<float> X_storage;
18+
auto X = RandomDataGenerator{n_samples, n_features, 0.0}.GenerateArrayInterface(&X_storage);
19+
HostDeviceVector<float> y_storage;
20+
auto y = RandomDataGenerator{n_samples, 1u, 0.0}.GenerateArrayInterface(&y_storage);
21+
22+
auto X_adapter = data::ArrayAdapter{StringView{X}};
23+
24+
Context ctx;
25+
std::shared_ptr<DMatrix> Xy{
26+
DMatrix::Create(&X_adapter, std::numeric_limits<float>::quiet_NaN(), ctx.Threads())};
27+
Xy->SetInfo("label", y);
28+
29+
std::unique_ptr<Learner> learner{Learner::Create({Xy})};
30+
learner->SetParam("tree_method", tree_method);
31+
for (std::int32_t i = 0; i < 3; ++i) {
32+
learner->UpdateOneIter(i, Xy);
33+
}
34+
35+
std::shared_ptr<DMatrix> p_m{new data::DMatrixProxy};
36+
auto proxy = std::dynamic_pointer_cast<data::DMatrixProxy>(p_m);
37+
proxy->SetArrayData(StringView{X});
38+
39+
HostDeviceVector<float>* out_predt{nullptr};
40+
41+
::testing::internal::CaptureStderr();
42+
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
43+
&out_predt, 0, 0);
44+
auto output = testing::internal::GetCapturedStderr();
45+
ASSERT_NE(output.find("Falling back"), std::string::npos);
46+
47+
learner->SetParam("tree_method", "hist");
48+
learner->SetParam("gpu_id", "-1");
49+
learner->Configure();
50+
HostDeviceVector<float>* out_predt_1{nullptr};
51+
52+
::testing::internal::CaptureStderr();
53+
learner->InplacePredict(p_m, PredictionType::kValue, std::numeric_limits<float>::quiet_NaN(),
54+
&out_predt_1, 0, 0);
55+
output = testing::internal::GetCapturedStderr();
56+
57+
ASSERT_TRUE(output.empty());
58+
59+
ASSERT_EQ(out_predt->ConstHostVector(), out_predt_1->ConstHostVector());
60+
}
61+
62+
TEST(GBTree, InplacePredictFallback) { TestInplaceFallback("gpu_hist"); }
63+
} // namespace xgboost

tests/cpp/predictor/test_predictor.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,11 @@
88
#include <xgboost/data.h> // for DMatrix, BatchIterator, BatchSet, MetaInfo
99
#include <xgboost/host_device_vector.h> // for HostDeviceVector
1010
#include <xgboost/predictor.h> // for PredictionCacheEntry, Predictor, Predic...
11+
#include <xgboost/string_view.h> // for StringView
1112

1213
#include <algorithm> // for max
1314
#include <limits> // for numeric_limits
15+
#include <memory> // for shared_ptr
1416
#include <unordered_map> // for unordered_map
1517

1618
#include "../../../src/common/bitfield.h" // for LBitField32

0 commit comments

Comments
 (0)