Skip to content

Commit 4212aba

Browse files
committed
Implement transform to reduce CPU/GPU code duplication.
* Implement Transform class. * Add tests for softmax. * Use Transform in regression, softmax and hinge objectives, except for Cox. * Mark old gpu objective functions deprecated. * static_assert for softmax. * Split up multi-gpu tests.
1 parent baef574 commit 4212aba

31 files changed

+1513
-997
lines changed

src/common/common.cc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
/*!
2-
* Copyright 2015 by Contributors
2+
* Copyright 2015-2018 by Contributors
33
* \file common.cc
44
* \brief Enable all kinds of global variables in common.
55
*/
66
#include <dmlc/thread_local.h>
7+
8+
#include "common.h"
79
#include "./random.h"
810

911
namespace xgboost {

src/common/common.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ int AllVisibleImpl::AllVisible() {
1111
// When compiled with CUDA but running on CPU only device,
1212
// cudaGetDeviceCount will fail.
1313
dh::safe_cuda(cudaGetDeviceCount(&n_visgpus));
14-
} catch(const std::exception& e) {
14+
} catch(const thrust::system::system_error& err) {
1515
return 0;
1616
}
1717
return n_visgpus;

src/common/common.h

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*!
2-
* Copyright 2015 by Contributors
2+
* Copyright 2015-2018 by Contributors
33
* \file common.h
44
* \brief Common utilities
55
*/
@@ -19,6 +19,13 @@
1919
#if defined(__CUDACC__)
2020
#include <thrust/system/cuda/error.h>
2121
#include <thrust/system_error.h>
22+
23+
#define WITH_CUDA() true
24+
25+
#else
26+
27+
#define WITH_CUDA() false
28+
2229
#endif
2330

2431
namespace dh {
@@ -29,11 +36,11 @@ namespace dh {
2936
#define safe_cuda(ans) ThrowOnCudaError((ans), __FILE__, __LINE__)
3037

3138
inline cudaError_t ThrowOnCudaError(cudaError_t code, const char *file,
32-
int line) {
39+
int line) {
3340
if (code != cudaSuccess) {
34-
throw thrust::system_error(code, thrust::cuda_category(),
35-
std::string{file} + "(" + // NOLINT
36-
std::to_string(line) + ")");
41+
LOG(FATAL) << thrust::system_error(code, thrust::cuda_category(),
42+
std::string{file} + ": " + // NOLINT
43+
std::to_string(line)).what();
3744
}
3845
return code;
3946
}
@@ -70,13 +77,13 @@ inline std::string ToString(const T& data) {
7077
*/
7178
class Range {
7279
public:
80+
using DifferenceType = int64_t;
81+
7382
class Iterator {
7483
friend class Range;
7584

7685
public:
77-
using DifferenceType = int64_t;
78-
79-
XGBOOST_DEVICE int64_t operator*() const { return i_; }
86+
XGBOOST_DEVICE DifferenceType operator*() const { return i_; }
8087
XGBOOST_DEVICE const Iterator &operator++() {
8188
i_ += step_;
8289
return *this;
@@ -97,8 +104,8 @@ class Range {
97104
XGBOOST_DEVICE void Step(DifferenceType s) { step_ = s; }
98105

99106
protected:
100-
XGBOOST_DEVICE explicit Iterator(int64_t start) : i_(start) {}
101-
XGBOOST_DEVICE explicit Iterator(int64_t start, int step) :
107+
XGBOOST_DEVICE explicit Iterator(DifferenceType start) : i_(start) {}
108+
XGBOOST_DEVICE explicit Iterator(DifferenceType start, DifferenceType step) :
102109
i_{start}, step_{step} {}
103110

104111
public:
@@ -109,9 +116,10 @@ class Range {
109116
XGBOOST_DEVICE Iterator begin() const { return begin_; } // NOLINT
110117
XGBOOST_DEVICE Iterator end() const { return end_; } // NOLINT
111118

112-
XGBOOST_DEVICE Range(int64_t begin, int64_t end)
119+
XGBOOST_DEVICE Range(DifferenceType begin, DifferenceType end)
113120
: begin_(begin), end_(end) {}
114-
XGBOOST_DEVICE Range(int64_t begin, int64_t end, Iterator::DifferenceType step)
121+
XGBOOST_DEVICE Range(DifferenceType begin, DifferenceType end,
122+
DifferenceType step)
115123
: begin_(begin, step), end_(end) {}
116124

117125
XGBOOST_DEVICE bool operator==(const Range& other) const {
@@ -121,9 +129,7 @@ class Range {
121129
return !(*this == other);
122130
}
123131

124-
XGBOOST_DEVICE void Step(Iterator::DifferenceType s) { begin_.Step(s); }
125-
126-
XGBOOST_DEVICE Iterator::DifferenceType GetStep() const { return begin_.step_; }
132+
XGBOOST_DEVICE void Step(DifferenceType s) { begin_.Step(s); }
127133

128134
private:
129135
Iterator begin_;

src/common/device_helpers.cuh

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <xgboost/logging.h>
1010

1111
#include "common.h"
12+
#include "span.h"
1213

1314
#include <algorithm>
1415
#include <chrono>
@@ -955,7 +956,7 @@ class SaveCudaContext {
955956
// cudaGetDevice will fail.
956957
try {
957958
safe_cuda(cudaGetDevice(&saved_device_));
958-
} catch (thrust::system::system_error & err) {
959+
} catch (const thrust::system::system_error & err) {
959960
saved_device_ = -1;
960961
}
961962
func();
@@ -1035,4 +1036,22 @@ ReduceT ReduceShards(std::vector<ShardT> *shards, FunctionT f) {
10351036
};
10361037
return std::accumulate(sums.begin(), sums.end(), ReduceT());
10371038
}
1039+
1040+
template <typename T,
1041+
typename IndexT = typename xgboost::common::Span<T>::index_type>
1042+
xgboost::common::Span<T> ToSpan(
1043+
thrust::device_vector<T>& vec,
1044+
IndexT offset = 0,
1045+
IndexT size = -1) {
1046+
size = size == -1 ? vec.size() : size;
1047+
CHECK_LE(offset + size, vec.size());
1048+
return {vec.data().get() + offset, static_cast<IndexT>(size)};
1049+
}
1050+
1051+
template <typename T>
1052+
xgboost::common::Span<T> ToSpan(thrust::device_vector<T>& vec,
1053+
size_t offset, size_t size) {
1054+
using IndexT = typename xgboost::common::Span<T>::index_type;
1055+
return ToSpan(vec, static_cast<IndexT>(offset), static_cast<IndexT>(size));
1056+
}
10381057
} // namespace dh

src/common/host_device_vector.cu

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ struct HostDeviceVectorImpl {
116116
int ndevices = vec_->distribution_.devices_.Size();
117117
start_ = vec_->distribution_.ShardStart(new_size, index_);
118118
proper_size_ = vec_->distribution_.ShardProperSize(new_size, index_);
119+
// The size on this device.
119120
size_t size_d = vec_->distribution_.ShardSize(new_size, index_);
120121
SetDevice();
121122
data_.resize(size_d);
@@ -230,15 +231,15 @@ struct HostDeviceVectorImpl {
230231
CHECK(devices.Contains(device));
231232
LazySyncDevice(device, GPUAccess::kWrite);
232233
return {shards_[devices.Index(device)].data_.data().get(),
233-
static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
234+
static_cast<typename common::Span<T>::index_type>(DeviceSize(device))};
234235
}
235236

236237
common::Span<const T> ConstDeviceSpan(int device) {
237238
GPUSet devices = distribution_.devices_;
238239
CHECK(devices.Contains(device));
239240
LazySyncDevice(device, GPUAccess::kRead);
240241
return {shards_[devices.Index(device)].data_.data().get(),
241-
static_cast<typename common::Span<const T>::index_type>(DeviceSize(device))};
242+
static_cast<typename common::Span<const T>::index_type>(DeviceSize(device))};
242243
}
243244

244245
size_t DeviceSize(int device) {
@@ -289,7 +290,6 @@ struct HostDeviceVectorImpl {
289290
data_h_.size() * sizeof(T),
290291
cudaMemcpyHostToDevice));
291292
} else {
292-
//
293293
dh::ExecuteShards(&shards_, [&](DeviceShard& shard) { shard.GatherTo(begin); });
294294
}
295295
}
@@ -304,14 +304,20 @@ struct HostDeviceVectorImpl {
304304

305305
void Copy(HostDeviceVectorImpl<T>* other) {
306306
CHECK_EQ(Size(), other->Size());
307+
// Data is on host.
307308
if (perm_h_.CanWrite() && other->perm_h_.CanWrite()) {
308309
std::copy(other->data_h_.begin(), other->data_h_.end(), data_h_.begin());
309-
} else {
310-
CHECK(distribution_ == other->distribution_);
311-
dh::ExecuteIndexShards(&shards_, [&](int i, DeviceShard& shard) {
312-
shard.Copy(&other->shards_[i]);
313-
});
310+
return;
314311
}
312+
// Data is on device;
313+
if (distribution_ != other->distribution_) {
314+
distribution_ = GPUDistribution();
315+
Reshard(other->Distribution());
316+
size_d_ = other->size_d_;
317+
}
318+
dh::ExecuteIndexShards(&shards_, [&](int i, DeviceShard& shard) {
319+
shard.Copy(&other->shards_[i]);
320+
});
315321
}
316322

317323
void Copy(const std::vector<T>& other) {

src/common/host_device_vector.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -111,8 +111,11 @@ class GPUDistribution {
111111
}
112112

113113
friend bool operator==(const GPUDistribution& a, const GPUDistribution& b) {
114-
return a.devices_ == b.devices_ && a.granularity_ == b.granularity_ &&
115-
a.overlap_ == b.overlap_ && a.offsets_ == b.offsets_;
114+
bool const res = a.devices_ == b.devices_ &&
115+
a.granularity_ == b.granularity_ &&
116+
a.overlap_ == b.overlap_ &&
117+
a.offsets_ == b.offsets_;
118+
return res;
116119
}
117120

118121
friend bool operator!=(const GPUDistribution& a, const GPUDistribution& b) {

src/common/math.h

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <vector>
1212
#include <cmath>
1313
#include <algorithm>
14+
#include <utility>
1415
#include "avx_helpers.h"
1516

1617
namespace xgboost {
@@ -29,22 +30,31 @@ inline avx::Float8 Sigmoid(avx::Float8 x) {
2930
}
3031

3132
/*!
32-
* \brief do inplace softmax transformaton on p_rec
33-
* \param p_rec the input/output vector of the values.
33+
* \brief Do inplace softmax transformaton on start to end
34+
*
35+
* \tparam Iterator Input iterator type
36+
*
37+
* \param start Start iterator of input
38+
* \param end end iterator of input
3439
*/
35-
inline void Softmax(std::vector<float>* p_rec) {
36-
std::vector<float> &rec = *p_rec;
37-
float wmax = rec[0];
38-
for (size_t i = 1; i < rec.size(); ++i) {
39-
wmax = std::max(rec[i], wmax);
40+
template <typename Iterator>
41+
XGBOOST_DEVICE inline void Softmax(Iterator start, Iterator end) {
42+
static_assert(std::is_same<bst_float,
43+
typename std::remove_reference<
44+
decltype(std::declval<Iterator>().operator*())>::type
45+
>::value,
46+
"Values should be of type bst_float");
47+
bst_float wmax = *start;
48+
for (Iterator i = start+1; i != end; ++i) {
49+
wmax = fmaxf(*i, wmax);
4050
}
4151
double wsum = 0.0f;
42-
for (float & elem : rec) {
43-
elem = std::exp(elem - wmax);
44-
wsum += elem;
52+
for (Iterator i = start; i != end; ++i) {
53+
*i = expf(*i - wmax);
54+
wsum += *i;
4555
}
46-
for (float & elem : rec) {
47-
elem /= static_cast<float>(wsum);
56+
for (Iterator i = start; i != end; ++i) {
57+
*i /= static_cast<float>(wsum);
4858
}
4959
}
5060

@@ -56,7 +66,7 @@ inline void Softmax(std::vector<float>* p_rec) {
5666
* \tparam Iterator The type of the iterator.
5767
*/
5868
template<typename Iterator>
59-
inline Iterator FindMaxIndex(Iterator begin, Iterator end) {
69+
XGBOOST_DEVICE inline Iterator FindMaxIndex(Iterator begin, Iterator end) {
6070
Iterator maxit = begin;
6171
for (Iterator it = begin; it != end; ++it) {
6272
if (*it > *maxit) maxit = it;

src/common/span.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
*
5050
* https://github.com/Microsoft/GSL/pull/664
5151
*
52-
* FIXME: Group these MSVC workarounds into a manageable place.
52+
* TODO(trivialfis): Group these MSVC workarounds into a manageable place.
5353
*/
5454
#if defined(_MSC_VER) && _MSC_VER < 1910
5555

@@ -68,7 +68,7 @@ namespace xgboost {
6868
namespace common {
6969

7070
// Usual logging facility is not available inside device code.
71-
// FIXME: Make dmlc check more generic.
71+
// TODO(trivialfis): Make dmlc check more generic.
7272
#define KERNEL_CHECK(cond) \
7373
do { \
7474
if (!(cond)) { \
@@ -104,11 +104,11 @@ constexpr detail::ptrdiff_t dynamic_extent = -1; // NOLINT
104104

105105
enum class byte : unsigned char {}; // NOLINT
106106

107-
namespace detail {
108-
109-
template <class ElementType, detail::ptrdiff_t Extent = dynamic_extent>
107+
template <class ElementType, detail::ptrdiff_t Extent>
110108
class Span;
111109

110+
namespace detail {
111+
112112
template <typename SpanType, bool IsConst>
113113
class SpanIterator {
114114
using ElementType = typename SpanType::element_type;

0 commit comments

Comments
 (0)