@@ -85,7 +85,7 @@ class ColumnMatrix {
85
85
for (int32_t fid = 0 ; fid < nfeature; ++fid) {
86
86
CHECK_LE (gmat.cut .Ptrs ()[fid + 1 ] - gmat.cut .Ptrs ()[fid], max_val);
87
87
}
88
- bool all_dense = true ;
88
+ bool all_dense = gmat. IsDense () ;
89
89
gmat.GetFeatureCounts (&feature_counts_[0 ]);
90
90
// classify features
91
91
for (int32_t fid = 0 ; fid < nfeature; ++fid) {
@@ -120,8 +120,9 @@ class ColumnMatrix {
120
120
index_.resize (boundary_[nfeature - 1 ].index_end );
121
121
type_size_ = 1 << gmat.index .getBinBound ();
122
122
index_.resize (boundary_[nfeature - 1 ].index_end * type_size_);
123
- if (!all_dense)
123
+ if (!all_dense) {
124
124
row_ind_.resize (boundary_[nfeature - 1 ].row_ind_end );
125
+ }
125
126
126
127
// store least bin id for each feature
127
128
index_base_.resize (nfeature);
@@ -148,10 +149,8 @@ class ColumnMatrix {
148
149
num_nonzeros.resize (nfeature);
149
150
std::fill (num_nonzeros.begin (), num_nonzeros.end (), 0 );
150
151
151
- if (all_dense)
152
- {
153
- switch (gmat.index .getBinBound ())
154
- {
152
+ if (all_dense) {
153
+ switch (gmat.index .getBinBound ()) {
155
154
case POWER_OF_TWO_8:
156
155
SetIndexAllDense (gmat.index .data <uint8_t >(), gmat, nrow);
157
156
break ;
@@ -162,42 +161,36 @@ class ColumnMatrix {
162
161
SetIndexAllDense (gmat.index .data <uint32_t >(), gmat, nrow);
163
162
break ;
164
163
}
165
- }
166
-
167
- else
168
- {
169
- switch (gmat.index .getBinBound ())
170
- {
164
+ } else {
165
+ switch (gmat.index .getBinBound ()) {
171
166
case POWER_OF_TWO_8:
172
- SetIndex (gmat.index .data <uint8_t >(), gmat, nrow, nfeature);
167
+ SetIndex (gmat.index .data <uint8_t >(), gmat. index . disp (), gmat , nrow, nfeature);
173
168
break ;
174
169
case POWER_OF_TWO_16:
175
- SetIndex (gmat.index .data <uint16_t >(), gmat, nrow, nfeature);
170
+ SetIndex (gmat.index .data <uint16_t >(), gmat. index . disp (), gmat , nrow, nfeature);
176
171
break ;
177
172
case POWER_OF_TWO_32:
178
- SetIndex (gmat.index .data <uint32_t >(), gmat, nrow, nfeature);
173
+ SetIndex (gmat.index .data <uint32_t >(), gmat. index . disp (), gmat , nrow, nfeature);
179
174
break ;
180
175
}
181
176
}
182
-
183
-
184
177
}
185
178
186
179
/* Fetch an individual column. This code should be used with XGBOOST_TYPE_SWITCH
187
180
to determine type of bin id's */
188
181
template <typename T>
189
182
inline Column<T> GetColumn (unsigned fid) const {
190
- Column<T> c (type_[fid], (T*)(&index_[boundary_[fid].index_begin * type_size_]), index_base_[fid],
191
- (type_[fid] == ColumnType::kSparseColumn ?
192
- &row_ind_[boundary_[fid].row_ind_begin ] : nullptr ),
193
- boundary_[fid].index_end - boundary_[fid].index_begin );
183
+ Column<T> c (type_[fid],
184
+ reinterpret_cast <const T*>(&index_[boundary_[fid].index_begin * type_size_]),
185
+ index_base_[fid], (type_[fid] == ColumnType::kSparseColumn ?
186
+ &row_ind_[boundary_[fid].row_ind_begin ] : nullptr ),
187
+ boundary_[fid].index_end - boundary_[fid].index_begin );
194
188
return c;
195
189
}
196
190
197
191
template <typename T>
198
- inline void SetIndexAllDense (T* index, const GHistIndexMatrix& gmat, const size_t nrow)
199
- {
200
- T* local_index = (T*)(&index_[0 ]);
192
+ inline void SetIndexAllDense (T* index, const GHistIndexMatrix& gmat, const size_t nrow) {
193
+ T* local_index = reinterpret_cast <T*>(&index_[0 ]);
201
194
for (size_t rid = 0 ; rid < nrow; ++rid) {
202
195
const size_t ibegin = gmat.row_ptr [rid];
203
196
const size_t iend = gmat.row_ptr [rid + 1 ];
@@ -209,43 +202,91 @@ class ColumnMatrix {
209
202
}
210
203
}
211
204
}
205
+
206
+ inline void SetIndexAllDense (uint32_t * index, const GHistIndexMatrix& gmat, const size_t nrow) {
207
+ uint32_t * local_index = reinterpret_cast <uint32_t *>(&index_[0 ]);
208
+ for (size_t rid = 0 ; rid < nrow; ++rid) {
209
+ const size_t ibegin = gmat.row_ptr [rid];
210
+ const size_t iend = gmat.row_ptr [rid + 1 ];
211
+ size_t fid = 0 ;
212
+ size_t jp = 0 ;
213
+ for (size_t i = ibegin; i < iend; ++i, ++jp) {
214
+ uint32_t * begin = &local_index[boundary_[jp].index_begin ];
215
+ begin[rid] = index[i] - index_base_[jp];
216
+ }
217
+ }
218
+ }
219
+
212
220
template <typename T>
213
- inline void SetIndex (T* index, const GHistIndexMatrix& gmat, const size_t nrow, const size_t nfeature)
214
- {
221
+ inline void SetIndex (T* index, uint32_t * disp, const GHistIndexMatrix& gmat,
222
+ const size_t nrow, const size_t nfeature) {
215
223
std::vector<size_t > num_nonzeros;
216
224
num_nonzeros.resize (nfeature);
217
225
std::fill (num_nonzeros.begin (), num_nonzeros.end (), 0 );
218
226
219
- T* local_index = (T*)(&index_[0 ]);
227
+ T* local_index = reinterpret_cast <T*>(&index_[0 ]);
228
+ // std::cout << "\n++local_index before set: \n";
229
+ // for(size_t i = 0; i < index_.size()/type_size_; ++i)
230
+ // std::cout << local_index[i] << " ";
231
+ //
232
+ // std::cout << "\n++index_base_:\n";
233
+ // for (int32_t fid = 0; fid < index_base_.size(); ++fid) {
234
+ // std::cout << index_base_[fid] << " ";
235
+ // }
236
+
237
+ // std::cout << "\n++bin_id: \n";
238
+
220
239
for (size_t rid = 0 ; rid < nrow; ++rid) {
221
240
const size_t ibegin = gmat.row_ptr [rid];
222
241
const size_t iend = gmat.row_ptr [rid + 1 ];
223
242
size_t fid = 0 ;
224
243
size_t jp = 0 ;
225
244
for (size_t i = ibegin; i < iend; ++i) {
226
- const uint32_t bin_id = index[i] + index_base_[jp];
245
+ const uint32_t bin_id = index[i] + disp[jp];
246
+ // std::cout << bin_id << " ";
227
247
auto iter = std::upper_bound (gmat.cut .Ptrs ().cbegin () + fid,
228
248
gmat.cut .Ptrs ().cend (), bin_id);
229
249
fid = std::distance (gmat.cut .Ptrs ().cbegin (), iter) - 1 ;
230
250
if (type_[fid] == kDenseColumn ) {
231
- T* begin = &local_index[boundary_[jp].index_begin ];
232
- begin[rid] = index[i];
233
- ++jp;
251
+ T* begin = &local_index[boundary_[fid].index_begin ];
252
+ begin[rid] = bin_id - index_base_[fid];
234
253
} else {
235
254
T* begin = &local_index[boundary_[fid].index_begin ];
236
- begin[num_nonzeros[fid]] = index[i]; // bin_id - index_base_[fid];
255
+ begin[num_nonzeros[fid]] = bin_id - index_base_[fid];
237
256
row_ind_[boundary_[fid].row_ind_begin + num_nonzeros[fid]] = rid;
238
257
++num_nonzeros[fid];
239
258
}
259
+ ++jp;
260
+ }
261
+ }
262
+ /*
263
+ for (size_t rid = 0; rid < nrow; ++rid) {
264
+ const size_t ibegin = gmat.row_ptr[rid];
265
+ const size_t iend = gmat.row_ptr[rid + 1];
266
+ size_t fid = 0;
267
+ for (size_t i = ibegin; i < iend; ++i) {
268
+ const uint32_t bin_id = gmat.index[i];
269
+ auto iter = std::upper_bound(gmat.cut.Ptrs().cbegin() + fid,
270
+ gmat.cut.Ptrs().cend(), bin_id);
271
+ fid = std::distance(gmat.cut.Ptrs().cbegin(), iter) - 1;
272
+ if (type_[fid] == kDenseColumn) {
273
+ uint32_t* begin = &index_[boundary_[fid].index_begin];
274
+ begin[rid] = bin_id - index_base_[fid];
275
+ } else {
276
+ uint32_t* begin = &index_[boundary_[fid].index_begin];
277
+ begin[num_nonzeros[fid]] = bin_id - index_base_[fid];
278
+ row_ind_[boundary_[fid].row_ind_begin + num_nonzeros[fid]] = rid;
279
+ ++num_nonzeros[fid];
240
280
}
241
281
}
282
+ }*/
242
283
}
243
- const size_t GetTypeSize () const
244
- {
284
+ const size_t GetTypeSize () const {
245
285
return type_size_;
246
286
}
247
287
248
288
private:
289
+ std::vector<uint8_t > index_; // index_: may store smaller integers; needs padding
249
290
struct ColumnBoundary {
250
291
// indicate where each column's index and row_ind is stored.
251
292
// index_begin and index_end are logical offsets, so they should be converted to
@@ -258,7 +299,6 @@ class ColumnMatrix {
258
299
259
300
std::vector<size_t > feature_counts_;
260
301
std::vector<ColumnType> type_;
261
- std::vector<uint8_t > index_; // index_: may store smaller integers; needs padding
262
302
std::vector<size_t > row_ind_;
263
303
std::vector<ColumnBoundary> boundary_;
264
304
0 commit comments