Skip to content

Commit b7a322d

Browse files
committed
[CI] Add RMM as an optional dependency
1 parent 0d411b0 commit b7a322d

File tree

6 files changed

+147
-6
lines changed

6 files changed

+147
-6
lines changed

CMakeLists.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ option(USE_NCCL "Build with NCCL to enable distributed GPU support." OFF)
4444
option(BUILD_WITH_SHARED_NCCL "Build with shared NCCL library." OFF)
4545
set(GPU_COMPUTE_VER "" CACHE STRING
4646
"Semicolon separated list of compute versions to be built against, e.g. '35;61'")
47+
option(USE_RMM "Build with RAPIDS Memory Manager (RMM)" OFF)
4748
## Copied From dmlc
4849
option(USE_HDFS "Build with HDFS support" OFF)
4950
option(USE_AZURE "Build with AZURE support" OFF)
@@ -79,6 +80,9 @@ endif (R_LIB AND GOOGLE_TEST)
7980
if (USE_AVX)
8081
message(SEND_ERROR "The option 'USE_AVX' is deprecated as experimental AVX features have been removed from XGBoost.")
8182
endif (USE_AVX)
83+
if (USE_RMM AND NOT (USE_CUDA))
84+
message(SEND_ERROR "`USE_RMM` must be enabled with `USE_CUDA` flag.")
85+
endif (USE_RMM AND NOT (USE_CUDA))
8286

8387
#-- Sanitizer
8488
if (USE_SANITIZER)
@@ -170,6 +174,9 @@ endif (R_LIB)
170174
# Plugin
171175
add_subdirectory(${xgboost_SOURCE_DIR}/plugin)
172176

177+
# 3rd-party libs
178+
include(cmake/ExternalLibs.cmake)
179+
173180
#-- library
174181
if (BUILD_STATIC_LIB)
175182
add_library(xgboost STATIC)

Jenkinsfile

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ pipeline {
6666
'build-cpu-non-omp': { BuildCPUNonOmp() },
6767
'build-gpu-cuda10.0': { BuildCUDA(cuda_version: '10.0') },
6868
'build-gpu-cuda10.1': { BuildCUDA(cuda_version: '10.1') },
69+
'build-gpu-rmm-cuda10.2': { BuildCUDAWithRMM(cuda_version: '10.2') },
6970
'build-jvm-packages': { BuildJVMPackages(spark_version: '2.4.3') },
7071
'build-jvm-doc': { BuildJVMDoc() }
7172
])
@@ -84,6 +85,7 @@ pipeline {
8485
'test-python-mgpu-cuda10.1': { TestPythonGPU(cuda_version: '10.1', multi_gpu: true) },
8586
'test-cpp-gpu': { TestCppGPU(cuda_version: '10.1') },
8687
'test-cpp-mgpu': { TestCppGPU(cuda_version: '10.1', multi_gpu: true) },
88+
'test-rmm-cpp-gpu': { TestCppGPUWithRMM(cuda_version: '10.2') },
8789
'test-jvm-jdk8': { CrossTestJVMwithJDK(jdk_version: '8', spark_version: '2.4.3') },
8890
'test-jvm-jdk11': { CrossTestJVMwithJDK(jdk_version: '11') },
8991
'test-jvm-jdk12': { CrossTestJVMwithJDK(jdk_version: '12') },
@@ -262,6 +264,22 @@ def BuildCUDA(args) {
262264
}
263265
}
264266

267+
def BuildCUDAWithRMM(args) {
268+
node('linux && cpu_build') {
269+
unstash name: 'srcs'
270+
echo "Build with CUDA ${args.cuda_version} and RMM"
271+
def container_type = "rmm"
272+
def docker_binary = "docker"
273+
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
274+
sh """
275+
${dockerRun} ${container_type} ${docker_binary} ${docker_args} tests/ci_build/build_via_cmake.sh --conda-env=rmm_test -DUSE_CUDA=ON -DUSE_RMM=ON
276+
"""
277+
echo 'Stashing C++ test executable (testxgboost)...'
278+
stash name: 'xgboost_rmm_cpp_tests', includes: 'build/testxgboost'
279+
deleteDir()
280+
}
281+
}
282+
265283
def BuildJVMPackages(args) {
266284
node('linux && cpu') {
267285
unstash name: 'srcs'
@@ -368,6 +386,22 @@ def TestCppGPU(args) {
368386
}
369387
}
370388

389+
def TestCppGPUWithRMM(args) {
390+
node('linux && gpu') {
391+
unstash name: 'xgboost_rmm_cpp_tests'
392+
unstash name: 'srcs'
393+
echo "Test C++, CUDA ${args.cuda_version} with RMM"
394+
def container_type = "rmm"
395+
def docker_binary = "nvidia-docker"
396+
def docker_args = "--build-arg CUDA_VERSION=${args.cuda_version}"
397+
echo "Using a single GPU"
398+
sh """
399+
${dockerRun} ${container_type} ${docker_binary} ${docker_args} bash -c "source activate rmm_test && build/testxgboost --gtest_filter=-*.MGPU_*"
400+
"""
401+
deleteDir()
402+
}
403+
}
404+
371405
def CrossTestJVMwithJDK(args) {
372406
node('linux && cpu') {
373407
unstash name: 'xgboost4j_jar'

cmake/ExternalLibs.cmake

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# RMM
2+
if (USE_RMM)
3+
# Use Conda env if available
4+
if(DEFINED ENV{CONDA_PREFIX})
5+
set(CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX};${CMAKE_PREFIX_PATH}")
6+
message(STATUS "Detected Conda environment, CMAKE_PREFIX_PATH set to: ${CMAKE_PREFIX_PATH}")
7+
else()
8+
message(STATUS "No Conda environment detected")
9+
endif()
10+
11+
find_path(RMM_INCLUDE "rmm"
12+
HINTS "$ENV{RMM_ROOT}/include")
13+
14+
find_library(RMM_LIBRARY "rmm"
15+
HINTS "$ENV{RMM_ROOT}/lib" "$ENV{RMM_ROOT}/build")
16+
17+
if ((NOT RMM_LIBRARY) OR (NOT RMM_INCLUDE))
18+
message(FATAL_ERROR "Could not locate RMM library")
19+
endif ()
20+
21+
message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}")
22+
message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")
23+
24+
target_include_directories(objxgboost PUBLIC ${RMM_INCLUDE})
25+
target_link_libraries(objxgboost PUBLIC ${RMM_LIBRARY} cuda)
26+
target_compile_definitions(objxgboost PUBLIC -DXGBOOST_USE_RMM=1)
27+
endif ()

src/common/device_helpers.cuh

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,15 @@
3636

3737
#ifdef XGBOOST_USE_NCCL
3838
#include "nccl.h"
39-
#endif
39+
#endif // XGBOOST_USE_NCCL
40+
41+
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
42+
#include "rmm/mr/device/cuda_memory_resource.hpp"
43+
#include "rmm/mr/device/default_memory_resource.hpp"
44+
#include "rmm/mr/device/device_memory_resource.hpp"
45+
#include "rmm/mr/device/pool_memory_resource.hpp"
46+
#include "rmm/mr/device/thrust_allocator_adaptor.hpp"
47+
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
4048

4149
#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600 || defined(__clang__)
4250

@@ -370,12 +378,21 @@ inline void DebugSyncDevice(std::string file="", int32_t line = -1) {
370378
}
371379

372380
namespace detail {
381+
382+
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
383+
template <typename T>
384+
using XGBBaseDeviceAllocator = rmm::mr::thrust_allocator<T>;
385+
#else // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
386+
template <typename T>
387+
using XGBBaseDeviceAllocator = thrust::device_malloc_allocator<T>;
388+
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
389+
373390
/**
374391
* \brief Default memory allocator, uses cudaMalloc/Free and logs allocations if verbose.
375392
*/
376393
template <class T>
377-
struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
378-
using SuperT = thrust::device_malloc_allocator<T>;
394+
struct XGBDefaultDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
395+
using SuperT = XGBBaseDeviceAllocator<T>;
379396
using pointer = thrust::device_ptr<T>; // NOLINT
380397
template<typename U>
381398
struct rebind // NOLINT
@@ -391,13 +408,19 @@ struct XGBDefaultDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
391408
GlobalMemoryLogger().RegisterDeallocation(ptr.get(), n * sizeof(T));
392409
return SuperT::deallocate(ptr, n);
393410
}
411+
#if defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
412+
using cuda_mr = rmm::mr::cuda_memory_resource;
413+
using pool_mr = rmm::mr::pool_memory_resource<cuda_mr>;
414+
XGBDefaultDeviceAllocatorImpl() : SuperT(new pool_mr(new cuda_mr), cudaStream_t{0}) {}
415+
#endif // defined(XGBOOST_USE_RMM) && XGBOOST_USE_RMM == 1
394416
};
395417

396418
/**
397-
* \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs allocations if verbose. Does not initialise memory on construction.
419+
* \brief Caching memory allocator, uses cub::CachingDeviceAllocator as a back-end and logs
420+
* allocations if verbose. Does not initialise memory on construction.
398421
*/
399422
template <class T>
400-
struct XGBCachingDeviceAllocatorImpl : thrust::device_malloc_allocator<T> {
423+
struct XGBCachingDeviceAllocatorImpl : XGBBaseDeviceAllocator<T> {
401424
using pointer = thrust::device_ptr<T>; // NOLINT
402425
template<typename U>
403426
struct rebind // NOLINT

tests/ci_build/Dockerfile.rmm

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
ARG CUDA_VERSION
2+
FROM nvidia/cuda:$CUDA_VERSION-devel-ubuntu18.04
3+
4+
# Environment
5+
ENV DEBIAN_FRONTEND noninteractive
6+
SHELL ["/bin/bash", "-c"] # Use Bash as shell
7+
8+
# Install all basic requirements
9+
RUN \
10+
apt-get update && \
11+
apt-get install -y wget unzip bzip2 libgomp1 build-essential ninja-build git && \
12+
# Python
13+
wget -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
14+
bash Miniconda3.sh -b -p /opt/python && \
15+
# CMake
16+
wget -nv -nc https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.sh --no-check-certificate && \
17+
bash cmake-3.13.0-Linux-x86_64.sh --skip-license --prefix=/usr
18+
19+
ENV PATH=/opt/python/bin:$PATH
20+
21+
# Create new Conda environment with RMM
22+
RUN \
23+
conda create -n rmm_test -c nvidia -c rapidsai -c conda-forge -c defaults \
24+
python=3.7 rmm=0.14 cudatoolkit=$CUDA_VERSION
25+
26+
ENV GOSU_VERSION 1.10
27+
28+
# Install lightweight sudo (not bound to TTY)
29+
RUN set -ex; \
30+
wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-amd64" && \
31+
chmod +x /usr/local/bin/gosu && \
32+
gosu nobody true
33+
34+
# Default entry-point to use if running locally
35+
# It will preserve attributes of created files
36+
COPY entrypoint.sh /scripts/
37+
38+
WORKDIR /workspace
39+
ENTRYPOINT ["/scripts/entrypoint.sh"]

tests/ci_build/build_via_cmake.sh

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,21 @@
11
#!/usr/bin/env bash
22
set -e
33

4+
if [[ "$1" == --conda-env=* ]]
5+
then
6+
conda_env=$(echo "$1" | sed 's/^--conda-env=//g' -)
7+
echo "Activating Conda environment ${conda_env}"
8+
shift 1
9+
cmake_args="$@"
10+
source activate ${conda_env}
11+
else
12+
cmake_args="$@"
13+
fi
14+
415
rm -rf build
516
mkdir build
617
cd build
7-
cmake .. "$@" -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON
18+
cmake .. ${cmake_args} -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_VERBOSE_MAKEFILE=ON
819
make clean
920
make -j$(nproc)
1021
cd ..

0 commit comments

Comments
 (0)