Skip to content

Commit 87d05de

Browse files
opal/cuda: Handle VMM pointers in cuda_check_addr
Signed-off-by: Akshay Venkatesh <[email protected]>
1 parent 7d20b86 commit 87d05de

File tree

2 files changed

+120
-13
lines changed

2 files changed

+120
-13
lines changed

config/opal_check_cuda.m4

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
dnl -*- autoconf -*-
22
dnl
3+
dnl Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
34
dnl Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
45
dnl University Research and Technology
56
dnl Corporation. All rights reserved.
@@ -118,6 +119,12 @@ AS_IF([test "$opal_check_cuda_happy" = "yes"],
118119
[#include <$opal_cuda_incdir/cuda.h>])],
119120
[])
120121
122+
# If we have CUDA support, check to see if we have support for cuMemCreate memory on host NUMA.
123+
AS_IF([test "$opal_check_cuda_happy"="yes"],
124+
[AC_CHECK_DECL([CU_MEM_LOCATION_TYPE_HOST_NUMA], [CUDA_VMM_SUPPORT=1], [CUDA_VMM_SUPPORT=0],
125+
[#include <$opal_cuda_incdir/cuda.h>])],
126+
[])
127+
121128
# If we have CUDA support, check to see if we have support for SYNC_MEMOPS
122129
# which was first introduced in CUDA 6.0.
123130
AS_IF([test "$opal_check_cuda_happy" = "yes"],
@@ -160,6 +167,10 @@ AM_CONDITIONAL([OPAL_cuda_support], [test "x$CUDA_SUPPORT" = "x1"])
160167
AC_DEFINE_UNQUOTED([OPAL_CUDA_SUPPORT],$CUDA_SUPPORT,
161168
[Whether we want cuda device pointer support])
162169
170+
AM_CONDITIONAL([OPAL_cuda_vmm_support], [test "x$CUDA_VMM_SUPPORT" = "x1"])
171+
AC_DEFINE_UNQUOTED([OPAL_CUDA_VMM_SUPPORT],$CUDA_VMM_SUPPORT,
172+
[Whether we have CU_MEM_LOCATION_TYPE_HOST_NUMA support available])
173+
163174
AM_CONDITIONAL([OPAL_cuda_sync_memops], [test "x$CUDA_SYNC_MEMOPS" = "x1"])
164175
AC_DEFINE_UNQUOTED([OPAL_CUDA_SYNC_MEMOPS],$CUDA_SYNC_MEMOPS,
165176
[Whether we have CUDA CU_POINTER_ATTRIBUTE_SYNC_MEMOPS support available])

opal/mca/accelerator/cuda/accelerator_cuda.c

Lines changed: 109 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
/*
2+
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
23
* Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
34
* Copyright (c) 2014 Research Organization for Information Science
45
* and Technology (RIST). All rights reserved.
56
* Copyright (c) 2014 Mellanox Technologies, Inc.
67
* All rights reserved.
78
* Copyright (c) Amazon.com, Inc. or its affiliates.
89
* All Rights reserved.
9-
* Copyright (c) 2024 NVIDIA Corporation. All rights reserved.
1010
* Copyright (c) 2024 The University of Tennessee and The University
1111
* of Tennessee Research Foundation. All rights
1212
* reserved.
@@ -154,9 +154,75 @@ static int accelerator_cuda_get_device_id(CUcontext mem_ctx) {
154154
return dev_id;
155155
}
156156

157+
static int accelerator_cuda_check_vmm(CUdeviceptr dbuf, CUmemorytype *mem_type,
158+
int *dev_id)
159+
{
160+
#if OPAL_CUDA_VMM_SUPPORT
161+
static int device_count = -1;
162+
CUmemAllocationProp prop;
163+
CUmemLocation location;
164+
CUresult result;
165+
unsigned long long flags;
166+
CUmemGenericAllocationHandle alloc_handle;
167+
168+
if (device_count == -1) {
169+
result = cuDeviceGetCount(&device_count);
170+
if (result != CUDA_SUCCESS) {
171+
return 0;
172+
}
173+
}
174+
175+
result = cuMemRetainAllocationHandle(&alloc_handle, (void*)dbuf);
176+
if (result != CUDA_SUCCESS) {
177+
return 0;
178+
}
179+
180+
result = cuMemGetAllocationPropertiesFromHandle(&prop, alloc_handle);
181+
if (result != CUDA_SUCCESS) {
182+
cuMemRelease(alloc_handle);
183+
return 0;
184+
}
185+
186+
if (prop.location.type == CU_MEM_LOCATION_TYPE_DEVICE) {
187+
*mem_type = CU_MEMORYTYPE_DEVICE;
188+
*dev_id = prop.location.id;
189+
cuMemRelease(alloc_handle);
190+
return 1;
191+
}
192+
193+
if (prop.location.type == CU_MEM_LOCATION_TYPE_HOST_NUMA) {
194+
/* check if device has access */
195+
for (int i = 0; i < device_count; i++) {
196+
location.type = CU_MEM_LOCATION_TYPE_DEVICE;
197+
location.id = i;
198+
result = cuMemGetAccess(&flags, &location, dbuf);
199+
if ((CUDA_SUCCESS == result) &&
200+
(CU_MEM_ACCESS_FLAGS_PROT_READWRITE == flags)) {
201+
*mem_type = CU_MEMORYTYPE_DEVICE;
202+
*dev_id = i;
203+
cuMemRelease(alloc_handle);
204+
return 1;
205+
}
206+
}
207+
}
208+
209+
/* host must have access as device access possibility is exhausted */
210+
*mem_type = CU_MEMORYTYPE_HOST;
211+
*dev_id = MCA_ACCELERATOR_NO_DEVICE_ID;
212+
cuMemRelease(alloc_handle);
213+
return 1;
214+
215+
#endif
216+
217+
return 0;
218+
}
219+
157220
static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *flags)
158221
{
159222
CUresult result;
223+
int is_vmm = 0;
224+
int vmm_dev_id = MCA_ACCELERATOR_NO_DEVICE_ID;
225+
CUmemorytype vmm_mem_type = 0;
160226
CUmemorytype mem_type = 0;
161227
CUdeviceptr dbuf = (CUdeviceptr) addr;
162228
CUcontext ctx = NULL, mem_ctx = NULL;
@@ -168,6 +234,8 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
168234

169235
*flags = 0;
170236

237+
is_vmm = accelerator_cuda_check_vmm(dbuf, &vmm_mem_type, &vmm_dev_id);
238+
171239
#if OPAL_CUDA_GET_ATTRIBUTES
172240
uint32_t is_managed = 0;
173241
/* With CUDA 7.0, we can get multiple attributes with a single call */
@@ -197,17 +265,24 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
197265
return OPAL_ERROR;
198266
}
199267
} else if (CU_MEMORYTYPE_HOST == mem_type) {
200-
/* Host memory, nothing to do here */
201-
return 0;
268+
if (is_vmm && (vmm_mem_type == CU_MEMORYTYPE_DEVICE)) {
269+
mem_type = CU_MEMORYTYPE_DEVICE;
270+
*dev_id = vmm_dev_id;
271+
} else {
272+
/* Host memory, nothing to do here */
273+
return 0;
274+
}
202275
} else if (0 == mem_type) {
203276
/* This can happen when CUDA is initialized but dbuf is not valid CUDA pointer */
204277
return 0;
205278
} else {
206-
/* query the device from the context */
207-
*dev_id = accelerator_cuda_get_device_id(mem_ctx);
279+
if (is_vmm) {
280+
*dev_id = vmm_dev_id;
281+
} else {
282+
/* query the device from the context */
283+
*dev_id = accelerator_cuda_get_device_id(mem_ctx);
284+
}
208285
}
209-
/* Must be a device pointer */
210-
assert(CU_MEMORYTYPE_DEVICE == mem_type);
211286
#else /* OPAL_CUDA_GET_ATTRIBUTES */
212287
result = cuPointerGetAttribute(&mem_type, CU_POINTER_ATTRIBUTE_MEMORY_TYPE, dbuf);
213288
if (CUDA_SUCCESS != result) {
@@ -218,16 +293,27 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
218293
return OPAL_ERROR;
219294
}
220295
} else if (CU_MEMORYTYPE_HOST == mem_type) {
221-
/* Host memory, nothing to do here */
222-
return 0;
296+
if (is_vmm && (vmm_mem_type == CU_MEMORYTYPE_DEVICE)) {
297+
mem_type = CU_MEMORYTYPE_DEVICE;
298+
*dev_id = vmm_dev_id;
299+
} else {
300+
/* Host memory, nothing to do here */
301+
return 0;
302+
}
223303
} else {
224-
result = cuPointerGetAttribute(&mem_ctx, CU_POINTER_ATTRIBUTE_CONTEXT, dbuf);
225-
/* query the device from the context */
226-
*dev_id = accelerator_cuda_get_device_id(mem_ctx);
304+
if (is_vmm) {
305+
*dev_id = vmm_dev_id;
306+
} else {
307+
result = cuPointerGetAttribute(&mem_ctx,
308+
CU_POINTER_ATTRIBUTE_CONTEXT, dbuf);
309+
/* query the device from the context */
310+
*dev_id = accelerator_cuda_get_device_id(mem_ctx);
311+
}
227312
}
313+
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
314+
228315
/* Must be a device pointer */
229316
assert(CU_MEMORYTYPE_DEVICE == mem_type);
230-
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
231317

232318
/* This piece of code was added in to handle in a case involving
233319
* OMP threads. The user had initialized CUDA and then spawned
@@ -250,6 +336,16 @@ static int accelerator_cuda_check_addr(const void *addr, int *dev_id, uint64_t *
250336
return OPAL_ERROR;
251337
}
252338
#endif /* OPAL_CUDA_GET_ATTRIBUTES */
339+
if (is_vmm) {
340+
/* This function is expected to set context if pointer is device
341+
* accessible but VMM allocations have NULL context associated
342+
* which cannot be set against the calling thread */
343+
opal_output(0,
344+
"CUDA: unable to set context with the given pointer"
345+
"ptr=%p aborting...", addr);
346+
return OPAL_ERROR;
347+
}
348+
253349
result = cuCtxSetCurrent(mem_ctx);
254350
if (OPAL_UNLIKELY(CUDA_SUCCESS != result)) {
255351
opal_output(0,

0 commit comments

Comments
 (0)