Skip to content

Commit ed9fc21

Browse files
committed
Add a minimal failing example
1 parent 5c5d4b2 commit ed9fc21

File tree

1 file changed

+131
-0
lines changed

1 file changed

+131
-0
lines changed

c/parallel/test/test_scan_minimal.cpp

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of CUDA Experimental in CUDA C++ Core Libraries,
4+
// under the Apache License v2.0 with LLVM Exceptions.
5+
// See https://llvm.org/LICENSE.txt for license information.
6+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7+
// SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES.
8+
//
9+
//===----------------------------------------------------------------------===//
10+
11+
#include <cuda_runtime.h>
12+
13+
#include <cstdint>
14+
15+
#include "test_util.h"
16+
17+
void scan(cccl_iterator_t input,
18+
cccl_iterator_t output,
19+
uint64_t num_items,
20+
cccl_op_t op,
21+
cccl_value_t init,
22+
bool force_inclusive)
23+
{
24+
cudaDeviceProp deviceProp;
25+
cudaGetDeviceProperties(&deviceProp, 0);
26+
27+
const int cc_major = deviceProp.major;
28+
const int cc_minor = deviceProp.minor;
29+
30+
const char* cub_path = TEST_CUB_PATH;
31+
const char* thrust_path = TEST_THRUST_PATH;
32+
const char* libcudacxx_path = TEST_LIBCUDACXX_PATH;
33+
const char* ctk_path = TEST_CTK_PATH;
34+
35+
cccl_device_scan_build_result_t build;
36+
REQUIRE(
37+
CUDA_SUCCESS
38+
== cccl_device_scan_build(
39+
&build,
40+
input,
41+
output,
42+
op,
43+
init,
44+
force_inclusive,
45+
cc_major,
46+
cc_minor,
47+
cub_path,
48+
thrust_path,
49+
libcudacxx_path,
50+
ctk_path));
51+
52+
const std::string sass = inspect_sass(build.cubin, build.cubin_size);
53+
54+
REQUIRE(sass.find("LDL") == std::string::npos);
55+
REQUIRE(sass.find("STL") == std::string::npos);
56+
57+
auto scan_function = force_inclusive ? cccl_device_inclusive_scan : cccl_device_exclusive_scan;
58+
59+
size_t temp_storage_bytes = 0;
60+
REQUIRE(CUDA_SUCCESS == scan_function(build, nullptr, &temp_storage_bytes, input, output, num_items, op, init, 0));
61+
62+
pointer_t<uint8_t> temp_storage(temp_storage_bytes);
63+
64+
REQUIRE(
65+
CUDA_SUCCESS == scan_function(build, temp_storage.ptr, &temp_storage_bytes, input, output, num_items, op, init, 0));
66+
REQUIRE(CUDA_SUCCESS == cccl_device_scan_cleanup(&build));
67+
}
68+
69+
struct XY
70+
{
71+
int x;
72+
int y;
73+
74+
bool operator==(const XY& other) const
75+
{
76+
return x == other.x && y == other.y;
77+
}
78+
};
79+
80+
C2H_TEST("Scan works with struct type", "[scan]")
81+
{
82+
const std::size_t num_items = GENERATE(0, 42, take(4, random(1 << 12, 1 << 16)));
83+
84+
operation_t op = make_operation(
85+
"op",
86+
"struct XY { int x; int y; };\n"
87+
"extern \"C\" __device__ void op(XY* lhs, XY* rhs, XY* out) {\n"
88+
" *out = XY{ lhs->x + rhs->x, lhs->y + rhs->y };\n"
89+
"}");
90+
91+
// Generate random input data
92+
std::vector<int> x = generate<int>(num_items);
93+
std::vector<int> y = generate<int>(num_items);
94+
std::vector<XY> input(num_items);
95+
for (std::size_t i = 0; i < num_items; ++i)
96+
{
97+
input[i] = XY{x[i], y[i]};
98+
}
99+
100+
std::vector<XY> output(num_items);
101+
pointer_t<XY> input_ptr(input);
102+
pointer_t<XY> output_ptr(output);
103+
value_t<XY> init{XY{0, 0}};
104+
105+
scan(input_ptr, output_ptr, num_items, op, init, false);
106+
107+
// Compute expected result
108+
std::vector<XY> expected(num_items);
109+
std::exclusive_scan(input.begin(), input.end(), expected.begin(), init.value, [](const XY& lhs, const XY& rhs) {
110+
return XY{lhs.x + rhs.x, lhs.y + rhs.y};
111+
});
112+
113+
if (num_items > 0)
114+
{
115+
REQUIRE(expected == std::vector<XY>(output_ptr));
116+
}
117+
118+
// Test inclusive scan
119+
scan(input_ptr, output_ptr, num_items, op, init, true);
120+
121+
// Compute expected result for inclusive scan
122+
std::vector<XY> expected_inclusive(num_items);
123+
std::inclusive_scan(input.begin(), input.end(), expected_inclusive.begin(), [](const XY& lhs, const XY& rhs) {
124+
return XY{lhs.x + rhs.x, lhs.y + rhs.y};
125+
});
126+
127+
if (num_items > 0)
128+
{
129+
REQUIRE(expected_inclusive == std::vector<XY>(output_ptr));
130+
}
131+
}

0 commit comments

Comments
 (0)