Skip to content

Commit 851398d

Browse files
committed
Fix auto merge conflict for branch-25.08
2 parents 678c968 + 5b9425b commit 851398d

File tree

31 files changed

+1234
-311
lines changed

31 files changed

+1234
-311
lines changed

ci/cudf_pandas_scripts/pandas-tests/run.sh

+6
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55

66
set -euo pipefail
77

8+
EXITCODE=0
9+
trap "EXITCODE=1" ERR
10+
set +e
11+
812
PANDAS_TESTS_BRANCH=${1}
913
RAPIDS_FULL_VERSION=$(<./VERSION)
1014
rapids-logger "Running Pandas tests using $PANDAS_TESTS_BRANCH branch and rapids-version $RAPIDS_FULL_VERSION"
@@ -43,3 +47,5 @@ RAPIDS_ARTIFACTS_DIR=${RAPIDS_ARTIFACTS_DIR:-"${PWD}/artifacts"}
4347
mkdir -p "${RAPIDS_ARTIFACTS_DIR}"
4448
mv pandas-testing/"${SUMMARY_FILE_NAME}" "${RAPIDS_ARTIFACTS_DIR}"/
4549
rapids-upload-to-s3 "${RAPIDS_ARTIFACTS_DIR}"/"${SUMMARY_FILE_NAME}" "${RAPIDS_ARTIFACTS_DIR}"
50+
rapids-logger "Test script exiting with value: $EXITCODE"
51+
exit ${EXITCODE}

cpp/src/column/column_view.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <cudf/column/column_view.hpp>
1818
#include <cudf/detail/null_mask.hpp>
1919
#include <cudf/hashing/detail/hashing.hpp>
20+
#include <cudf/logger_macros.hpp>
2021
#include <cudf/strings/strings_column_view.hpp>
2122
#include <cudf/types.hpp>
2223
#include <cudf/utilities/default_stream.hpp>
@@ -55,8 +56,7 @@ void prefetch_col_data(ColumnView& col, void const* data_ptr, std::string_view k
5556
scv.chars_size(cudf::get_default_stream()) * sizeof(char),
5657
cudf::get_default_stream());
5758
} else {
58-
std::cout << key << ": Unsupported type: " << static_cast<int32_t>(col.type().id())
59-
<< std::endl;
59+
CUDF_LOG_DEBUG("Unsupported type: %d", static_cast<int32_t>(col.type().id()));
6060
}
6161
}
6262
}

cpp/src/io/parquet/page_enc.cu

+1
Original file line numberDiff line numberDiff line change
@@ -689,6 +689,7 @@ CUDF_KERNEL void __launch_bounds__(128)
689689
util::round_up_unsafe(page_g.max_hdr_size + page_g.max_data_size, page_align);
690690
if (not comp_page_sizes.empty()) {
691691
comp_page_offset += page_g.max_hdr_size + comp_page_sizes[ck_g.first_page];
692+
page_g.comp_data_size = comp_page_sizes[ck_g.first_page + num_pages];
692693
}
693694
page_headers_size += page_g.max_hdr_size;
694695
max_page_data_size = max(max_page_data_size, page_g.max_data_size);

cpp/src/strings/split/split.cu

+23-22
Original file line numberDiff line numberDiff line change
@@ -105,14 +105,19 @@ namespace {
105105
* 6 '' aa b__ccc__
106106
* ```
107107
*
108-
* @tparam Tokenizer provides unique functions for split/rsplit.
109-
* @param strings_column The strings to split
108+
* @tparam Tokenizer provides unique functions for split/rsplit
109+
* @tparam DelimiterFn Functor for locating delimiters
110+
* @param input The strings to split
110111
* @param tokenizer Tokenizer for counting and producing tokens
112+
* @param delimiter_fn Functor called on each byte to check for delimiters
113+
* @param stream CUDA stream used for device memory operations and kernel launches
114+
* @param mr Device memory resource used to allocate the returned objects' device memory
111115
* @return table of columns for the output of the split
112116
*/
113-
template <typename Tokenizer>
117+
template <typename Tokenizer, typename DelimiterFn>
114118
std::unique_ptr<table> split_fn(strings_column_view const& input,
115119
Tokenizer tokenizer,
120+
DelimiterFn delimiter_fn,
116121
rmm::cuda_stream_view stream,
117122
rmm::device_async_resource_ref mr)
118123
{
@@ -123,7 +128,7 @@ std::unique_ptr<table> split_fn(strings_column_view const& input,
123128
}
124129

125130
// builds the offsets and the vector of all tokens
126-
auto [offsets, tokens] = split_helper(input, tokenizer, stream, mr);
131+
auto [offsets, tokens] = split_helper(input, tokenizer, delimiter_fn, stream, mr);
127132
auto const d_offsets = cudf::detail::offsetalator_factory::make_input_iterator(offsets->view());
128133
auto const d_tokens = tokens.data();
129134

@@ -386,7 +391,7 @@ std::unique_ptr<table> whitespace_split_fn(size_type strings_count,
386391

387392
} // namespace
388393

389-
std::unique_ptr<table> split(strings_column_view const& strings_column,
394+
std::unique_ptr<table> split(strings_column_view const& input,
390395
string_scalar const& delimiter,
391396
size_type maxsplit,
392397
rmm::cuda_stream_view stream,
@@ -396,20 +401,18 @@ std::unique_ptr<table> split(strings_column_view const& strings_column,
396401

397402
size_type max_tokens = maxsplit > 0 ? maxsplit + 1 : std::numeric_limits<size_type>::max();
398403

399-
auto strings_device_view = column_device_view::create(strings_column.parent(), stream);
404+
auto d_strings = column_device_view::create(input.parent(), stream);
400405
if (delimiter.size() == 0) {
401-
return whitespace_split_fn(strings_column.size(),
402-
whitespace_split_tokenizer_fn{*strings_device_view, max_tokens},
403-
stream,
404-
mr);
406+
return whitespace_split_fn(
407+
input.size(), whitespace_split_tokenizer_fn{*d_strings, max_tokens}, stream, mr);
405408
}
406409

407-
string_view d_delimiter(delimiter.data(), delimiter.size());
408-
return split_fn(
409-
strings_column, split_tokenizer_fn{*strings_device_view, d_delimiter, max_tokens}, stream, mr);
410+
auto tokenizer = split_tokenizer_fn{*d_strings, delimiter.size(), max_tokens};
411+
auto delimiter_fn = string_delimiter_fn{delimiter.value(stream)};
412+
return split_fn(input, tokenizer, delimiter_fn, stream, mr);
410413
}
411414

412-
std::unique_ptr<table> rsplit(strings_column_view const& strings_column,
415+
std::unique_ptr<table> rsplit(strings_column_view const& input,
413416
string_scalar const& delimiter,
414417
size_type maxsplit,
415418
rmm::cuda_stream_view stream,
@@ -419,17 +422,15 @@ std::unique_ptr<table> rsplit(strings_column_view const& strings_column,
419422

420423
size_type max_tokens = maxsplit > 0 ? maxsplit + 1 : std::numeric_limits<size_type>::max();
421424

422-
auto strings_device_view = column_device_view::create(strings_column.parent(), stream);
425+
auto d_strings = column_device_view::create(input.parent(), stream);
423426
if (delimiter.size() == 0) {
424-
return whitespace_split_fn(strings_column.size(),
425-
whitespace_rsplit_tokenizer_fn{*strings_device_view, max_tokens},
426-
stream,
427-
mr);
427+
return whitespace_split_fn(
428+
input.size(), whitespace_rsplit_tokenizer_fn{*d_strings, max_tokens}, stream, mr);
428429
}
429430

430-
string_view d_delimiter(delimiter.data(), delimiter.size());
431-
return split_fn(
432-
strings_column, rsplit_tokenizer_fn{*strings_device_view, d_delimiter, max_tokens}, stream, mr);
431+
auto tokenizer = rsplit_tokenizer_fn{*d_strings, delimiter.size(), max_tokens};
432+
auto delimiter_fn = string_delimiter_fn{delimiter.value(stream)};
433+
return split_fn(input, tokenizer, delimiter_fn, stream, mr);
433434
}
434435

435436
} // namespace detail

0 commit comments

Comments
 (0)