|
13 | 13 |
|
14 | 14 | from rapidsmpf.buffer.resource import BufferResource
|
15 | 15 | from rapidsmpf.progress_thread import ProgressThread
|
16 |
| -from rapidsmpf.shuffler import Shuffler, partition_and_pack, unpack_and_concat |
| 16 | +from rapidsmpf.shuffler import ( |
| 17 | + Shuffler, |
| 18 | + partition_and_pack, |
| 19 | + split_and_pack, |
| 20 | + unpack_and_concat, |
| 21 | +) |
17 | 22 | from rapidsmpf.testing import assert_eq
|
18 | 23 | from rapidsmpf.utils.cudf import (
|
19 | 24 | cudf_to_pylibcudf_table,
|
@@ -50,6 +55,52 @@ def test_partition_and_pack_unpack(
|
50 | 55 | assert_eq(expect, got, sort_rows="0")
|
51 | 56 |
|
52 | 57 |
|
| 58 | +@pytest.mark.parametrize( |
| 59 | + "df", |
| 60 | + [ |
| 61 | + {"0": [1, 2, 3], "1": [2, 2, 1]}, |
| 62 | + {"0": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], "1": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]}, |
| 63 | + {"0": [], "1": []}, |
| 64 | + ], |
| 65 | +) |
| 66 | +@pytest.mark.parametrize("num_partitions", [1, 2, 3, 10]) |
| 67 | +def test_split_and_pack_unpack( |
| 68 | + device_mr: rmm.mr.CudaMemoryResource, df: dict[str, list[int]], num_partitions: int |
| 69 | +) -> None: |
| 70 | + expect = cudf.DataFrame(df) |
| 71 | + splits = np.linspace(0, len(expect), num_partitions, endpoint=False)[1:].astype(int) |
| 72 | + partitions = split_and_pack( |
| 73 | + cudf_to_pylibcudf_table(expect), |
| 74 | + splits=splits, |
| 75 | + stream=DEFAULT_STREAM, |
| 76 | + device_mr=device_mr, |
| 77 | + ) |
| 78 | + got = pylibcudf_to_cudf_dataframe( |
| 79 | + unpack_and_concat( |
| 80 | + tuple(partitions[i] for i in range(num_partitions)), |
| 81 | + stream=DEFAULT_STREAM, |
| 82 | + device_mr=device_mr, |
| 83 | + ) |
| 84 | + ) |
| 85 | + |
| 86 | + assert_eq(expect, got) |
| 87 | + |
| 88 | + |
| 89 | +@pytest.mark.parametrize("df", [{"0": [1, 2, 3], "1": [2, 2, 1]}, {"0": [], "1": []}]) |
| 90 | +@pytest.mark.parametrize("num_partitions", [1, 2, 3, 10]) |
| 91 | +def test_split_and_pack_unpack_out_of_range( |
| 92 | + device_mr: rmm.mr.CudaMemoryResource, df: dict[str, list[int]], num_partitions: int |
| 93 | +) -> None: |
| 94 | + expect = cudf.DataFrame({"0": [], "1": []}) |
| 95 | + with pytest.raises(IndexError): |
| 96 | + split_and_pack( |
| 97 | + cudf_to_pylibcudf_table(expect), |
| 98 | + splits=[100], |
| 99 | + stream=DEFAULT_STREAM, |
| 100 | + device_mr=device_mr, |
| 101 | + ) |
| 102 | + |
| 103 | + |
53 | 104 | @pytest.mark.parametrize("wait_on", [False, True])
|
54 | 105 | @pytest.mark.parametrize("total_num_partitions", [1, 2, 3, 10])
|
55 | 106 | def test_shuffler_single_nonempty_partition(
|
|
0 commit comments