Skip to content

Commit 5e140ff

Browse files
authored
fix: replace deprecated np.split_array (#2735)
1 parent 451939f commit 5e140ff

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

awswrangler/_utils.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import boto3
3030
import botocore.credentials
3131
import numpy as np
32+
import numpy.core.numeric as _nx
3233
import pyarrow as pa
3334
from botocore.config import Config
3435
from packaging import version
@@ -851,7 +852,18 @@ def check_schema_changes(columns_types: dict[str, str], table_input: dict[str, A
851852
@engine.dispatch_on_engine
852853
def split_pandas_frame(df: pd.DataFrame, splits: int) -> list[pd.DataFrame]:
853854
"""Split a DataFrame into n chunks."""
854-
return [sub_df for sub_df in np.array_split(df, splits) if not sub_df.empty] # type: ignore[attr-defined]
855+
# Logic obtained from np.array_split definition
856+
total = len(df)
857+
each_section, extras = divmod(total, splits)
858+
section_sizes = [0] + extras * [each_section + 1] + (splits - extras) * [each_section]
859+
div_points = _nx.array(section_sizes, dtype=_nx.intp).cumsum() # type: ignore[attr-defined]
860+
861+
sub_dfs = []
862+
for i in range(splits):
863+
sub_df = df.iloc[div_points[i] : div_points[i + 1]]
864+
if not sub_df.empty:
865+
sub_dfs.append(sub_df)
866+
return sub_dfs
855867

856868

857869
@engine.dispatch_on_engine

0 commit comments

Comments
 (0)