Skip to content

Commit 087529c

Browse files
committed
Less memory intensive rename
1 parent 12e13d6 commit 087529c

File tree

1 file changed

+6
-5
lines changed

1 file changed

+6
-5
lines changed

awswrangler/catalog/_utils.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -134,14 +134,15 @@ def rename_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
134134
>>> df_rename = wr.catalog.rename_duplicated_columns(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4], 'a_1': [4, 6]}))
135135
"""
136136
names = df.columns
137-
name_df = pd.DataFrame(names, columns=["name"])
138-
name_df["col_count"] = name_df.groupby("name").cumcount().astype(str)
139-
name_df["new_names"] = name_df["name"]
140-
name_df.loc[name_df.col_count > "0", "new_names"] += "_" + name_df.col_count
141-
df.columns = name_df.new_names.values
137+
set_names = set(names)
138+
if len(names) == len(set_names):
139+
return df
140+
d = {key: [name + f"_{i}" if i > 0 else name for i, name in enumerate(names[names==key])] for key in set_names}
141+
df.rename(columns=lambda c: d[c].pop(0), inplace=True)
142142
while df.columns.duplicated().any():
143143
# Catches edge cases where pd.DataFrame({"A": [1, 2], "a": [3, 4], "a_1": [5, 6]})
144144
df = rename_duplicated_columns(df)
145+
145146
return df
146147

147148

0 commit comments

Comments
 (0)