@@ -130,12 +130,11 @@ def rename_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
130
130
131
131
Note
132
132
----
133
- This transformation will run `inplace` and will make changes in the original DataFrame.
133
+ This transformation will run `inplace` and will make changes to the original DataFrame.
134
134
135
135
Note
136
136
----
137
- Also handles potential new duplicated conflicts by appending another `_n`
138
- to the end of the column name if it conflicts.
137
+ Also handles potential new column duplicate conflicts by appending an additional `_n`.
139
138
140
139
Parameters
141
140
----------
@@ -165,7 +164,6 @@ def rename_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
165
164
while df .columns .duplicated ().any ():
166
165
# Catches edge cases where pd.DataFrame({"A": [1, 2], "a": [3, 4], "a_1": [5, 6]})
167
166
df = rename_duplicated_columns (df )
168
-
169
167
return df
170
168
171
169
@@ -179,7 +177,6 @@ def sanitize_dataframe_columns_names(
179
177
Possible transformations:
180
178
- Strip accents
181
179
- Remove non alphanumeric characters
182
- - Convert CamelCase to snake_case
183
180
184
181
Note
185
182
----
@@ -192,8 +189,9 @@ def sanitize_dataframe_columns_names(
192
189
Original Pandas DataFrame.
193
190
handle_duplicate_columns : str, optional
194
191
How to handle duplicate columns. Can be "warn" or "drop" or "rename".
195
- The default is "warn". "drop" will drop all but the first duplicated column.
192
+ "drop" will drop all but the first duplicated column.
196
193
"rename" will rename all duplicated columns with an incremental number.
194
+ Defaults to "warn".
197
195
198
196
Returns
199
197
-------
@@ -214,23 +212,18 @@ def sanitize_dataframe_columns_names(
214
212
"""
215
213
df .columns = [sanitize_column_name (x ) for x in df .columns ]
216
214
df .index .names = [None if x is None else sanitize_column_name (x ) for x in df .index .names ]
217
- # Ignore mypy error from pandas.DataFrame.columns.duplicated().any()
218
- if df .columns .duplicated .any (): # type:ignore
215
+ if df .columns .duplicated ().any (): # type: ignore
219
216
if handle_duplicate_columns == "warn" :
220
217
warnings .warn (
221
- "Some columns names are duplicated , consider using `handle_duplicate_columns='[drop|rename]'`" ,
218
+ "Duplicate columns were detected , consider using `handle_duplicate_columns='[drop|rename]'`" ,
222
219
UserWarning ,
223
220
)
224
-
225
221
elif handle_duplicate_columns == "drop" :
226
222
df = drop_duplicated_columns (df )
227
-
228
223
elif handle_duplicate_columns == "rename" :
229
224
df = rename_duplicated_columns (df )
230
-
231
225
else :
232
226
raise ValueError ("handle_duplicate_columns must be one of ['warn', 'drop', 'rename']" )
233
-
234
227
return df
235
228
236
229
0 commit comments