@@ -146,7 +146,7 @@ def rename_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
146
146
return df
147
147
148
148
149
- def sanitize_dataframe_columns_names (df : pd .DataFrame , handle_dup_cols : Optional [str ] = "warn" ) -> pd .DataFrame :
149
+ def sanitize_dataframe_columns_names (df : pd .DataFrame , handle_duplicate_columns : Optional [str ] = "warn" ) -> pd .DataFrame :
150
150
"""Normalize all columns names to be compatible with Amazon Athena.
151
151
152
152
https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html
@@ -165,7 +165,7 @@ def sanitize_dataframe_columns_names(df: pd.DataFrame, handle_dup_cols: Optional
165
165
----------
166
166
df : pandas.DataFrame
167
167
Original Pandas DataFrame.
168
- handle_dup_cols : str, optional
168
+ handle_duplicate_columns : str, optional
169
169
How to handle duplicate columns. Can be "warn" or "drop" or "rename".
170
170
The default is "warn". "drop" will drop all but the first duplicated column.
171
171
"rename" will rename all duplicated columns with an incremental number.
@@ -178,24 +178,24 @@ def sanitize_dataframe_columns_names(df: pd.DataFrame, handle_dup_cols: Optional
178
178
--------
179
179
>>> import awswrangler as wr
180
180
>>> df_normalized = wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2]}))
181
- >>> df_normalized_drop = wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4]}), handle_dup_cols ="drop")
182
- >>> df_normalized_rename = wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4], 'a_1': [4, 6]}), handle_dup_cols ="rename")
181
+ >>> df_normalized_drop = wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4]}), handle_duplicate_columns ="drop")
182
+ >>> df_normalized_rename = wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4], 'a_1': [4, 6]}), handle_duplicate_columns ="rename")
183
183
"""
184
184
df .columns = [sanitize_column_name (x ) for x in df .columns ]
185
185
df .index .names = [None if x is None else sanitize_column_name (x ) for x in df .index .names ]
186
186
if df .columns .duplicated ().any ():
187
- if handle_dup_cols == "warn" :
187
+ if handle_duplicate_columns == "warn" :
188
188
warnings .warn ("Some columns names are duplicated, consider using " +
189
- "`handle_dup_cols ='[drop|rename]'`" )
189
+ "`handle_duplicate_columns ='[drop|rename]'`" )
190
190
191
- elif handle_dup_cols == "drop" :
191
+ elif handle_duplicate_columns == "drop" :
192
192
df = drop_duplicated_columns (df )
193
193
194
- elif handle_dup_cols == "rename" :
194
+ elif handle_duplicate_columns == "rename" :
195
195
df = rename_duplicated_columns (df )
196
196
197
197
else :
198
- raise ValueError ("handle_dup_cols must be one of ['warn', 'drop', 'rename']" )
198
+ raise ValueError ("handle_duplicate_columns must be one of ['warn', 'drop', 'rename']" )
199
199
200
200
return df
201
201
0 commit comments