Skip to content

Commit e7ae9fe

Browse files
committed
handle_dup_cols => handle_duplicate_columns
1 parent 087529c commit e7ae9fe

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

awswrangler/catalog/_utils.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def rename_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
146146
return df
147147

148148

149-
def sanitize_dataframe_columns_names(df: pd.DataFrame, handle_dup_cols: Optional[str] = "warn") -> pd.DataFrame:
149+
def sanitize_dataframe_columns_names(df: pd.DataFrame, handle_duplicate_columns: Optional[str] = "warn") -> pd.DataFrame:
150150
"""Normalize all columns names to be compatible with Amazon Athena.
151151
152152
https://docs.aws.amazon.com/athena/latest/ug/tables-databases-columns-names.html
@@ -165,7 +165,7 @@ def sanitize_dataframe_columns_names(df: pd.DataFrame, handle_dup_cols: Optional
165165
----------
166166
df : pandas.DataFrame
167167
Original Pandas DataFrame.
168-
handle_dup_cols : str, optional
168+
handle_duplicate_columns : str, optional
169169
How to handle duplicate columns. Can be "warn" or "drop" or "rename".
170170
The default is "warn". "drop" will drop all but the first duplicated column.
171171
"rename" will rename all duplicated columns with an incremental number.
@@ -178,24 +178,24 @@ def sanitize_dataframe_columns_names(df: pd.DataFrame, handle_dup_cols: Optional
178178
--------
179179
>>> import awswrangler as wr
180180
>>> df_normalized = wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2]}))
181-
>>> df_normalized_drop = wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4]}), handle_dup_cols="drop")
182-
>>> df_normalized_rename = wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4], 'a_1': [4, 6]}), handle_dup_cols="rename")
181+
>>> df_normalized_drop = wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4]}), handle_duplicate_columns="drop")
182+
>>> df_normalized_rename = wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4], 'a_1': [4, 6]}), handle_duplicate_columns="rename")
183183
"""
184184
df.columns = [sanitize_column_name(x) for x in df.columns]
185185
df.index.names = [None if x is None else sanitize_column_name(x) for x in df.index.names]
186186
if df.columns.duplicated().any():
187-
if handle_dup_cols == "warn":
187+
if handle_duplicate_columns == "warn":
188188
warnings.warn("Some columns names are duplicated, consider using "+
189-
"`handle_dup_cols='[drop|rename]'`")
189+
"`handle_duplicate_columns='[drop|rename]'`")
190190

191-
elif handle_dup_cols == "drop":
191+
elif handle_duplicate_columns == "drop":
192192
df = drop_duplicated_columns(df)
193193

194-
elif handle_dup_cols == "rename":
194+
elif handle_duplicate_columns == "rename":
195195
df = rename_duplicated_columns(df)
196196

197197
else:
198-
raise ValueError("handle_dup_cols must be one of ['warn', 'drop', 'rename']")
198+
raise ValueError("handle_duplicate_columns must be one of ['warn', 'drop', 'rename']")
199199

200200
return df
201201

tests/test_athena.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,8 @@ def test_athena_read_list(glue_database):
248248

249249
def test_sanitize_dataframe_column_names():
250250
assert wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2]})).equals(pd.DataFrame({'a': [1, 2]})) # Unsure how to test for warnings
251-
assert wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4]}), handle_dup_cols="drop").equals(pd.DataFrame({'a': [1, 2]}))
252-
assert wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4], 'a_1': [5, 6]}), handle_dup_cols="rename").equals(pd.DataFrame({'a': [1, 2], 'a_1': [3, 4], 'a_1_1': [5, 6]}))
251+
assert wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4]}), handle_duplicate_columns="drop").equals(pd.DataFrame({'a': [1, 2]}))
252+
assert wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({'A': [1, 2], 'a': [3, 4], 'a_1': [5, 6]}), handle_duplicate_columns="rename").equals(pd.DataFrame({'a': [1, 2], 'a_1': [3, 4], 'a_1_1': [5, 6]}))
253253

254254

255255
def test_sanitize_names():

0 commit comments

Comments
 (0)