Skip to content

Commit 97396ce

Browse files
committed
Minor - Fixes
1 parent 656e908 commit 97396ce

File tree

2 files changed

+7
-14
lines changed

2 files changed

+7
-14
lines changed

awswrangler/catalog/_utils.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -130,12 +130,11 @@ def rename_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
130130
131131
Note
132132
----
133-
This transformation will run `inplace` and will make changes in the original DataFrame.
133+
This transformation will run `inplace` and will make changes to the original DataFrame.
134134
135135
Note
136136
----
137-
Also handles potential new duplicated conflicts by appending another `_n`
138-
to the end of the column name if it conflicts.
137+
Also handles potential new column duplicate conflicts by appending an additional `_n`.
139138
140139
Parameters
141140
----------
@@ -165,7 +164,6 @@ def rename_duplicated_columns(df: pd.DataFrame) -> pd.DataFrame:
165164
while df.columns.duplicated().any():
166165
# Catches edge cases where pd.DataFrame({"A": [1, 2], "a": [3, 4], "a_1": [5, 6]})
167166
df = rename_duplicated_columns(df)
168-
169167
return df
170168

171169

@@ -179,7 +177,6 @@ def sanitize_dataframe_columns_names(
179177
Possible transformations:
180178
- Strip accents
181179
- Remove non alphanumeric characters
182-
- Convert CamelCase to snake_case
183180
184181
Note
185182
----
@@ -192,8 +189,9 @@ def sanitize_dataframe_columns_names(
192189
Original Pandas DataFrame.
193190
handle_duplicate_columns : str, optional
194191
How to handle duplicate columns. Can be "warn" or "drop" or "rename".
195-
The default is "warn". "drop" will drop all but the first duplicated column.
192+
"drop" will drop all but the first duplicated column.
196193
"rename" will rename all duplicated columns with an incremental number.
194+
Defaults to "warn".
197195
198196
Returns
199197
-------
@@ -214,23 +212,18 @@ def sanitize_dataframe_columns_names(
214212
"""
215213
df.columns = [sanitize_column_name(x) for x in df.columns]
216214
df.index.names = [None if x is None else sanitize_column_name(x) for x in df.index.names]
217-
# Ignore mypy error from pandas.DataFrame.columns.duplicated().any()
218-
if df.columns.duplicated.any(): # type:ignore
215+
if df.columns.duplicated().any(): # type: ignore
219216
if handle_duplicate_columns == "warn":
220217
warnings.warn(
221-
"Some columns names are duplicated, consider using `handle_duplicate_columns='[drop|rename]'`",
218+
"Duplicate columns were detected, consider using `handle_duplicate_columns='[drop|rename]'`",
222219
UserWarning,
223220
)
224-
225221
elif handle_duplicate_columns == "drop":
226222
df = drop_duplicated_columns(df)
227-
228223
elif handle_duplicate_columns == "rename":
229224
df = rename_duplicated_columns(df)
230-
231225
else:
232226
raise ValueError("handle_duplicate_columns must be one of ['warn', 'drop', 'rename']")
233-
234227
return df
235228

236229

tests/test_athena.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ def test_athena_read_list(glue_database):
247247

248248

249249
def test_sanitize_dataframe_column_names():
250-
with pytest.warns(UserWarning, match=r"Some*"):
250+
with pytest.warns(UserWarning, match=r"Duplicate*"):
251251
test_df = pd.DataFrame({"a": [1, 2], "b": [3, 4]})
252252
test_df.columns = ["a", "a"]
253253
assert wr.catalog.sanitize_dataframe_columns_names(df=pd.DataFrame({"A": [1, 2], "a": [3, 4]})).equals(test_df)

0 commit comments

Comments
 (0)