Skip to content

Commit 2bf79c8

Browse files
authored
chore: add redshift COPY with SERIALIZETOJSON test case (#3104)
* test: add COPY with SERIALIZETOJSON test case * docs: clarify docs
1 parent 710f640 commit 2bf79c8

File tree

2 files changed

+41
-0
lines changed

2 files changed

+41
-0
lines changed

awswrangler/redshift/_write.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -648,6 +648,10 @@ def copy( # noqa: PLR0913
648648
The size that will be set for all VARCHAR columns not specified with varchar_lengths.
649649
varchar_lengths
650650
Dict of VARCHAR length by columns. (e.g. {"col1": 10, "col5": 200}).
651+
serialize_to_json
652+
Should awswrangler add SERIALIZETOJSON parameter into the COPY command?
653+
SERIALIZETOJSON is necessary to load nested data
654+
https://docs.aws.amazon.com/redshift/latest/dg/ingest-super.html#copy_json
651655
keep_files
652656
Should keep stage files?
653657
use_threads

tests/unit/test_redshift.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1595,3 +1595,40 @@ def test_add_new_columns_case_sensitive(
15951595
df2 = wr.redshift.read_sql_query(sql=f"SELECT * FROM {schema}.{redshift_table}", con=redshift_con)
15961596
expected_columns = list(sorted(df.columns.tolist() + ["boo"]))
15971597
assert expected_columns == list(sorted(df2.columns.tolist()))
1598+
1599+
1600+
def test_copy_serialize_to_json_super(
1601+
path: str, redshift_table: str, redshift_con: redshift_connector.Connection, databases_parameters: dict[str, Any]
1602+
) -> None:
1603+
schema = "public"
1604+
1605+
with redshift_con.cursor() as cursor:
1606+
cursor.execute(f"CREATE TABLE {schema}.{redshift_table} (id BIGINT, text SUPER ENCODE RAW) DISTSTYLE AUTO;")
1607+
redshift_con.commit()
1608+
1609+
df = pd.DataFrame({"id": [1, 2, 3], "text": [{"text": "test1"}, {"text": "test2"}, {"text": "test3"}]})
1610+
1611+
df["id"] = df["id"].astype("Int64")
1612+
# Serialize JSON
1613+
df["text"] = df["text"].apply(json.dumps)
1614+
1615+
wr.redshift.copy(
1616+
df=df,
1617+
path=path,
1618+
con=redshift_con,
1619+
schema=schema,
1620+
table=redshift_table,
1621+
mode="append",
1622+
serialize_to_json=True, # Add SERIALIZETOJSON to COPY be able to load into SUPER JSON column
1623+
iam_role=databases_parameters["redshift"]["role"],
1624+
)
1625+
1626+
df_res = wr.redshift.read_sql_query(
1627+
sql=f"SELECT * FROM public.{redshift_table} ORDER BY id",
1628+
con=redshift_con,
1629+
)
1630+
1631+
# Deserialize JSON
1632+
df_res["text"] = df_res["text"].apply(json.loads)
1633+
1634+
assert_pandas_equals(df, df_res)

0 commit comments

Comments
 (0)