Skip to content

Commit b80bfcd

Browse files
committed
Fix validate schema
1 parent 0157c10 commit b80bfcd

File tree

1 file changed

+5
-4
lines changed

1 file changed

+5
-4
lines changed

awswrangler/s3/_read_parquet.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,10 @@ def _validate_schemas(schemas: List[pa.schema], validate_schema: bool) -> pa.sch
116116
first: pa.schema = schemas[0]
117117
if len(schemas) == 1:
118118
return first
119-
for schema in schemas[1:]:
120-
if not first.equals(schema, check_metadata=False):
121-
if validate_schema:
119+
first_dict = {s.name: s.type for s in first}
120+
if validate_schema:
121+
for schema in schemas[1:]:
122+
if first_dict != {s.name: s.type for s in schema}:
122123
raise exceptions.InvalidSchemaConvergence(
123124
f"At least 2 different schemas were detected:\n 1 - {first}\n 2 - {schema}."
124125
)
@@ -187,7 +188,7 @@ def _read_parquet_metadata(
187188
version_ids=version_ids,
188189
coerce_int96_timestamp_unit=coerce_int96_timestamp_unit,
189190
)
190-
merged_schemas = _validate_schemas(schemas=schemas, validate_schema=True)
191+
merged_schemas = _validate_schemas(schemas=schemas, validate_schema=False)
191192

192193
columns_types: Dict[str, str] = _data_types.athena_types_from_pyarrow_schema(
193194
schema=merged_schemas, partitions=None, ignore_null=ignore_null

0 commit comments

Comments
 (0)