Skip to content

fix: edge case fix on iceberg target table being dropped instead on being renamed first #667

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jun 7, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -133,20 +133,27 @@
{%- endif -%}

{%- set old_relation_table_type = adapter.get_glue_table_type(old_relation) -%}
-- we cannot use old_bkp_relation, because it returns None if the relation doesn't exist
-- we need to create a python object via the make_temp_relation instead
{%- set old_relation_bkp = make_temp_relation(old_relation, '__bkp') -%}

{%- if old_relation_table_type == 'iceberg' -%}
{{ rename_relation(old_relation, old_bkp_relation) }}
{%- if old_relation_table_type.value == 'iceberg_table' -%}
{{ rename_relation(old_relation, old_relation_bkp) }}
{%- else -%}
{%- do drop_relation_glue(old_relation) -%}
{%- endif -%}

{{ rename_relation(tmp_relation, target_relation) }}

-- old_bkp_relation might not exists in case we have a switch from hive to iceberg
-- we prevent to drop something that doesn't exist even if drop_relation is able to deal with not existing tables
{%- if old_bkp_relation is not none -%}
{%- do drop_relation(old_bkp_relation) -%}
-- old_relation_bkp might not exists in case we have a switch from hive to iceberg
-- we cannot use old_bkp_relation, because result could be cached by dbt, we use instead glue apis
-- get_glue_table returns none in case EntityNotFoundException, therefore this works well with restrictive environment
-- where Lakeformation is used, and allow operations in only existing objects
{%- set old_relation_bkp_exists = adapter.get_glue_table(old_relation_bkp) -%}
{%- if old_relation_bkp_exists is not none -%}
{%- do drop_relation(old_relation_bkp) -%}
{%- endif -%}

{%- endif -%}
{%- endif -%}

Expand Down
28 changes: 22 additions & 6 deletions tests/functional/adapter/test_ha_iceberg.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,33 @@ class TestTableIcebergTableUnique:
def models(self):
return {"table_iceberg_table_unique.sql": models__table_iceberg_naming_table_unique}

def test__table_creation(self, project):
def test__table_creation(self, project, capsys):
relation_name = "table_iceberg_table_unique"
model_run_result_row_count_query = f"select count(*) as records from {project.test_schema}.{relation_name}"

model_run = run_dbt(["run", "--select", relation_name])
model_run_result = model_run.results[0]
assert model_run_result.status == RunStatus.Success
fist_model_run = run_dbt(["run", "--select", relation_name])
first_model_run_result = fist_model_run.results[0]
assert first_model_run_result.status == RunStatus.Success

first_models_records_count = project.run_sql(model_run_result_row_count_query, fetch="all")[0][0]

assert first_models_records_count == 2

second_model_run = run_dbt(["run", "-d", "--select", relation_name])
second_model_run_result = second_model_run.results[0]
assert second_model_run_result.status == RunStatus.Success

out, _ = capsys.readouterr()
# in case of 2nd run we expect that the target table is renamed to __bkp
alter_statement = (
f"alter table `awsdatacatalog`.`{project.test_schema}`.`{relation_name}` "
f"rename to `{project.test_schema}`.`{relation_name}__bkp`"
)
assert alter_statement in out

models_records_count = project.run_sql(model_run_result_row_count_query, fetch="all")[0][0]
second_models_records_count = project.run_sql(model_run_result_row_count_query, fetch="all")[0][0]

assert models_records_count == 2
assert second_models_records_count == 2


# in case s3_data_naming=table for iceberg a compile error must be raised
Expand Down
Loading