Skip to content

Commit 6ef6c97

Browse files
svdimchenkoSerhii Dimchenko
and
Serhii Dimchenko
authored
feat: Implement non-partitioned tmp table (#405)
Co-authored-by: Serhii Dimchenko <[email protected]>
1 parent 905746f commit 6ef6c97

File tree

4 files changed

+62
-26
lines changed

4 files changed

+62
-26
lines changed

.gitignore

+3
Original file line numberDiff line numberDiff line change
@@ -143,3 +143,6 @@ cython_debug/
143143

144144
# Project specific
145145
test.py
146+
147+
# OS
148+
.DS_Store

dbt/include/athena/macros/materializations/models/helpers/get_partition_batches.sql

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,15 @@
1-
{% macro get_partition_batches(sql) -%}
1+
{% macro get_partition_batches(sql, as_subquery=True) -%}
22
{%- set partitioned_by = config.get('partitioned_by') -%}
33
{%- set athena_partitions_limit = config.get('partitions_limit', 100) | int -%}
44
{%- set partitioned_keys = adapter.format_partition_keys(partitioned_by) -%}
55
{% do log('PARTITIONED KEYS: ' ~ partitioned_keys) %}
66

77
{% call statement('get_partitions', fetch_result=True) %}
8-
select distinct {{ partitioned_keys }} from ({{ sql }}) order by {{ partitioned_keys }};
8+
{%- if as_subquery -%}
9+
select distinct {{ partitioned_keys }} from ({{ sql }}) order by {{ partitioned_keys }};
10+
{%- else -%}
11+
select distinct {{ partitioned_keys }} from {{ sql }} order by {{ partitioned_keys }};
12+
{%- endif -%}
913
{% endcall %}
1014

1115
{%- set table = load_result('get_partitions').table -%}

dbt/include/athena/macros/materializations/models/incremental/incremental.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@
2525
{% set to_drop = [] %}
2626
{% if existing_relation is none %}
2727
{% set query_result = safe_create_table_as(False, target_relation, sql) -%}
28-
{% set build_sql = "select '{{ query_result }}'" -%}
28+
{% set build_sql = "select '" ~ query_result ~ "'" -%}
2929
{% elif existing_relation.is_view or should_full_refresh() %}
3030
{% do drop_relation(existing_relation) %}
3131
{% set query_result = safe_create_table_as(False, target_relation, sql) -%}
32-
{% set build_sql = "select '{{ query_result }}'" -%}
32+
{% set build_sql = "select '" ~ query_result ~ "'" -%}
3333
{% elif partitioned_by is not none and strategy == 'insert_overwrite' %}
3434
{% set tmp_relation = make_temp_relation(target_relation) %}
3535
{% if tmp_relation is not none %}

dbt/include/athena/macros/materializations/models/table/create_table_as.sql

+51-22
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
{% macro athena__create_table_as(temporary, relation, sql) -%}
1+
{% macro athena__create_table_as(temporary, relation, sql, skip_partitioning=False) -%}
22
{%- set materialized = config.get('materialized', default='table') -%}
33
{%- set external_location = config.get('external_location', default=none) -%}
4-
{%- set partitioned_by = config.get('partitioned_by', default=none) -%}
4+
{%- set partitioned_by = config.get('partitioned_by', default=none) if not skip_partitioning else none -%}
55
{%- set bucketed_by = config.get('bucketed_by', default=none) -%}
66
{%- set bucket_count = config.get('bucket_count', default=none) -%}
77
{%- set field_delimiter = config.get('field_delimiter', default=none) -%}
@@ -90,41 +90,70 @@
9090

9191
{% macro create_table_as_with_partitions(temporary, relation, sql) -%}
9292

93-
{% set partitions_batches = get_partition_batches(sql) %}
93+
{%- set tmp_relation = api.Relation.create(
94+
identifier=relation.identifier ~ '__tmp_not_partitioned',
95+
schema=relation.schema,
96+
database=relation.database,
97+
s3_path_table_part=relation.identifier ~ '__tmp_not_partitioned' ,
98+
type='table'
99+
)
100+
-%}
101+
102+
{%- if tmp_relation is not none -%}
103+
{%- do drop_relation(tmp_relation) -%}
104+
{%- endif -%}
105+
106+
{%- do log('CREATE NON-PARTIONED STAGING TABLE: ' ~ tmp_relation) -%}
107+
{%- do run_query(create_table_as(temporary, tmp_relation, sql, True)) -%}
108+
109+
{% set partitions_batches = get_partition_batches(sql=tmp_relation, as_subquery=False) %}
94110
{% do log('BATCHES TO PROCESS: ' ~ partitions_batches | length) %}
95111

96-
{%- do log('CREATE EMPTY TABLE: ' ~ relation) -%}
97-
{%- set create_empty_table_query -%}
98-
{{ create_table_as(temporary, relation, sql) }}
99-
limit 0
100-
{%- endset -%}
101-
{%- do run_query(create_empty_table_query) -%}
102-
{%- set dest_columns = adapter.get_columns_in_relation(relation) -%}
112+
{%- set dest_columns = adapter.get_columns_in_relation(tmp_relation) -%}
103113
{%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%}
104114

105115
{%- for batch in partitions_batches -%}
106116
{%- do log('BATCH PROCESSING: ' ~ loop.index ~ ' OF ' ~ partitions_batches | length) -%}
107117

108-
{%- set insert_batch_partitions -%}
109-
insert into {{ relation }} ({{ dest_cols_csv }})
110-
select {{ dest_cols_csv }}
111-
from ({{ sql }})
112-
where {{ batch }}
113-
{%- endset -%}
118+
{%- if loop.index == 1 -%}
119+
{%- set create_target_relation_sql -%}
120+
select {{ dest_cols_csv }}
121+
from {{ tmp_relation }}
122+
where {{ batch }}
123+
{%- endset -%}
124+
{%- do run_query(create_table_as(temporary, relation, create_target_relation_sql)) -%}
125+
{%- else -%}
126+
{%- set insert_batch_partitions_sql -%}
127+
insert into {{ relation }} ({{ dest_cols_csv }})
128+
select {{ dest_cols_csv }}
129+
from {{ tmp_relation }}
130+
where {{ batch }}
131+
{%- endset -%}
132+
133+
{%- do run_query(insert_batch_partitions_sql) -%}
134+
{%- endif -%}
135+
114136

115-
{%- do run_query(insert_batch_partitions) -%}
116137
{%- endfor -%}
117138

139+
{%- do drop_relation(tmp_relation) -%}
140+
118141
select 'SUCCESSFULLY CREATED TABLE {{ relation }}'
119142

120143
{%- endmacro %}
121144

122145
{% macro safe_create_table_as(temporary, relation, sql) -%}
123-
{%- set query_result = adapter.run_query_with_partitions_limit_catching(create_table_as(temporary, relation, sql)) -%}
124-
{%- do log('QUERY RESULT: ' ~ query_result) -%}
125-
{%- if query_result == 'TOO_MANY_OPEN_PARTITIONS' -%}
126-
{%- do create_table_as_with_partitions(temporary, relation, sql) -%}
127-
{%- set query_result = relation ~ ' with many partitions created' -%}
146+
{%- if temporary -%}
147+
{%- do run_query(create_table_as(temporary, relation, sql, True)) -%}
148+
{%- set query_result = relation ~ ' as temporary relation without partitioning created' -%}
149+
{%- else -%}
150+
{%- set query_result = adapter.run_query_with_partitions_limit_catching(create_table_as(temporary, relation, sql)) -%}
151+
{%- do log('QUERY RESULT: ' ~ query_result) -%}
152+
{%- if query_result == 'TOO_MANY_OPEN_PARTITIONS' -%}
153+
{%- do create_table_as_with_partitions(temporary, relation, sql) -%}
154+
{%- set query_result = relation ~ ' with many partitions created' -%}
155+
{%- endif -%}
128156
{%- endif -%}
157+
129158
{{ return(query_result) }}
130159
{%- endmacro %}

0 commit comments

Comments
 (0)