Skip to content

Commit bb4095e

Browse files
committed
only quantize gemma2 VSWA
Signed-off-by: Netanel Haber <[email protected]> remove misleading comment Signed-off-by: Netanel Haber <[email protected]> fix test_gemma Signed-off-by: Netanel Haber <[email protected]>
1 parent 65be79e commit bb4095e

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

tests/integration/defs/examples/test_gemma.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,13 @@ def get_ckpt_type(model_path):
8787
VSWA_MODELS = VSWA_ATTENTION.keys()
8888

8989
GEMMA2_MODELS = {GEMMA_2_9B_IT, GEMMA_2_27B_IT}
90-
"For plain, non VSWA testing"
9190

9291

9392
@skip_pre_hopper
9493
@pytest.mark.parametrize("batch_size", [8])
9594
@pytest.mark.parametrize("data_type", ['bfloat16'])
9695
@pytest.mark.parametrize("qformat", ['fp8'])
97-
@pytest.mark.parametrize("gemma_model_root", VSWA_MODELS, indirect=True)
96+
@pytest.mark.parametrize("gemma_model_root", GEMMA2_MODELS, indirect=True)
9897
def test_llm_hf_gemma_quantization_1gpu_vswa(batch_size, data_type,
9998
gemma_model_root, llm_venv,
10099
cmodel_dir, engine_dir,
@@ -175,7 +174,8 @@ def hf_gemma_quantization_1gpu(batch_size,
175174
threshold_score = 18
176175

177176
window = [
178-
f"--max_attention_window={max_attention_window}",
177+
"--max_attention_window_size",
178+
','.join((str(w) for w in max_attention_window)),
179179
] if max_attention_window is not None else []
180180

181181
summary_cmd = [
@@ -306,7 +306,7 @@ def gemma_1gpu_summary(batch_size,
306306
check_call(" ".join(build_cmd), shell=True, env=llm_venv._new_env)
307307

308308
window = {
309-
'max_attention_window': max_attention_window
309+
'max_attention_window_size': max_attention_window
310310
} if max_attention_window is not None else {}
311311

312312
print("Run summarize...")

0 commit comments

Comments
 (0)