Skip to content

Commit 793d010

Browse files
ruodilLarryXFly
andauthored
waive failed case in perf test, change default max_batch_size to 512 and write config.json to output log (#3656)
Signed-off-by: Ruodi <[email protected]> Signed-off-by: Larry <[email protected]> Co-authored-by: Larry <[email protected]>
1 parent 52e6702 commit 793d010

File tree

5 files changed

+45
-34
lines changed

5 files changed

+45
-34
lines changed

tests/integration/defs/perf/test_perf.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,8 @@ def __init__(
283283
backend: str = "",
284284
mode: str = "plugin",
285285
data_type: str = "float16",
286-
max_batch_size: int = 2048,
287-
max_num_tokens: int = 8192,
286+
max_batch_size: int = 512,
287+
max_num_tokens: int = 2048,
288288
gpu_weights_percent: float = -1,
289289
batch_sizes: List[int] = [0],
290290
input_lens: List[int] = [8],
@@ -601,7 +601,7 @@ def validate(self):
601601
if self.model_name in MODEL_PATH_DICT.keys():
602602
VALID_QUANTS = [
603603
"", "nvfp4", "fp8", "int8_sq", "int4_awq", "w4a8_awq",
604-
"int8_wo", "int4_wo", "full_prec"
604+
"w4a16_awq", "int8_wo", "int4_wo", "full_prec"
605605
]
606606
else:
607607
VALID_QUANTS = [

tests/integration/defs/perf/utils.py

+14
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,20 @@ def run_cmd(self, cmd_idx: int, venv) -> str:
265265
benchmark_cmd = mpi_cmd + command
266266
output += subprocess.check_output(benchmark_cmd,
267267
env=envs).decode()
268+
# write config.json to output log
269+
match = re.search(r'--engine_dir=([^\s]+)', current_cmd_str)
270+
if match:
271+
engine_dir = match.group(1)
272+
print_info(
273+
f'writing config.json in {engine_dir} to output log')
274+
with open(os.path.join(engine_dir, "config.json"),
275+
"r") as f:
276+
config_content = f.read()
277+
output += "\n" + "=" * 50 + "\n"
278+
output += "ENGINE CONFIG:\n"
279+
output += "=" * 50 + "\n"
280+
output += config_content
281+
output += "\n" + "=" * 50 + "\n"
268282
return output
269283

270284
def get_cmd_str(self, cmd_idx) -> List[str]:

tests/integration/test_lists/qa/trt_llm_release_perf_cluster_test.yml

+5-6
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,15 @@ trt_llm_release_perf_cluster_test:
4040

4141
# GB chip specific tests with high memory
4242
- condition:
43-
ranges:
44-
system_gpu_count:
45-
gte: 8
46-
gpu_memory:
47-
gt: 100000
4843
wildcards:
49-
chip: 'gb*'
44+
gpu:
45+
- '*b100*'
5046
linux_distribution_name: '*'
5147
tests:
5248
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:512-input_output_len:128,128-quant:fp8-ep:8-tp:8-gpus:8]
49+
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:1-input_output_len:1000,2000-quant:fp8-reqs:10-ep:4-tp:8-gpus:8] #min latency test
50+
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:384-maxnt:1536-input_output_len:1000,2000-quant:fp8-reqs:49152-con:3072-ep:8-tp:8-gpus:8] #max throughput test
5351
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float16-maxbs:512-input_output_len:128,128-quant:nvfp4-ep:8-tp:8-gpus:8]
52+
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-streaming-float16-maxbs:512-input_output_len:1000,1000-quant:nvfp4-con:4096-ep:8-tp:8-gpus:8]
5453
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float16-maxbs:1-input_output_len:1000,2000-quant:nvfp4-reqs:10-ep:4-tp:8-gpus:8] #min latency test
5554
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float16-maxbs:384-maxnt:1536-input_output_len:1000,2000-quant:nvfp4-reqs:49152-con:3072-ep:8-tp:8-gpus:8] #max throughput test

tests/integration/test_lists/qa/trt_llm_release_perf_test.yml

+6-25
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@ trt_llm_release_perf_test:
7474
tests:
7575
- perf/test_perf.py::test_perf[llama_70b_sq_per_tensor-cppmanager-exe-plugin_ifb-float16-input_output_len:128,128+512,32-gpus:2]
7676
- perf/test_perf.py::test_perf[mixtral_8x7b-cppmanager-exe-plugin_ifb-float16-mp-input_output_len:128,128+512,32-gpus:2]
77-
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-float16-maxbs:256-input_output_len:128,128+512,32-gpus:2]
7877
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-float16-input_output_len:128,128+512,32-gpus:2]
7978
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-streaming-float16-input_output_len:128,128-gpus:2]
8079

@@ -123,8 +122,8 @@ trt_llm_release_perf_test:
123122
tests:
124123
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:32-input_output_len:128,128-quant:fp8-ep:8-tp:8-gpus:8]
125124
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-streaming-float16-maxbs:32-input_output_len:128,128-quant:fp8-ep:8-tp:8-gpus:8]
126-
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:1-input_output_len:1000,2000-reqs:10-quant:fp8-con:1-ep:4-tp:8-gpus:8] #min latency test
127-
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:128-maxnt:1127-input_output_len:1000,2000-reqs:5120-quant:fp8-con:1024-ep:8-tp:8-gpus:8] #max throughput test
125+
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:1-input_output_len:1000,2000-quant:fp8-reqs:10-con:1-ep:4-tp:8-gpus:8] #min latency test
126+
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:128-maxnt:1127-input_output_len:1000,2000-quant:fp8-reqs:5120-con:1024-ep:8-tp:8-gpus:8] #max throughput test
128127

129128
# FP8 specific tests
130129
- condition:
@@ -134,7 +133,7 @@ trt_llm_release_perf_test:
134133
- perf/test_perf.py::test_perf[llama_v3.1_8b-cppmanager-exe-plugin_ifb-float16-maxbs:256-input_output_len:128,128-beams:4-quant:fp8]
135134

136135
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-float16-input_output_len:128,128-quant:fp8]
137-
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-float16-input_output_len:128,128-quant:int4_awq]
136+
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-float16-input_output_len:128,128-quant:w4a16_awq]
138137
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-float16-input_output_len:128,128-quant:w4a8_awq]
139138
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-float16-maxbs:256-input_output_len:128,128-quant:fp8]
140139
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-float16-maxbs:256-input_output_len:512,32-quant:fp8]
@@ -176,7 +175,6 @@ trt_llm_release_perf_test:
176175
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-float16-input_output_len:128,128-quant:fp8-gpus:2]
177176
- perf/test_perf.py::test_perf[mixtral_8x7b_v0.1-bench-float16-input_output_len:512,32-quant:fp8-gpus:2]
178177
- perf/test_perf.py::test_perf[llama_v3.2_11b-bench-float16-input_output_len:512,200-quant:fp8-gpus:2]
179-
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-float16-input_output_len:128,128-quant:fp8-tp:2]
180178

181179
- condition:
182180
terms:
@@ -203,28 +201,11 @@ trt_llm_release_perf_test:
203201
# GB chip specific tests
204202
- condition:
205203
wildcards:
206-
chip: 'gb*'
204+
gpu:
205+
- '*b100*'
206+
- '*b40*'
207207
linux_distribution_name: '*'
208208
tests:
209209
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-float16-input_output_len:128,128-quant:nvfp4]
210210
- perf/test_perf.py::test_perf[deepseek_v3_lite_nvfp4-bench-pytorch-float16-input_output_len:128,128-quant:nvfp4]
211211
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float16-input_output_len:128,128-quant:fp8]
212-
213-
# GB200 chip specific tests
214-
- condition:
215-
ranges:
216-
system_gpu_count:
217-
gte: 8
218-
gpu_memory:
219-
gt: 100000
220-
wildcards:
221-
chip: 'gb*'
222-
linux_distribution_name: '*'
223-
tests:
224-
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:512-input_output_len:128,128-quant:fp8-ep:8-tp:8-gpus:8]
225-
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:1-input_output_len:1000,2000-quant:fp8-reqs:10-ep:4-tp:8-gpus:8] #min latency test
226-
- perf/test_perf.py::test_perf[deepseek_r1-bench-pytorch-float16-maxbs:384-maxnt:1536-input_output_len:1000,2000-quant:fp8-reqs:49152-con:3072-ep:8-tp:8-gpus:8] #max throughput test
227-
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-float16-maxbs:512-input_output_len:128,128-quant:nvfp4-ep:8-tp:8-gpus:8]
228-
- perf/test_perf.py::test_perf[deepseek_r1_nvfp4-bench-pytorch-streaming-float16-maxbs:512-input_output_len:1000,1000-quant:nvfp4-con:4096-ep:8-tp:8-gpus:8]
229-
- perf/test_perf.py::test_perf[deepseek_r1-nvfp4-bench-pytorch-float16-maxbs:1-input_output_len:1000,2000-quant:nvfp4-reqs:10-ep:4-tp:8-gpus:8] #min latency test
230-
- perf/test_perf.py::test_perf[deepseek_r1-nvfp4-bench-pytorch-float16-maxbs:384-maxnt:1536-input_output_len:1000,2000-quant:nvfp4-reqs:49152-con:3072-ep:8-tp:8-gpus:8] #max throughput test

tests/integration/test_lists/waives.txt

+17
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,23 @@ examples/test_medusa.py::test_llama_medusa_1gpu[llama-v2-7b-hf] SKIP (https://nv
443443
examples/test_medusa.py::test_llama_medusa_1gpu[llama-3.2-1b] SKIP (https://nvbugs/5219534)
444444
examples/test_medusa.py::test_llama_medusa_1gpu[llama-3.1-8b] SKIP (https://nvbugs/5219535)
445445
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-4-mini-instruct-fp8-bfloat16] SKIP (https://nvbugspro.nvidia.com/bug/5226339)
446+
examples/test_multimodal.py::test_llm_multimodal_general[neva-22b-pp:1-tp:1-bfloat16-bs:1-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5227342)
447+
disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ucx[DeepSeek-V3-Lite-fp8] SKIP (https://nvbugs/5201168)
446448
perf/test_perf.py::test_perf[gpt_350m-cppmanager-plugin-float16-bs:32-input_output_len:60,20] SKIP (https://nvbugs/5228840)
447449
perf/test_perf.py::test_perf[gpt_350m-cppmanager-static_batching-plugin-float16-bs:32-input_output_len:60,20] SKIP (https://nvbugs/5228840)
450+
perf/test_perf.py::test_perf[t5-bench-float16-input_output_len:128,20] SKIP # https://nvbugspro.nvidia.com/bug/5207477
451+
perf/test_perf.py::test_perf[flan_t5_base-bench-float16-input_output_len:128,20] SKIP
452+
perf/test_perf.py::test_perf[flan_t5_large-bench-float16-input_output_len:128,20] SKIP
453+
perf/test_perf.py::test_perf[flan_t5_large-bench-float16-maxbs:1-input_output_len:128,20-gpus:2] SKIP
454+
perf/test_perf.py::test_perf[whisper_large_v3-bench-float16-input_output_len:128,20] SKIP
455+
perf/test_perf.py::test_perf[mamba_370m-bench-float16-input_output_len:128,128] SKIP
456+
perf/test_perf.py::test_perf[mamba_370m-bench-float16-input_output_len:512,32] SKIP
457+
perf/test_perf.py::test_perf[mamba_2.8b-bench-float16-input_output_len:128,128] SKIP
458+
perf/test_perf.py::test_perf[mamba_2.8b-bench-float16-input_output_len:512,32] SKIP
459+
perf/test_perf.py::test_perf[t5-bench-float16-input_output_len:128,20-gpus:2] SKIP
460+
perf/test_perf.py::test_perf[t5-bench-float16-maxbs:1-input_output_len:128,20-gpus:2] SKIP
461+
perf/test_perf.py::test_perf[gpt_20b-bench-float16-maxbs:8-input_output_len:128,128-reqs:80-gpus:8] SKIP
462+
perf/test_perf.py::test_perf[gpt_20b-bench-float16-maxbs:8-input_output_len:512,32-reqs:80-gpus:8] SKIP
463+
full:B40/perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float16-input_output_len:128,128-quant:fp8] SKIP #https://nvbugspro.nvidia.com/bug/5150255
464+
full:B200/perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-bench-pytorch-float16-input_output_len:128,128-quant:fp8] SKIP #https://nvbugspro.nvidia.com/bug/5150255
448465
accuracy/test_cli_flow.py::TestGpt2Medium::test_fp8_lm_head SKIP (https://nvbugs/5231769)

0 commit comments

Comments
 (0)