Skip to content

Commit 16447e5

Browse files
committed
fix
1 parent 2f49454 commit 16447e5

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

vllm/engine/arg_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class EngineArgs:
3535
quantization: Optional[str] = None
3636
enforce_eager: bool = False
3737
max_context_len_to_capture: int = 8192
38-
disable_fast_allreduce = False
38+
disable_fast_allreduce: bool = False
3939

4040
def __post_init__(self):
4141
if self.tokenizer is None:

vllm/worker/model_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -408,7 +408,7 @@ def capture_model(self, kv_caches: List[KVCache]) -> None:
408408
context_lens = torch.ones(max_batch_size, dtype=torch.int32).cuda()
409409
block_tables = torch.from_numpy(self.graph_block_tables).cuda()
410410

411-
if not self.model_config.disable_fast_allreduce:
411+
if not self.parallel_config.disable_fast_allreduce:
412412
comm_op.init_fast_ar()
413413
comm_op.begin_capture()
414414
# NOTE: Capturing the largest batch size first may help reduce the

0 commit comments

Comments
 (0)