We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c8cea30 commit 83eda07Copy full SHA for 83eda07
tensorrt_llm/_torch/models/modeling_deepseekv3.py
@@ -491,7 +491,7 @@ def forward(
491
492
min_latency_mode = True if hidden_states.size(
493
0
494
- ) <= 128 and self.fusion_config.POST_MOE_FUSION and self.is_nvfp4 else False
+ ) <= 128 and self.fusion_config.POST_MOE_FUSION and self.is_nvfp4 and not using_prev_fusion else False
495
496
if residual is None:
497
residual = hidden_states
@@ -510,6 +510,7 @@ def forward(
510
)
511
512
if self.fusion_config.PRE_MOE_FUSION:
513
+ hidden_states_fp4 = None
514
# Custom AR Fusion for DeepseekV3
515
if using_prev_fusion:
516
0 commit comments