Skip to content

Commit f1935c5

Browse files
authored
Fix vLLM generation with sampling params (#578)
* Fix vLLM generation with sampling params * Fix typo * Add Qwen coder example * Clean * Remove debug * Remove debug
1 parent 4f4baed commit f1935c5

File tree

5 files changed

+14
-8
lines changed

5 files changed

+14
-8
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ tensorboardX = ["tensorboardX"]
9595
vllm = ["vllm>=0.7.0", "ray", "more_itertools"]
9696
quality = ["ruff==v0.2.2","pre-commit"]
9797
tests = ["pytest==7.4.0"]
98-
dev = ["lighteval[accelerate,quality,tests,multilingual,math]"]
98+
dev = ["lighteval[accelerate,quality,tests,multilingual,math,extended_tasks]"]
9999
docs = ["hf-doc-builder", "watchdog"]
100100
extended_tasks = [
101101
"langdetect", # ifeval

src/lighteval/main_vllm.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2121
# SOFTWARE.
2222
import os
23+
import re
2324
from typing import Optional
2425

2526
from typer import Argument, Option
@@ -138,6 +139,8 @@ def vllm(
138139
generation_parameters = GenerationParameters.from_dict(config)
139140
else:
140141
generation_parameters = GenerationParameters.from_model_args(model_args)
142+
# We slice out generation_parameters from model_args to avoid double-counting in the VLLMModelConfig
143+
model_args = re.sub(r"generation_parameters=\{.*?\},?", "", model_args).strip(",")
141144
metric_options = {}
142145

143146
model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")}

src/lighteval/models/sglang/sglang_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,14 +216,14 @@ def greedy_until(
216216
if max_new_tokens is not None:
217217
if context_size + max_new_tokens > self.max_length:
218218
logger.warning(
219-
f"{context_size + max_new_tokens=} which is greather than {self.max_length=}. Truncating context to {self.max_length - max_new_tokens} tokens."
219+
f"{context_size + max_new_tokens=} which is greater than {self.max_length=}. Truncating context to {self.max_length - max_new_tokens} tokens."
220220
)
221221
context_size = self.max_length - max_new_tokens
222222
inputs = [input[-context_size:] for input in inputs]
223223
else:
224224
if context_size > self.max_length:
225225
logger.warning(
226-
f"{context_size=} which is greather than {self.max_length=}. Truncating context to {self.max_length} tokens."
226+
f"{context_size=} which is greater than {self.max_length=}. Truncating context to {self.max_length} tokens."
227227
)
228228
context_size = self.max_length
229229
inputs = [input[-context_size:] for input in inputs]

src/lighteval/models/vllm/vllm_model.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ def greedy_until(
266266
if max_new_tokens is not None:
267267
if context_size + max_new_tokens > self.max_length:
268268
logger.warning(
269-
f"{context_size + max_new_tokens=} which is greather than {self.max_length=}. Truncating context to {self.max_length - max_new_tokens} tokens."
269+
f"{context_size + max_new_tokens=} which is greater than {self.max_length=}. Truncating context to {self.max_length - max_new_tokens} tokens."
270270
)
271271
context_size = self.max_length - max_new_tokens
272272
if context_size < 0:
@@ -278,7 +278,7 @@ def greedy_until(
278278
else:
279279
if context_size > self.max_length:
280280
logger.warning(
281-
f"{context_size=} which is greather than {self.max_length=}. Truncating context to {self.max_length} tokens."
281+
f"{context_size=} which is greater than {self.max_length=}. Truncating context to {self.max_length} tokens."
282282
)
283283
context_size = self.max_length
284284
inputs = [input[-context_size:] for input in inputs]

src/lighteval/tasks/extended/lcb/main.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,12 @@
2121
# SOFTWARE.
2222
"""Usage:
2323
lighteval vllm \
24-
"pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B,dtype=float16,tensor_parallel_size=4,max_model_length=32768,gpu_memory_utilisation=0.8" \
25-
"extended|lcb:codegeneration|0|0" \
26-
--custom-tasks src/lighteval/tasks/extended/lcb/main.py
24+
"pretrained=deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B,dtype=bfloat16,data_parallel_size=8,max_model_length=32768,gpu_memory_utilization=0.8,generation_parameters={temperature:0.6,top_p:0.95}" \
25+
"extended|lcb:codegeneration|0|0"
26+
27+
lighteval vllm \
28+
"pretrained=Qwen/Qwen2.5-Coder-3B-Instruct,dtype=bfloat16,data_parallel_size=8,max_model_length=32768,gpu_memory_utilization=0.8,generation_parameters={temperature:0.2,top_p:0.95}" \
29+
"extended|lcb:codegeneration|0|0"
2730
"""
2831

2932
import json

0 commit comments

Comments
 (0)