Skip to content

Commit b2a0803

Browse files
committed
add new models for evals
1 parent 6b95248 commit b2a0803

File tree

2 files changed

+13
-9
lines changed

2 files changed

+13
-9
lines changed

evals/index.eval.ts

+7-8
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ import { google } from "@ai-sdk/google";
3939
import { anthropic } from "@ai-sdk/anthropic";
4040
import { groq } from "@ai-sdk/groq";
4141
import { cerebras } from "@ai-sdk/cerebras";
42+
import { openai } from "@ai-sdk/openai";
4243
dotenv.config();
4344

4445
/**
@@ -274,14 +275,12 @@ const generateFilteredTestcases = (): Testcase[] => {
274275

275276
// Execute the task
276277
let llmClient: LLMClient;
277-
if (input.modelName.startsWith("gpt")) {
278-
llmClient = new CustomOpenAIClient({
279-
modelName: input.modelName as AvailableModel,
280-
client: wrapOpenAI(
281-
new OpenAI({
282-
apiKey: process.env.OPENAI_API_KEY,
283-
}),
284-
),
278+
if (
279+
input.modelName.startsWith("gpt") ||
280+
input.modelName.startsWith("o")
281+
) {
282+
llmClient = new AISdkClient({
283+
model: wrapAISDKModel(openai(input.modelName)),
285284
});
286285
} else if (input.modelName.startsWith("gemini")) {
287286
llmClient = new AISdkClient({

evals/taskConfig.ts

+6-1
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,18 @@ const ALL_EVAL_MODELS = [
2323
"gemini-2.5-pro-exp-03-25",
2424
"gemini-1.5-pro",
2525
"gemini-1.5-flash-8b",
26+
"gemini-2.5-flash-preview-04-17",
27+
"gemini-2.5-pro-preview-03-25",
2628
// ANTHROPIC
2729
"claude-3-5-sonnet-latest",
2830
"claude-3-7-sonnet-latest",
2931
// OPENAI
3032
"gpt-4o-mini",
3133
"gpt-4o",
3234
"gpt-4.5-preview",
35+
"o3",
36+
"o3-mini",
37+
"o4-mini",
3338
// TOGETHER - META
3439
"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
3540
"meta-llama/Llama-3.3-70B-Instruct-Turbo",
@@ -95,7 +100,7 @@ if (filterByEvalName && !tasksByName[filterByEvalName]) {
95100
*/
96101
const DEFAULT_EVAL_MODELS = process.env.EVAL_MODELS
97102
? process.env.EVAL_MODELS.split(",")
98-
: ["claude-3-5-sonnet-latest", "gpt-4o-mini", "gpt-4o"];
103+
: ["gemini-2.5-pro-preview-03-25", "o3"];
99104

100105
/**
101106
* getModelList:

0 commit comments

Comments
 (0)