Skip to content

Commit 1b78fd6

Browse files
committed
track inference time
1 parent abefe82 commit 1b78fd6

File tree

4 files changed

+36
-3
lines changed

4 files changed

+36
-3
lines changed

lib/agent/AnthropicCUAClient.ts

+21-1
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,9 @@ export class AnthropicCUAClient extends AgentClient {
108108
});
109109

110110
try {
111+
let totalInputTokens = 0;
112+
let totalOutputTokens = 0;
113+
let totalInferenceTime = 0;
111114
// Execute steps until completion or max steps reached
112115
while (!completed && currentStep < maxSteps) {
113116
logger({
@@ -117,6 +120,9 @@ export class AnthropicCUAClient extends AgentClient {
117120
});
118121

119122
const result = await this.executeStep(inputItems, logger);
123+
totalInputTokens += result.usage.input_tokens;
124+
totalOutputTokens += result.usage.output_tokens;
125+
totalInferenceTime += result.usage.inference_time_ms;
120126

121127
// Add actions to the list
122128
if (result.actions.length > 0) {
@@ -158,6 +164,11 @@ export class AnthropicCUAClient extends AgentClient {
158164
actions,
159165
message: finalMessage,
160166
completed,
167+
usage: {
168+
input_tokens: totalInputTokens,
169+
output_tokens: totalOutputTokens,
170+
inference_time_ms: totalInferenceTime,
171+
},
161172
};
162173
} catch (error) {
163174
const errorMessage =
@@ -185,7 +196,11 @@ export class AnthropicCUAClient extends AgentClient {
185196
message: string;
186197
completed: boolean;
187198
nextInputItems: ResponseInputItem[];
188-
usage: { input_tokens: number; output_tokens: number };
199+
usage: {
200+
input_tokens: number;
201+
output_tokens: number;
202+
inference_time_ms: number;
203+
};
189204
}> {
190205
try {
191206
// Get response from the model
@@ -194,6 +209,7 @@ export class AnthropicCUAClient extends AgentClient {
194209
const usage = {
195210
input_tokens: result.usage.input_tokens,
196211
output_tokens: result.usage.output_tokens,
212+
inference_time_ms: result.usage.inference_time_ms,
197213
};
198214

199215
logger({
@@ -424,12 +440,16 @@ export class AnthropicCUAClient extends AgentClient {
424440
);
425441
}
426442

443+
const startTime = Date.now();
427444
// Create the message using the Anthropic Messages API
428445
// @ts-expect-error - The Anthropic SDK types are stricter than what we need
429446
const response = await this.client.beta.messages.create(requestParams);
447+
const endTime = Date.now();
448+
const elapsedMs = endTime - startTime;
430449
const usage = {
431450
input_tokens: response.usage.input_tokens,
432451
output_tokens: response.usage.output_tokens,
452+
inference_time_ms: elapsedMs,
433453
};
434454

435455
// Store the message ID for future use

lib/agent/OpenAICUAClient.ts

+13-1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ export class OpenAICUAClient extends AgentClient {
102102
// Execute steps until completion or max steps reached
103103
let totalInputTokens = 0;
104104
let totalOutputTokens = 0;
105+
let totalInferenceTime = 0;
105106
while (!completed && currentStep < maxSteps) {
106107
logger({
107108
category: "agent",
@@ -116,6 +117,7 @@ export class OpenAICUAClient extends AgentClient {
116117
);
117118
totalInputTokens += result.usage.input_tokens;
118119
totalOutputTokens += result.usage.output_tokens;
120+
totalInferenceTime += result.usage.inference_time_ms;
119121

120122
// Add actions to the list
121123
actions.push(...result.actions);
@@ -150,6 +152,7 @@ export class OpenAICUAClient extends AgentClient {
150152
usage: {
151153
input_tokens: totalInputTokens,
152154
output_tokens: totalOutputTokens,
155+
inference_time_ms: totalInferenceTime,
153156
},
154157
};
155158
} catch (error) {
@@ -184,7 +187,11 @@ export class OpenAICUAClient extends AgentClient {
184187
completed: boolean;
185188
nextInputItems: ResponseInputItem[];
186189
responseId: string;
187-
usage: { input_tokens: number; output_tokens: number };
190+
usage: {
191+
input_tokens: number;
192+
output_tokens: number;
193+
inference_time_ms: number;
194+
};
188195
}> {
189196
try {
190197
// Get response from the model
@@ -194,6 +201,7 @@ export class OpenAICUAClient extends AgentClient {
194201
const usage = {
195202
input_tokens: result.usage.input_tokens,
196203
output_tokens: result.usage.output_tokens,
204+
inference_time_ms: result.usage.inference_time_ms,
197205
};
198206

199207
// Add any reasoning items to our map
@@ -328,14 +336,18 @@ export class OpenAICUAClient extends AgentClient {
328336
requestParams.previous_response_id = previousResponseId;
329337
}
330338

339+
const startTime = Date.now();
331340
// Create the response using the OpenAI Responses API
332341
// @ts-expect-error - Force type to match what the OpenAI SDK expects
333342
const response = await this.client.responses.create(requestParams);
343+
const endTime = Date.now();
344+
const elapsedMs = endTime - startTime;
334345

335346
// Extract only the input_tokens and output_tokens
336347
const usage = {
337348
input_tokens: response.usage.input_tokens,
338349
output_tokens: response.usage.output_tokens,
350+
inference_time_ms: elapsedMs,
339351
};
340352

341353
// Store the response ID for future use

lib/handlers/agentHandler.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ export class StagehandAgentHandler {
184184
StagehandFunctionName.AGENT,
185185
result.usage.input_tokens,
186186
result.usage.output_tokens,
187-
0,
187+
result.usage.inference_time_ms,
188188
);
189189
}
190190

types/agent.ts

+1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ export interface AgentResult {
1414
usage?: {
1515
input_tokens: number;
1616
output_tokens: number;
17+
inference_time_ms: number;
1718
};
1819
}
1920

0 commit comments

Comments
 (0)