Skip to content

Commit 73d6736

Browse files
miguelg719kamath
andauthored
Computer Use Agents (CUA) support (#571)
* first draft of CU support * prettier * added anthropic cua * prettier * first cleanup * more fixes * removing scratchpad * missing @ import * fixing test changes for improving local * cleanup example * fixes for initialization and logs * fixed local issues * resolving last comments * Update .changeset/tender-rats-cheat.md Co-authored-by: Anirudh Kamath <[email protected]> --------- Co-authored-by: Anirudh Kamath <[email protected]>
1 parent c87d1d0 commit 73d6736

14 files changed

+4111
-808
lines changed

Diff for: .changeset/tender-rats-cheat.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": minor
3+
---
4+
5+
You can now use Computer Using Agents (CUA) natively in Stagehand for both Anthropic and OpenAI models! This unlocks a brand new frontier of applications for Stagehand users 🤘

Diff for: examples/cua-example.ts

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import { Stagehand } from "@/dist";
2+
import dotenv from "dotenv";
3+
import StagehandConfig from "@/stagehand.config";
4+
import chalk from "chalk";
5+
6+
// Load environment variables
7+
dotenv.config();
8+
9+
async function main() {
10+
console.log(
11+
`\n${chalk.bold("Stagehand 🤘 Computer Use Agent (CUA) Demo")}\n`,
12+
);
13+
14+
// Initialize Stagehand
15+
console.log(`${chalk.cyan("→")} Initializing Stagehand...`);
16+
const stagehand = new Stagehand({
17+
...StagehandConfig,
18+
});
19+
20+
await stagehand.init();
21+
console.log(`${chalk.green("✓")} Stagehand initialized`);
22+
23+
try {
24+
const page = stagehand.page;
25+
26+
console.log(`\n${chalk.magenta.bold("⚡ First Agent Execution")}`);
27+
28+
const agent = stagehand.agent({
29+
provider: "openai",
30+
model: "computer-use-preview-2025-02-04",
31+
instructions: `You are a helpful assistant that can use a web browser.
32+
You are currently on the following page: ${page.url()}.
33+
Do not ask follow up questions, the user will trust your judgement.`,
34+
options: {
35+
apiKey: process.env.OPENAI_API_KEY,
36+
},
37+
});
38+
39+
console.log(`${chalk.yellow("→")} Navigating to Google...`);
40+
await stagehand.page.goto("https://www.google.com");
41+
console.log(`${chalk.green("✓")} Loaded: ${chalk.dim(page.url())}`);
42+
43+
// Execute the agent again with a different instruction
44+
const firstInstruction =
45+
"Search for openai news on google and extract the name of the first 3 results";
46+
console.log(
47+
`${chalk.cyan("↳")} Instruction: ${chalk.white(firstInstruction)}`,
48+
);
49+
50+
const result1 = await agent.execute(firstInstruction);
51+
52+
console.log(`${chalk.green("✓")} Execution complete`);
53+
console.log(`${chalk.yellow("⤷")} Result:`);
54+
console.log(chalk.white(JSON.stringify(result1, null, 2)));
55+
56+
console.log(`\n${chalk.magenta.bold("⚡ Second Agent Execution")}`);
57+
58+
console.log(
59+
`\n${chalk.yellow("→")} Navigating to Browserbase careers page...`,
60+
);
61+
await page.goto("https://www.browserbase.com/careers");
62+
console.log(`${chalk.green("✓")} Loaded: ${chalk.dim(page.url())}`);
63+
64+
const instruction =
65+
"Apply for the full-stack engineer position with mock data. Don't submit the form.";
66+
console.log(`${chalk.cyan("↳")} Instruction: ${chalk.white(instruction)}`);
67+
68+
const result = await agent.execute({
69+
instruction,
70+
maxSteps: 20,
71+
});
72+
73+
console.log(`${chalk.green("✓")} Execution complete`);
74+
console.log(`${chalk.yellow("⤷")} Result:`);
75+
console.log(chalk.white(JSON.stringify(result, null, 2)));
76+
} catch (error) {
77+
console.log(`${chalk.red("✗")} Error: ${error}`);
78+
if (error instanceof Error && error.stack) {
79+
console.log(chalk.dim(error.stack.split("\n").slice(1).join("\n")));
80+
}
81+
} finally {
82+
// Close the browser
83+
console.log(`\n${chalk.yellow("→")} Closing browser...`);
84+
await stagehand.close();
85+
console.log(`${chalk.green("✓")} Browser closed\n`);
86+
}
87+
}
88+
89+
main().catch((error) => {
90+
console.log(`${chalk.red("✗")} Unhandled error in main function`);
91+
console.log(chalk.red(error));
92+
});

Diff for: lib/agent/AgentClient.ts

+44
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import {
2+
AgentAction,
3+
AgentResult,
4+
AgentType,
5+
AgentExecutionOptions,
6+
} from "@/types/agent";
7+
8+
/**
9+
* Abstract base class for agent clients
10+
* This provides a common interface for all agent implementations
11+
*/
12+
export abstract class AgentClient {
13+
public type: AgentType;
14+
public modelName: string;
15+
public clientOptions: Record<string, unknown>;
16+
public userProvidedInstructions?: string;
17+
18+
constructor(
19+
type: AgentType,
20+
modelName: string,
21+
userProvidedInstructions?: string,
22+
) {
23+
this.type = type;
24+
this.modelName = modelName;
25+
this.userProvidedInstructions = userProvidedInstructions;
26+
this.clientOptions = {};
27+
}
28+
29+
abstract execute(options: AgentExecutionOptions): Promise<AgentResult>;
30+
31+
abstract captureScreenshot(
32+
options?: Record<string, unknown>,
33+
): Promise<unknown>;
34+
35+
abstract setViewport(width: number, height: number): void;
36+
37+
abstract setCurrentUrl(url: string): void;
38+
39+
abstract setScreenshotProvider(provider: () => Promise<string>): void;
40+
41+
abstract setActionHandler(
42+
handler: (action: AgentAction) => Promise<void>,
43+
): void;
44+
}

Diff for: lib/agent/AgentProvider.ts

+81
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
import { LogLine } from "@/types/log";
2+
import { AgentClient } from "./AgentClient";
3+
import { AgentType } from "@/types/agent";
4+
import { OpenAICUAClient } from "./OpenAICUAClient";
5+
import { AnthropicCUAClient } from "./AnthropicCUAClient";
6+
7+
// Map model names to their provider types
8+
const modelToAgentProviderMap: Record<string, AgentType> = {
9+
"computer-use-preview-2025-02-04": "openai",
10+
"claude-3-5-sonnet-20240620": "anthropic",
11+
"claude-3-7-sonnet-20250219": "anthropic", // Add newer Claude models
12+
};
13+
14+
/**
15+
* Provider for agent clients
16+
* This class is responsible for creating the appropriate agent client
17+
* based on the provider type
18+
*/
19+
export class AgentProvider {
20+
private logger: (message: LogLine) => void;
21+
22+
/**
23+
* Create a new agent provider
24+
*/
25+
constructor(logger: (message: LogLine) => void) {
26+
this.logger = logger;
27+
}
28+
29+
getClient(
30+
modelName: string,
31+
clientOptions?: Record<string, unknown>,
32+
userProvidedInstructions?: string,
33+
): AgentClient {
34+
const type = AgentProvider.getAgentProvider(modelName);
35+
this.logger({
36+
category: "agent",
37+
message: `Getting agent client for type: ${type}, model: ${modelName}`,
38+
level: 2,
39+
});
40+
41+
try {
42+
switch (type) {
43+
case "openai":
44+
return new OpenAICUAClient(
45+
type,
46+
modelName,
47+
userProvidedInstructions,
48+
clientOptions,
49+
);
50+
case "anthropic":
51+
return new AnthropicCUAClient(
52+
type,
53+
modelName,
54+
userProvidedInstructions,
55+
clientOptions,
56+
);
57+
default:
58+
throw new Error(`Unknown agent type: ${type}`);
59+
}
60+
} catch (error) {
61+
const errorMessage =
62+
error instanceof Error ? error.message : String(error);
63+
this.logger({
64+
category: "agent",
65+
message: `Error creating agent client: ${errorMessage}`,
66+
level: 0,
67+
});
68+
throw error;
69+
}
70+
}
71+
72+
static getAgentProvider(modelName: string): AgentType {
73+
// First check the exact model name in the map
74+
if (modelName in modelToAgentProviderMap) {
75+
return modelToAgentProviderMap[modelName];
76+
}
77+
78+
// Default to OpenAI CUA for unrecognized models with warning
79+
throw new Error(`Unknown model name: ${modelName}`);
80+
}
81+
}

0 commit comments

Comments
 (0)