Skip to content

Commit 516725f

Browse files
sameelarifmiguelg719kamath
authored
use observe in act (#518)
* use observe in act * changeset * fix act from observe (#531) * fix act from observe * remove arbitrary log * refactor into actHandler and solved recursing issue * adding the missing piece to observe prompt --------- Co-authored-by: miguel <[email protected]> Co-authored-by: Anirudh Kamath <[email protected]>
1 parent c820bfc commit 516725f

File tree

8 files changed

+77
-32
lines changed

8 files changed

+77
-32
lines changed

Diff for: .changeset/wild-hats-turn.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": minor
3+
---
4+
5+
`act()` now uses `observe()` under the hood. This results in significant performance improvements.

Diff for: evals/tasks/nonsense_action.ts

+2-13
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,11 @@ export const nonsense_action: EvalFunction = async ({ modelName, logger }) => {
1313
await stagehand.page.goto("https://www.homedepot.com/");
1414

1515
const result = await stagehand.page.act({
16-
action: "click on the first banana",
16+
action: "what is the capital of the moon?",
1717
});
18-
console.log("result", result);
19-
20-
// Assert the output
21-
const expectedResult = {
22-
success: false,
23-
message: "Action was not able to be completed.",
24-
action: "click on the first banana",
25-
};
26-
27-
const isResultCorrect =
28-
JSON.stringify(result) === JSON.stringify(expectedResult);
2918

3019
return {
31-
_success: isResultCorrect,
20+
_success: !result.success, // We expect this to fail
3221
debugUrl,
3322
sessionUrl,
3423
logs: logger.getLogs(),

Diff for: evals/tasks/simple_google_search.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,11 @@ export const simple_google_search: EvalFunction = async ({
1515
await stagehand.page.goto("https://www.google.com");
1616

1717
await stagehand.page.act({
18-
action: 'Search for "OpenAI"',
18+
action: 'type "OpenAI" into the search bar',
1919
});
2020

21+
await stagehand.page.act("click the search button");
22+
2123
const expectedUrl = "https://www.google.com/search?q=OpenAI";
2224
const currentUrl = stagehand.page.url();
2325

Diff for: lib/StagehandPage.ts

+5
Original file line numberDiff line numberDiff line change
@@ -465,6 +465,7 @@ export class StagehandPage {
465465
useVision, // still destructure this but will not pass it on
466466
variables = {},
467467
domSettleTimeoutMs,
468+
slowDomBasedAct = false,
468469
} = actionOrOptions;
469470

470471
if (typeof useVision !== "undefined") {
@@ -487,6 +488,10 @@ export class StagehandPage {
487488
? this.stagehand.llmProvider.getClient(modelName, modelClientOptions)
488489
: this.llmClient;
489490

491+
if (!slowDomBasedAct) {
492+
return this.actHandler.observeAct(action);
493+
}
494+
490495
this.stagehand.log({
491496
category: "act",
492497
message: "running act",

Diff for: lib/handlers/actHandler.ts

+35-5
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ import { LLMProvider } from "../llm/LLMProvider";
1111
import { StagehandContext } from "../StagehandContext";
1212
import { StagehandPage } from "../StagehandPage";
1313
import { generateId } from "../utils";
14-
import { ObserveResult } from "@/types/stagehand";
14+
import { ActResult, ObserveResult } from "@/types/stagehand";
15+
import { SupportedPlaywrightAction } from "@/types/act";
1516

1617
/**
1718
* NOTE: Vision support has been removed from this version of Stagehand.
@@ -71,7 +72,7 @@ export class StagehandActHandler {
7172
*/
7273
public async actFromObserveResult(
7374
observe: ObserveResult,
74-
): Promise<{ success: boolean; message: string; action: string }> {
75+
): Promise<ActResult> {
7576
this.logger({
7677
category: "action",
7778
message: "Performing act from an ObserveResult",
@@ -98,7 +99,10 @@ export class StagehandActHandler {
9899
action: observe.description || `ObserveResult action (${method})`,
99100
};
100101
} catch (err) {
101-
if (!this.selfHeal) {
102+
if (
103+
!this.selfHeal ||
104+
err instanceof PlaywrightCommandMethodNotSupportedException
105+
) {
102106
this.logger({
103107
category: "action",
104108
message: "Error performing act from an ObserveResult",
@@ -136,11 +140,14 @@ export class StagehandActHandler {
136140
? `${method} ${observe.description}`
137141
: observe.description;
138142
// Call act with the ObserveResult description
139-
await this.stagehandPage.act(actCommand);
143+
await this.stagehandPage.act({
144+
action: actCommand,
145+
slowDomBasedAct: true,
146+
});
140147
} catch (err) {
141148
this.logger({
142149
category: "action",
143-
message: "Error performing act from an ObserveResult",
150+
message: "Error performing act from an ObserveResult on fallback",
144151
level: 1,
145152
auxiliary: {
146153
error: { value: err.message, type: "string" },
@@ -156,6 +163,29 @@ export class StagehandActHandler {
156163
}
157164
}
158165

166+
/**
167+
* Perform an act based on an instruction.
168+
* This method will observe the page and then perform the act on the first element returned.
169+
*/
170+
public async observeAct(instruction: string): Promise<ActResult> {
171+
const observeResults = await this.stagehandPage.observe(
172+
`Find the most relevant element to perform an action on given the following action: ${instruction}.
173+
Provide an action for this element such as ${Object.values(SupportedPlaywrightAction).join(", ")}, or any other playwright locator method. Remember that to users, buttons and links look the same in most cases.
174+
If the action is completely unrelated to a potential action to be taken on the page, return an empty array.
175+
ONLY return one action. If multiple actions are relevant, return the most relevant one.`,
176+
);
177+
if (observeResults.length === 0) {
178+
return {
179+
success: false,
180+
message: `Failed to perform act: No observe results found for action"`,
181+
action: instruction,
182+
};
183+
}
184+
// Picking the first element observe returns
185+
const element = observeResults[0];
186+
return this.actFromObserveResult(element);
187+
}
188+
159189
private async _recordAction(action: string, result: string): Promise<string> {
160190
const id = generateId(action);
161191

Diff for: package.json

+13-13
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,24 @@
66
"module": "./dist/index.js",
77
"types": "./dist/index.d.ts",
88
"scripts": {
9-
"2048": "npm run build-dom-scripts && tsx examples/2048.ts",
10-
"popup": "npm run build-dom-scripts && tsx examples/popup.ts",
11-
"example": "npm run build-dom-scripts && tsx examples/example.ts",
12-
"langchain": "npm run build-dom-scripts && tsx examples/langchain.ts",
13-
"debug-url": "npm run build-dom-scripts && tsx examples/debugUrl.ts",
14-
"external-client": "npm run build-dom-scripts && tsx examples/external_client.ts",
15-
"instructions": "npm run build-dom-scripts && tsx examples/instructions.ts",
16-
"ai-sdk-client": "npm run build-dom-scripts && tsx examples/ai_sdk_example.ts",
17-
"actionable_observe_example": "npm run build-dom-scripts && tsx examples/actionable_observe_example.ts",
9+
"2048": "npm run build && tsx examples/2048.ts",
10+
"popup": "npm run build && tsx examples/popup.ts",
11+
"example": "npm run build && tsx examples/example.ts",
12+
"langchain": "npm run build && tsx examples/langchain.ts",
13+
"debug-url": "npm run build && tsx examples/debugUrl.ts",
14+
"external-client": "npm run build && tsx examples/external_client.ts",
15+
"instructions": "npm run build && tsx examples/instructions.ts",
16+
"ai-sdk-client": "npm run build && tsx examples/ai_sdk_example.ts",
17+
"actionable_observe_example": "npm run build && tsx examples/actionable_observe_example.ts",
1818
"format": "prettier --write .",
1919
"prettier": "prettier --check .",
2020
"prettier:fix": "prettier --write .",
2121
"eslint": "eslint .",
2222
"cache:clear": "rm -rf .cache",
23-
"evals": "npm run build-dom-scripts && tsx evals/index.eval.ts",
24-
"e2e": "npm run build-dom-scripts && cd evals/deterministic && npx playwright test --config=e2e.playwright.config.ts",
25-
"e2e:bb": "npm run build-dom-scripts && cd evals/deterministic && npx playwright test --config=bb.playwright.config.ts",
26-
"e2e:local": "npm run build-dom-scripts && cd evals/deterministic && npx playwright test --config=local.playwright.config.ts",
23+
"evals": "npm run build && tsx evals/index.eval.ts",
24+
"e2e": "npm run build && cd evals/deterministic && npx playwright test --config=e2e.playwright.config.ts",
25+
"e2e:bb": "npm run build && cd evals/deterministic && npx playwright test --config=bb.playwright.config.ts",
26+
"e2e:local": "npm run build && cd evals/deterministic && npx playwright test --config=local.playwright.config.ts",
2727
"build-dom-scripts": "tsx lib/dom/genDomScripts.ts",
2828
"build-types": "tsc --emitDeclarationOnly --outDir dist",
2929
"build-js": "tsup lib/index.ts --dts",

Diff for: types/act.ts

+7
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,10 @@ export interface ActCommandResult {
2626
step: string;
2727
why?: string;
2828
}
29+
30+
// We can use this enum to list the actions supported in performPlaywrightMethod
31+
export enum SupportedPlaywrightAction {
32+
CLICK = "click",
33+
FILL = "fill",
34+
TYPE = "type",
35+
}

Diff for: types/stagehand.ts

+7
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,13 @@ export interface ActOptions {
7777
useVision?: boolean;
7878
variables?: Record<string, string>;
7979
domSettleTimeoutMs?: number;
80+
/**
81+
* If true, the action will be performed in a slow manner that allows the DOM to settle.
82+
* This is useful for debugging.
83+
*
84+
* @default false
85+
*/
86+
slowDomBasedAct?: boolean;
8087
}
8188

8289
export interface ActResult {

0 commit comments

Comments
 (0)