Skip to content

Commit 1032d7d

Browse files
Allow act to accept an ObserveResult (#441)
* flagged return action v0.1 * isVisible flag added to observe * prettier and cleanup of fs functions * simplifying observe flagging * two flags: useAccessibilityTree and returnAction. No more processAlDOM on a11y context * github eval * add back in a11y param (whoops) * google search observe eval (returnAction) * fix my terrible grammar in the instruction * amazon actionable eval * add gh eval to config * vtj eval * added evals to config.json * fixing lint/build issues * compare element handles * yc eval * changed useAccessibilityTree to onlyVisible * removing useAccessibilityTree from evals * mostly removing comments * accept multiple selectors * added changeset * allow act to accept ObserveResult * actionable observe example * changeset --------- Co-authored-by: Miguel <[email protected]>
1 parent eaed594 commit 1032d7d

File tree

6 files changed

+169
-6
lines changed

6 files changed

+169
-6
lines changed

Diff for: .changeset/ninety-drinks-warn.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": minor
3+
---
4+
5+
allow act to accept observe output

Diff for: examples/actionable_observe_example.ts

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/**
2+
* This file is meant to be used as a scratchpad for trying out actionable observe.
3+
* To create a Stagehand project with best practices and configuration, run:
4+
*
5+
* npx create-browser-app@latest my-browser-app
6+
*/
7+
8+
import { Stagehand } from "@/dist";
9+
import stagehandConfig from "@/stagehand.config";
10+
11+
async function example() {
12+
const stagehand = new Stagehand(stagehandConfig);
13+
await stagehand.init();
14+
await stagehand.page.goto("https://www.apartments.com/san-francisco-ca/");
15+
16+
await new Promise((resolve) => setTimeout(resolve, 3000));
17+
const observations1 = await stagehand.page.observe({
18+
instruction: "find the 'all filters' button",
19+
});
20+
await stagehand.page.act(observations1[0]);
21+
22+
await new Promise((resolve) => setTimeout(resolve, 3000));
23+
const observations2 = await stagehand.page.observe({
24+
instruction: "find the '1+' button in the 'beds' section",
25+
});
26+
await stagehand.page.act(observations2[0]);
27+
28+
await new Promise((resolve) => setTimeout(resolve, 3000));
29+
const observations3 = await stagehand.page.observe({
30+
instruction: "find the 'apartments' button in the 'home type' section",
31+
});
32+
await stagehand.page.act(observations3[0]);
33+
34+
await new Promise((resolve) => setTimeout(resolve, 3000));
35+
const observations4 = await stagehand.page.observe({
36+
instruction: "find the pet policy dropdown to click on.",
37+
});
38+
await stagehand.page.act(observations4[0]);
39+
40+
await new Promise((resolve) => setTimeout(resolve, 3000));
41+
const observations5 = await stagehand.page.observe({
42+
instruction: "find the 'Dog Friendly' option to click on",
43+
});
44+
await stagehand.page.act(observations5[0]);
45+
46+
await new Promise((resolve) => setTimeout(resolve, 3000));
47+
const observations6 = await stagehand.page.observe({
48+
instruction: "find the 'see results' section",
49+
});
50+
await stagehand.page.act(observations6[0]);
51+
52+
const currentUrl = await stagehand.page.url();
53+
await stagehand.close();
54+
if (
55+
currentUrl.includes(
56+
"https://www.apartments.com/apartments/san-francisco-ca/min-1-bedrooms-pet-friendly-dog/",
57+
)
58+
) {
59+
console.log("✅ Success! we made it to the correct page");
60+
} else {
61+
console.log(
62+
"❌ Whoops, looks like we didnt make it to the correct page. " +
63+
"\nThanks for testing out this new Stagehand feature!" +
64+
"\nReach us on Slack if you have any feedback/questions/suggestions!",
65+
);
66+
}
67+
}
68+
69+
(async () => {
70+
await example();
71+
})();

Diff for: lib/StagehandPage.ts

+32-6
Original file line numberDiff line numberDiff line change
@@ -285,15 +285,41 @@ export class StagehandPage {
285285
}
286286
}
287287

288-
async act(actionOrOptions: string | ActOptions): Promise<ActResult> {
288+
async act(
289+
actionOrOptions: string | ActOptions | ObserveResult,
290+
): Promise<ActResult> {
289291
if (!this.actHandler) {
290292
throw new Error("Act handler not initialized");
291293
}
292294

293-
const options: ActOptions =
294-
typeof actionOrOptions === "string"
295-
? { action: actionOrOptions }
296-
: actionOrOptions;
295+
// If actionOrOptions is an ObserveResult, we call actFromObserveResult.
296+
// We need to ensure there is both a selector and a method in the ObserveResult.
297+
if (typeof actionOrOptions === "object" && actionOrOptions !== null) {
298+
// If it has selector AND method => treat as ObserveResult
299+
if ("selector" in actionOrOptions && "method" in actionOrOptions) {
300+
const observeResult = actionOrOptions as ObserveResult;
301+
// validate observeResult.method, etc.
302+
return this.actHandler.actFromObserveResult(observeResult);
303+
} else {
304+
// If it's an object but no selector/method,
305+
// check that it’s truly ActOptions (i.e., has an `action` field).
306+
if (!("action" in actionOrOptions)) {
307+
throw new Error(
308+
"Invalid argument. Valid arguments are: a string, an ActOptions object, " +
309+
"or an ObserveResult WITH 'selector' and 'method' fields.",
310+
);
311+
}
312+
}
313+
} else if (typeof actionOrOptions === "string") {
314+
// Convert string to ActOptions
315+
actionOrOptions = { action: actionOrOptions };
316+
} else {
317+
throw new Error(
318+
"Invalid argument: you may have called act with an empty ObserveResult.\n" +
319+
"Valid arguments are: a string, an ActOptions object, or an ObserveResult " +
320+
"WITH 'selector' and 'method' fields.",
321+
);
322+
}
297323

298324
const {
299325
action,
@@ -302,7 +328,7 @@ export class StagehandPage {
302328
useVision, // still destructure this but will not pass it on
303329
variables = {},
304330
domSettleTimeoutMs,
305-
} = options;
331+
} = actionOrOptions;
306332

307333
if (typeof useVision !== "undefined") {
308334
this.stagehand.log({

Diff for: lib/handlers/actHandler.ts

+59
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import { LLMProvider } from "../llm/LLMProvider";
1111
import { StagehandContext } from "../StagehandContext";
1212
import { StagehandPage } from "../StagehandPage";
1313
import { generateId } from "../utils";
14+
import { ObserveResult } from "@/types/stagehand";
1415

1516
/**
1617
* NOTE: Vision support has been removed from this version of Stagehand.
@@ -56,6 +57,57 @@ export class StagehandActHandler {
5657
this.userProvidedInstructions = userProvidedInstructions;
5758
}
5859

60+
/**
61+
* Perform an immediate Playwright action based on an ObserveResult object
62+
* that was returned from `page.observe(...)`.
63+
*/
64+
public async actFromObserveResult(
65+
observe: ObserveResult,
66+
): Promise<{ success: boolean; message: string; action: string }> {
67+
this.logger({
68+
category: "action",
69+
message: "Performing act from an ObserveResult",
70+
level: 1,
71+
auxiliary: {
72+
observeResult: {
73+
value: JSON.stringify(observe),
74+
type: "object",
75+
},
76+
},
77+
});
78+
79+
const method = observe.method;
80+
const args = observe.arguments ?? [];
81+
// remove the xpath prefix on the selector
82+
const selector = observe.selector.replace("xpath=", "");
83+
84+
try {
85+
await this._performPlaywrightMethod(method, args, selector);
86+
87+
return {
88+
success: true,
89+
message: `Action [${method}] performed successfully on selector: ${selector}`,
90+
action: observe.description || `ObserveResult action (${method})`,
91+
};
92+
} catch (err) {
93+
this.logger({
94+
category: "action",
95+
message: "Error performing act from an ObserveResult",
96+
level: 1,
97+
auxiliary: {
98+
error: { value: err.message, type: "string" },
99+
trace: { value: err.stack, type: "string" },
100+
observeResult: { value: JSON.stringify(observe), type: "object" },
101+
},
102+
});
103+
return {
104+
success: false,
105+
message: `Failed to perform act: ${err.message}`,
106+
action: observe.description || `ObserveResult action (${method})`,
107+
};
108+
}
109+
}
110+
59111
private async _recordAction(action: string, result: string): Promise<string> {
60112
const id = generateId(action);
61113

@@ -361,6 +413,13 @@ export class StagehandActHandler {
361413
},
362414
},
363415
});
416+
// try {
417+
// // Force-click here
418+
// await locator.click({ force: true });
419+
// } catch (e) {
420+
// // handle/log exception
421+
// throw new PlaywrightCommandException(e.message);
422+
// }
364423

365424
// NAVIDNOTE: Should this happen before we wait for locator[method]?
366425
const newOpenedTab = await Promise.race([

Diff for: package.json

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
"external-client": "npm run build-dom-scripts && tsx examples/external_client.ts",
1414
"instructions": "npm run build-dom-scripts && tsx examples/instructions.ts",
1515
"ai-sdk-client": "npm run build-dom-scripts && tsx examples/ai_sdk_example.ts",
16+
"actionable_observe_example": "npm run build-dom-scripts && tsx examples/actionable_observe_example.ts",
1617
"format": "prettier --write .",
1718
"prettier": "prettier --check .",
1819
"prettier:fix": "prettier --write .",

Diff for: types/page.ts

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ export const defaultExtractSchema = z.object({
2020
export interface Page extends Omit<PlaywrightPage, "on"> {
2121
act(action: string): Promise<ActResult>;
2222
act(options: ActOptions): Promise<ActResult>;
23+
act(observation: ObserveResult): Promise<ActResult>;
2324

2425
extract(
2526
instruction: string,

0 commit comments

Comments
 (0)