Skip to content

Commit bbbcee7

Browse files
Observe perform candidates (#426)
* flagged return action v0.1 * isVisible flag added to observe * prettier and cleanup of fs functions * simplifying observe flagging * two flags: useAccessibilityTree and returnAction. No more processAlDOM on a11y context * github eval * add back in a11y param (whoops) * google search observe eval (returnAction) * fix my terrible grammar in the instruction * amazon actionable eval * add gh eval to config * vtj eval * added evals to config.json * fixing lint/build issues * compare element handles * yc eval * changed useAccessibilityTree to onlyVisible * removing useAccessibilityTree from evals * mostly removing comments * accept multiple selectors * added changeset * return action defaults to false, waitForSettledDom before getAccessibilityTree * fixes to xpath generation and more evals (observe form fields) --------- Co-authored-by: seanmcguire12 <[email protected]>
1 parent 8e84664 commit bbbcee7

18 files changed

+943
-139
lines changed

Diff for: .changeset/chilled-jokes-teach.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": minor
3+
---
4+
5+
Observe got a major upgrade. Now it will return a suggested playwright method with any necessary arguments for the generated candidate elements. It also includes a major speedup when using a11y tree processing for context.

Diff for: evals/evals.config.json

+24
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,30 @@
215215
{
216216
"name": "extract_zillow",
217217
"categories": ["text_extract"]
218+
},
219+
{
220+
"name": "observe_github",
221+
"categories": ["observe"]
222+
},
223+
{
224+
"name": "observe_vantechjournal",
225+
"categories": ["observe"]
226+
},
227+
{
228+
"name": "observe_amazon_add_to_cart",
229+
"categories": ["observe"]
230+
},
231+
{
232+
"name": "observe_simple_google_search",
233+
"categories": ["observe"]
234+
},
235+
{
236+
"name": "observe_yc_startup",
237+
"categories": ["observe"]
238+
},
239+
{
240+
"name": "observe_taxes",
241+
"categories": ["observe"]
218242
}
219243
]
220244
}

Diff for: evals/tasks/ionwave_observe.ts

+2-6
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,7 @@
11
import { initStagehand } from "@/evals/initStagehand";
22
import { EvalFunction } from "@/types/evals";
33

4-
export const ionwave_observe: EvalFunction = async ({
5-
modelName,
6-
logger,
7-
useAccessibilityTree,
8-
}) => {
4+
export const ionwave_observe: EvalFunction = async ({ modelName, logger }) => {
95
const { stagehand, initResponse } = await initStagehand({
106
modelName,
117
logger,
@@ -15,7 +11,7 @@ export const ionwave_observe: EvalFunction = async ({
1511

1612
await stagehand.page.goto("https://elpasotexas.ionwave.net/Login.aspx");
1713

18-
const observations = await stagehand.page.observe({ useAccessibilityTree });
14+
const observations = await stagehand.page.observe({ onlyVisible: true });
1915

2016
if (observations.length === 0) {
2117
await stagehand.close();

Diff for: evals/tasks/observe_amazon_add_to_cart.ts

+75
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import { EvalFunction } from "@/types/evals";
2+
import { initStagehand } from "@/evals/initStagehand";
3+
import { performPlaywrightMethod } from "@/lib/a11y/utils";
4+
5+
export const observe_amazon_add_to_cart: EvalFunction = async ({
6+
modelName,
7+
logger,
8+
}) => {
9+
const { stagehand, initResponse } = await initStagehand({
10+
modelName,
11+
logger,
12+
});
13+
14+
const { debugUrl, sessionUrl } = initResponse;
15+
16+
await stagehand.page.goto(
17+
"https://www.amazon.com/Laptop-MacBook-Surface-Water-Resistant-Accessories/dp/B0D5M4H5CD",
18+
);
19+
20+
await stagehand.page.waitForTimeout(5000);
21+
22+
const observations1 = await stagehand.page.observe({
23+
instruction: "Find and click the 'Add to Cart' button",
24+
onlyVisible: false,
25+
returnAction: true,
26+
});
27+
28+
console.log(observations1);
29+
30+
// Example of using performPlaywrightMethod if you have the xpath
31+
if (observations1.length > 0) {
32+
const action1 = observations1[0];
33+
await performPlaywrightMethod(
34+
stagehand.page,
35+
stagehand.logger,
36+
action1.method,
37+
action1.arguments,
38+
action1.selector.replace("xpath=", ""),
39+
);
40+
}
41+
42+
await stagehand.page.waitForTimeout(2000);
43+
44+
const observations2 = await stagehand.page.observe({
45+
instruction: "Find and click the 'Proceed to checkout' button",
46+
onlyVisible: false,
47+
returnAction: true,
48+
});
49+
50+
// Example of using performPlaywrightMethod if you have the xpath
51+
if (observations2.length > 0) {
52+
const action2 = observations2[0];
53+
await performPlaywrightMethod(
54+
stagehand.page,
55+
stagehand.logger,
56+
action2.method,
57+
action2.arguments,
58+
action2.selector.replace("xpath=", ""),
59+
);
60+
}
61+
await stagehand.page.waitForTimeout(2000);
62+
63+
const currentUrl = stagehand.page.url();
64+
const expectedUrlPrefix = "https://www.amazon.com/ap/signin";
65+
66+
await stagehand.close();
67+
68+
return {
69+
_success: currentUrl.startsWith(expectedUrlPrefix),
70+
currentUrl,
71+
debugUrl,
72+
sessionUrl,
73+
logs: logger.getLogs(),
74+
};
75+
};

Diff for: evals/tasks/observe_github.ts

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import { initStagehand } from "@/evals/initStagehand";
2+
import { EvalFunction } from "@/types/evals";
3+
4+
export const observe_github: EvalFunction = async ({ modelName, logger }) => {
5+
const { stagehand, initResponse } = await initStagehand({
6+
modelName,
7+
logger,
8+
});
9+
10+
const { debugUrl, sessionUrl } = initResponse;
11+
12+
await stagehand.page.goto(
13+
"https://github.com/browserbase/stagehand/tree/main/lib",
14+
);
15+
16+
const observations = await stagehand.page.observe({
17+
instruction: "find the scrollable element that holds the repos file tree",
18+
});
19+
20+
if (observations.length === 0) {
21+
await stagehand.close();
22+
return {
23+
_success: false,
24+
observations,
25+
debugUrl,
26+
sessionUrl,
27+
logs: logger.getLogs(),
28+
};
29+
}
30+
31+
const possibleLocators = [
32+
`#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav > ul`,
33+
`#repos-file-tree > div.Box-sc-g0xbh4-0.jbQqON > div > div > div > nav`,
34+
];
35+
36+
const possibleHandles = [];
37+
for (const locatorStr of possibleLocators) {
38+
const locator = stagehand.page.locator(locatorStr);
39+
const handle = await locator.elementHandle();
40+
if (handle) {
41+
possibleHandles.push({ locatorStr, handle });
42+
}
43+
}
44+
45+
let foundMatch = false;
46+
let matchedLocator: string | null = null;
47+
48+
for (const observation of observations) {
49+
try {
50+
const observationLocator = stagehand.page
51+
.locator(observation.selector)
52+
.first();
53+
const observationHandle = await observationLocator.elementHandle();
54+
if (!observationHandle) {
55+
continue;
56+
}
57+
58+
for (const { locatorStr, handle: candidateHandle } of possibleHandles) {
59+
const isSameNode = await observationHandle.evaluate(
60+
(node, otherNode) => node === otherNode,
61+
candidateHandle,
62+
);
63+
if (isSameNode) {
64+
foundMatch = true;
65+
matchedLocator = locatorStr;
66+
break;
67+
}
68+
}
69+
70+
if (foundMatch) {
71+
break;
72+
}
73+
} catch (error) {
74+
console.warn(
75+
`Failed to check observation with selector ${observation.selector}:`,
76+
error.message,
77+
);
78+
continue;
79+
}
80+
}
81+
82+
await stagehand.close();
83+
84+
return {
85+
_success: foundMatch,
86+
matchedLocator,
87+
observations,
88+
debugUrl,
89+
sessionUrl,
90+
logs: logger.getLogs(),
91+
};
92+
};

Diff for: evals/tasks/observe_simple_google_search.ts

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
import { EvalFunction } from "@/types/evals";
2+
import { initStagehand } from "@/evals/initStagehand";
3+
import { performPlaywrightMethod } from "@/lib/a11y/utils";
4+
5+
export const observe_simple_google_search: EvalFunction = async ({
6+
modelName,
7+
logger,
8+
}) => {
9+
const { stagehand, initResponse } = await initStagehand({
10+
modelName,
11+
logger,
12+
});
13+
14+
const { debugUrl, sessionUrl } = initResponse;
15+
16+
await stagehand.page.goto("https://www.google.com");
17+
18+
// await stagehand.page.act({
19+
// action: 'Search for "OpenAI"',
20+
// });
21+
const observation1 = await stagehand.page.observe({
22+
instruction: "Find the search bar and enter 'OpenAI'",
23+
onlyVisible: false,
24+
returnAction: true,
25+
});
26+
console.log(observation1);
27+
28+
if (observation1.length > 0) {
29+
const action1 = observation1[0];
30+
await performPlaywrightMethod(
31+
stagehand.page,
32+
stagehand.logger,
33+
action1.method,
34+
action1.arguments,
35+
action1.selector.replace("xpath=", ""),
36+
);
37+
}
38+
await stagehand.page.waitForTimeout(5000);
39+
const observation2 = await stagehand.page.observe({
40+
instruction: "Click the search button in the suggestions dropdown",
41+
onlyVisible: false,
42+
returnAction: true,
43+
});
44+
console.log(observation2);
45+
46+
if (observation2.length > 0) {
47+
const action2 = observation2[0];
48+
await performPlaywrightMethod(
49+
stagehand.page,
50+
stagehand.logger,
51+
action2.method,
52+
action2.arguments,
53+
action2.selector.replace("xpath=", ""),
54+
);
55+
}
56+
await stagehand.page.waitForTimeout(5000);
57+
58+
const expectedUrl = "https://www.google.com/search?q=OpenAI";
59+
const currentUrl = stagehand.page.url();
60+
61+
await stagehand.close();
62+
63+
return {
64+
_success: currentUrl.startsWith(expectedUrl),
65+
currentUrl,
66+
debugUrl,
67+
sessionUrl,
68+
logs: logger.getLogs(),
69+
};
70+
};

Diff for: evals/tasks/observe_taxes.ts

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import { EvalFunction } from "@/types/evals";
2+
import { initStagehand } from "@/evals/initStagehand";
3+
4+
export const observe_taxes: EvalFunction = async ({ modelName, logger }) => {
5+
const { stagehand, initResponse } = await initStagehand({
6+
modelName,
7+
logger,
8+
});
9+
10+
const { debugUrl, sessionUrl } = initResponse;
11+
12+
await stagehand.page.goto("https://file.1040.com/estimate/");
13+
14+
const observations = await stagehand.page.observe({
15+
instruction: "Find all the form elements under the 'Income' section",
16+
});
17+
18+
if (observations.length === 0) {
19+
await stagehand.close();
20+
return {
21+
_success: false,
22+
observations,
23+
debugUrl,
24+
sessionUrl,
25+
logs: logger.getLogs(),
26+
};
27+
} else if (observations.length < 13) {
28+
await stagehand.close();
29+
return {
30+
_success: false,
31+
observations,
32+
debugUrl,
33+
sessionUrl,
34+
logs: logger.getLogs(),
35+
};
36+
}
37+
38+
const expectedLocator = `#tpWages`;
39+
40+
const expectedResult = await stagehand.page
41+
.locator(expectedLocator)
42+
.first()
43+
.innerText();
44+
45+
let foundMatch = false;
46+
for (const observation of observations) {
47+
try {
48+
const observationResult = await stagehand.page
49+
.locator(observation.selector)
50+
.first()
51+
.innerText();
52+
53+
if (observationResult === expectedResult) {
54+
foundMatch = true;
55+
break;
56+
}
57+
} catch (error) {
58+
console.warn(
59+
`Failed to check observation with selector ${observation.selector}:`,
60+
error.message,
61+
);
62+
continue;
63+
}
64+
}
65+
66+
await stagehand.close();
67+
68+
return {
69+
_success: foundMatch,
70+
expected: expectedResult,
71+
observations,
72+
debugUrl,
73+
sessionUrl,
74+
logs: logger.getLogs(),
75+
};
76+
};

0 commit comments

Comments
 (0)