Skip to content

Commit 4aa4813

Browse files
authored
Miguel/observe a11y (#412)
* first steps towards accessibility backbone * cleanup working for accessibility tree * accessibility backbone eval task updates * added accessibility tree to evals typing * migrated extract construct to new stagehand page location * fixing linting * first try * new observe logic for indexing elements (using nodeId-DOM nodeid) * work in progress * selector for a11y tree now in xpath format * testing not returning * generating xpath for elements not in selectormap * passing evals locally * adjusting evals * merged main * PR cleanup * deleted unnecessary evals for now * fixing liniting * removing useAccessibilityTree from extract evals * changes for review * fixing lint errors * final review fixes * prettify * resolved comments * changeset * added final comment
1 parent fe3b044 commit 4aa4813

18 files changed

+460
-44
lines changed

Diff for: .changeset/empty-peas-smell.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": minor
3+
---
4+
5+
Includes a new format to get website context using accessibility (a11y) trees. The new context is provided optionally with the flag useAccessibilityTree for observe tasks.

Diff for: evals/args.ts

+2
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ if (extractMethodArg) {
3535
// Set the extraction method in the process environment so tasks can reference it.
3636
process.env.EXTRACT_METHOD = extractMethod;
3737
const useTextExtract = process.env.EXTRACT_METHOD === "textExtract";
38+
const useAccessibilityTree = process.env.EXTRACT_METHOD === "accessibilityTree";
3839

3940
/**
4041
* Variables for filtering which tasks to run:
@@ -75,5 +76,6 @@ export {
7576
filterByCategory,
7677
filterByEvalName,
7778
useTextExtract,
79+
useAccessibilityTree,
7880
DEFAULT_EVAL_CATEGORIES,
7981
};

Diff for: evals/evals.config.json

-1
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,6 @@
192192
"name": "vanta_h",
193193
"categories": ["observe"]
194194
},
195-
196195
{
197196
"name": "extract_area_codes",
198197
"categories": ["text_extract"]

Diff for: evals/index.eval.ts

+7-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,12 @@ import { env } from "./env";
1919
import { generateExperimentName } from "./utils";
2020
import { exactMatch, errorMatch } from "./scoring";
2121
import { tasksByName, MODELS } from "./taskConfig";
22-
import { filterByCategory, filterByEvalName, useTextExtract } from "./args";
22+
import {
23+
filterByCategory,
24+
filterByEvalName,
25+
useTextExtract,
26+
useAccessibilityTree,
27+
} from "./args";
2328
import { Eval } from "braintrust";
2429
import { EvalFunction, SummaryResult, Testcase } from "../types/evals";
2530
import { EvalLogger } from "./logger";
@@ -221,6 +226,7 @@ const generateFilteredTestcases = (): Testcase[] => {
221226
modelName: input.modelName,
222227
logger,
223228
useTextExtract,
229+
useAccessibilityTree,
224230
});
225231

226232
// Log result to console

Diff for: evals/tasks/ionwave_observe.ts

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import { initStagehand } from "../initStagehand";
22
import { EvalFunction } from "../../types/evals";
33

4-
export const ionwave_observe: EvalFunction = async ({ modelName, logger }) => {
4+
export const ionwave_observe: EvalFunction = async ({
5+
modelName,
6+
logger,
7+
useAccessibilityTree,
8+
}) => {
59
const { stagehand, initResponse } = await initStagehand({
610
modelName,
711
logger,
@@ -11,7 +15,7 @@ export const ionwave_observe: EvalFunction = async ({ modelName, logger }) => {
1115

1216
await stagehand.page.goto("https://elpasotexas.ionwave.net/Login.aspx");
1317

14-
const observations = await stagehand.page.observe();
18+
const observations = await stagehand.page.observe({ useAccessibilityTree });
1519

1620
if (observations.length === 0) {
1721
await stagehand.close();

Diff for: evals/tasks/panamcs.ts

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import { initStagehand } from "../initStagehand";
22
import { EvalFunction } from "../../types/evals";
33

4-
export const panamcs: EvalFunction = async ({ modelName, logger }) => {
4+
export const panamcs: EvalFunction = async ({
5+
modelName,
6+
logger,
7+
useAccessibilityTree,
8+
}) => {
59
const { stagehand, initResponse } = await initStagehand({
610
modelName,
711
logger,
@@ -11,7 +15,7 @@ export const panamcs: EvalFunction = async ({ modelName, logger }) => {
1115

1216
await stagehand.page.goto("https://panamcs.org/about/staff/");
1317

14-
const observations = await stagehand.page.observe();
18+
const observations = await stagehand.page.observe({ useAccessibilityTree });
1519

1620
if (observations.length === 0) {
1721
await stagehand.close();

Diff for: evals/tasks/shopify_homepage.ts

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import { initStagehand } from "../initStagehand";
22
import { EvalFunction } from "../../types/evals";
33

4-
export const shopify_homepage: EvalFunction = async ({ modelName, logger }) => {
4+
export const shopify_homepage: EvalFunction = async ({
5+
modelName,
6+
logger,
7+
useAccessibilityTree,
8+
}) => {
59
const { stagehand, initResponse } = await initStagehand({
610
modelName,
711
logger,
@@ -11,7 +15,7 @@ export const shopify_homepage: EvalFunction = async ({ modelName, logger }) => {
1115

1216
await stagehand.page.goto("https://www.shopify.com/");
1317

14-
const observations = await stagehand.page.observe();
18+
const observations = await stagehand.page.observe({ useAccessibilityTree });
1519

1620
if (observations.length === 0) {
1721
await stagehand.close();

Diff for: evals/tasks/vanta.ts

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import { initStagehand } from "../initStagehand";
22
import { EvalFunction } from "../../types/evals";
33

4-
export const vanta: EvalFunction = async ({ modelName, logger }) => {
4+
export const vanta: EvalFunction = async ({
5+
modelName,
6+
logger,
7+
useAccessibilityTree,
8+
}) => {
59
const { stagehand, initResponse } = await initStagehand({
610
modelName,
711
logger,
@@ -12,7 +16,7 @@ export const vanta: EvalFunction = async ({ modelName, logger }) => {
1216
await stagehand.page.goto("https://www.vanta.com/");
1317
await stagehand.page.act({ action: "close the cookies popup" });
1418

15-
const observations = await stagehand.page.observe();
19+
const observations = await stagehand.page.observe({ useAccessibilityTree });
1620

1721
if (observations.length === 0) {
1822
await stagehand.close();

Diff for: evals/tasks/vanta_h.ts

+6-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,11 @@
11
import { initStagehand } from "../initStagehand";
22
import { EvalFunction } from "../../types/evals";
33

4-
export const vanta_h: EvalFunction = async ({ modelName, logger }) => {
4+
export const vanta_h: EvalFunction = async ({
5+
modelName,
6+
logger,
7+
useAccessibilityTree,
8+
}) => {
59
const { stagehand, initResponse } = await initStagehand({
610
modelName,
711
logger,
@@ -13,6 +17,7 @@ export const vanta_h: EvalFunction = async ({ modelName, logger }) => {
1317

1418
const observations = await stagehand.page.observe({
1519
instruction: "find the buy now button if it is available",
20+
useAccessibilityTree,
1621
});
1722

1823
await stagehand.close();

Diff for: lib/StagehandPage.ts

+34
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import type {
22
Page as PlaywrightPage,
33
BrowserContext as PlaywrightContext,
4+
CDPSession,
45
} from "@playwright/test";
56
import { LLMClient } from "./llm/LLMClient";
67
import { ActOptions, ActResult, GotoOptions, Stagehand } from "./index";
@@ -25,6 +26,7 @@ export class StagehandPage {
2526
private extractHandler: StagehandExtractHandler;
2627
private observeHandler: StagehandObserveHandler;
2728
private llmClient: LLMClient;
29+
private cdpClient: CDPSession | null = null;
2830

2931
constructor(
3032
page: PlaywrightPage,
@@ -460,6 +462,10 @@ export class StagehandPage {
460462
value: llmClient.modelName,
461463
type: "string",
462464
},
465+
useAccessibilityTree: {
466+
value: options?.useAccessibilityTree ? "true" : "false",
467+
type: "boolean",
468+
},
463469
},
464470
});
465471

@@ -473,6 +479,7 @@ export class StagehandPage {
473479
fullPage: false,
474480
requestId,
475481
domSettleTimeoutMs: options?.domSettleTimeoutMs,
482+
useAccessibilityTree: options?.useAccessibilityTree ?? false,
476483
})
477484
.catch((e) => {
478485
this.stagehand.log({
@@ -506,4 +513,31 @@ export class StagehandPage {
506513
throw e;
507514
});
508515
}
516+
517+
async getCDPClient(): Promise<CDPSession> {
518+
if (!this.cdpClient) {
519+
this.cdpClient = await this.context.newCDPSession(this.page);
520+
}
521+
return this.cdpClient;
522+
}
523+
524+
async sendCDP<T>(
525+
command: string,
526+
args?: Record<string, unknown>,
527+
): Promise<T> {
528+
const client = await this.getCDPClient();
529+
// Type assertion needed because CDP command strings are not fully typed
530+
return client.send(
531+
command as Parameters<CDPSession["send"]>[0],
532+
args || {},
533+
) as Promise<T>;
534+
}
535+
536+
async enableCDP(domain: string): Promise<void> {
537+
await this.sendCDP(`${domain}.enable`, {});
538+
}
539+
540+
async disableCDP(domain: string): Promise<void> {
541+
await this.sendCDP(`${domain}.disable`, {});
542+
}
509543
}

0 commit comments

Comments
 (0)