Skip to content

Commit 8c9445f

Browse files
xpath -> extract (#483)
* add xpath to AXNode type * rebase main * allow ObserveResult in page.extract() * pass observation to textExtract * update extract handler * update function signatures * observe -> extract * prettier * parameterize scrolling back to top * rm debug code * rm logging * rm comments * revert * rm comments * rm logging * revert unnecessary changes in extractHandler.ts * delete handler.ts * xpath -> node function * shared chunk collection function * abstract class instead of interface/shared function * documentation * dont scroll to bottom unless necessary * block: start when scrolling into view * scroll all the way to the bottom when needed * align bottom edge for scroll into view * subtract 25% viewport height from scroll location * add scrollTo param * evals * add scrollTo param in collectDomChunks * changeset * scrollTo = true in processDom * accept xpath string instead of ObserveResult * update changeset * xpath -> selector rename * rm observeResult from page.extract
1 parent 33644b3 commit 8c9445f

18 files changed

+1213
-481
lines changed

Diff for: .changeset/cuddly-bulldogs-juggle.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": minor
3+
---
4+
5+
you can now do targetted extraction by passing an xpath string into extract. This limits the dom processing step to a target element, reducing tokens and increasing speed.

Diff for: evals/evals.config.json

+8
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,14 @@
247247
{
248248
"name": "observe_iframes2",
249249
"categories": ["observe"]
250+
},
251+
{
252+
"name": "extract_hamilton_weather",
253+
"categories": ["text_extract"]
254+
},
255+
{
256+
"name": "extract_regulations_table",
257+
"categories": ["text_extract"]
250258
}
251259
]
252260
}

Diff for: evals/tasks/extract_hamilton_weather.ts

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
import { EvalFunction } from "@/types/evals";
2+
import { initStagehand } from "@/evals/initStagehand";
3+
import { z } from "zod";
4+
5+
export const extract_hamilton_weather: EvalFunction = async ({
6+
modelName,
7+
logger,
8+
useTextExtract,
9+
}) => {
10+
const { stagehand, initResponse } = await initStagehand({
11+
modelName,
12+
logger,
13+
});
14+
15+
const { debugUrl, sessionUrl } = initResponse;
16+
17+
try {
18+
await stagehand.page.goto("https://hamilton-weather.surge.sh/");
19+
const xpath =
20+
"/html/body[1]/div[5]/main[1]/article[1]/div[6]/div[2]/div[1]/table[1]";
21+
22+
const weatherData = await stagehand.page.extract({
23+
instruction: "extract the weather data for Sun, Feb 23 at 11PM",
24+
schema: z.object({
25+
temperature: z.string(),
26+
weather_description: z.string(),
27+
wind: z.string(),
28+
humidity: z.string(),
29+
barometer: z.string(),
30+
visibility: z.string(),
31+
}),
32+
modelName,
33+
useTextExtract,
34+
selector: xpath,
35+
});
36+
37+
// Define the expected weather data
38+
const expectedWeatherData = {
39+
temperature: "27 °F",
40+
weather_description: "Light snow. Overcast.",
41+
wind: "6 mph",
42+
humidity: "93%",
43+
barometer: '30.07 "Hg',
44+
visibility: "10 mi",
45+
};
46+
47+
// Check that every field matches the expected value
48+
const isWeatherCorrect =
49+
weatherData.temperature === expectedWeatherData.temperature &&
50+
weatherData.weather_description ===
51+
expectedWeatherData.weather_description &&
52+
weatherData.wind === expectedWeatherData.wind &&
53+
weatherData.humidity === expectedWeatherData.humidity &&
54+
weatherData.barometer === expectedWeatherData.barometer &&
55+
weatherData.visibility === expectedWeatherData.visibility;
56+
57+
await stagehand.close();
58+
59+
return {
60+
_success: isWeatherCorrect,
61+
weatherData,
62+
debugUrl,
63+
sessionUrl,
64+
logs: logger.getLogs(),
65+
};
66+
} catch (error) {
67+
console.error("Error or timeout occurred:", error);
68+
69+
await stagehand.close();
70+
71+
return {
72+
_success: false,
73+
error: JSON.parse(JSON.stringify(error, null, 2)),
74+
debugUrl,
75+
sessionUrl,
76+
logs: logger.getLogs(),
77+
};
78+
}
79+
};

Diff for: evals/tasks/extract_regulations_table.ts

+96
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
import { EvalFunction } from "@/types/evals";
2+
import { initStagehand } from "@/evals/initStagehand";
3+
import { z } from "zod";
4+
5+
export const extract_regulations_table: EvalFunction = async ({
6+
modelName,
7+
logger,
8+
useTextExtract,
9+
}) => {
10+
const { stagehand, initResponse } = await initStagehand({
11+
modelName,
12+
logger,
13+
});
14+
15+
const { debugUrl, sessionUrl } = initResponse;
16+
17+
try {
18+
await stagehand.page.goto(
19+
"https://www.ncc.gov.ng/technical-regulation/standards/numbering",
20+
);
21+
22+
const xpath =
23+
"/html/body/div[2]/section[4]/div/div/div[1]/main/div[2]/div/div/div/div/div/div/div[2]/div[2]/div[3]/div[1]";
24+
25+
const allottees = await stagehand.page.extract({
26+
instruction:
27+
"Extract ALL of the Allottees and their corresponding name, area, and area code.",
28+
schema: z.object({
29+
allottee_list: z.array(
30+
z.object({
31+
allottee_name: z.string(),
32+
area: z.string(),
33+
area_code: z.string(),
34+
access_code: z.string(),
35+
}),
36+
),
37+
}),
38+
modelName,
39+
useTextExtract,
40+
selector: xpath,
41+
});
42+
43+
// Define the expected weather data
44+
const allottees_expected_first = {
45+
allottee_name: "101 Communications Limited",
46+
area: "Lagos",
47+
area_code: "0201",
48+
access_code: "249",
49+
};
50+
51+
const allottees_expected_last = {
52+
allottee_name: "21st Century Technologies Limited",
53+
area: "Lagos",
54+
area_code: "0201",
55+
access_code: "278",
56+
};
57+
58+
const expected_length = 10;
59+
60+
const allotteeList = allottees.allottee_list;
61+
62+
// Check that the first entry, last entry, and total number match expectations
63+
const isFirstCorrect =
64+
JSON.stringify(allotteeList[0]) ===
65+
JSON.stringify(allottees_expected_first);
66+
const isLastCorrect =
67+
JSON.stringify(allotteeList[allotteeList.length - 1]) ===
68+
JSON.stringify(allottees_expected_last);
69+
const isLengthCorrect = allotteeList.length === expected_length;
70+
71+
const isRegulationsCorrect =
72+
isFirstCorrect && isLastCorrect && isLengthCorrect;
73+
74+
await stagehand.close();
75+
76+
return {
77+
_success: isRegulationsCorrect,
78+
regulationsData: allottees,
79+
debugUrl,
80+
sessionUrl,
81+
logs: logger.getLogs(),
82+
};
83+
} catch (error) {
84+
console.error("Error or timeout occurred:", error);
85+
86+
await stagehand.close();
87+
88+
return {
89+
_success: false,
90+
error: JSON.parse(JSON.stringify(error, null, 2)),
91+
debugUrl,
92+
sessionUrl,
93+
logs: logger.getLogs(),
94+
};
95+
}
96+
};

Diff for: lib/StagehandPage.ts

+14-2
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,10 @@ export class StagehandPage {
246246
};
247247
}
248248
if (prop === "extract") {
249-
return async (options: ExtractOptions<z.AnyZodObject>) => {
250-
return this.extract(options);
249+
return async (
250+
instructionOrOptions: string | ExtractOptions<z.AnyZodObject>,
251+
) => {
252+
return this.extract(instructionOrOptions);
251253
};
252254
}
253255
if (prop === "observe") {
@@ -566,8 +568,17 @@ export class StagehandPage {
566568
modelClientOptions,
567569
domSettleTimeoutMs,
568570
useTextExtract,
571+
selector,
569572
} = options;
570573

574+
// Throw a NotImplementedError if the user passed in an `xpath`
575+
// and `useTextExtract` is false
576+
if (selector && useTextExtract !== true) {
577+
throw new Error(
578+
"NotImplementedError: Passing an xpath into extract is only supported when `useTextExtract: true`.",
579+
);
580+
}
581+
571582
if (this.api) {
572583
return this.api.extract<T>(options);
573584
}
@@ -605,6 +616,7 @@ export class StagehandPage {
605616
requestId,
606617
domSettleTimeoutMs,
607618
useTextExtract,
619+
selector,
608620
})
609621
.catch((e) => {
610622
this.stagehand.log({

Diff for: lib/dom/DomChunk.ts

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
export interface DomChunk {
2+
startOffset: number;
3+
endOffset: number;
4+
outputString: string;
5+
selectorMap: Record<number, string[]>;
6+
}

Diff for: lib/dom/ElementContainer.ts

+67-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,33 @@
11
import { StagehandContainer } from "./StagehandContainer";
22

3-
export class ElementContainer implements StagehandContainer {
4-
constructor(private el: HTMLElement) {}
3+
/**
4+
* The ElementContainer class is a container implementation for a specific
5+
* HTML element.
6+
*
7+
* Unlike `GlobalPageContainer`, which manages the entire page,
8+
* this class focuses on one particular `HTMLElement`. Operations
9+
* such as `scrollTo` and `scrollIntoView` apply to that element
10+
* rather than `window`.
11+
*/
12+
export class ElementContainer extends StagehandContainer {
13+
/**
14+
* Creates an instance of `ElementContainer` tied to a specific element.
15+
* @param el - The scrollable `HTMLElement` that this container controls.
16+
*/
17+
constructor(private el: HTMLElement) {
18+
super();
19+
}
20+
21+
public getRootElement(): HTMLElement {
22+
return this.el;
23+
}
524

25+
/**
26+
* Retrieves the height of the visible viewport within this element
27+
* (`el.clientHeight`).
28+
*
29+
* @returns The visible (client) height of the element, in pixels.
30+
*/
631
public getViewportHeight(): number {
732
return this.el.clientHeight;
833
}
@@ -11,12 +36,50 @@ export class ElementContainer implements StagehandContainer {
1136
return this.el.scrollHeight;
1237
}
1338

39+
/**
40+
* Returns the element's current vertical scroll offset.
41+
*/
42+
public getScrollPosition(): number {
43+
return this.el.scrollTop;
44+
}
45+
46+
/**
47+
* Smoothly scrolls this element to the specified vertical offset, and
48+
* waits for the scrolling to complete.
49+
*
50+
* @param offset - The scroll offset (in pixels) from the top of the element.
51+
* @returns A promise that resolves once scrolling is finished.
52+
*/
1453
public async scrollTo(offset: number): Promise<void> {
15-
await new Promise((resolve) => setTimeout(resolve, 1500));
16-
this.el.scrollTo({ top: offset, left: 0, behavior: "smooth" });
54+
this.el.scrollTo({ top: offset, behavior: "smooth" });
55+
await this.waitForScrollEnd();
56+
}
57+
58+
/**
59+
* Scrolls this element so that the given `element` is visible, or
60+
* scrolls to the top if none is provided. Smoothly animates the scroll
61+
* and waits until it finishes.
62+
*
63+
* @param element - The child element to bring into view. If omitted, scrolls to top.
64+
* @returns A promise that resolves once scrolling completes.
65+
*/
66+
public async scrollIntoView(element?: HTMLElement): Promise<void> {
67+
if (!element) {
68+
this.el.scrollTo({ top: 0, behavior: "smooth" });
69+
} else {
70+
element.scrollIntoView({ behavior: "smooth", block: "end" });
71+
}
1772
await this.waitForScrollEnd();
1873
}
1974

75+
/**
76+
* Internal helper that waits until scrolling in this element has
77+
* fully stopped. It listens for scroll events on the element,
78+
* resetting a short timer every time a scroll occurs, and resolves
79+
* once there's no scroll for ~100ms.
80+
*
81+
* @returns A promise that resolves when scrolling has finished.
82+
*/
2083
private async waitForScrollEnd(): Promise<void> {
2184
return new Promise<void>((resolve) => {
2285
let scrollEndTimer: number;

0 commit comments

Comments
 (0)