Skip to content

Commit 71ee10d

Browse files
scroll to next chunk (#608)
* scroll to next chunk * changeset * handle root case * evals
1 parent 7a514a8 commit 71ee10d

File tree

10 files changed

+309
-1
lines changed

10 files changed

+309
-1
lines changed

Diff for: .changeset/clean-cups-join.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": patch
3+
---
4+
5+
added support for "scrolling to next/previous chunk"

Diff for: evals/evals.config.json

+8
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,14 @@
291291
{
292292
"name": "scroll_75",
293293
"categories": ["act", "regression_dom_extract"]
294+
},
295+
{
296+
"name": "nextChunk",
297+
"categories": ["act"]
298+
},
299+
{
300+
"name": "prevChunk",
301+
"categories": ["act"]
294302
}
295303
]
296304
}

Diff for: evals/tasks/nextChunk.ts

+74
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import { initStagehand } from "@/evals/initStagehand";
2+
import { EvalFunction } from "@/types/evals";
3+
4+
export const nextChunk: EvalFunction = async ({ modelName, logger }) => {
5+
const { stagehand, initResponse } = await initStagehand({
6+
modelName,
7+
logger,
8+
domSettleTimeoutMs: 3000,
9+
});
10+
11+
const { debugUrl, sessionUrl } = initResponse;
12+
13+
await stagehand.page.goto("https://www.apartments.com/san-francisco-ca/");
14+
await stagehand.page.act({
15+
action: "click on the all filters button",
16+
slowDomBasedAct: false,
17+
});
18+
19+
const { initialScrollTop, chunkHeight } = await stagehand.page.evaluate(
20+
() => {
21+
const container = document.querySelector(
22+
"#advancedFilters > div",
23+
) as HTMLElement;
24+
if (!container) {
25+
console.warn(
26+
"Could not find #advancedFilters > div. Returning 0 for measurements.",
27+
);
28+
return { initialScrollTop: 0, chunkHeight: 0 };
29+
}
30+
return {
31+
initialScrollTop: container.scrollTop,
32+
chunkHeight: container.getBoundingClientRect().height,
33+
};
34+
},
35+
);
36+
37+
await stagehand.page.act({
38+
action: "scroll down one chunk on the filters modal",
39+
slowDomBasedAct: false,
40+
});
41+
42+
await new Promise((resolve) => setTimeout(resolve, 2000));
43+
44+
const newScrollTop = await stagehand.page.evaluate(() => {
45+
const container = document.querySelector(
46+
"#advancedFilters > div",
47+
) as HTMLElement;
48+
return container?.scrollTop ?? 0;
49+
});
50+
51+
await stagehand.close();
52+
53+
const actualDiff = newScrollTop - initialScrollTop;
54+
const threshold = 20; // allowable difference in px
55+
const scrolledOneChunk = Math.abs(actualDiff - chunkHeight) <= threshold;
56+
57+
const evaluationResult = scrolledOneChunk
58+
? {
59+
_success: true,
60+
logs: logger.getLogs(),
61+
debugUrl,
62+
sessionUrl,
63+
message: `Successfully scrolled ~one chunk: expected ~${chunkHeight}, got ${actualDiff}`,
64+
}
65+
: {
66+
_success: false,
67+
logs: logger.getLogs(),
68+
debugUrl,
69+
sessionUrl,
70+
message: `Scroll difference expected ~${chunkHeight} but only scrolled ${actualDiff}.`,
71+
};
72+
73+
return evaluationResult;
74+
};

Diff for: evals/tasks/prevChunk.ts

+65
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import { initStagehand } from "@/evals/initStagehand";
2+
import { EvalFunction } from "@/types/evals";
3+
4+
export const prevChunk: EvalFunction = async ({ modelName, logger }) => {
5+
const { stagehand, initResponse } = await initStagehand({
6+
modelName,
7+
logger,
8+
domSettleTimeoutMs: 3000,
9+
});
10+
11+
const { debugUrl, sessionUrl } = initResponse;
12+
await stagehand.page.goto("https://aigrant.com/");
13+
await new Promise((resolve) => setTimeout(resolve, 2000));
14+
const { initialScrollTop, chunkHeight } = await stagehand.page.evaluate(
15+
() => {
16+
const halfPage = document.body.scrollHeight / 2;
17+
18+
window.scrollTo({
19+
top: halfPage,
20+
left: 0,
21+
behavior: "instant",
22+
});
23+
24+
const chunk = window.innerHeight;
25+
26+
return {
27+
initialScrollTop: window.scrollY,
28+
chunkHeight: chunk,
29+
};
30+
},
31+
);
32+
await new Promise((resolve) => setTimeout(resolve, 2000));
33+
await stagehand.page.act({
34+
action: "scroll up one chunk",
35+
slowDomBasedAct: false,
36+
});
37+
38+
await new Promise((resolve) => setTimeout(resolve, 5000));
39+
40+
const finalScrollTop = await stagehand.page.evaluate(() => window.scrollY);
41+
42+
await stagehand.close();
43+
44+
const actualDiff = initialScrollTop - finalScrollTop;
45+
const threshold = 20; // px tolerance
46+
const scrolledOneChunk = Math.abs(actualDiff - chunkHeight) <= threshold;
47+
48+
const evaluationResult = scrolledOneChunk
49+
? {
50+
_success: true,
51+
logs: logger.getLogs(),
52+
debugUrl,
53+
sessionUrl,
54+
message: `Successfully scrolled ~one chunk UP: expected ~${chunkHeight}, got ${actualDiff}.`,
55+
}
56+
: {
57+
_success: false,
58+
logs: logger.getLogs(),
59+
debugUrl,
60+
sessionUrl,
61+
message: `Scroll difference expected ~${chunkHeight} but only scrolled ${actualDiff}.`,
62+
};
63+
64+
return evaluationResult;
65+
};

Diff for: lib/dom/global.d.ts

+1
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,6 @@ declare global {
3232
}>;
3333
getScrollableElementXpaths: (topN?: number) => Promise<string[]>;
3434
getNodeFromXpath: (xpath: string) => Node | null;
35+
waitForElementScrollEnd: (element: HTMLElement) => Promise<void>;
3536
}
3637
}

Diff for: lib/dom/process.ts

+2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
canElementScroll,
55
getNodeFromXpath,
66
waitForDomSettle,
7+
waitForElementScrollEnd,
78
} from "./utils";
89
import { createStagehandContainer } from "./containerFactory";
910
import { StagehandContainer } from "./StagehandContainer";
@@ -530,6 +531,7 @@ window.getElementBoundingBoxes = getElementBoundingBoxes;
530531
window.createStagehandContainer = createStagehandContainer;
531532
window.getScrollableElementXpaths = getScrollableElementXpaths;
532533
window.getNodeFromXpath = getNodeFromXpath;
534+
window.waitForElementScrollEnd = waitForElementScrollEnd;
533535

534536
async function pickChunk(chunksSeen: Array<number>) {
535537
const viewportHeight = calculateViewportHeight();

Diff for: lib/dom/utils.ts

+20
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,23 @@ export function getNodeFromXpath(xpath: string) {
6969
null,
7070
).singleNodeValue;
7171
}
72+
73+
export function waitForElementScrollEnd(
74+
element: HTMLElement,
75+
idleMs = 100,
76+
): Promise<void> {
77+
return new Promise<void>((resolve) => {
78+
let scrollEndTimer: number | undefined;
79+
80+
const handleScroll = () => {
81+
clearTimeout(scrollEndTimer);
82+
scrollEndTimer = window.setTimeout(() => {
83+
element.removeEventListener("scroll", handleScroll);
84+
resolve();
85+
}, idleMs);
86+
};
87+
88+
element.addEventListener("scroll", handleScroll, { passive: true });
89+
handleScroll();
90+
});
91+
}

Diff for: lib/handlers/handlerUtils/actHandlerUtils.ts

+129
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,137 @@ export const methodHandlerMap: Record<
2121
type: fillOrType,
2222
press: pressKey,
2323
click: clickElement,
24+
nextChunk: scrollToNextChunk,
25+
prevChunk: scrollToPreviousChunk,
2426
};
2527

28+
export async function scrollToNextChunk(ctx: MethodHandlerContext) {
29+
const { stagehandPage, xpath, logger } = ctx;
30+
31+
logger({
32+
category: "action",
33+
message: "scrolling to next chunk",
34+
level: 2,
35+
auxiliary: {
36+
xpath: { value: xpath, type: "string" },
37+
},
38+
});
39+
40+
try {
41+
await stagehandPage.page.evaluate(
42+
({ xpath }) => {
43+
const elementNode = getNodeFromXpath(xpath);
44+
if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) {
45+
console.warn(`Could not locate element to scroll by its height.`);
46+
return Promise.resolve();
47+
}
48+
49+
const element = elementNode as HTMLElement;
50+
const tagName = element.tagName.toLowerCase();
51+
let height: number;
52+
53+
if (tagName === "html" || tagName === "body") {
54+
height = window.visualViewport.height;
55+
window.scrollBy({
56+
top: height,
57+
left: 0,
58+
behavior: "smooth",
59+
});
60+
61+
const scrollingEl =
62+
document.scrollingElement || document.documentElement;
63+
return window.waitForElementScrollEnd(scrollingEl as HTMLElement);
64+
} else {
65+
height = element.getBoundingClientRect().height;
66+
element.scrollBy({
67+
top: height,
68+
left: 0,
69+
behavior: "smooth",
70+
});
71+
72+
return window.waitForElementScrollEnd(element);
73+
}
74+
},
75+
{ xpath },
76+
);
77+
} catch (e) {
78+
logger({
79+
category: "action",
80+
message: "error scrolling to next chunk",
81+
level: 1,
82+
auxiliary: {
83+
error: { value: e.message, type: "string" },
84+
trace: { value: e.stack, type: "string" },
85+
xpath: { value: xpath, type: "string" },
86+
},
87+
});
88+
throw new PlaywrightCommandException(e.message);
89+
}
90+
}
91+
92+
export async function scrollToPreviousChunk(ctx: MethodHandlerContext) {
93+
const { stagehandPage, xpath, logger } = ctx;
94+
95+
logger({
96+
category: "action",
97+
message: "scrolling to previous chunk",
98+
level: 2,
99+
auxiliary: {
100+
xpath: { value: xpath, type: "string" },
101+
},
102+
});
103+
104+
try {
105+
await stagehandPage.page.evaluate(
106+
({ xpath }) => {
107+
const elementNode = getNodeFromXpath(xpath);
108+
if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) {
109+
console.warn(`Could not locate element to scroll by its height.`);
110+
return Promise.resolve();
111+
}
112+
113+
const element = elementNode as HTMLElement;
114+
const tagName = element.tagName.toLowerCase();
115+
let height: number;
116+
117+
if (tagName === "html" || tagName === "body") {
118+
height = window.visualViewport.height;
119+
window.scrollBy({
120+
top: -height,
121+
left: 0,
122+
behavior: "smooth",
123+
});
124+
125+
const scrollingEl =
126+
document.scrollingElement || document.documentElement;
127+
return window.waitForElementScrollEnd(scrollingEl as HTMLElement);
128+
} else {
129+
height = element.getBoundingClientRect().height;
130+
element.scrollBy({
131+
top: -height,
132+
left: 0,
133+
behavior: "smooth",
134+
});
135+
return window.waitForElementScrollEnd(element);
136+
}
137+
},
138+
{ xpath },
139+
);
140+
} catch (e) {
141+
logger({
142+
category: "action",
143+
message: "error scrolling to previous chunk",
144+
level: 1,
145+
auxiliary: {
146+
error: { value: e.message, type: "string" },
147+
trace: { value: e.stack, type: "string" },
148+
xpath: { value: xpath, type: "string" },
149+
},
150+
});
151+
throw new PlaywrightCommandException(e.message);
152+
}
153+
}
154+
26155
export async function scrollElementIntoView(ctx: MethodHandlerContext) {
27156
const { locator, xpath, logger } = ctx;
28157

Diff for: lib/prompt.ts

+3-1
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,9 @@ export function buildActObservePrompt(
401401
let instruction = `Find the most relevant element to perform an action on given the following action: ${action}.
402402
Provide an action for this element such as ${supportedActions.join(", ")}, or any other playwright locator method. Remember that to users, buttons and links look the same in most cases.
403403
If the action is completely unrelated to a potential action to be taken on the page, return an empty array.
404-
ONLY return one action. If multiple actions are relevant, return the most relevant one. If the user is asking to scroll to a position on the page, e.g., 'halfway' or 0.75, etc, you must return the argument formatted as the correct percentage, e.g., '50%' or '75%', etc.`;
404+
ONLY return one action. If multiple actions are relevant, return the most relevant one.
405+
If the user is asking to scroll to a position on the page, e.g., 'halfway' or 0.75, etc, you must return the argument formatted as the correct percentage, e.g., '50%' or '75%', etc.
406+
If the user is asking to scroll to the next chunk/previous chunk, choose the nextChunk/prevChunk method. No arguments are required here.`;
405407

406408
// Add variable names (not values) to the instruction if any
407409
if (variables && Object.keys(variables).length > 0) {

Diff for: types/act.ts

+2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ export enum SupportedPlaywrightAction {
3636
FILL = "fill",
3737
TYPE = "type",
3838
SCROLL = "scrollTo",
39+
NEXT_CHUNK = "nextChunk",
40+
PREV_CHUNK = "prevChunk",
3941
}
4042

4143
/**

0 commit comments

Comments
 (0)