Skip to content

Commit 98166d7

Browse files
support scrolling (#563)
* support scrolling * changeset * move window declaration to process.ts * use existing getNodeFromXpath * log warning if xpath not found * handle mouse.wheel * evals * add range tolerance of +- 200px
1 parent 73d6736 commit 98166d7

File tree

9 files changed

+191
-3
lines changed

9 files changed

+191
-3
lines changed

Diff for: .changeset/plenty-ties-float.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": patch
3+
---
4+
5+
support scrolling in `act`

Diff for: evals/evals.config.json

+8
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,14 @@
279279
{
280280
"name": "extract_geniusee_2",
281281
"categories": ["targeted_extract"]
282+
},
283+
{
284+
"name": "scroll_50",
285+
"categories": ["act"]
286+
},
287+
{
288+
"name": "scroll_75",
289+
"categories": ["act"]
282290
}
283291
]
284292
}

Diff for: evals/tasks/scroll_50.ts

+48
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import { initStagehand } from "@/evals/initStagehand";
2+
import { EvalFunction } from "@/types/evals";
3+
4+
export const scroll_50: EvalFunction = async ({ modelName, logger }) => {
5+
const { stagehand, initResponse } = await initStagehand({
6+
modelName,
7+
logger,
8+
domSettleTimeoutMs: 3000,
9+
});
10+
11+
const { debugUrl, sessionUrl } = initResponse;
12+
await stagehand.page.goto("https://aigrant.com/");
13+
await stagehand.page.act({
14+
action: "Scroll 50% down the page",
15+
slowDomBasedAct: false,
16+
});
17+
18+
await new Promise((resolve) => setTimeout(resolve, 5000));
19+
20+
// Get the current scroll position and total scroll height
21+
const scrollInfo = await stagehand.page.evaluate(() => {
22+
return {
23+
scrollTop: window.scrollY + window.innerHeight / 2,
24+
scrollHeight: document.documentElement.scrollHeight,
25+
};
26+
});
27+
28+
await stagehand.close();
29+
30+
const halfwayScroll = scrollInfo.scrollHeight / 2;
31+
const halfwayReached = Math.abs(scrollInfo.scrollTop - halfwayScroll) <= 200;
32+
const evaluationResult = halfwayReached
33+
? {
34+
_success: true,
35+
logs: logger.getLogs(),
36+
debugUrl,
37+
sessionUrl,
38+
}
39+
: {
40+
_success: false,
41+
logs: logger.getLogs(),
42+
debugUrl,
43+
sessionUrl,
44+
message: `Scroll position (${scrollInfo.scrollTop}px) is not halfway down the page (${halfwayScroll}px).`,
45+
};
46+
47+
return evaluationResult;
48+
};

Diff for: evals/tasks/scroll_75.ts

+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import { initStagehand } from "@/evals/initStagehand";
2+
import { EvalFunction } from "@/types/evals";
3+
4+
export const scroll_75: EvalFunction = async ({ modelName, logger }) => {
5+
const { stagehand, initResponse } = await initStagehand({
6+
modelName,
7+
logger,
8+
domSettleTimeoutMs: 3000,
9+
});
10+
11+
const { debugUrl, sessionUrl } = initResponse;
12+
await stagehand.page.goto("https://aigrant.com/");
13+
await stagehand.page.act({
14+
action: "Scroll 75% down the page",
15+
slowDomBasedAct: false,
16+
});
17+
18+
await new Promise((resolve) => setTimeout(resolve, 5000));
19+
20+
// Get the current scroll position and total scroll height
21+
const scrollInfo = await stagehand.page.evaluate(() => {
22+
return {
23+
scrollTop: window.scrollY + window.innerHeight * 0.75,
24+
scrollHeight: document.documentElement.scrollHeight,
25+
};
26+
});
27+
28+
await stagehand.close();
29+
30+
const threeQuartersScroll = scrollInfo.scrollHeight * 0.75;
31+
const threeQuartersReached =
32+
Math.abs(scrollInfo.scrollTop - threeQuartersScroll) <= 200;
33+
const evaluationResult = threeQuartersReached
34+
? {
35+
_success: true,
36+
logs: logger.getLogs(),
37+
debugUrl,
38+
sessionUrl,
39+
}
40+
: {
41+
_success: false,
42+
logs: logger.getLogs(),
43+
debugUrl,
44+
sessionUrl,
45+
message: `Scroll position (${scrollInfo.scrollTop}px) is not three quarters down the page (${threeQuartersScroll}px).`,
46+
};
47+
48+
return evaluationResult;
49+
};

Diff for: lib/dom/process.ts

+2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import {
33
calculateViewportHeight,
44
canElementScroll,
55
getNodeFromXpath,
6+
waitForDomSettle,
67
} from "./utils";
78
import { createStagehandContainer } from "./containerFactory";
89
import { StagehandContainer } from "./StagehandContainer";
@@ -518,6 +519,7 @@ export function getElementBoundingBoxes(xpath: string): Array<{
518519
return boundingBoxes;
519520
}
520521

522+
window.waitForDomSettle = waitForDomSettle;
521523
window.processDom = processDom;
522524
window.processAllOfDom = processAllOfDom;
523525
window.storeDOM = storeDOM;

Diff for: lib/dom/utils.ts

-2
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,6 @@ export async function waitForDomSettle() {
1414
});
1515
}
1616

17-
window.waitForDomSettle = waitForDomSettle;
18-
1917
export function calculateViewportHeight() {
2018
return Math.ceil(window.innerHeight * 0.75);
2119
}

Diff for: lib/handlers/actHandler.ts

+77
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import {
1919
} from "@/types/stagehand";
2020
import { SupportedPlaywrightAction } from "@/types/act";
2121
import { buildActObservePrompt } from "../prompt";
22+
import { getNodeFromXpath } from "@/lib/dom/utils";
2223
/**
2324
* NOTE: Vision support has been removed from this version of Stagehand.
2425
* If useVision or verifierUseVision is set to true, a warning is logged and
@@ -448,6 +449,82 @@ export class StagehandActHandler {
448449

449450
throw new PlaywrightCommandException(e.message);
450451
}
452+
} else if (
453+
method === "scrollTo" ||
454+
method === "scroll" ||
455+
method === "mouse.wheel"
456+
) {
457+
this.logger({
458+
category: "action",
459+
message: "scrolling element vertically to specified percentage",
460+
level: 2,
461+
auxiliary: {
462+
xpath: { value: xpath, type: "string" },
463+
coordinate: { value: JSON.stringify(args), type: "string" },
464+
},
465+
});
466+
467+
try {
468+
const [yArg = "0%"] = args as string[];
469+
470+
await this.stagehandPage.page.evaluate(
471+
({ xpath, yArg }) => {
472+
function parsePercent(val: string): number {
473+
const cleaned = val.trim().replace("%", "");
474+
const num = parseFloat(cleaned);
475+
return Number.isNaN(num) ? 0 : Math.max(0, Math.min(num, 100));
476+
}
477+
478+
const elementNode = getNodeFromXpath(xpath);
479+
if (!elementNode || elementNode.nodeType !== Node.ELEMENT_NODE) {
480+
console.warn(`Could not locate element to scroll on.`);
481+
return;
482+
}
483+
484+
const element = elementNode as HTMLElement;
485+
const yPct = parsePercent(yArg);
486+
487+
// Determine if <html> is actually the scrolled container
488+
if (element.tagName.toLowerCase() === "html") {
489+
// Scroll the entire page (window)
490+
const scrollHeight = document.body.scrollHeight;
491+
const viewportHeight = window.innerHeight;
492+
const scrollTop = (scrollHeight - viewportHeight) * (yPct / 100);
493+
494+
window.scrollTo({
495+
top: scrollTop,
496+
left: window.scrollX,
497+
behavior: "smooth",
498+
});
499+
} else {
500+
// Otherwise, scroll the element itself
501+
const scrollHeight = element.scrollHeight;
502+
const clientHeight = element.clientHeight;
503+
const scrollTop = (scrollHeight - clientHeight) * (yPct / 100);
504+
505+
element.scrollTo({
506+
top: scrollTop,
507+
left: element.scrollLeft,
508+
behavior: "smooth",
509+
});
510+
}
511+
},
512+
{ xpath, yArg },
513+
);
514+
} catch (e) {
515+
this.logger({
516+
category: "action",
517+
message: "error scrolling element vertically to percentage",
518+
level: 1,
519+
auxiliary: {
520+
error: { value: (e as Error).message, type: "string" },
521+
trace: { value: (e as Error).stack, type: "string" },
522+
xpath: { value: xpath, type: "string" },
523+
args: { value: JSON.stringify(args), type: "object" },
524+
},
525+
});
526+
throw new PlaywrightCommandException((e as Error).message);
527+
}
451528
} else if (method === "fill" || method === "type") {
452529
try {
453530
await locator.fill("");

Diff for: lib/prompt.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ export function buildActObservePrompt(
401401
let instruction = `Find the most relevant element to perform an action on given the following action: ${action}.
402402
Provide an action for this element such as ${supportedActions.join(", ")}, or any other playwright locator method. Remember that to users, buttons and links look the same in most cases.
403403
If the action is completely unrelated to a potential action to be taken on the page, return an empty array.
404-
ONLY return one action. If multiple actions are relevant, return the most relevant one.`;
404+
ONLY return one action. If multiple actions are relevant, return the most relevant one. If the user is asking to scroll to a position on the page, e.g., 'halfway' or 0.75, etc, you must return the argument formatted as the correct percentage, e.g., '50%' or '75%', etc.`;
405405

406406
// Add variable names (not values) to the instruction if any
407407
if (variables && Object.keys(variables).length > 0) {

Diff for: types/act.ts

+1
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,5 @@ export enum SupportedPlaywrightAction {
3232
CLICK = "click",
3333
FILL = "fill",
3434
TYPE = "type",
35+
SCROLL = "scrollTo",
3536
}

0 commit comments

Comments
 (0)