@@ -241,6 +241,9 @@ export async function getAccessibilityTree(
241
241
await page . enableCDP ( "Accessibility" ) ;
242
242
243
243
try {
244
+ // Identify which elements are scrollable and get their backendNodeIds
245
+ const scrollableBackendIds = await findScrollableElementIds ( page ) ;
246
+
244
247
// Fetch the full accessibility tree from Chrome DevTools Protocol
245
248
const { nodes } = await page . sendCDP < { nodes : AXNode [ ] } > (
246
249
"Accessibility.getFullAXTree" ,
@@ -249,16 +252,28 @@ export async function getAccessibilityTree(
249
252
250
253
// Transform into hierarchical structure
251
254
const hierarchicalTree = await buildHierarchicalTree (
252
- nodes . map ( ( node ) => ( {
253
- role : node . role ?. value ,
254
- name : node . name ?. value ,
255
- description : node . description ?. value ,
256
- value : node . value ?. value ,
257
- nodeId : node . nodeId ,
258
- backendDOMNodeId : node . backendDOMNodeId ,
259
- parentId : node . parentId ,
260
- childIds : node . childIds ,
261
- } ) ) ,
255
+ nodes . map ( ( node ) => {
256
+ let roleValue = node . role ?. value || "" ;
257
+
258
+ if ( scrollableBackendIds . has ( node . backendDOMNodeId ) ) {
259
+ if ( roleValue === "generic" || roleValue === "none" ) {
260
+ roleValue = "scrollable" ;
261
+ } else {
262
+ roleValue = roleValue ? `scrollable, ${ roleValue } ` : "scrollable" ;
263
+ }
264
+ }
265
+
266
+ return {
267
+ role : roleValue ,
268
+ name : node . name ?. value ,
269
+ description : node . description ?. value ,
270
+ value : node . value ?. value ,
271
+ nodeId : node . nodeId ,
272
+ backendDOMNodeId : node . backendDOMNodeId ,
273
+ parentId : node . parentId ,
274
+ childIds : node . childIds ,
275
+ } ;
276
+ } ) ,
262
277
page ,
263
278
logger ,
264
279
) ;
@@ -360,6 +375,69 @@ export async function getXPathByResolvedObjectId(
360
375
return result . value || "" ;
361
376
}
362
377
378
+ /**
379
+ * `findScrollableElementIds` is a function that identifies elements in
380
+ * the browser that are deemed "scrollable". At a high level, it does the
381
+ * following:
382
+ * - Calls the browser-side `window.getScrollableElementXpaths()` function,
383
+ * which returns a list of XPaths for scrollable containers.
384
+ * - Iterates over the returned list of XPaths, locating each element in the DOM
385
+ * using `stagehandPage.sendCDP(...)`
386
+ * - During each iteration, we call `Runtime.evaluate` to run `document.evaluate(...)`
387
+ * with each XPath, obtaining a `RemoteObject` reference if it exists.
388
+ * - Then, for each valid object reference, we call `DOM.describeNode` to retrieve
389
+ * the element’s `backendNodeId`.
390
+ * - Collects all resulting `backendNodeId`s in a Set and returns them.
391
+ *
392
+ * @param stagehandPage - A StagehandPage instance with built-in CDP helpers.
393
+ * @returns A Promise that resolves to a Set of unique `backendNodeId`s corresponding
394
+ * to scrollable elements in the DOM.
395
+ */
396
+ export async function findScrollableElementIds (
397
+ stagehandPage : StagehandPage ,
398
+ ) : Promise < Set < number > > {
399
+ // get the xpaths of the scrollable elements
400
+ const xpaths = await stagehandPage . page . evaluate ( ( ) => {
401
+ return window . getScrollableElementXpaths ( ) ;
402
+ } ) ;
403
+
404
+ const scrollableBackendIds = new Set < number > ( ) ;
405
+
406
+ for ( const xpath of xpaths ) {
407
+ if ( ! xpath ) continue ;
408
+
409
+ // evaluate the XPath in the stagehandPage
410
+ const { result } = await stagehandPage . sendCDP < {
411
+ result ?: { objectId ?: string } ;
412
+ } > ( "Runtime.evaluate" , {
413
+ expression : `
414
+ (function() {
415
+ const res = document.evaluate(${ JSON . stringify (
416
+ xpath ,
417
+ ) } , document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null);
418
+ return res.singleNodeValue;
419
+ })();
420
+ ` ,
421
+ returnByValue : false ,
422
+ } ) ;
423
+
424
+ // if we have an objectId, call DOM.describeNode to get backendNodeId
425
+ if ( result ?. objectId ) {
426
+ const { node } = await stagehandPage . sendCDP < {
427
+ node ?: { backendNodeId ?: number } ;
428
+ } > ( "DOM.describeNode" , {
429
+ objectId : result . objectId ,
430
+ } ) ;
431
+
432
+ if ( node ?. backendNodeId ) {
433
+ scrollableBackendIds . add ( node . backendNodeId ) ;
434
+ }
435
+ }
436
+ }
437
+
438
+ return scrollableBackendIds ;
439
+ }
440
+
363
441
export async function performPlaywrightMethod (
364
442
stagehandPage : Page ,
365
443
logger : ( logLine : LogLine ) => void ,
0 commit comments