@@ -7,6 +7,7 @@ import { formatText } from "../utils";
7
7
import { StagehandPage } from "../StagehandPage" ;
8
8
import { Stagehand , StagehandFunctionName } from "../index" ;
9
9
import { pageTextSchema } from "../../types/page" ;
10
+ import { getAccessibilityTree } from "@/lib/a11y/utils" ;
10
11
11
12
const PROXIMITY_THRESHOLD = 15 ;
12
13
@@ -51,31 +52,6 @@ const PROXIMITY_THRESHOLD = 15;
51
52
* **10. Handle the extraction response and logging the results.**
52
53
* - Processes the output from the LLM and logs relevant information.
53
54
*
54
- *
55
- * Here is what `domExtract` does at a high level:
56
- *
57
- * **1. Wait for the DOM to settle and start DOM debugging.**
58
- * - Ensures the page is fully loaded and stable before extraction.
59
- *
60
- * **2. Process the DOM in chunks.**
61
- * - The `processDom` function:
62
- * - Divides the page into vertical "chunks" based on viewport height.
63
- * - Picks the next chunk that hasn't been processed yet.
64
- * - Scrolls to that chunk and extracts candidate elements.
65
- * - Returns `outputString` (HTML snippets of candidate elements),
66
- * `selectorMap` (the XPaths of the candidate elements),
67
- * `chunk` (the current chunk index), and `chunks` (the array of all chunk indices).
68
- * - This chunk-based approach ensures that large or lengthy pages can be processed in smaller, manageable sections.
69
- *
70
- * **3. Pass the extracted DOM elements (in `outputString`) to the LLM for structured data extraction.**
71
- * - Uses the instructions, schema, and previously extracted content as context to
72
- * guide the LLM in extracting the structured data.
73
- *
74
- * **4. Check if extraction is complete.**
75
- * - If the extraction is complete (all chunks have been processed or the LLM determines
76
- * that we do not need to continue), return the final result.
77
- * - If not, repeat steps 1-4 with the next chunk until extraction is complete or no more chunks remain.
78
- *
79
55
* @remarks
80
56
* Each step corresponds to specific code segments, as noted in the comments throughout the code.
81
57
*/
@@ -112,7 +88,6 @@ export class StagehandExtractHandler {
112
88
instruction,
113
89
schema,
114
90
content = { } ,
115
- chunksSeen = [ ] ,
116
91
llmClient,
117
92
requestId,
118
93
domSettleTimeoutMs,
@@ -154,7 +129,6 @@ export class StagehandExtractHandler {
154
129
instruction,
155
130
schema,
156
131
content,
157
- chunksSeen,
158
132
llmClient,
159
133
requestId,
160
134
domSettleTimeoutMs,
@@ -415,22 +389,20 @@ export class StagehandExtractHandler {
415
389
instruction,
416
390
schema,
417
391
content = { } ,
418
- chunksSeen = [ ] ,
419
392
llmClient,
420
393
requestId,
421
394
domSettleTimeoutMs,
422
395
} : {
423
396
instruction : string ;
424
397
schema : T ;
425
398
content ?: z . infer < T > ;
426
- chunksSeen ?: Array < number > ;
427
399
llmClient : LLMClient ;
428
400
requestId ?: string ;
429
401
domSettleTimeoutMs ?: number ;
430
402
} ) : Promise < z . infer < T > > {
431
403
this . logger ( {
432
404
category : "extraction" ,
433
- message : "starting extraction using old approach " ,
405
+ message : "starting extraction using a11y tree " ,
434
406
level : 1 ,
435
407
auxiliary : {
436
408
instruction : {
@@ -440,56 +412,24 @@ export class StagehandExtractHandler {
440
412
} ,
441
413
} ) ;
442
414
443
- // **1:** Wait for the DOM to settle and start DOM debugging
444
- // This ensures the page is stable before extracting any data.
445
415
await this . stagehandPage . _waitForSettledDom ( domSettleTimeoutMs ) ;
446
-
447
- // **2:** Call processDom() to handle chunk-based extraction
448
- // processDom determines which chunk of the page to process next.
449
- // It will:
450
- // - Identify all chunks (vertical segments of the page),
451
- // - Pick the next unprocessed chunk,
452
- // - Scroll to that chunk's region,
453
- // - Extract candidate elements and their text,
454
- // - Return the extracted text (outputString), a selectorMap (for referencing elements),
455
- // the current chunk index, and the full list of chunks.
456
- const { outputString, chunk, chunks } = await this . stagehand . page . evaluate (
457
- ( chunksSeen ?: number [ ] ) => window . processDom ( chunksSeen ?? [ ] ) ,
458
- chunksSeen ,
459
- ) ;
460
-
416
+ const tree = await getAccessibilityTree ( this . stagehandPage , this . logger ) ;
461
417
this . logger ( {
462
418
category : "extraction" ,
463
- message : "received output from processDom." ,
464
- auxiliary : {
465
- chunk : {
466
- value : chunk . toString ( ) ,
467
- type : "integer" ,
468
- } ,
469
- chunks_left : {
470
- value : ( chunks . length - chunksSeen . length ) . toString ( ) ,
471
- type : "integer" ,
472
- } ,
473
- chunks_total : {
474
- value : chunks . length . toString ( ) ,
475
- type : "integer" ,
476
- } ,
477
- } ,
419
+ message : "Getting accessibility tree data" ,
420
+ level : 1 ,
478
421
} ) ;
422
+ const outputString = tree . simplified ;
479
423
480
- // **3:** Pass the list of candidate HTML snippets to the LLM
481
- // The LLM uses the provided instruction and schema to parse and extract
482
- // structured data.
483
424
const extractionResponse = await extract ( {
484
425
instruction,
485
426
previouslyExtractedContent : content ,
486
427
domElements : outputString ,
487
428
schema,
429
+ chunksSeen : 1 ,
430
+ chunksTotal : 1 ,
488
431
llmClient,
489
- chunksSeen : chunksSeen . length ,
490
- chunksTotal : chunks . length ,
491
432
requestId,
492
- isUsingTextExtract : false ,
493
433
userProvidedInstructions : this . userProvidedInstructions ,
494
434
logger : this . logger ,
495
435
logInferenceToFile : this . stagehand . logInferenceToFile ,
@@ -521,48 +461,32 @@ export class StagehandExtractHandler {
521
461
} ,
522
462
} ) ;
523
463
524
- // Mark the current chunk as processed by adding it to chunksSeen
525
- chunksSeen . push ( chunk ) ;
526
-
527
- // **4:** Check if extraction is complete
528
- // If the LLM deems the extraction complete or we've processed all chunks, return the final result.
529
- // Otherwise, call domExtract again for the next chunk.
530
- if ( completed || chunksSeen . length === chunks . length ) {
464
+ if ( completed ) {
531
465
this . logger ( {
532
466
category : "extraction" ,
533
- message : "got response" ,
467
+ message : "extraction completed successfully" ,
468
+ level : 1 ,
534
469
auxiliary : {
535
470
extraction_response : {
536
471
value : JSON . stringify ( extractionResponse ) ,
537
472
type : "object" ,
538
473
} ,
539
474
} ,
540
475
} ) ;
541
-
542
- return output ;
543
476
} else {
544
477
this . logger ( {
545
478
category : "extraction" ,
546
- message : "continuing extraction" ,
479
+ message : "extraction incomplete after processing all data" ,
480
+ level : 1 ,
547
481
auxiliary : {
548
482
extraction_response : {
549
483
value : JSON . stringify ( extractionResponse ) ,
550
484
type : "object" ,
551
485
} ,
552
486
} ,
553
487
} ) ;
554
- await this . stagehandPage . _waitForSettledDom ( domSettleTimeoutMs ) ;
555
-
556
- // Recursively continue with the next chunk
557
- return this . domExtract ( {
558
- instruction,
559
- schema,
560
- content : output ,
561
- chunksSeen,
562
- llmClient,
563
- domSettleTimeoutMs,
564
- } ) ;
565
488
}
489
+ return output ;
566
490
}
567
491
568
492
/**
0 commit comments