@@ -6,6 +6,7 @@ import { LLMClient } from "../llm/LLMClient";
6
6
import { formatText } from "../utils" ;
7
7
import { StagehandPage } from "../StagehandPage" ;
8
8
import { Stagehand } from "../index" ;
9
+ import { pageTextSchema } from "../../types/page" ;
9
10
10
11
const PROXIMITY_THRESHOLD = 15 ;
11
12
@@ -118,16 +119,26 @@ export class StagehandExtractHandler {
118
119
useTextExtract = false ,
119
120
selector,
120
121
} : {
121
- instruction : string ;
122
- schema : T ;
122
+ instruction ? : string ;
123
+ schema ? : T ;
123
124
content ?: z . infer < T > ;
124
125
chunksSeen ?: Array < number > ;
125
- llmClient : LLMClient ;
126
+ llmClient ? : LLMClient ;
126
127
requestId ?: string ;
127
128
domSettleTimeoutMs ?: number ;
128
129
useTextExtract ?: boolean ;
129
130
selector ?: string ;
130
- } ) : Promise < z . infer < T > > {
131
+ } = { } ) : Promise < z . infer < T > > {
132
+ const noArgsCalled = ! instruction && ! schema && ! llmClient && ! selector ;
133
+ if ( noArgsCalled ) {
134
+ this . logger ( {
135
+ category : "extraction" ,
136
+ message : "Extracting the entire page text." ,
137
+ level : 1 ,
138
+ } ) ;
139
+ return this . extractPageText ( ) ;
140
+ }
141
+
131
142
if ( useTextExtract ) {
132
143
return this . textExtract ( {
133
144
instruction,
@@ -151,6 +162,52 @@ export class StagehandExtractHandler {
151
162
}
152
163
}
153
164
165
+ private async extractPageText ( ) : Promise < { page_text ?: string } > {
166
+ await this . stagehandPage . _waitForSettledDom ( ) ;
167
+ await this . stagehandPage . startDomDebug ( ) ;
168
+
169
+ const originalDOM = await this . stagehandPage . page . evaluate ( ( ) =>
170
+ window . storeDOM ( undefined ) ,
171
+ ) ;
172
+
173
+ const { selectorMap } : { selectorMap : Record < number , string [ ] > } =
174
+ await this . stagehand . page . evaluate ( ( ) =>
175
+ window . processAllOfDom ( undefined ) ,
176
+ ) ;
177
+
178
+ await this . stagehand . page . evaluate ( ( ) =>
179
+ window . createTextBoundingBoxes ( undefined ) ,
180
+ ) ;
181
+
182
+ const containerDims = await this . getTargetDimensions ( ) ;
183
+
184
+ const allAnnotations = await this . collectAllAnnotations (
185
+ selectorMap ,
186
+ containerDims . width ,
187
+ containerDims . height ,
188
+ containerDims . offsetLeft ,
189
+ containerDims . offsetTop ,
190
+ ) ;
191
+
192
+ const deduplicatedTextAnnotations =
193
+ this . deduplicateAnnotations ( allAnnotations ) ;
194
+
195
+ await this . stagehandPage . page . evaluate (
196
+ ( dom ) => window . restoreDOM ( dom , undefined ) ,
197
+ originalDOM ,
198
+ ) ;
199
+
200
+ const formattedText = formatText (
201
+ deduplicatedTextAnnotations ,
202
+ containerDims . width ,
203
+ ) ;
204
+
205
+ await this . stagehandPage . cleanupDomDebug ( ) ;
206
+
207
+ const result = { page_text : formattedText } ;
208
+ return pageTextSchema . parse ( result ) ;
209
+ }
210
+
154
211
private async textExtract < T extends z . AnyZodObject > ( {
155
212
instruction,
156
213
schema,
@@ -160,10 +217,10 @@ export class StagehandExtractHandler {
160
217
domSettleTimeoutMs,
161
218
selector,
162
219
} : {
163
- instruction : string ;
164
- schema : T ;
220
+ instruction ? : string ;
221
+ schema ? : T ;
165
222
content ?: z . infer < T > ;
166
- llmClient : LLMClient ;
223
+ llmClient ? : LLMClient ;
167
224
requestId ?: string ;
168
225
domSettleTimeoutMs ?: number ;
169
226
selector ?: string ;
@@ -611,4 +668,41 @@ export class StagehandExtractHandler {
611
668
612
669
return allAnnotations ;
613
670
}
671
+
672
+ /**
673
+ * Deduplicate text annotations by grouping them by text, then removing duplicates
674
+ * within a certain proximity threshold.
675
+ */
676
+ private deduplicateAnnotations (
677
+ annotations : TextAnnotation [ ] ,
678
+ ) : TextAnnotation [ ] {
679
+ const annotationsGroupedByText = new Map < string , TextAnnotation [ ] > ( ) ;
680
+ const deduplicated : TextAnnotation [ ] = [ ] ;
681
+
682
+ for ( const annotation of annotations ) {
683
+ if ( ! annotationsGroupedByText . has ( annotation . text ) ) {
684
+ annotationsGroupedByText . set ( annotation . text , [ ] ) ;
685
+ }
686
+ annotationsGroupedByText . get ( annotation . text ) ! . push ( annotation ) ;
687
+ }
688
+
689
+ for ( const [ text , group ] of annotationsGroupedByText . entries ( ) ) {
690
+ for ( const annotation of group ) {
691
+ const isDuplicate = deduplicated . some ( ( existing ) => {
692
+ if ( existing . text !== text ) return false ;
693
+
694
+ const dx = existing . bottom_left . x - annotation . bottom_left . x ;
695
+ const dy = existing . bottom_left . y - annotation . bottom_left . y ;
696
+ const distance = Math . hypot ( dx , dy ) ;
697
+ return distance < PROXIMITY_THRESHOLD ;
698
+ } ) ;
699
+
700
+ if ( ! isDuplicate ) {
701
+ deduplicated . push ( annotation ) ;
702
+ }
703
+ }
704
+ }
705
+
706
+ return deduplicated ;
707
+ }
614
708
}
0 commit comments