Skip to content

Commit 55c9673

Browse files
extract() -> full page text (#544)
* extract page text * changeset
1 parent 3714533 commit 55c9673

File tree

4 files changed

+117
-8
lines changed

4 files changed

+117
-8
lines changed

Diff for: .changeset/seven-glasses-drop.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": minor
3+
---
4+
5+
deterministically get the full text representation of a webpage by calling `extract()` (with no arguments)

Diff for: lib/StagehandPage.ts

+6-1
Original file line numberDiff line numberDiff line change
@@ -574,14 +574,19 @@ export class StagehandPage {
574574
}
575575

576576
async extract<T extends z.AnyZodObject = typeof defaultExtractSchema>(
577-
instructionOrOptions: string | ExtractOptions<T>,
577+
instructionOrOptions?: string | ExtractOptions<T>,
578578
): Promise<ExtractResult<T>> {
579579
if (!this.extractHandler) {
580580
throw new Error("Extract handler not initialized");
581581
}
582582

583583
await clearOverlays(this.page);
584584

585+
// check if user called extract() with no arguments
586+
if (!instructionOrOptions) {
587+
return this.extractHandler.extract();
588+
}
589+
585590
const options: ExtractOptions<T> =
586591
typeof instructionOrOptions === "string"
587592
? {

Diff for: lib/handlers/extractHandler.ts

+101-7
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { LLMClient } from "../llm/LLMClient";
66
import { formatText } from "../utils";
77
import { StagehandPage } from "../StagehandPage";
88
import { Stagehand } from "../index";
9+
import { pageTextSchema } from "../../types/page";
910

1011
const PROXIMITY_THRESHOLD = 15;
1112

@@ -118,16 +119,26 @@ export class StagehandExtractHandler {
118119
useTextExtract = false,
119120
selector,
120121
}: {
121-
instruction: string;
122-
schema: T;
122+
instruction?: string;
123+
schema?: T;
123124
content?: z.infer<T>;
124125
chunksSeen?: Array<number>;
125-
llmClient: LLMClient;
126+
llmClient?: LLMClient;
126127
requestId?: string;
127128
domSettleTimeoutMs?: number;
128129
useTextExtract?: boolean;
129130
selector?: string;
130-
}): Promise<z.infer<T>> {
131+
} = {}): Promise<z.infer<T>> {
132+
const noArgsCalled = !instruction && !schema && !llmClient && !selector;
133+
if (noArgsCalled) {
134+
this.logger({
135+
category: "extraction",
136+
message: "Extracting the entire page text.",
137+
level: 1,
138+
});
139+
return this.extractPageText();
140+
}
141+
131142
if (useTextExtract) {
132143
return this.textExtract({
133144
instruction,
@@ -151,6 +162,52 @@ export class StagehandExtractHandler {
151162
}
152163
}
153164

165+
private async extractPageText(): Promise<{ page_text?: string }> {
166+
await this.stagehandPage._waitForSettledDom();
167+
await this.stagehandPage.startDomDebug();
168+
169+
const originalDOM = await this.stagehandPage.page.evaluate(() =>
170+
window.storeDOM(undefined),
171+
);
172+
173+
const { selectorMap }: { selectorMap: Record<number, string[]> } =
174+
await this.stagehand.page.evaluate(() =>
175+
window.processAllOfDom(undefined),
176+
);
177+
178+
await this.stagehand.page.evaluate(() =>
179+
window.createTextBoundingBoxes(undefined),
180+
);
181+
182+
const containerDims = await this.getTargetDimensions();
183+
184+
const allAnnotations = await this.collectAllAnnotations(
185+
selectorMap,
186+
containerDims.width,
187+
containerDims.height,
188+
containerDims.offsetLeft,
189+
containerDims.offsetTop,
190+
);
191+
192+
const deduplicatedTextAnnotations =
193+
this.deduplicateAnnotations(allAnnotations);
194+
195+
await this.stagehandPage.page.evaluate(
196+
(dom) => window.restoreDOM(dom, undefined),
197+
originalDOM,
198+
);
199+
200+
const formattedText = formatText(
201+
deduplicatedTextAnnotations,
202+
containerDims.width,
203+
);
204+
205+
await this.stagehandPage.cleanupDomDebug();
206+
207+
const result = { page_text: formattedText };
208+
return pageTextSchema.parse(result);
209+
}
210+
154211
private async textExtract<T extends z.AnyZodObject>({
155212
instruction,
156213
schema,
@@ -160,10 +217,10 @@ export class StagehandExtractHandler {
160217
domSettleTimeoutMs,
161218
selector,
162219
}: {
163-
instruction: string;
164-
schema: T;
220+
instruction?: string;
221+
schema?: T;
165222
content?: z.infer<T>;
166-
llmClient: LLMClient;
223+
llmClient?: LLMClient;
167224
requestId?: string;
168225
domSettleTimeoutMs?: number;
169226
selector?: string;
@@ -611,4 +668,41 @@ export class StagehandExtractHandler {
611668

612669
return allAnnotations;
613670
}
671+
672+
/**
673+
* Deduplicate text annotations by grouping them by text, then removing duplicates
674+
* within a certain proximity threshold.
675+
*/
676+
private deduplicateAnnotations(
677+
annotations: TextAnnotation[],
678+
): TextAnnotation[] {
679+
const annotationsGroupedByText = new Map<string, TextAnnotation[]>();
680+
const deduplicated: TextAnnotation[] = [];
681+
682+
for (const annotation of annotations) {
683+
if (!annotationsGroupedByText.has(annotation.text)) {
684+
annotationsGroupedByText.set(annotation.text, []);
685+
}
686+
annotationsGroupedByText.get(annotation.text)!.push(annotation);
687+
}
688+
689+
for (const [text, group] of annotationsGroupedByText.entries()) {
690+
for (const annotation of group) {
691+
const isDuplicate = deduplicated.some((existing) => {
692+
if (existing.text !== text) return false;
693+
694+
const dx = existing.bottom_left.x - annotation.bottom_left.x;
695+
const dy = existing.bottom_left.y - annotation.bottom_left.y;
696+
const distance = Math.hypot(dx, dy);
697+
return distance < PROXIMITY_THRESHOLD;
698+
});
699+
700+
if (!isDuplicate) {
701+
deduplicated.push(annotation);
702+
}
703+
}
704+
}
705+
706+
return deduplicated;
707+
}
614708
}

Diff for: types/page.ts

+5
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ export const defaultExtractSchema = z.object({
1717
extraction: z.string(),
1818
});
1919

20+
export const pageTextSchema = z.object({
21+
page_text: z.string(),
22+
});
23+
2024
export interface Page extends Omit<PlaywrightPage, "on"> {
2125
act(action: string): Promise<ActResult>;
2226
act(options: ActOptions): Promise<ActResult>;
@@ -28,6 +32,7 @@ export interface Page extends Omit<PlaywrightPage, "on"> {
2832
extract<T extends z.AnyZodObject>(
2933
options: ExtractOptions<T>,
3034
): Promise<ExtractResult<T>>;
35+
extract(): Promise<ExtractResult<typeof pageTextSchema>>;
3136

3237
observe(): Promise<ObserveResult[]>;
3338
observe(instruction: string): Promise<ObserveResult[]>;

0 commit comments

Comments
 (0)