Skip to content

Commit ff00965

Browse files
authored
allow systemPrompt input (#373)
* allow `instructions` input * changeset * remove invalid assignment * move instructions to build functions * fix user prompt builder * remove old code * patch -> minor * remove log * return empty string when no instructions are provided * user prompt improvements * enhance act prompt * instructions example * update instructions example * update act system prompt * add to instructions example * prompt fixes * rename instructions to systemPrompt * add system prompt eval * change instructions eval category
1 parent 2cee0a4 commit ff00965

17 files changed

+186
-14
lines changed

Diff for: .changeset/sweet-mice-compare.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@browserbasehq/stagehand": minor
3+
---
4+
5+
Allow the input of custom instructions into the constructor so that users can guide, or provide guardrails to, the LLM in making decisions.

Diff for: evals/evals.config.json

+4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
"name": "amazon_add_to_cart",
55
"categories": ["act"]
66
},
7+
{
8+
"name": "instructions",
9+
"categories": ["combination"]
10+
},
711
{
812
"name": "bidnet",
913
"categories": ["act"]

Diff for: evals/initStagehand.ts

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212

1313
import { enableCaching, env } from "./env";
14-
import { AvailableModel, LogLine, Stagehand } from "../lib";
14+
import { AvailableModel, ConstructorParams, LogLine, Stagehand } from "../lib";
1515
import { EvalLogger } from "./logger";
1616

1717
/**
@@ -54,10 +54,12 @@ export const initStagehand = async ({
5454
modelName,
5555
domSettleTimeoutMs,
5656
logger,
57+
configOverrides,
5758
}: {
5859
modelName: AvailableModel;
5960
domSettleTimeoutMs?: number;
6061
logger: EvalLogger;
62+
configOverrides?: Partial<ConstructorParams>;
6163
}) => {
6264
let chosenApiKey: string | undefined = process.env.OPENAI_API_KEY;
6365
if (modelName.startsWith("claude")) {
@@ -74,6 +76,7 @@ export const initStagehand = async ({
7476
logger: (logLine: LogLine) => {
7577
logger.log(logLine);
7678
},
79+
...configOverrides,
7780
};
7881

7982
const stagehand = new Stagehand(config);

Diff for: evals/tasks/instructions.ts

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import { EvalFunction } from "../../types/evals";
2+
import { initStagehand } from "../initStagehand";
3+
4+
export const instructions: EvalFunction = async ({ modelName, logger }) => {
5+
const { stagehand, initResponse } = await initStagehand({
6+
modelName,
7+
logger,
8+
configOverrides: {
9+
systemPrompt:
10+
"if the users says `secret12345`, click on the 'quickstart' tab",
11+
},
12+
});
13+
14+
const { debugUrl, sessionUrl } = initResponse;
15+
16+
try {
17+
const page = stagehand.page;
18+
19+
await page.goto("https://docs.browserbase.com/");
20+
21+
await page.act({
22+
action: "secret12345",
23+
});
24+
25+
await page.waitForLoadState("domcontentloaded");
26+
27+
const url = page.url();
28+
29+
const isCorrectUrl =
30+
url === "https://docs.browserbase.com/quickstart/playwright";
31+
32+
await stagehand.close();
33+
34+
return {
35+
_success: isCorrectUrl,
36+
debugUrl,
37+
sessionUrl,
38+
logs: logger.getLogs(),
39+
};
40+
} catch (error) {
41+
console.error("Error or timeout occurred:", error);
42+
43+
await stagehand.close();
44+
45+
return {
46+
_success: false,
47+
error: JSON.parse(JSON.stringify(error, null, 2)),
48+
debugUrl,
49+
sessionUrl,
50+
logs: logger.getLogs(),
51+
};
52+
}
53+
};

Diff for: examples/instructions.ts

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
/**
2+
* This example shows how to use custom instructions with Stagehand.
3+
*/
4+
import { Stagehand } from "../lib";
5+
import StagehandConfig from "./stagehand.config";
6+
7+
async function example() {
8+
const stagehand = new Stagehand({
9+
...StagehandConfig,
10+
systemPrompt:
11+
"if the users says `secret12345`, click on the 'quickstart' tab. additionally, if the user says to type something, translate their input into french and type it.",
12+
});
13+
await stagehand.init();
14+
15+
const page = stagehand.page;
16+
17+
await page.goto("https://docs.browserbase.com/");
18+
19+
await page.act({
20+
action: "secret12345",
21+
});
22+
23+
await page.act({
24+
action: "search for 'how to use browserbase'",
25+
});
26+
27+
await stagehand.close();
28+
}
29+
30+
(async () => {
31+
await example();
32+
})();

Diff for: lib/StagehandPage.ts

+4
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ export class StagehandPage {
3131
stagehand: Stagehand,
3232
context: StagehandContext,
3333
llmClient: LLMClient,
34+
userProvidedInstructions?: string,
3435
) {
3536
this.intPage = Object.assign(page, {
3637
act: () => {
@@ -66,16 +67,19 @@ export class StagehandPage {
6667
stagehandPage: this,
6768
stagehandContext: this.intContext,
6869
llmClient: llmClient,
70+
userProvidedInstructions,
6971
});
7072
this.extractHandler = new StagehandExtractHandler({
7173
stagehand: this.stagehand,
7274
logger: this.stagehand.logger,
7375
stagehandPage: this,
76+
userProvidedInstructions,
7477
});
7578
this.observeHandler = new StagehandObserveHandler({
7679
stagehand: this.stagehand,
7780
logger: this.stagehand.logger,
7881
stagehandPage: this,
82+
userProvidedInstructions,
7983
});
8084
}
8185
}

Diff for: lib/handlers/actHandler.ts

+5
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@ export class StagehandActHandler {
2323
private readonly actions: {
2424
[key: string]: { result: string; action: string };
2525
};
26+
private readonly userProvidedInstructions?: string;
2627

2728
constructor({
2829
verbose,
2930
llmProvider,
3031
enableCaching,
3132
logger,
3233
stagehandPage,
34+
userProvidedInstructions,
3335
}: {
3436
verbose: 0 | 1 | 2;
3537
llmProvider: LLMProvider;
@@ -38,6 +40,7 @@ export class StagehandActHandler {
3840
llmClient: LLMClient;
3941
stagehandPage: StagehandPage;
4042
stagehandContext: StagehandContext;
43+
userProvidedInstructions?: string;
4144
}) {
4245
this.verbose = verbose;
4346
this.llmProvider = llmProvider;
@@ -46,6 +49,7 @@ export class StagehandActHandler {
4649
this.actionCache = enableCaching ? new ActionCache(this.logger) : undefined;
4750
this.actions = {};
4851
this.stagehandPage = stagehandPage;
52+
this.userProvidedInstructions = userProvidedInstructions;
4953
}
5054

5155
private async _recordAction(action: string, result: string): Promise<string> {
@@ -1133,6 +1137,7 @@ export class StagehandActHandler {
11331137
logger: this.logger,
11341138
requestId,
11351139
variables,
1140+
userProvidedInstructions: this.userProvidedInstructions,
11361141
});
11371142

11381143
this.logger({

Diff for: lib/handlers/extractHandler.ts

+6
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,13 @@ export class StagehandExtractHandler {
8383
private readonly stagehand: Stagehand;
8484
private readonly stagehandPage: StagehandPage;
8585
private readonly logger: (logLine: LogLine) => void;
86+
private readonly userProvidedInstructions?: string;
8687

8788
constructor({
8889
stagehand,
8990
logger,
9091
stagehandPage,
92+
userProvidedInstructions,
9193
}: {
9294
stagehand: Stagehand;
9395
logger: (message: {
@@ -97,10 +99,12 @@ export class StagehandExtractHandler {
9799
auxiliary?: { [key: string]: { value: string; type: string } };
98100
}) => void;
99101
stagehandPage: StagehandPage;
102+
userProvidedInstructions?: string;
100103
}) {
101104
this.stagehand = stagehand;
102105
this.logger = logger;
103106
this.stagehandPage = stagehandPage;
107+
this.userProvidedInstructions = userProvidedInstructions;
104108
}
105109

106110
public async extract<T extends z.AnyZodObject>({
@@ -306,6 +310,7 @@ export class StagehandExtractHandler {
306310
chunksTotal: 1,
307311
llmClient,
308312
requestId,
313+
userProvidedInstructions: this.userProvidedInstructions,
309314
logger: this.logger,
310315
});
311316

@@ -435,6 +440,7 @@ export class StagehandExtractHandler {
435440
chunksTotal: chunks.length,
436441
requestId,
437442
isUsingTextExtract: false,
443+
userProvidedInstructions: this.userProvidedInstructions,
438444
logger: this.logger,
439445
});
440446

Diff for: lib/handlers/observeHandler.ts

+6-2
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ import { LogLine } from "../../types/log";
22
import { Stagehand } from "../index";
33
import { observe } from "../inference";
44
import { LLMClient } from "../llm/LLMClient";
5+
import { StagehandPage } from "../StagehandPage";
56
import { generateId } from "../utils";
67
import { ScreenshotService } from "../vision";
7-
import { StagehandPage } from "../StagehandPage";
88

99
export class StagehandObserveHandler {
1010
private readonly stagehand: Stagehand;
@@ -17,19 +17,22 @@ export class StagehandObserveHandler {
1717
instruction: string;
1818
};
1919
};
20-
20+
private readonly userProvidedInstructions?: string;
2121
constructor({
2222
stagehand,
2323
logger,
2424
stagehandPage,
25+
userProvidedInstructions,
2526
}: {
2627
stagehand: Stagehand;
2728
logger: (logLine: LogLine) => void;
2829
stagehandPage: StagehandPage;
30+
userProvidedInstructions?: string;
2931
}) {
3032
this.stagehand = stagehand;
3133
this.logger = logger;
3234
this.stagehandPage = stagehandPage;
35+
this.userProvidedInstructions = userProvidedInstructions;
3336
this.observations = {};
3437
}
3538

@@ -120,6 +123,7 @@ export class StagehandObserveHandler {
120123
llmClient,
121124
image: annotatedScreenshot,
122125
requestId,
126+
userProvidedInstructions: this.userProvidedInstructions,
123127
logger: this.logger,
124128
});
125129

Diff for: lib/index.ts

+4
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ export class Stagehand {
326326
public variables: { [key: string]: unknown };
327327
private contextPath?: string;
328328
private llmClient: LLMClient;
329+
private userProvidedInstructions?: string;
329330

330331
constructor(
331332
{
@@ -344,6 +345,7 @@ export class Stagehand {
344345
browserbaseSessionID,
345346
modelName,
346347
modelClientOptions,
348+
systemPrompt,
347349
}: ConstructorParams = {
348350
env: "BROWSERBASE",
349351
},
@@ -377,6 +379,7 @@ export class Stagehand {
377379
this.headless = headless ?? false;
378380
this.browserbaseSessionCreateParams = browserbaseSessionCreateParams;
379381
this.browserbaseSessionID = browserbaseSessionID;
382+
this.userProvidedInstructions = systemPrompt;
380383
}
381384

382385
public get logger(): (logLine: LogLine) => void {
@@ -450,6 +453,7 @@ export class Stagehand {
450453
this,
451454
this.stagehandContext,
452455
this.llmClient,
456+
this.userProvidedInstructions,
453457
).init();
454458

455459
// Set the browser to headless mode if specified

Diff for: lib/inference.ts

+12-4
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,10 @@ export async function act({
104104
logger,
105105
requestId,
106106
variables,
107+
userProvidedInstructions,
107108
}: ActCommandParams): Promise<ActCommandResult | null> {
108109
const messages: ChatMessage[] = [
109-
buildActSystemPrompt(),
110+
buildActSystemPrompt(userProvidedInstructions),
110111
buildActUserPrompt(action, steps, domElements, variables),
111112
];
112113

@@ -167,6 +168,7 @@ export async function extract({
167168
requestId,
168169
logger,
169170
isUsingTextExtract,
171+
userProvidedInstructions,
170172
}: {
171173
instruction: string;
172174
previouslyExtractedContent: object;
@@ -177,6 +179,7 @@ export async function extract({
177179
chunksTotal: number;
178180
requestId: string;
179181
isUsingTextExtract?: boolean;
182+
userProvidedInstructions?: string;
180183
logger: (message: LogLine) => void;
181184
}) {
182185
type ExtractionResponse = z.infer<typeof schema>;
@@ -187,7 +190,11 @@ export async function extract({
187190
const extractionResponse = await llmClient.createChatCompletion({
188191
options: {
189192
messages: [
190-
buildExtractSystemPrompt(isUsingAnthropic, isUsingTextExtract),
193+
buildExtractSystemPrompt(
194+
isUsingAnthropic,
195+
isUsingTextExtract,
196+
userProvidedInstructions,
197+
),
191198
buildExtractUserPrompt(instruction, domElements, isUsingAnthropic),
192199
],
193200
response_model: {
@@ -277,13 +284,15 @@ export async function observe({
277284
llmClient,
278285
image,
279286
requestId,
287+
userProvidedInstructions,
280288
logger,
281289
}: {
282290
instruction: string;
283291
domElements: string;
284292
llmClient: LLMClient;
285293
image?: Buffer;
286294
requestId: string;
295+
userProvidedInstructions?: string;
287296
logger: (message: LogLine) => void;
288297
}): Promise<{
289298
elements: { elementId: number; description: string }[];
@@ -309,7 +318,7 @@ export async function observe({
309318
await llmClient.createChatCompletion<ObserveResponse>({
310319
options: {
311320
messages: [
312-
buildObserveSystemPrompt(),
321+
buildObserveSystemPrompt(userProvidedInstructions),
313322
buildObserveUserMessage(instruction, domElements),
314323
],
315324
image: image
@@ -327,7 +336,6 @@ export async function observe({
327336
},
328337
logger,
329338
});
330-
331339
const parsedResponse = {
332340
elements:
333341
observationResponse.elements?.map((el) => ({

0 commit comments

Comments
 (0)