Skip to content

Commit

Permalink
Add act evals on stagehand.page (#328)
Browse files Browse the repository at this point in the history
* move act evals to stagehand.page

* add basic act and make act necessary in type
  • Loading branch information
kamath authored Dec 23, 2024
1 parent d8ab6e5 commit 027c6a6
Show file tree
Hide file tree
Showing 37 changed files with 96 additions and 92 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ jobs:
- name: Run E2E Tests
run: npm run e2e

run-act-evals:
run-act-evals:
runs-on: ubuntu-latest
timeout-minutes: 25
needs: [run-text-extract-evals]
Expand Down Expand Up @@ -248,8 +248,6 @@ run-act-evals:
exit 1
fi

run-observe-evals:
runs-on: ubuntu-latest
timeout-minutes: 25
Expand Down
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ const stagehand = new Stagehand({
```javascript
await stagehand.init();
await stagehand.page.goto("https://github.com/browserbase/stagehand");
await stagehand.act({ action: "click on the contributors" });
await stagehand.page.act({ action: "click on the contributors" });
const contributor = await stagehand.extract({
instruction: "extract the top contributor",
schema: z.object({
Expand Down Expand Up @@ -412,7 +412,7 @@ Prompting Stagehand is more literal and atomic than other higher level framework
- **Use specific and concise actions**

```javascript
await stagehand.act({ action: "click the login button" });
await stagehand.page.act({ action: "click the login button" });

const productInfo = await stagehand.extract({
instruction: "find the red shoes",
Expand All @@ -429,16 +429,16 @@ Instead of combining actions:

```javascript
// Avoid this
await stagehand.act({ action: "log in and purchase the first item" });
await stagehand.page.act({ action: "log in and purchase the first item" });
```

Split them into individual steps:

```javascript
await stagehand.act({ action: "click the login button" });
await stagehand.page.act({ action: "click the login button" });
// ...additional steps to log in...
await stagehand.act({ action: "click on the first item" });
await stagehand.act({ action: "click the purchase button" });
await stagehand.page.act({ action: "click on the first item" });
await stagehand.page.act({ action: "click the purchase button" });
```

- **Use `observe()` to get actionable suggestions from the current page**
Expand All @@ -454,21 +454,21 @@ console.log("Possible actions:", actions);

```javascript
// Too vague
await stagehand.act({ action: "find something interesting on the page" });
await stagehand.page.act({ action: "find something interesting on the page" });
```

- **Combine multiple actions into one instruction**

```javascript
// Avoid combining actions
await stagehand.act({ action: "fill out the form and submit it" });
await stagehand.page.act({ action: "fill out the form and submit it" });
```

- **Expect Stagehand to perform high-level planning or reasoning**

```javascript
// Outside Stagehand's scope
await stagehand.act({ action: "book the cheapest flight available" });
await stagehand.page.act({ action: "book the cheapest flight available" });
```

By following these guidelines, you'll increase the reliability and effectiveness of your web automations with Stagehand. Remember, Stagehand excels at executing precise, well-defined actions so keeping your instructions atomic will lead to the best outcomes.
Expand Down
2 changes: 1 addition & 1 deletion evals/tasks/allrecipes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export const allrecipes: EvalFunction = async ({
waitUntil: "domcontentloaded",
});

await stagehand.act({
await stagehand.page.act({
action: 'Search for "chocolate chip cookies" using the search bar',
});

Expand Down
4 changes: 2 additions & 2 deletions evals/tasks/amazon_add_to_cart.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@ export const amazon_add_to_cart: EvalFunction = async ({

await stagehand.page.waitForTimeout(5000);

await stagehand.act({
await stagehand.page.act({
action: "click the 'Add to Cart' button",
});

await stagehand.page.waitForTimeout(2000);

await stagehand.act({
await stagehand.page.act({
action: "click the 'Proceed to checkout' button",
});

Expand Down
14 changes: 7 additions & 7 deletions evals/tasks/apple.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,18 @@ export const apple: EvalFunction = async ({ modelName, logger }) => {

await stagehand.page.goto("https://www.apple.com/iphone-16-pro/");

await stagehand.act({ action: "click on the buy button" });
await stagehand.act({ action: "select the Pro Max model" });
await stagehand.act({ action: "select the natural titanium color" });
await stagehand.act({ action: "select the 256GB storage option" });
await stagehand.act({
await stagehand.page.act({ action: "click on the buy button" });
await stagehand.page.act({ action: "select the Pro Max model" });
await stagehand.page.act({ action: "select the natural titanium color" });
await stagehand.page.act({ action: "select the 256GB storage option" });
await stagehand.page.act({
action: "click on the 'select a smartphone' trade-in option",
});

await stagehand.act({
await stagehand.page.act({
action: "select the iPhone 13 mini model from the dropdown",
});
await stagehand.act({
await stagehand.page.act({
action: "select the iPhone 13 mini is in good condition",
});

Expand Down
2 changes: 1 addition & 1 deletion evals/tasks/arxiv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export const arxiv: EvalFunction = async ({
try {
await stagehand.page.goto("https://arxiv.org/search/");

await stagehand.act({
await stagehand.page.act({
action: "search for papers about web agents with multimodal models",
});

Expand Down
2 changes: 1 addition & 1 deletion evals/tasks/bidnet.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export const bidnet: EvalFunction = async ({ modelName, logger }) => {

await stagehand.page.goto("https://www.bidnetdirect.com/");

await stagehand.act({
await stagehand.page.act({
action: 'Click on the "Construction" keyword',
});

Expand Down
6 changes: 3 additions & 3 deletions evals/tasks/combination_sauce.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ export const combination_sauce: EvalFunction = async ({
useTextExtract,
});

await stagehand.act({
await stagehand.page.act({
action: `enter username 'standard_user'`,
});

await stagehand.act({
await stagehand.page.act({
action: `enter password '${password}'`,
});

await stagehand.act({
await stagehand.page.act({
action: "click on 'login'",
});

Expand Down
4 changes: 2 additions & 2 deletions evals/tasks/costar.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ export const costar: EvalFunction = async ({
),
]);

await stagehand.act({ action: "click on the first article" });
await stagehand.page.act({ action: "click on the first article" });

await stagehand.act({
await stagehand.page.act({
action: "click on the learn more button for the first job",
});

Expand Down
10 changes: 5 additions & 5 deletions evals/tasks/expedia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,14 @@ export const expedia: EvalFunction = async ({ modelName, logger }) => {

try {
await stagehand.page.goto("https://www.expedia.com/flights");
await stagehand.act({
await stagehand.page.act({
action:
"find round-trip flights from San Francisco (SFO) to Toronto (YYZ) for Jan 1, 2025 (up to one to two weeks)",
});
await stagehand.act({ action: "Go to the first non-stop flight" });
await stagehand.act({ action: "select the cheapest flight" });
await stagehand.act({ action: "click on the first non-stop flight" });
await stagehand.act({ action: "Take me to the checkout page" });
await stagehand.page.act({ action: "Go to the first non-stop flight" });
await stagehand.page.act({ action: "select the cheapest flight" });
await stagehand.page.act({ action: "click on the first non-stop flight" });
await stagehand.page.act({ action: "Take me to the checkout page" });

const url = stagehand.page.url();
return {
Expand Down
10 changes: 5 additions & 5 deletions evals/tasks/expedia_search.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,18 @@ export const expedia_search: EvalFunction = async ({ modelName, logger }) => {
try {
await stagehand.page.goto("https://www.expedia.com/flights");

await stagehand.act({
await stagehand.page.act({
action:
"find round-trip flights from San Francisco (SFO) to Toronto (YYZ) for Jan 1, 2025 (up to one to two weeks)",
});

await stagehand.act({ action: "Go to the first non-stop flight" });
await stagehand.page.act({ action: "Go to the first non-stop flight" });

await stagehand.act({ action: "select the cheapest flight" });
await stagehand.page.act({ action: "select the cheapest flight" });

await stagehand.act({ action: "click on the first non-stop flight" });
await stagehand.page.act({ action: "click on the first non-stop flight" });

await stagehand.act({
await stagehand.page.act({
action: "Take me to the checkout page",
});

Expand Down
2 changes: 1 addition & 1 deletion evals/tasks/extract_collaborators.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ export const extract_collaborators: EvalFunction = async ({

try {
await stagehand.page.goto("https://github.com/facebook/react");
await stagehand.act({
await stagehand.page.act({
action: "find the contributors section",
});

Expand Down
2 changes: 1 addition & 1 deletion evals/tasks/extract_github_commits.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export const extract_github_commits: EvalFunction = async ({
try {
await stagehand.page.goto("https://github.com/facebook/react");

await stagehand.act({
await stagehand.page.act({
action:
"find commit history, generally described by the number of commits",
});
Expand Down
6 changes: 3 additions & 3 deletions evals/tasks/extract_partners.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@ export const extract_partners: EvalFunction = async ({
try {
await stagehand.page.goto("https://ramp.com");

await stagehand.act({
await stagehand.page.act({
action: "move down to the bottom of the page.",
});

await stagehand.act({
await stagehand.page.act({
action: "Close the popup.",
});

await stagehand.act({
await stagehand.page.act({
action: "Find and click on the link that leads to the partners page.",
});

Expand Down
2 changes: 1 addition & 1 deletion evals/tasks/extract_snowshoeing_destinations.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ export const extract_snowshoeing_destinations: EvalFunction = async ({
"https://www.cbisland.com/blog/10-snowshoeing-adventures-on-cape-breton-island/",
);

await stagehand.act({ action: "reject the cookies" });
await stagehand.page.act({ action: "reject the cookies" });

const snowshoeing_regions = await stagehand.extract({
instruction:
Expand Down
12 changes: 6 additions & 6 deletions evals/tasks/google_jobs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ export const google_jobs: EvalFunction = async ({

try {
await stagehand.page.goto("https://www.google.com/");
await stagehand.act({ action: "click on the about page" });
await stagehand.act({ action: "click on the careers page" });
await stagehand.act({ action: "input data scientist into role" });
await stagehand.act({ action: "input new york city into location" });
await stagehand.act({ action: "click on the search button" });
await stagehand.act({ action: "click on the first job link" });
await stagehand.page.act({ action: "click on the about page" });
await stagehand.page.act({ action: "click on the careers page" });
await stagehand.page.act({ action: "input data scientist into role" });
await stagehand.page.act({ action: "input new york city into location" });
await stagehand.page.act({ action: "click on the search button" });
await stagehand.page.act({ action: "click on the first job link" });

const jobDetails = await stagehand.extract({
instruction:
Expand Down
8 changes: 4 additions & 4 deletions evals/tasks/homedepot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ export const homedepot: EvalFunction = async ({

try {
await stagehand.page.goto("https://www.homedepot.com/");
await stagehand.act({ action: "search for gas grills" });
await stagehand.act({ action: "click on the best selling gas grill" });
await stagehand.act({ action: "click on the Product Details" });
await stagehand.act({ action: "find the Primary Burner BTU" });
await stagehand.page.act({ action: "search for gas grills" });
await stagehand.page.act({ action: "click on the best selling gas grill" });
await stagehand.page.act({ action: "click on the Product Details" });
await stagehand.page.act({ action: "find the Primary Burner BTU" });

const productSpecs = await stagehand.extract({
instruction: "Extract the Primary exact Burner BTU of the product",
Expand Down
4 changes: 2 additions & 2 deletions evals/tasks/ibm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ export const ibm: EvalFunction = async ({ modelName, logger }) => {
try {
await stagehand.page.goto("https://www.ibm.com/artificial-intelligence");

await stagehand.act({
await stagehand.page.act({
action: "if there is a cookies popup, accept it",
});

Expand All @@ -24,7 +24,7 @@ export const ibm: EvalFunction = async ({ modelName, logger }) => {
}),
});

await stagehand.act({
await stagehand.page.act({
action: "click on the 'explore AI use cases' button",
});

Expand Down
2 changes: 1 addition & 1 deletion evals/tasks/imdb_movie_details.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ export const imdb_movie_details: EvalFunction = async ({
await stagehand.page.goto("https://www.imdb.com/title/tt0111161/", {
waitUntil: "domcontentloaded",
});
await stagehand.act({
await stagehand.page.act({
action: "click on the movie ratings",
});

Expand Down
2 changes: 1 addition & 1 deletion evals/tasks/ionwave.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export const ionwave: EvalFunction = async ({ modelName, logger }) => {

await stagehand.page.goto("https://elpasotexas.ionwave.net/Login.aspx");

await stagehand.act({
await stagehand.page.act({
action: 'Click on "Closed Bids"',
});

Expand Down
10 changes: 5 additions & 5 deletions evals/tasks/laroche_form.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ export const laroche_form: EvalFunction = async ({ modelName, logger }) => {
"https://www.laroche-posay.us/offers/anthelios-melt-in-milk-sunscreen-sample.html",
);

await stagehand.act({ action: "close the privacy policy popup" });
await stagehand.page.act({ action: "close the privacy policy popup" });
await stagehand.page
.waitForNavigation({ waitUntil: "domcontentloaded", timeout: 10000 })
.catch(() => {});

await stagehand.act({ action: "fill the last name field" });
await stagehand.act({ action: "fill address 1 field" });
await stagehand.act({ action: "select a state" });
await stagehand.act({ action: "select a skin type" });
await stagehand.page.act({ action: "fill the last name field" });
await stagehand.page.act({ action: "fill address 1 field" });
await stagehand.page.act({ action: "select a state" });
await stagehand.page.act({ action: "select a skin type" });

return {
_success: true,
Expand Down
2 changes: 1 addition & 1 deletion evals/tasks/nonsense_action.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ export const nonsense_action: EvalFunction = async ({ modelName, logger }) => {
try {
await stagehand.page.goto("https://www.homedepot.com/");

const result = await stagehand.act({
const result = await stagehand.page.act({
action: "click on the first banana",
});
console.log("result", result);
Expand Down
4 changes: 2 additions & 2 deletions evals/tasks/peeler_complex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ export const peeler_complex: EvalFunction = async ({
try {
await stagehand.page.goto(`https://chefstoys.com/`, { timeout: 60000 });

await stagehand.act({
await stagehand.page.act({
action: "search for %search_query%",
variables: {
search_query: "peeler",
},
});

await stagehand.act({
await stagehand.page.act({
action: 'click on the first "OXO" brand peeler',
});

Expand Down
2 changes: 1 addition & 1 deletion evals/tasks/peeler_simple.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export const peeler_simple: EvalFunction = async ({ modelName, logger }) => {
}

await stagehand.page.goto(`file://${process.cwd()}/evals/assets/peeler.html`);
await stagehand.act({ action: "add the peeler to cart" });
await stagehand.page.act({ action: "add the peeler to cart" });

const successMessageLocator = stagehand.page.locator(
'text="Congratulations, you have 1 A in your cart"',
Expand Down
Loading

0 comments on commit 027c6a6

Please sign in to comment.