added page assert tool and improved prompt, added structured output to agent

gurvinder-dhillon · gurvinder-dhillon · commit 2585b1f7d333 · 2025-04-07T18:39:49.000-07:00
diff --git a/src/auto.ts b/src/auto.ts
@@ -1,4 +1,5 @@
 import { test as base } from '@playwright/test';
+import { z } from "zod";
 import { AutoConfig } from './types';
 import { sessionManager, context } from './browser';
 import { createReactAgent } from '@langchain/langgraph/prebuilt';
@@ -21,8 +22,16 @@ import {
     browser_choose_file,
     browser_go_forward,
     browser_assert,
+    browser_page_assert,
 } from './tools';
 
+// Define response schema
+const AutoResponseSchema = z.object({
+    action: z.string().describe("The type of action performed (assert, click, type, etc)"),
+    error: z.string().describe("Error message if any, empty string if none"),
+    output: z.string().describe("Raw output from the action")
+});
+
 // Extend base test to automatically track page
 export const test = base.extend({
     page: async ({ page }, use) => {
@@ -51,8 +60,15 @@ const initializeAgent = () => {
         - For pressing keys, use the pressKey tool
         - For saving PDFs, use the savePDF tool
         - For choosing files, use the chooseFile tool
-        - For verification and assertions, use the assert tool
-        Return the operation result or content as requested.`;
+        - While calling the verification and assertion tools, DO NOT assume or make up any expected values. Use the values as provided in the instruction only.
+        - For verification and assertions like {"isVisible", "hasText", "isEnabled", "isChecked"}, use the browser_assert tool
+        - For page assertions like {page title, current page url} use the browser_page_assert tools
+        Return a stringified JSON object with exactly these fields:
+            {
+                "action": "<type of action performed>",
+                "error": "<error message or empty string>",
+                "output": "<your output message>"
+            }`;
 
     const agent = createReactAgent({
         llm: model,
@@ -73,8 +89,18 @@ const initializeAgent = () => {
             browser_choose_file,
             browser_assert,
             browser_go_forward,
+            browser_page_assert,
         ],
         stateModifier: prompt,
+        responseFormat: {
+            prompt: `Return a stringified JSON object with exactly these fields:
+            {
+                "action": "<type of action performed>",
+                "error": "<error message or empty string>",
+                "output": "<your output message>"
+            }`,
+            schema: AutoResponseSchema
+        }
     });
 
     return { agent };
@@ -87,13 +113,17 @@ export async function auto(
 ): Promise<any> {
     console.log(`[Auto] Processing instruction: "${instruction}"`);
 
-    if (config?.page) {
+    if (config?.page)
+    {
         sessionManager.setPage(config.page);
         console.log(`[Auto] Page set from config`);
-    } else {
-        try {
+    } else
+    {
+        try
+        {
             sessionManager.getPage();
-        } catch {
+        } catch
+        {
             // In standalone mode, create a new page
             console.log(`[Auto] No existing page, creating new page`);
             await context.createPage();
@@ -107,14 +137,16 @@ export async function auto(
         messages: [new HumanMessage(instruction)],
     });
 
-    console.log('Agent result:', result);
+    //console.log('Agent result:', result);
     // Process agent result
     const response = result.messages?.[-1]?.content;
     console.log(`[Auto] Agent response:`, response);
 
-    if (typeof response === 'string') {
+    if (typeof response === 'string')
+    {
         // If it's a success message, return null to match original behavior
-        if (response.startsWith('Successfully')) {
+        if (response.startsWith('Successfully'))
+        {
             console.log(`[Auto] Detected success message, returning null`);
             return null;
         }
diff --git a/src/tools/browser_assert.ts b/src/tools/browser_assert.ts
@@ -1,8 +1,8 @@
 import { tool } from '@langchain/core/tools';
 import { z } from 'zod';
+import { test, expect } from '@playwright/test';
 import { runAndWait } from './utils';
 import { context } from '../browser/context';
-import { expect } from '@playwright/test';
 
 /**
  * Schema for assertions with descriptions for the AI model
@@ -27,53 +27,66 @@ const assertSchema = z.object({
 
 export const browser_assert = tool(
     async ({ element, ref, assertion, expected }) => {
-        try {
+        try
+        {
             console.log(`[Assert Tool] Starting operation:`, {
                 element,
                 ref,
                 assertion,
                 expected,
             });
 
-            const result = await runAndWait(
-                context,
-                `Asserted "${element}" ${assertion}${expected ? ` equals "${expected}"` : ''}`,
+            const result = await test.step(
+                `Assert "${element}" ${assertion}${expected ? ` equals "${expected}"` : ''}`,
                 async () => {
-                    const locator = context.refLocator(ref);
-                    console.log(`[Assert Tool] Performing assertion`);
+                    return await runAndWait(
+                        context,
+                        `Asserted "${element}" ${assertion}${expected ? ` equals "${expected}"` : ''}`,
+                        async () => {
+                            const locator = context.refLocator(ref);
+                            console.log(`[Assert Tool] Performing assertion`);
 
-                    switch (assertion.toLowerCase()) {
-                        case 'isvisible':
-                            await expect(locator).toBeVisible();
-                            return 'Element is visible';
-                        case 'hastext':
-                            if (!expected)
-                                throw new Error(
-                                    'Expected value required for hasText assertion',
-                                );
-                            await expect(locator).toHaveText(expected);
-                            return `Element has text "${expected}"`;
-                        case 'isenabled':
-                            await expect(locator).toBeEnabled();
-                            return 'Element is enabled';
-                        case 'ischecked':
-                            await expect(locator).toBeChecked();
-                            return 'Element is checked';
-                        default:
-                            throw new Error(
-                                `Unsupported assertion type: ${assertion}`,
-                            );
-                    }
+                            // Create descriptive message for both success and error cases
+                            const message = `${element} should ${assertion}${expected ? ` with text "${expected}"` : ''}`;
+
+                            switch (assertion.toLowerCase())
+                            {
+                                case 'isvisible':
+                                    await expect(locator, message).toBeVisible();
+                                    return message;
+                                case 'hastext':
+                                    if (!expected)
+                                        throw new Error(
+                                            'Expected value required for hasText assertion',
+                                        );
+                                    await expect(locator, message).toHaveText(expected);
+                                    return message;
+                                case 'isenabled':
+                                    await expect(locator, message).toBeEnabled();
+                                    return message;
+                                case 'ischecked':
+                                    await expect(locator, message).toBeChecked();
+                                    return message;
+                                default:
+                                    throw new Error(
+                                        `Unsupported assertion type: ${assertion}`,
+                                    );
+                            }
+                        },
+                        true,
+                    );
                 },
-                true,
             );
 
             console.log(`[Assert Tool] Operation completed`);
             return result;
-        } catch (error) {
+        } catch (error)
+        {
+            // Simple error handling using Playwright's built-in error messages
             const errorMessage = `Assertion failed: ${error instanceof Error ? error.message : 'Unknown error'}`;
             console.error(`[Assert Tool] Error:`, errorMessage);
             return errorMessage;
+
         }
     },
     {
diff --git a/src/tools/browser_page_assert.ts b/src/tools/browser_page_assert.ts
@@ -0,0 +1,87 @@
+import { tool } from '@langchain/core/tools';
+import { z } from 'zod';
+import { test, expect } from '@playwright/test';
+import { runAndWait } from './utils';
+import { context } from '../browser/context';
+
+/**
+ * Schema for page-level assertions
+ */
+const pageAssertSchema = z.object({
+    assertion: z
+        .string()
+        .describe('Type of assertion to perform (e.g., "hasTitle", "hasURL", "isOK")'),
+    expected: z
+        .string()
+        .optional()
+        .describe('Expected value for title/URL assertions'),
+});
+
+export const browser_page_assert = tool(
+    async ({ assertion, expected }) => {
+        try
+        {
+            console.log(`[Page Assert Tool] Starting operation:`, {
+                assertion,
+                expected,
+            });
+
+            const result = await test.step(
+                `Assert page ${assertion}${expected ? ` equals "${expected}"` : ''}`,
+                async () => {
+                    return await runAndWait(
+                        context,
+                        `Asserted page ${assertion}${expected ? ` equals "${expected}"` : ''}`,
+                        async () => {
+                            const page = context.existingPage();
+                            console.log(`[Page Assert Tool] Performing assertion`);
+
+                            // Create descriptive message for both success and error cases
+                            const message = `Page should ${assertion}${expected ? ` "${expected}"` : ''}`;
+
+                            switch (assertion.toLowerCase())
+                            {
+                                case 'hastitle':
+                                    if (!expected)
+                                        throw new Error(
+                                            'Expected value required for hasTitle assertion',
+                                        );
+                                    await expect(page, message).toHaveTitle(expected);
+                                    return message;
+                                case 'hasurl':
+                                    if (!expected)
+                                        throw new Error(
+                                            'Expected value required for hasURL assertion',
+                                        );
+                                    await expect(page, message).toHaveURL(expected);
+                                    return message;
+                                case 'isok': {
+                                    // TODO: Implement response tracking in context
+                                    throw new Error('Response assertions not yet implemented');
+                                }
+                                default:
+                                    throw new Error(
+                                        `Unsupported page assertion type: ${assertion}`,
+                                    );
+                            }
+                        },
+                        true,
+                    );
+                },
+            );
+
+            console.log(`[Page Assert Tool] Operation completed`);
+            return result;
+        } catch (error)
+        {
+            const errorMessage = `Page assertion failed: ${error instanceof Error ? error.message : 'Unknown error'}`;
+            console.error(`[Page Assert Tool] Error:`, errorMessage);
+            return errorMessage;
+        }
+    },
+    {
+        name: 'page_assert',
+        description: "Assert conditions on the page or response using Playwright's assertions",
+        schema: pageAssertSchema
+    }
+);
diff --git a/src/tools/browser_snapshot.ts b/src/tools/browser_snapshot.ts
@@ -2,7 +2,7 @@ import { tool } from '@langchain/core/tools';
 import { z } from 'zod';
 import { test } from '@playwright/test';
 import { context } from '../browser/context';
-import { captureAriaSnapshot } from './utils';
+import { run } from './utils';
 
 /**
  * Schema with dummy property to satisfy Gemini's API requirement for non-empty object properties
@@ -16,15 +16,20 @@ const snapshotSchema = z.object({
 
 export const browser_snapshot = tool(
     async () => {
-        try {
+        try
+        {
             console.log(`[Aria Snapshot] Starting snapshot operation`);
-            const result =
-                await test.step(`Capture Accessibility Snapshot`, async () => {
-                    return await captureAriaSnapshot(context);
+            const result = await test.step(`Capture Accessibility Snapshot`, async () => {
+                return await run(context, {
+                    callback: async () => { }, // Empty callback since we just want the snapshot
+                    captureSnapshot: true
                 });
+            });
+
             console.log(`[Aria Snapshot] Operation completed successfully`);
             return result;
-        } catch (error) {
+        } catch (error)
+        {
             const errorMessage = `Failed to capture snapshot: ${error instanceof Error ? error.message : 'Unknown error'}`;
             console.error(`[Aria Snapshot] Error:`, errorMessage);
             return errorMessage;
diff --git a/src/tools/index.ts b/src/tools/index.ts
@@ -14,3 +14,4 @@ export { browser_save_pdf } from './browser_save_pdf';
 export { browser_choose_file } from './browser_choose_file';
 export { browser_assert } from './browser_assert';
 export { browser_go_forward } from './browser_go_forward';
+export { browser_page_assert } from './browser_page_assert';
diff --git a/src/tools/utils.ts b/src/tools/utils.ts
@@ -179,13 +179,11 @@ class PageSnapshot {
             `- Page URL: ${page.url()}`,
             `- Page Title: ${await page.title()}`
         );
-        lines.push(
-            `- Page Snapshot`,
-            '```yaml',
-            yamlDocument.toString().trim(),
-            '```',
-            ''
-        );
+        lines.push(`- Page Snapshot`);
+        yamlDocument.toString().trim().split('\n').forEach(line => {
+            lines.push(`    ${line}`); // 4-space indentation
+        });
+        lines.push('');
         this._text = lines.join('\n');
     }