From 1cfaac5fcfa568a7d155c98f889240528146563e Mon Sep 17 00:00:00 2001 From: yutao Date: Fri, 7 Nov 2025 16:13:03 +0800 Subject: [PATCH 1/3] chore(core): refine error processing of agent --- packages/core/src/agent/task-builder.ts | 67 ++++++++++--------- packages/core/src/agent/utils.ts | 11 ++- packages/core/src/service/index.ts | 2 - packages/core/tests/unit-test/utils.test.ts | 54 +++++++++++++++ packages/shared/src/extractor/dom-util.ts | 3 - packages/shared/src/types/index.ts | 2 - .../src/puppeteer/base-page.ts | 2 + .../tests/ai/web/puppeteer/e2e.test.ts | 16 ++--- 8 files changed, 105 insertions(+), 52 deletions(-) diff --git a/packages/core/src/agent/task-builder.ts b/packages/core/src/agent/task-builder.ts index 7f34c71b0..7dbc27464 100644 --- a/packages/core/src/agent/task-builder.ts +++ b/packages/core/src/agent/task-builder.ts @@ -23,7 +23,11 @@ import { generateElementByPosition } from '@midscene/shared/extractor'; import { getDebug } from '@midscene/shared/logger'; import { assert } from '@midscene/shared/utils'; import type { TaskCache } from './task-cache'; -import { matchElementFromCache, matchElementFromPlan } from './utils'; +import { + ifPlanLocateParamIsBbox, + matchElementFromCache, + matchElementFromPlan, +} from './utils'; const debug = getDebug('agent:task-builder'); @@ -201,22 +205,33 @@ export class TaskBuilder { locateFields.forEach((field) => { if (param[field]) { - const locatePlan = locatePlanForLocate(param[field]); - debug( - 'will prepend locate param for field', - `action.type=${planType}`, - `param=${JSON.stringify(param[field])}`, - `locatePlan=${JSON.stringify(locatePlan)}`, - ); - const locateTask = this.createLocateTask( - locatePlan, - param[field], - context, - (result) => { - param[field] = result; - }, - ); - context.tasks.push(locateTask); + if (ifPlanLocateParamIsBbox(param[field])) { + debug( + 'plan locate param is bbox, will match element from plan', + param[field], + ); + const elementFromPlan = matchElementFromPlan(param[field]); + if (elementFromPlan) { + param[field] = elementFromPlan; + } + } else { + const locatePlan = locatePlanForLocate(param[field]); + debug( + 'will prepend locate param for field', + `action.type=${planType}`, + `param=${JSON.stringify(param[field])}`, + `locatePlan=${JSON.stringify(locatePlan)}`, + ); + const locateTask = this.createLocateTask( + locatePlan, + param[field], + context, + (result) => { + param[field] = result; + }, + ); + context.tasks.push(locateTask); + } } else { assert( !requiredLocateFields.includes(field), @@ -419,11 +434,7 @@ export class TaskBuilder { ); const cacheHitFlag = !!elementFromCache; - const elementFromPlan = - !userExpectedPathHitFlag && !cacheHitFlag - ? matchElementFromPlan(param) - : undefined; - const planHitFlag = !!elementFromPlan; + const planHitFlag = false; let elementFromAiLocate: LocateResultElement | null | undefined; if (!userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag) { @@ -446,10 +457,7 @@ export class TaskBuilder { } const element = - elementFromXpath || - elementFromCache || - elementFromPlan || - elementFromAiLocate; + elementFromXpath || elementFromCache || elementFromAiLocate; let currentCacheEntry: ElementCacheFeature | undefined; if ( @@ -526,13 +534,6 @@ export class TaskBuilder { cacheToSave: currentCacheEntry, }, }; - } else if (planHitFlag) { - hitBy = { - from: 'Planning', - context: { - rect: elementFromPlan?.rect, - }, - }; } onResult?.(element); diff --git a/packages/core/src/agent/utils.ts b/packages/core/src/agent/utils.ts index e4a234ece..4ff957b94 100644 --- a/packages/core/src/agent/utils.ts +++ b/packages/core/src/agent/utils.ts @@ -117,6 +117,16 @@ export function generateCacheId(fileName?: string): string { return `${taskFile}-${testFileIndex.get(taskFile)}`; } +export function ifPlanLocateParamIsBbox( + planLocateParam: PlanningLocateParam, +): boolean { + return !!( + planLocateParam.bbox && + Array.isArray(planLocateParam.bbox) && + planLocateParam.bbox.length === 4 + ); +} + export function matchElementFromPlan( planLocateParam: PlanningLocateParam, ): LocateResultElement | undefined { @@ -170,7 +180,6 @@ export async function matchElementFromCache( const rect = await context.interfaceInstance.rectMatchesCacheFeature(cacheEntry); const element: LocateResultElement = { - id: uuid(), center: [ Math.round(rect.left + rect.width / 2), Math.round(rect.top + rect.height / 2), diff --git a/packages/core/src/service/index.ts b/packages/core/src/service/index.ts index 2ec871a25..2c9148bc1 100644 --- a/packages/core/src/service/index.ts +++ b/packages/core/src/service/index.ts @@ -188,10 +188,8 @@ export default class Service { if (elements.length === 1) { return { element: { - id: elements[0]!.id, center: elements[0]!.center, rect: elements[0]!.rect, - isOrderSensitive: elements[0]!.isOrderSensitive, }, rect, dump, diff --git a/packages/core/tests/unit-test/utils.test.ts b/packages/core/tests/unit-test/utils.test.ts index 939475d72..eecd62668 100644 --- a/packages/core/tests/unit-test/utils.test.ts +++ b/packages/core/tests/unit-test/utils.test.ts @@ -25,6 +25,7 @@ import { describe, expect, it } from 'vitest'; import { z } from 'zod'; // @ts-ignore no types in es folder import { reportHTMLContent, writeDumpReport } from '../../dist/es/utils'; // use modules from dist, otherwise we will miss the template file +import { ifPlanLocateParamIsBbox } from '../../src/agent/utils'; import { getTmpDir, getTmpFile, @@ -1542,3 +1543,56 @@ describe('loadActionParam and dumpActionParam integration', () => { `); }); }); + +describe('ifPlanLocateParamIsBbox', () => { + it('should return true when bbox is valid array with 4 elements', () => { + const param = { + prompt: 'test element', + bbox: [100, 200, 300, 400] as [number, number, number, number], + }; + expect(ifPlanLocateParamIsBbox(param)).toBe(true); + }); + + it('should return false when bbox is undefined', () => { + const param = { + prompt: 'test element', + }; + expect(ifPlanLocateParamIsBbox(param)).toBe(false); + }); + + it('should return false when bbox is not an array', () => { + const param = { + prompt: 'test element', + bbox: 'not an array' as any, + }; + expect(ifPlanLocateParamIsBbox(param)).toBe(false); + }); + + it('should return false when bbox array length is not 4', () => { + const param1 = { + prompt: 'test element', + bbox: [100, 200] as any, + }; + expect(ifPlanLocateParamIsBbox(param1)).toBe(false); + + const param2 = { + prompt: 'test element', + bbox: [100, 200, 300] as any, + }; + expect(ifPlanLocateParamIsBbox(param2)).toBe(false); + + const param3 = { + prompt: 'test element', + bbox: [100, 200, 300, 400, 500] as any, + }; + expect(ifPlanLocateParamIsBbox(param3)).toBe(false); + }); + + it('should return false when bbox is null', () => { + const param = { + prompt: 'test element', + bbox: null as any, + }; + expect(ifPlanLocateParamIsBbox(param)).toBe(false); + }); +}); diff --git a/packages/shared/src/extractor/dom-util.ts b/packages/shared/src/extractor/dom-util.ts index 2f37dbee5..910c871fe 100644 --- a/packages/shared/src/extractor/dom-util.ts +++ b/packages/shared/src/extractor/dom-util.ts @@ -143,11 +143,8 @@ export function generateElementByPosition(position: { width: edgeSize, height: edgeSize, }; - const id = generateHashId(rect); const element = { - id, rect, - content: '', center: [position.x, position.y] as [number, number], }; diff --git a/packages/shared/src/types/index.ts b/packages/shared/src/types/index.ts index 6769ef393..e976ef88f 100644 --- a/packages/shared/src/types/index.ts +++ b/packages/shared/src/types/index.ts @@ -49,6 +49,4 @@ export interface WebElementInfo extends ElementInfo { export type LocateResultElement = { center: [number, number]; rect: Rect; - id: string; - isOrderSensitive?: boolean; }; diff --git a/packages/web-integration/src/puppeteer/base-page.ts b/packages/web-integration/src/puppeteer/base-page.ts index e24980bd4..0f706f4c0 100644 --- a/packages/web-integration/src/puppeteer/base-page.ts +++ b/packages/web-integration/src/puppeteer/base-page.ts @@ -572,6 +572,8 @@ export class Page< } async afterInvokeAction(name: string, param: any): Promise { + await this.waitForNavigation(); + await this.waitForNetworkIdle(); if (this.onAfterInvokeAction) { await this.onAfterInvokeAction(name, param); } diff --git a/packages/web-integration/tests/ai/web/puppeteer/e2e.test.ts b/packages/web-integration/tests/ai/web/puppeteer/e2e.test.ts index 4084425df..42c3984a6 100644 --- a/packages/web-integration/tests/ai/web/puppeteer/e2e.test.ts +++ b/packages/web-integration/tests/ai/web/puppeteer/e2e.test.ts @@ -41,7 +41,7 @@ describe( }).rejects.toThrowError(); }); - it('Sauce Demo by Swag Lab', async () => { + it.only('Sauce Demo by Swag Lab', async () => { const { originPage, reset } = await launchPage( 'https://www.saucedemo.com/', ); @@ -67,7 +67,9 @@ describe( await sleep(10 * 1000); - agent.setAIActionContext('这是 Sauce Demo by Swag Lab 的测试'); + agent.setAIActionContext( + 'This is a testing application for Sauce Demo by Swag Lab', + ); const flag = await agent.aiBoolean('this is a login page'); expect(flag).toBe(true); @@ -86,15 +88,7 @@ describe( expect(beforeInvokeAction.mock.calls.length).toEqual( afterInvokeAction.mock.calls.length, ); - expect( - beforeInvokeAction.mock.calls.map((call) => call[0]), - ).toMatchInlineSnapshot(` - [ - "Input", - "Input", - "Tap", - ] - `); + expect(beforeInvokeAction.mock.calls.length).toBeGreaterThan(2); expect(onTaskStartTip.mock.calls.length).toBeGreaterThan(1); From 454547e39ae6bd021c29adc10093059f7ad3785d Mon Sep 17 00:00:00 2001 From: yutao Date: Mon, 10 Nov 2025 11:43:54 +0800 Subject: [PATCH 2/3] chore(core): fix lint --- packages/web-integration/tests/ai/web/puppeteer/e2e.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/web-integration/tests/ai/web/puppeteer/e2e.test.ts b/packages/web-integration/tests/ai/web/puppeteer/e2e.test.ts index 42c3984a6..c08ba4a91 100644 --- a/packages/web-integration/tests/ai/web/puppeteer/e2e.test.ts +++ b/packages/web-integration/tests/ai/web/puppeteer/e2e.test.ts @@ -41,7 +41,7 @@ describe( }).rejects.toThrowError(); }); - it.only('Sauce Demo by Swag Lab', async () => { + it('Sauce Demo by Swag Lab', async () => { const { originPage, reset } = await launchPage( 'https://www.saucedemo.com/', ); From 25149b94777b5cd8a0d18982cad36561f175af14 Mon Sep 17 00:00:00 2001 From: yutao Date: Mon, 10 Nov 2025 11:47:37 +0800 Subject: [PATCH 3/3] chore(core): fix lint --- packages/web-integration/src/puppeteer/base-page.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/web-integration/src/puppeteer/base-page.ts b/packages/web-integration/src/puppeteer/base-page.ts index 0f706f4c0..81f52733f 100644 --- a/packages/web-integration/src/puppeteer/base-page.ts +++ b/packages/web-integration/src/puppeteer/base-page.ts @@ -564,8 +564,6 @@ export class Page< } async beforeInvokeAction(name: string, param: any): Promise { - await this.waitForNavigation(); - await this.waitForNetworkIdle(); if (this.onBeforeInvokeAction) { await this.onBeforeInvokeAction(name, param); }