Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 34 additions & 33 deletions packages/core/src/agent/task-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ import { generateElementByPosition } from '@midscene/shared/extractor';
import { getDebug } from '@midscene/shared/logger';
import { assert } from '@midscene/shared/utils';
import type { TaskCache } from './task-cache';
import { matchElementFromCache, matchElementFromPlan } from './utils';
import {
ifPlanLocateParamIsBbox,
matchElementFromCache,
matchElementFromPlan,
} from './utils';

const debug = getDebug('agent:task-builder');

Expand Down Expand Up @@ -201,22 +205,33 @@ export class TaskBuilder {

locateFields.forEach((field) => {
if (param[field]) {
const locatePlan = locatePlanForLocate(param[field]);
debug(
'will prepend locate param for field',
`action.type=${planType}`,
`param=${JSON.stringify(param[field])}`,
`locatePlan=${JSON.stringify(locatePlan)}`,
);
const locateTask = this.createLocateTask(
locatePlan,
param[field],
context,
(result) => {
param[field] = result;
},
);
context.tasks.push(locateTask);
if (ifPlanLocateParamIsBbox(param[field])) {
debug(
'plan locate param is bbox, will match element from plan',
param[field],
);
const elementFromPlan = matchElementFromPlan(param[field]);
if (elementFromPlan) {
param[field] = elementFromPlan;
}
} else {
const locatePlan = locatePlanForLocate(param[field]);
debug(
'will prepend locate param for field',
`action.type=${planType}`,
`param=${JSON.stringify(param[field])}`,
`locatePlan=${JSON.stringify(locatePlan)}`,
);
const locateTask = this.createLocateTask(
locatePlan,
param[field],
context,
(result) => {
param[field] = result;
},
);
context.tasks.push(locateTask);
}
} else {
assert(
!requiredLocateFields.includes(field),
Expand Down Expand Up @@ -419,11 +434,7 @@ export class TaskBuilder {
);
const cacheHitFlag = !!elementFromCache;

const elementFromPlan =
!userExpectedPathHitFlag && !cacheHitFlag
? matchElementFromPlan(param)
: undefined;
const planHitFlag = !!elementFromPlan;
const planHitFlag = false;

let elementFromAiLocate: LocateResultElement | null | undefined;
if (!userExpectedPathHitFlag && !cacheHitFlag && !planHitFlag) {
Expand All @@ -446,10 +457,7 @@ export class TaskBuilder {
}

const element =
elementFromXpath ||
elementFromCache ||
elementFromPlan ||
elementFromAiLocate;
elementFromXpath || elementFromCache || elementFromAiLocate;

let currentCacheEntry: ElementCacheFeature | undefined;
if (
Expand Down Expand Up @@ -526,13 +534,6 @@ export class TaskBuilder {
cacheToSave: currentCacheEntry,
},
};
} else if (planHitFlag) {
hitBy = {
from: 'Planning',
context: {
rect: elementFromPlan?.rect,
},
};
}

onResult?.(element);
Expand Down
11 changes: 10 additions & 1 deletion packages/core/src/agent/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,16 @@ export function generateCacheId(fileName?: string): string {
return `${taskFile}-${testFileIndex.get(taskFile)}`;
}

export function ifPlanLocateParamIsBbox(
planLocateParam: PlanningLocateParam,
): boolean {
return !!(
planLocateParam.bbox &&
Array.isArray(planLocateParam.bbox) &&
planLocateParam.bbox.length === 4
);
}

export function matchElementFromPlan(
planLocateParam: PlanningLocateParam,
): LocateResultElement | undefined {
Expand Down Expand Up @@ -170,7 +180,6 @@ export async function matchElementFromCache(
const rect =
await context.interfaceInstance.rectMatchesCacheFeature(cacheEntry);
const element: LocateResultElement = {
id: uuid(),
center: [
Math.round(rect.left + rect.width / 2),
Math.round(rect.top + rect.height / 2),
Expand Down
2 changes: 0 additions & 2 deletions packages/core/src/service/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,8 @@ export default class Service {
if (elements.length === 1) {
return {
element: {
id: elements[0]!.id,
center: elements[0]!.center,
rect: elements[0]!.rect,
isOrderSensitive: elements[0]!.isOrderSensitive,
},
rect,
dump,
Expand Down
54 changes: 54 additions & 0 deletions packages/core/tests/unit-test/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import { describe, expect, it } from 'vitest';
import { z } from 'zod';
// @ts-ignore no types in es folder
import { reportHTMLContent, writeDumpReport } from '../../dist/es/utils'; // use modules from dist, otherwise we will miss the template file
import { ifPlanLocateParamIsBbox } from '../../src/agent/utils';
import {
getTmpDir,
getTmpFile,
Expand Down Expand Up @@ -1542,3 +1543,56 @@ describe('loadActionParam and dumpActionParam integration', () => {
`);
});
});

describe('ifPlanLocateParamIsBbox', () => {
it('should return true when bbox is valid array with 4 elements', () => {
const param = {
prompt: 'test element',
bbox: [100, 200, 300, 400] as [number, number, number, number],
};
expect(ifPlanLocateParamIsBbox(param)).toBe(true);
});

it('should return false when bbox is undefined', () => {
const param = {
prompt: 'test element',
};
expect(ifPlanLocateParamIsBbox(param)).toBe(false);
});

it('should return false when bbox is not an array', () => {
const param = {
prompt: 'test element',
bbox: 'not an array' as any,
};
expect(ifPlanLocateParamIsBbox(param)).toBe(false);
});

it('should return false when bbox array length is not 4', () => {
const param1 = {
prompt: 'test element',
bbox: [100, 200] as any,
};
expect(ifPlanLocateParamIsBbox(param1)).toBe(false);

const param2 = {
prompt: 'test element',
bbox: [100, 200, 300] as any,
};
expect(ifPlanLocateParamIsBbox(param2)).toBe(false);

const param3 = {
prompt: 'test element',
bbox: [100, 200, 300, 400, 500] as any,
};
expect(ifPlanLocateParamIsBbox(param3)).toBe(false);
});

it('should return false when bbox is null', () => {
const param = {
prompt: 'test element',
bbox: null as any,
};
expect(ifPlanLocateParamIsBbox(param)).toBe(false);
});
});
3 changes: 0 additions & 3 deletions packages/shared/src/extractor/dom-util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,8 @@ export function generateElementByPosition(position: {
width: edgeSize,
height: edgeSize,
};
const id = generateHashId(rect);
const element = {
id,
rect,
content: '',
center: [position.x, position.y] as [number, number],
};

Expand Down
2 changes: 0 additions & 2 deletions packages/shared/src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,4 @@ export interface WebElementInfo extends ElementInfo {
export type LocateResultElement = {
center: [number, number];
rect: Rect;
id: string;
isOrderSensitive?: boolean;
};
2 changes: 2 additions & 0 deletions packages/web-integration/src/puppeteer/base-page.ts
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,8 @@ export class Page<
}

async afterInvokeAction(name: string, param: any): Promise<void> {
await this.waitForNavigation();
await this.waitForNetworkIdle();
if (this.onAfterInvokeAction) {
await this.onAfterInvokeAction(name, param);
}
Expand Down
14 changes: 4 additions & 10 deletions packages/web-integration/tests/ai/web/puppeteer/e2e.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,9 @@ describe(

await sleep(10 * 1000);

agent.setAIActionContext('这是 Sauce Demo by Swag Lab 的测试');
agent.setAIActionContext(
'This is a testing application for Sauce Demo by Swag Lab',
);

const flag = await agent.aiBoolean('this is a login page');
expect(flag).toBe(true);
Expand All @@ -86,15 +88,7 @@ describe(
expect(beforeInvokeAction.mock.calls.length).toEqual(
afterInvokeAction.mock.calls.length,
);
expect(
beforeInvokeAction.mock.calls.map((call) => call[0]),
).toMatchInlineSnapshot(`
[
"Input",
"Input",
"Tap",
]
`);
expect(beforeInvokeAction.mock.calls.length).toBeGreaterThan(2);

expect(onTaskStartTip.mock.calls.length).toBeGreaterThan(1);

Expand Down