Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion src/ai/conversation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export class Conversation {
model: any;
telemetryFunctionId?: string;
protectedPrefixCount = 0;
private toolExecutions: ToolExecution[] = [];
private autoTrimRules: Map<string, number>;

constructor(messages: ModelMessage[] = [], model?: any, telemetryFunctionId?: string) {
Expand Down Expand Up @@ -83,7 +84,9 @@ export class Conversation {
}

clone(): Conversation {
return new Conversation([...this.messages], this.model, this.telemetryFunctionId);
const clone = new Conversation([...this.messages], this.model, this.telemetryFunctionId);
clone.toolExecutions = [...this.toolExecutions];
return clone;
}

cleanupTag(tagName: string, replacement: string, keepLast = 0): void {
Expand Down Expand Up @@ -227,6 +230,10 @@ export class Conversation {
}

getToolExecutions(): ToolExecution[] {
if (this.toolExecutions.length > 0) {
return [...this.toolExecutions];
}

const toolCalls = new Map<string, any>();
for (const message of this.messages) {
if (message.role !== 'assistant') continue;
Expand Down Expand Up @@ -256,4 +263,9 @@ export class Conversation {

return executions;
}

addToolExecutions(executions: ToolExecution[]): void {
if (executions.length === 0) return;
this.toolExecutions.push(...executions);
}
}
49 changes: 39 additions & 10 deletions src/ai/historian/experience.ts
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,8 @@ export function WithExperience<T extends Constructor>(Base: T) {
if (!CODECEPT_TOOLS.includes(exec.toolName as any)) continue;
if (!exec.output?.code) continue;

this.saveFallbackAttempts(exec, initialState);

if (!exec.wasSuccessful) {
const bucket = failedByTool.get(exec.toolName) || [];
bucket.push(exec);
Expand Down Expand Up @@ -280,16 +282,7 @@ export function WithExperience<T extends Constructor>(Base: T) {
const candidate = candidates[pattern.candidateIndex];
if (!candidate) continue;

const url = candidate.success.output?.pageDiff?.currentUrl;
let state: ActionResult = initialState;

if (url && url !== initialState.url) {
const transition = this.stateManager.getLastVisitToPath(url);
if (transition) {
state = ActionResult.fromState(transition.toState);
}
}

const state = this.resolveActionState(candidate.success, initialState);
if (isNonReusableCode(candidate.success.output.code)) continue;
this.experienceTracker.writeAction(state, { title: pattern.intent, code: candidate.success.output.code, explanation: pattern.explanation });
}
Expand All @@ -300,6 +293,42 @@ export function WithExperience<T extends Constructor>(Base: T) {
}
}

private saveFallbackAttempts(exec: ToolExecution, initialState: ActionResult): void {
if (!exec.wasSuccessful) return;
const attempts = exec.output?.attempts;
if (!Array.isArray(attempts)) return;
if (attempts.length < 2) return;

const successfulAttempt = attempts.find((attempt) => attempt.success && attempt.command === exec.output.code) || attempts.find((attempt) => attempt.success);
if (!successfulAttempt?.command) return;

const failedAttempts = attempts.filter((attempt) => !attempt.success);
if (failedAttempts.length === 0) return;
if (isNonReusableCode(successfulAttempt.command)) return;

const state = this.resolveActionState(exec, initialState);
const title = getExecutionLabel(exec, `${exec.toolName} target element`);
const failedCommands = failedAttempts
.map((attempt) => attempt.command)
.filter(Boolean)
.join(', ');
const explanation = failedCommands ? `Use this locator after these alternatives failed: ${failedCommands}` : 'Use this locator after fallback attempts failed.';
this.experienceTracker.writeAction(state, {
title,
code: successfulAttempt.command,
explanation,
});
}

private resolveActionState(exec: ToolExecution, initialState: ActionResult): ActionResult {
const url = exec.output?.url || exec.output?.pageDiff?.previousUrl;
if (!url || url === initialState.url) return initialState;

const transition = this.stateManager.getLastVisitToPath(url);
if (!transition) return initialState;
return ActionResult.fromState(transition.toState);
}

private async analyzeDiscoveries(stepsWithDiffs: Array<{ step: SessionStep; ariaDiff: string | null }>): Promise<void> {
if (!stepsWithDiffs.some((s) => s.ariaDiff)) return;

Expand Down
13 changes: 13 additions & 0 deletions src/ai/pilot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,19 @@ export class Pilot implements Agent {
const state = this.explorer.getStateManager().getCurrentState();
if (!state) return '';
const actionResult = ActionResult.fromState(state);
const successful = this.experienceTracker.getSuccessfulExperience(actionResult);
if (successful.length > 0) {
return dedent`
<experience>
Past successful recipes recorded from prior runs for this page.
Prefer these solutions first when they match the current scenario. Use the exact code blocks as the first attempt before trying alternative locators.
If a saved locator misses, then fall back to ARIA/UI-map.

${successful.join('\n\n')}
</experience>
`;
}

const toc = this.experienceTracker.getExperienceTableOfContents(actionResult);
return renderExperienceToc(toc);
}
Expand Down
20 changes: 14 additions & 6 deletions src/ai/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -253,16 +253,24 @@ export class Provider {
const toolCalls = response.toolCalls || [];
const toolResults = response.toolResults || [];

const toolExecutions = toolCalls.map((call: any, index: number) => ({
toolName: call.toolName || '',
input: call.input,
output: toolResults[index]?.output,
wasSuccessful: toolResults[index]?.output?.success || false,
}));
const toolExecutions = toolCalls.map((call: any, index: number) => {
const output = this.unwrapToolOutput(toolResults[index]?.output);
return {
toolName: call.toolName || '',
input: call.input,
output,
wasSuccessful: Boolean(output) && output.success !== false,
};
});
conversation.addToolExecutions(toolExecutions);

return { conversation, response, toolExecutions };
}

private unwrapToolOutput(output: any): any {
return output?.type === 'json' && output?.value ? output.value : output;
}

async chat(messages: ModelMessage[], model: any, options: any = {}): Promise<any> {
const modelName = this.getModelName(model);
setActivity(`🤖 Asking ${modelName}`, 'ai');
Expand Down
7 changes: 7 additions & 0 deletions src/ai/tester.ts
Original file line number Diff line number Diff line change
Expand Up @@ -581,6 +581,7 @@ export class Tester extends TaskAgent implements Agent {
}

if (isNewUrl) {
const experience = this.getExperience(currentState);
const alreadySeenUiMap = this.seenUiMapUrls.has(currentUrl);
let research = '';
if (!alreadySeenUiMap) {
Expand Down Expand Up @@ -625,6 +626,8 @@ export class Tester extends TaskAgent implements Agent {
However, <page_ui_map> is not always up to date, use <page_aria> and <page_html> to understand the ACTUAL state of the page
Do not interact with elements that are not listed in <page_aria> and <page_html>
Refer to information on page sections in <page_ui_map> and use container CSS locators to interact with elements inside sections

${experience}
`;
return context;
}
Expand Down Expand Up @@ -784,6 +787,7 @@ export class Tester extends TaskAgent implements Agent {
- Use tool input schemas exactly as documented. Do not invent parameter names or add fields not listed by the tool schema.
- Use click() for buttons, links, and clickable elements ONLY - do NOT include I.fillField() or I.type() commands in click() tool
- click() commands array is for FALLBACK LOCATORS of the SAME element, NOT for clicking different elements in sequence. If you need to click two different elements, make two separate click() calls.
- If <experience> contains an ACTION/FLOW code block that matches the current step, put that saved command FIRST in the relevant tool's commands array. Add new fallback locators only after the saved command.
- Use form() for text input (I.fillField, I.type), dropdown selection (I.selectOption), file uploads (I.attachFile), and multi-step form interactions
- Use pressKey() for pressing special keys (Enter, Escape, Tab, Arrow keys) or key combinations with modifiers (Ctrl+A, Shift+Delete, etc.)
- Use container CSS locators from <page_ui_map> to interact with elements inside sections
Expand Down Expand Up @@ -853,6 +857,7 @@ export class Tester extends TaskAgent implements Agent {

private buildScenarioBlock(task: Test, actionResult: ActionResult): string {
const knowledge = this.getKnowledge(actionResult);
const experience = this.getExperience(actionResult);

return dedent`
<task>
Expand Down Expand Up @@ -883,6 +888,8 @@ export class Tester extends TaskAgent implements Agent {
${this.buildAvailableFiles()}

${knowledge}

${experience}

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no, this can quickly overflow all context!

`;
}

Expand Down
28 changes: 23 additions & 5 deletions src/experience-tracker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,10 @@ export class ExperienceTracker {

readExperienceFile(stateHash: string): { content: string; data: any } {
const filePath = this.getExperienceFilePath(stateHash);
return this.readExperienceFileByPath(filePath);
}

private readExperienceFileByPath(filePath: string): { content: string; data: any } {
const fileContent = readFileSync(filePath, 'utf8');
const { content, data } = matter(fileContent);
return { content, data };
Expand Down Expand Up @@ -269,9 +273,23 @@ export class ExperienceTracker {
return this.getAllExperience()
.filter((experience) => {
const experienceState = experience.data as WebPageState;
return state.isRelevantExperienceRecord(experienceState, {
includeDescendantExperience: options?.includeDescendantExperience,
});
if (
state.isRelevantExperienceRecord(experienceState, {
includeDescendantExperience: options?.includeDescendantExperience,
})
) {
return true;
}

const related = Array.isArray(experience.data.related) ? experience.data.related : [];
return related.some((url) =>
state.isRelevantExperienceRecord(
{ url },
{
includeDescendantExperience: options?.includeDescendantExperience,
}
)
);
})
.map((experience) => {
const lines = experience.content.split('\n');
Expand Down Expand Up @@ -350,7 +368,7 @@ export class ExperienceTracker {
const filePath = this.findExperienceFileByHash(entry.fileHash);
if (!filePath) return null;

const { content } = this.readExperienceFile(entry.fileHash);
const { content } = this.readExperienceFileByPath(filePath);
const extracted = extractHeadingSection(content, sectionIndex);
if (!extracted) return null;

Expand Down Expand Up @@ -424,7 +442,7 @@ export class ExperienceTracker {
const filePath = this.findExperienceFileByHash(entry.fileHash);
if (!filePath) return null;

const { content } = this.readExperienceFile(entry.fileHash);
const { content } = this.readExperienceFileByPath(filePath);
const extracted = extractHeadingSection(content, sectionIndex);
if (!extracted) return null;

Expand Down
8 changes: 7 additions & 1 deletion src/explorbot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,13 @@ export class ExplorBot {
this.agents.tester = this.createAgent(({ ai, explorer }) => {
const researcher = this.agentResearcher();
const navigator = this.agentNavigator();
const tools = createAgentTools({ explorer, researcher, navigator });
const stateManager = explorer.getStateManager();
const experienceTracker = stateManager.getExperienceTracker();
const getState = () => {
const state = stateManager.getCurrentState();
return state ? ActionResult.fromState(state) : null;
};
const tools = createAgentTools({ explorer, researcher, navigator, experienceTracker, getState });
return new Tester(explorer, ai, researcher, navigator, tools);
});

Expand Down
30 changes: 30 additions & 0 deletions tests/unit/conversation.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -405,4 +405,34 @@ describe('Conversation', () => {
expect(output.pageDiff.htmlParts).toBeDefined();
});
});

it('preserves recorded tool executions after compacting prompt tool results', () => {
const conversation = new Conversation();
conversation.addToolExecutions([
{
toolName: 'click',
input: { explanation: 'Open run' },
wasSuccessful: true,
output: {
success: true,
code: 'I.click("Run")',
attempts: [
{ command: 'I.click("Missing")', success: false },
{ command: 'I.click("Run")', success: true },
],
pageDiff: {
htmlParts: [{ subtree: '<div>large</div>' }],
ariaChanges: 'x'.repeat(600),
},
},
},
]);

conversation.compactToolResults(0);

const [execution] = conversation.getToolExecutions();
expect(execution.output.attempts).toHaveLength(2);
expect(execution.output.pageDiff.htmlParts).toHaveLength(1);
expect(execution.output.pageDiff.ariaChanges).toHaveLength(600);
});
});
30 changes: 26 additions & 4 deletions tests/unit/experience-tracker.test.ts
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
import { existsSync, rmSync } from 'node:fs';
import { join } from 'node:path';
import { ActionResult } from '../../src/action-result';
import { ConfigParser } from '../../src/config';
import { ExperienceTracker } from '../../src/experience-tracker';

describe('ExperienceTracker', () => {
let experienceTracker: ExperienceTracker;
const testDir = '/tmp/experience';
const testRoot = join(process.cwd(), 'tmp', 'experience-tracker-test');
const testDir = join(testRoot, 'experience');

beforeEach(() => {
if (existsSync(testDir)) {
Expand All @@ -24,7 +26,7 @@ describe('ExperienceTracker', () => {

const configParser = ConfigParser.getInstance();
(configParser as any).config = mockConfig;
(configParser as any).configPath = '/tmp/config.js';
(configParser as any).configPath = join(testRoot, 'config.js');

experienceTracker = new ExperienceTracker();
});
Expand Down Expand Up @@ -123,6 +125,26 @@ describe('ExperienceTracker', () => {
const withDesc = experienceTracker.getRelevantExperience(parent, { includeDescendantExperience: true });
expect(withDesc).toHaveLength(2);
});

it('includes experience when current state matches related URL', () => {
const list = new ActionResult({
html: '<html><body>List</body></html>',
url: 'https://example.com/projects/demo',
title: 'List',
});
const suite = new ActionResult({
html: '<html><body>Suite</body></html>',
url: 'https://example.com/projects/demo/suite/123',
title: 'Suite',
});

experienceTracker.writeFlow(list, '## FLOW: open suite\n\n* Open suite\n\n```js\nI.click("Suite")\n```\n\n---\n', ['/projects/demo/suite/123']);

const relevant = experienceTracker.getRelevantExperience(suite);

expect(relevant).toHaveLength(1);
expect(relevant[0].content).toContain('## FLOW: open suite');
});
});

describe('readExperienceFile', () => {
Expand Down Expand Up @@ -238,7 +260,7 @@ describe('ExperienceTracker', () => {
experienceTracker.writeFlow(state, '');
experienceTracker.writeFlow(state, ' \n ');
const stateHash = state.getStateHash();
const filePath = `/tmp/experience/${stateHash}.md`;
const filePath = join(testDir, `${stateHash}.md`);
if (existsSync(filePath)) {
const { content } = experienceTracker.readExperienceFile(stateHash);
expect(content.trim()).toBe('');
Expand Down Expand Up @@ -266,7 +288,7 @@ describe('ExperienceTracker', () => {
const state = makeState();
disabledTracker.writeFlow(state, sampleBody);
const stateHash = state.getStateHash();
const filePath = `/tmp/experience/${stateHash}.md`;
const filePath = join(testDir, `${stateHash}.md`);
expect(existsSync(filePath)).toBe(false);
});
});
Expand Down
Loading
Loading