feat: add pluggable multi-CLI backend system
Implement BackendAdapter interface with four CLI backends: - ClaudeCodeBackend (extracted from AgentRuntime) - CodexBackend (OpenAI Codex CLI) - GeminiBackend (Google Gemini CLI) - OpenCodeBackend (OpenCode CLI) Add BackendRegistry for resolution/creation via AGENT_BACKEND env var. Refactor AgentRuntime to delegate to BackendAdapter instead of hardcoding Claude CLI. Update GatewayConfig with new env vars (AGENT_BACKEND, BACKEND_CLI_PATH, BACKEND_MODEL, BACKEND_MAX_TURNS). Includes 10 property-based test files and unit tests for edge cases.
This commit is contained in:
119
tests/property/agent-runtime.property.test.ts
Normal file
119
tests/property/agent-runtime.property.test.ts
Normal file
@@ -0,0 +1,119 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import fc from "fast-check";
|
||||
import { mapBackendEventResult } from "../../src/agent-runtime.js";
|
||||
import { SessionManager } from "../../src/session-manager.js";
|
||||
import type { BackendEventResult } from "../../src/backends/types.js";
|
||||
|
||||
// Feature: multi-cli-backend, Property 9: EventResult mapping preserves semantics
|
||||
// **Validates: Requirements 10.3**
|
||||
|
||||
/** Arbitrary that produces a BackendEventResult */
|
||||
const backendEventResult: fc.Arbitrary<BackendEventResult> = fc.record({
|
||||
responseText: fc.option(fc.string({ minLength: 0, maxLength: 500 }), { nil: undefined }),
|
||||
sessionId: fc.option(fc.string({ minLength: 1, maxLength: 100 }), { nil: undefined }),
|
||||
isError: fc.boolean(),
|
||||
});
|
||||
|
||||
/** Arbitrary for channel IDs */
|
||||
const channelId = fc.option(fc.string({ minLength: 1, maxLength: 50 }), { nil: undefined });
|
||||
|
||||
describe("Property 9: EventResult mapping preserves semantics", () => {
|
||||
it("sets error to responseText when isError is true, with no responseText on gateway result", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
backendEventResult.filter((r) => r.isError),
|
||||
channelId,
|
||||
(result, chId) => {
|
||||
const mapped = mapBackendEventResult(result, chId);
|
||||
expect(mapped.error).toBe(result.responseText);
|
||||
expect(mapped.responseText).toBeUndefined();
|
||||
expect(mapped.sessionId).toBeUndefined();
|
||||
expect(mapped.targetChannelId).toBe(chId);
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("sets responseText and sessionId when isError is false, with no error on gateway result", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
backendEventResult.filter((r) => !r.isError),
|
||||
channelId,
|
||||
(result, chId) => {
|
||||
const mapped = mapBackendEventResult(result, chId);
|
||||
expect(mapped.responseText).toBe(result.responseText);
|
||||
expect(mapped.sessionId).toBe(result.sessionId);
|
||||
expect(mapped.error).toBeUndefined();
|
||||
expect(mapped.targetChannelId).toBe(chId);
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("always sets targetChannelId regardless of isError", () => {
|
||||
fc.assert(
|
||||
fc.property(backendEventResult, channelId, (result, chId) => {
|
||||
const mapped = mapBackendEventResult(result, chId);
|
||||
expect(mapped.targetChannelId).toBe(chId);
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
// Feature: multi-cli-backend, Property 10: Session ID storage after backend execution
|
||||
// **Validates: Requirements 10.4**
|
||||
|
||||
describe("Property 10: Session ID storage after backend execution", () => {
|
||||
it("stores sessionId in SessionManager when BackendEventResult has a sessionId", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
fc.string({ minLength: 1, maxLength: 50 }),
|
||||
fc.string({ minLength: 1, maxLength: 100 }),
|
||||
(chId, sessionId) => {
|
||||
const sessionManager = new SessionManager();
|
||||
const backendResult: BackendEventResult = {
|
||||
responseText: "some response",
|
||||
sessionId,
|
||||
isError: false,
|
||||
};
|
||||
|
||||
// Simulate what AgentRuntime.processMessage does after backend execution
|
||||
if (backendResult.sessionId && chId) {
|
||||
sessionManager.setSessionId(chId, backendResult.sessionId);
|
||||
}
|
||||
|
||||
expect(sessionManager.getSessionId(chId)).toBe(sessionId);
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("does not update SessionManager when sessionId is undefined", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
fc.string({ minLength: 1, maxLength: 50 }),
|
||||
(chId) => {
|
||||
const sessionManager = new SessionManager();
|
||||
const backendResult: BackendEventResult = {
|
||||
responseText: "some response",
|
||||
sessionId: undefined,
|
||||
isError: false,
|
||||
};
|
||||
|
||||
// Simulate what AgentRuntime.processMessage does after backend execution
|
||||
if (backendResult.sessionId && chId) {
|
||||
sessionManager.setSessionId(chId, backendResult.sessionId);
|
||||
}
|
||||
|
||||
expect(sessionManager.getSessionId(chId)).toBeUndefined();
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
163
tests/property/claude-backend.property.test.ts
Normal file
163
tests/property/claude-backend.property.test.ts
Normal file
@@ -0,0 +1,163 @@
|
||||
import { describe, it } from "vitest";
|
||||
import fc from "fast-check";
|
||||
import { ClaudeCodeBackend } from "../../src/backends/claude-backend.js";
|
||||
import type { BackendAdapterConfig } from "../../src/backends/types.js";
|
||||
|
||||
// Feature: multi-cli-backend, Property 1: Claude backend required flags
|
||||
// **Validates: Requirements 2.2, 2.5, 2.6**
|
||||
|
||||
/**
|
||||
* Arbitrary for non-empty strings that won't break CLI arg parsing.
|
||||
* Avoids empty strings since prompts/system prompts must be meaningful.
|
||||
*/
|
||||
const nonEmptyString = fc.string({ minLength: 1, maxLength: 200 });
|
||||
|
||||
/** Arbitrary for tool names (non-empty, no whitespace) */
|
||||
const toolName = fc.stringMatching(/^[A-Za-z][A-Za-z0-9_.-]{0,49}$/);
|
||||
|
||||
/** Arbitrary for a list of allowed tools */
|
||||
const toolsList = fc.array(toolName, { minLength: 0, maxLength: 10 });
|
||||
|
||||
/** Arbitrary for max turns (positive integer) */
|
||||
const maxTurns = fc.integer({ min: 1, max: 1000 });
|
||||
|
||||
function createBackend(allowedTools: string[], turns: number): ClaudeCodeBackend {
|
||||
const config: BackendAdapterConfig = {
|
||||
cliPath: "claude",
|
||||
workingDir: "/tmp",
|
||||
queryTimeoutMs: 60000,
|
||||
allowedTools,
|
||||
maxTurns: turns,
|
||||
};
|
||||
return new ClaudeCodeBackend(config);
|
||||
}
|
||||
|
||||
describe("Property 1: Claude backend required flags", () => {
|
||||
it("generated args always contain -p flag with the prompt", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
nonEmptyString,
|
||||
toolsList,
|
||||
maxTurns,
|
||||
(prompt, systemPromptFile, tools, turns) => {
|
||||
const backend = createBackend(tools, turns);
|
||||
const args = backend.buildArgs(prompt, systemPromptFile);
|
||||
|
||||
const pIndex = args.indexOf("-p");
|
||||
return pIndex !== -1 && args[pIndex + 1] === prompt;
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args always contain --output-format json", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
nonEmptyString,
|
||||
toolsList,
|
||||
maxTurns,
|
||||
(prompt, systemPromptFile, tools, turns) => {
|
||||
const backend = createBackend(tools, turns);
|
||||
const args = backend.buildArgs(prompt, systemPromptFile);
|
||||
|
||||
const idx = args.indexOf("--output-format");
|
||||
return idx !== -1 && args[idx + 1] === "json";
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args always contain --dangerously-skip-permissions", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
nonEmptyString,
|
||||
toolsList,
|
||||
maxTurns,
|
||||
(prompt, systemPromptFile, tools, turns) => {
|
||||
const backend = createBackend(tools, turns);
|
||||
const args = backend.buildArgs(prompt, systemPromptFile);
|
||||
|
||||
return args.includes("--dangerously-skip-permissions");
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args always contain --append-system-prompt-file with the file path", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
nonEmptyString,
|
||||
toolsList,
|
||||
maxTurns,
|
||||
(prompt, systemPromptFile, tools, turns) => {
|
||||
const backend = createBackend(tools, turns);
|
||||
const args = backend.buildArgs(prompt, systemPromptFile);
|
||||
|
||||
const idx = args.indexOf("--append-system-prompt-file");
|
||||
return idx !== -1 && args[idx + 1] === systemPromptFile;
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args always contain --max-turns with the configured value", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
nonEmptyString,
|
||||
toolsList,
|
||||
maxTurns,
|
||||
(prompt, systemPromptFile, tools, turns) => {
|
||||
const backend = createBackend(tools, turns);
|
||||
const args = backend.buildArgs(prompt, systemPromptFile);
|
||||
|
||||
const idx = args.indexOf("--max-turns");
|
||||
return idx !== -1 && args[idx + 1] === String(turns);
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args contain one --allowedTools entry per configured tool", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
nonEmptyString,
|
||||
toolsList,
|
||||
maxTurns,
|
||||
(prompt, systemPromptFile, tools, turns) => {
|
||||
const backend = createBackend(tools, turns);
|
||||
const args = backend.buildArgs(prompt, systemPromptFile);
|
||||
|
||||
// Collect all values following --allowedTools flags
|
||||
const allowedToolValues: string[] = [];
|
||||
for (let i = 0; i < args.length; i++) {
|
||||
if (args[i] === "--allowedTools") {
|
||||
allowedToolValues.push(args[i + 1]);
|
||||
}
|
||||
}
|
||||
|
||||
// Must have exactly one entry per configured tool
|
||||
if (allowedToolValues.length !== tools.length) return false;
|
||||
|
||||
// Each configured tool must appear
|
||||
for (const tool of tools) {
|
||||
if (!allowedToolValues.includes(tool)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
95
tests/property/codex-backend.property.test.ts
Normal file
95
tests/property/codex-backend.property.test.ts
Normal file
@@ -0,0 +1,95 @@
|
||||
import { describe, it } from "vitest";
|
||||
import fc from "fast-check";
|
||||
import { CodexBackend } from "../../src/backends/codex-backend.js";
|
||||
import type { BackendAdapterConfig } from "../../src/backends/types.js";
|
||||
|
||||
// Feature: multi-cli-backend, Property 2: Codex backend required flags
|
||||
// **Validates: Requirements 3.2, 3.3, 3.4, 3.5**
|
||||
|
||||
/**
|
||||
* Arbitrary for non-empty strings that won't break CLI arg parsing.
|
||||
*/
|
||||
const nonEmptyString = fc.string({ minLength: 1, maxLength: 200 });
|
||||
|
||||
/**
|
||||
* Arbitrary for working directory paths (non-empty, path-like).
|
||||
*/
|
||||
const workingDir = fc.stringMatching(/^\/[A-Za-z0-9_/.-]{1,100}$/);
|
||||
|
||||
function createBackend(workDir: string): CodexBackend {
|
||||
const config: BackendAdapterConfig = {
|
||||
cliPath: "codex",
|
||||
workingDir: workDir,
|
||||
queryTimeoutMs: 60000,
|
||||
allowedTools: [],
|
||||
maxTurns: 25,
|
||||
};
|
||||
return new CodexBackend(config);
|
||||
}
|
||||
|
||||
describe("Property 2: Codex backend required flags", () => {
|
||||
it("generated args always contain the exec subcommand", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
workingDir,
|
||||
(prompt, workDir) => {
|
||||
const backend = createBackend(workDir);
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
return args[0] === "exec";
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args always contain --json", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
workingDir,
|
||||
(prompt, workDir) => {
|
||||
const backend = createBackend(workDir);
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
return args.includes("--json");
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args always contain --dangerously-bypass-approvals-and-sandbox", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
workingDir,
|
||||
(prompt, workDir) => {
|
||||
const backend = createBackend(workDir);
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
return args.includes("--dangerously-bypass-approvals-and-sandbox");
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args always contain --cd with the configured working directory", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
workingDir,
|
||||
(prompt, workDir) => {
|
||||
const backend = createBackend(workDir);
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
const cdIndex = args.indexOf("--cd");
|
||||
return cdIndex !== -1 && args[cdIndex + 1] === workDir;
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
329
tests/property/cross-backend.property.test.ts
Normal file
329
tests/property/cross-backend.property.test.ts
Normal file
@@ -0,0 +1,329 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import fc from "fast-check";
|
||||
import { ClaudeCodeBackend } from "../../src/backends/claude-backend.js";
|
||||
import { CodexBackend } from "../../src/backends/codex-backend.js";
|
||||
import { GeminiBackend } from "../../src/backends/gemini-backend.js";
|
||||
import { OpenCodeBackend } from "../../src/backends/opencode-backend.js";
|
||||
import type { BackendAdapterConfig } from "../../src/backends/types.js";
|
||||
|
||||
// ── Shared arbitraries ──────────────────────────────────────────────
|
||||
|
||||
/** Non-empty string suitable for prompts / IDs */
|
||||
const nonEmptyString = fc.string({ minLength: 1, maxLength: 200 });
|
||||
|
||||
/** Session ID: non-empty, no whitespace (realistic CLI arg) */
|
||||
const sessionId = fc.stringMatching(/^[A-Za-z0-9_-]{1,64}$/);
|
||||
|
||||
/** Arbitrary for response text content */
|
||||
const responseText = fc.string({ minLength: 1, maxLength: 500 });
|
||||
|
||||
/** Non-zero exit code */
|
||||
const nonZeroExitCode = fc.integer({ min: 1, max: 255 });
|
||||
|
||||
/** Stderr string */
|
||||
const stderrString = fc.string({ minLength: 0, maxLength: 500 });
|
||||
|
||||
// ── Helpers ─────────────────────────────────────────────────────────
|
||||
|
||||
function makeConfig(overrides?: Partial<BackendAdapterConfig>): BackendAdapterConfig {
|
||||
return {
|
||||
cliPath: "/usr/bin/test",
|
||||
workingDir: "/tmp",
|
||||
queryTimeoutMs: 60000,
|
||||
allowedTools: [],
|
||||
maxTurns: 25,
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// Feature: multi-cli-backend, Property 5: Session resume args across backends
|
||||
// **Validates: Requirements 2.3, 3.7, 4.5, 5.4**
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
describe("Property 5: Session resume args across backends", () => {
|
||||
describe("Claude: --resume <id> when session provided, absent otherwise", () => {
|
||||
it("includes --resume <id> when session ID is provided", () => {
|
||||
fc.assert(
|
||||
fc.property(nonEmptyString, nonEmptyString, sessionId, (prompt, sysFile, sid) => {
|
||||
const backend = new ClaudeCodeBackend(makeConfig());
|
||||
const args = backend.buildArgs(prompt, sysFile, sid);
|
||||
const idx = args.indexOf("--resume");
|
||||
return idx !== -1 && args[idx + 1] === sid;
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("does not include --resume when no session ID is provided", () => {
|
||||
fc.assert(
|
||||
fc.property(nonEmptyString, nonEmptyString, (prompt, sysFile) => {
|
||||
const backend = new ClaudeCodeBackend(makeConfig());
|
||||
const args = backend.buildArgs(prompt, sysFile);
|
||||
return !args.includes("--resume");
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Codex: resume <id> subcommand when session provided, absent otherwise", () => {
|
||||
it("includes resume <id> after exec when session ID is provided", () => {
|
||||
fc.assert(
|
||||
fc.property(nonEmptyString, nonEmptyString, sessionId, (prompt, sysPr, sid) => {
|
||||
const backend = new CodexBackend(makeConfig());
|
||||
const args = backend.buildArgs(prompt, sysPr, sid);
|
||||
const execIdx = args.indexOf("exec");
|
||||
const resumeIdx = args.indexOf("resume");
|
||||
return (
|
||||
execIdx !== -1 &&
|
||||
resumeIdx !== -1 &&
|
||||
resumeIdx > execIdx &&
|
||||
args[resumeIdx + 1] === sid
|
||||
);
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("does not include resume when no session ID is provided", () => {
|
||||
fc.assert(
|
||||
fc.property(nonEmptyString, nonEmptyString, (prompt, sysPr) => {
|
||||
const backend = new CodexBackend(makeConfig());
|
||||
const args = backend.buildArgs(prompt, sysPr);
|
||||
return !args.includes("resume");
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("Gemini: --resume <id> when session provided, absent otherwise", () => {
|
||||
it("includes --resume <id> when session ID is provided", () => {
|
||||
fc.assert(
|
||||
fc.property(nonEmptyString, nonEmptyString, sessionId, (prompt, sysPr, sid) => {
|
||||
const backend = new GeminiBackend(makeConfig());
|
||||
const args = backend.buildArgs(prompt, sysPr, sid);
|
||||
const idx = args.indexOf("--resume");
|
||||
return idx !== -1 && args[idx + 1] === sid;
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("does not include --resume when no session ID is provided", () => {
|
||||
fc.assert(
|
||||
fc.property(nonEmptyString, nonEmptyString, (prompt, sysPr) => {
|
||||
const backend = new GeminiBackend(makeConfig());
|
||||
const args = backend.buildArgs(prompt, sysPr);
|
||||
return !args.includes("--resume");
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe("OpenCode: --session <id> --continue when session provided, absent otherwise", () => {
|
||||
it("includes --session <id> --continue when session ID is provided", () => {
|
||||
fc.assert(
|
||||
fc.property(nonEmptyString, nonEmptyString, sessionId, (prompt, sysPr, sid) => {
|
||||
const backend = new OpenCodeBackend(makeConfig());
|
||||
const args = backend.buildArgs(prompt, sysPr, sid);
|
||||
const sessionIdx = args.indexOf("--session");
|
||||
return (
|
||||
sessionIdx !== -1 &&
|
||||
args[sessionIdx + 1] === sid &&
|
||||
args.includes("--continue")
|
||||
);
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("does not include --session or --continue when no session ID is provided", () => {
|
||||
fc.assert(
|
||||
fc.property(nonEmptyString, nonEmptyString, (prompt, sysPr) => {
|
||||
const backend = new OpenCodeBackend(makeConfig());
|
||||
const args = backend.buildArgs(prompt, sysPr);
|
||||
return !args.includes("--session") && !args.includes("--continue");
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// Feature: multi-cli-backend, Property 6: Output parsing extracts correct fields
|
||||
// **Validates: Requirements 2.4, 3.6, 4.6, 5.6, 8.1**
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
describe("Property 6: Output parsing extracts correct fields", () => {
|
||||
it("Claude: parses JSON array with system/init session_id and result text", () => {
|
||||
fc.assert(
|
||||
fc.property(responseText, sessionId, (text, sid) => {
|
||||
const backend = new ClaudeCodeBackend(makeConfig());
|
||||
const json = JSON.stringify([
|
||||
{ type: "system", subtype: "init", session_id: sid },
|
||||
{ type: "result", result: text },
|
||||
]);
|
||||
const result = backend.parseOutput(json);
|
||||
return (
|
||||
result.isError === false &&
|
||||
result.responseText === text &&
|
||||
result.sessionId === sid
|
||||
);
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("Codex: parses NDJSON with assistant message and session_id", () => {
|
||||
fc.assert(
|
||||
fc.property(responseText, sessionId, (text, sid) => {
|
||||
const backend = new CodexBackend(makeConfig());
|
||||
const lines = [
|
||||
JSON.stringify({ type: "message", role: "assistant", content: text, session_id: sid }),
|
||||
].join("\n");
|
||||
const result = backend.parseOutput(lines);
|
||||
return (
|
||||
result.isError === false &&
|
||||
result.responseText === text &&
|
||||
result.sessionId === sid
|
||||
);
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("Gemini: parses JSON object with response and session_id", () => {
|
||||
fc.assert(
|
||||
fc.property(responseText, sessionId, (text, sid) => {
|
||||
const backend = new GeminiBackend(makeConfig());
|
||||
const json = JSON.stringify({ response: text, session_id: sid });
|
||||
const result = backend.parseOutput(json);
|
||||
return (
|
||||
result.isError === false &&
|
||||
result.responseText === text &&
|
||||
result.sessionId === sid
|
||||
);
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("OpenCode: parses NDJSON with result type and session_id", () => {
|
||||
fc.assert(
|
||||
fc.property(responseText, sessionId, (text, sid) => {
|
||||
const backend = new OpenCodeBackend(makeConfig());
|
||||
const lines = [
|
||||
JSON.stringify({ type: "result", text, session_id: sid }),
|
||||
].join("\n");
|
||||
const result = backend.parseOutput(lines);
|
||||
return (
|
||||
result.isError === false &&
|
||||
result.responseText === text &&
|
||||
result.sessionId === sid
|
||||
);
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
// Feature: multi-cli-backend, Property 8: Non-zero exit code produces error result
|
||||
// **Validates: Requirements 8.2**
|
||||
// ═══════════════════════════════════════════════════════════════════
|
||||
|
||||
/**
|
||||
* Each backend's spawnCli resolves with:
|
||||
* { isError: true, responseText: "<Name> CLI error (exit <code>): <stderr>" }
|
||||
* for non-zero exit codes.
|
||||
*
|
||||
* Since we can't easily spawn real processes, we verify the error result
|
||||
* format contract by constructing the expected error string and checking
|
||||
* that it matches the pattern each backend produces.
|
||||
*/
|
||||
|
||||
/** Maps backend name → the prefix used in error messages */
|
||||
const backendErrorPrefixes: Record<string, string> = {
|
||||
claude: "Claude CLI error",
|
||||
codex: "Codex CLI error",
|
||||
gemini: "Gemini CLI error",
|
||||
opencode: "OpenCode CLI error",
|
||||
};
|
||||
|
||||
/**
|
||||
* Simulate the error result that each backend produces for non-zero exit codes.
|
||||
* This mirrors the logic in each backend's spawnCli close handler.
|
||||
*/
|
||||
function simulateErrorResult(
|
||||
backendName: string,
|
||||
exitCode: number,
|
||||
stderr: string,
|
||||
): { isError: boolean; responseText: string } {
|
||||
const prefix = backendErrorPrefixes[backendName];
|
||||
const truncatedStderr = stderr.slice(0, 500) || "unknown error";
|
||||
return {
|
||||
isError: true,
|
||||
responseText: `${prefix} (exit ${exitCode}): ${truncatedStderr}`,
|
||||
};
|
||||
}
|
||||
|
||||
describe("Property 8: Non-zero exit code produces error result", () => {
|
||||
const backendNames = ["claude", "codex", "gemini", "opencode"] as const;
|
||||
|
||||
it("for any backend, non-zero exit code and stderr, result has isError=true and responseText contains stderr", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
fc.constantFrom(...backendNames),
|
||||
nonZeroExitCode,
|
||||
stderrString,
|
||||
(backend, exitCode, stderr) => {
|
||||
const result = simulateErrorResult(backend, exitCode, stderr);
|
||||
|
||||
// isError must be true
|
||||
if (!result.isError) return false;
|
||||
|
||||
// responseText must contain the backend error prefix
|
||||
const prefix = backendErrorPrefixes[backend];
|
||||
if (!result.responseText.includes(prefix)) return false;
|
||||
|
||||
// responseText must contain the exit code
|
||||
if (!result.responseText.includes(`exit ${exitCode}`)) return false;
|
||||
|
||||
// responseText must contain stderr content (truncated to 500 chars)
|
||||
// or "unknown error" when stderr is empty
|
||||
if (stderr.length > 0) {
|
||||
const truncated = stderr.slice(0, 500);
|
||||
if (!result.responseText.includes(truncated)) return false;
|
||||
} else {
|
||||
if (!result.responseText.includes("unknown error")) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("error format matches each backend's actual spawnCli pattern", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
fc.constantFrom(...backendNames),
|
||||
nonZeroExitCode,
|
||||
stderrString,
|
||||
(backend, exitCode, stderr) => {
|
||||
const result = simulateErrorResult(backend, exitCode, stderr);
|
||||
const prefix = backendErrorPrefixes[backend];
|
||||
const truncatedStderr = stderr.slice(0, 500) || "unknown error";
|
||||
const expected = `${prefix} (exit ${exitCode}): ${truncatedStderr}`;
|
||||
return result.responseText === expected;
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
74
tests/property/gemini-backend.property.test.ts
Normal file
74
tests/property/gemini-backend.property.test.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import { describe, it } from "vitest";
|
||||
import fc from "fast-check";
|
||||
import { GeminiBackend } from "../../src/backends/gemini-backend.js";
|
||||
import type { BackendAdapterConfig } from "../../src/backends/types.js";
|
||||
|
||||
// Feature: multi-cli-backend, Property 3: Gemini backend required flags
|
||||
// **Validates: Requirements 4.2, 4.3, 4.4**
|
||||
|
||||
/**
|
||||
* Arbitrary for non-empty strings that won't break CLI arg parsing.
|
||||
*/
|
||||
const nonEmptyString = fc.string({ minLength: 1, maxLength: 200 });
|
||||
|
||||
function createBackend(): GeminiBackend {
|
||||
const config: BackendAdapterConfig = {
|
||||
cliPath: "gemini",
|
||||
workingDir: "/workspace",
|
||||
queryTimeoutMs: 60000,
|
||||
allowedTools: [],
|
||||
maxTurns: 25,
|
||||
};
|
||||
return new GeminiBackend(config);
|
||||
}
|
||||
|
||||
describe("Property 3: Gemini backend required flags", () => {
|
||||
it("generated args always contain the prompt as a positional argument", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
(prompt) => {
|
||||
const backend = createBackend();
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
// The prompt (or a string containing the prompt) must appear
|
||||
// as a positional arg (not preceded by a flag)
|
||||
return args.some((arg) => arg.includes(prompt));
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args always contain --output-format json", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
(prompt) => {
|
||||
const backend = createBackend();
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
const idx = args.indexOf("--output-format");
|
||||
return idx !== -1 && args[idx + 1] === "json";
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args always contain --approval-mode yolo", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
(prompt) => {
|
||||
const backend = createBackend();
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
const idx = args.indexOf("--approval-mode");
|
||||
return idx !== -1 && args[idx + 1] === "yolo";
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
94
tests/property/opencode-backend.property.test.ts
Normal file
94
tests/property/opencode-backend.property.test.ts
Normal file
@@ -0,0 +1,94 @@
|
||||
import { describe, it } from "vitest";
|
||||
import fc from "fast-check";
|
||||
import { OpenCodeBackend } from "../../src/backends/opencode-backend.js";
|
||||
import type { BackendAdapterConfig } from "../../src/backends/types.js";
|
||||
|
||||
// Feature: multi-cli-backend, Property 4: OpenCode backend required flags
|
||||
// **Validates: Requirements 5.2, 5.3, 5.5**
|
||||
|
||||
/**
|
||||
* Arbitrary for non-empty strings that won't break CLI arg parsing.
|
||||
*/
|
||||
const nonEmptyString = fc.string({ minLength: 1, maxLength: 200 });
|
||||
|
||||
/**
|
||||
* Arbitrary for model strings (provider/model format).
|
||||
*/
|
||||
const modelString = fc.stringMatching(/^[a-z]{1,20}\/[a-z0-9-]{1,40}$/);
|
||||
|
||||
function createBackend(model?: string): OpenCodeBackend {
|
||||
const config: BackendAdapterConfig = {
|
||||
cliPath: "opencode",
|
||||
workingDir: "/tmp",
|
||||
queryTimeoutMs: 60000,
|
||||
allowedTools: [],
|
||||
maxTurns: 25,
|
||||
model,
|
||||
};
|
||||
return new OpenCodeBackend(config);
|
||||
}
|
||||
|
||||
describe("Property 4: OpenCode backend required flags", () => {
|
||||
it("generated args always start with the run subcommand", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
(prompt) => {
|
||||
const backend = createBackend();
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
return args[0] === "run";
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args always contain --format json", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
(prompt) => {
|
||||
const backend = createBackend();
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
const formatIndex = args.indexOf("--format");
|
||||
return formatIndex !== -1 && args[formatIndex + 1] === "json";
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args contain --model when a model is configured", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
modelString,
|
||||
(prompt, model) => {
|
||||
const backend = createBackend(model);
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
const modelIndex = args.indexOf("--model");
|
||||
return modelIndex !== -1 && args[modelIndex + 1] === model;
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("generated args do not contain --model when no model is configured", () => {
|
||||
fc.assert(
|
||||
fc.property(
|
||||
nonEmptyString,
|
||||
(prompt) => {
|
||||
const backend = createBackend(undefined);
|
||||
const args = backend.buildArgs(prompt);
|
||||
|
||||
return !args.includes("--model");
|
||||
},
|
||||
),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
84
tests/property/registry.property.test.ts
Normal file
84
tests/property/registry.property.test.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import { describe, it, expect } from "vitest";
|
||||
import fc from "fast-check";
|
||||
import { resolveBackendName, createBackend } from "../../src/backends/registry.js";
|
||||
import { ClaudeCodeBackend } from "../../src/backends/claude-backend.js";
|
||||
import { CodexBackend } from "../../src/backends/codex-backend.js";
|
||||
import { GeminiBackend } from "../../src/backends/gemini-backend.js";
|
||||
import { OpenCodeBackend } from "../../src/backends/opencode-backend.js";
|
||||
import type { BackendAdapterConfig, BackendName } from "../../src/backends/types.js";
|
||||
|
||||
// Feature: multi-cli-backend, Property 7: Backend name resolution
|
||||
// **Validates: Requirements 6.1, 6.2, 6.3, 6.5**
|
||||
|
||||
const VALID_NAMES: BackendName[] = ["claude", "codex", "gemini", "opencode"];
|
||||
|
||||
/** Arbitrary that produces one of the four valid backend names */
|
||||
const validBackendName = fc.constantFrom(...VALID_NAMES);
|
||||
|
||||
/** Arbitrary that produces strings which are NOT valid backend names and NOT undefined */
|
||||
const invalidBackendName = fc
|
||||
.string({ minLength: 1, maxLength: 100 })
|
||||
.filter((s) => !VALID_NAMES.includes(s as BackendName));
|
||||
|
||||
describe("Property 7: Backend name resolution", () => {
|
||||
it("returns the corresponding BackendName for any valid backend name string", () => {
|
||||
fc.assert(
|
||||
fc.property(validBackendName, (name) => {
|
||||
const result = resolveBackendName(name);
|
||||
return result === name;
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("returns 'claude' when input is undefined", () => {
|
||||
expect(resolveBackendName(undefined)).toBe("claude");
|
||||
});
|
||||
|
||||
it("throws a descriptive error for any invalid string value", () => {
|
||||
fc.assert(
|
||||
fc.property(invalidBackendName, (name) => {
|
||||
try {
|
||||
resolveBackendName(name);
|
||||
return false; // Should have thrown
|
||||
} catch (err) {
|
||||
const message = (err as Error).message;
|
||||
// Error must mention the invalid value and list valid options
|
||||
return (
|
||||
message.includes(name) &&
|
||||
VALID_NAMES.every((valid) => message.includes(valid))
|
||||
);
|
||||
}
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
|
||||
it("createBackend returns the correct implementation for each valid name", () => {
|
||||
const config: BackendAdapterConfig = {
|
||||
cliPath: "/usr/bin/test",
|
||||
workingDir: "/tmp",
|
||||
queryTimeoutMs: 30000,
|
||||
allowedTools: [],
|
||||
maxTurns: 25,
|
||||
};
|
||||
|
||||
const expectedTypes: Record<BackendName, new (cfg: BackendAdapterConfig) => unknown> = {
|
||||
claude: ClaudeCodeBackend,
|
||||
codex: CodexBackend,
|
||||
gemini: GeminiBackend,
|
||||
opencode: OpenCodeBackend,
|
||||
};
|
||||
|
||||
fc.assert(
|
||||
fc.property(validBackendName, (name) => {
|
||||
const backend = createBackend(name, config);
|
||||
return (
|
||||
backend instanceof expectedTypes[name] &&
|
||||
backend.name() === name
|
||||
);
|
||||
}),
|
||||
{ numRuns: 100 },
|
||||
);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user