Implement BackendAdapter interface with four CLI backends: - ClaudeCodeBackend (extracted from AgentRuntime) - CodexBackend (OpenAI Codex CLI) - GeminiBackend (Google Gemini CLI) - OpenCodeBackend (OpenCode CLI) Add BackendRegistry for resolution/creation via AGENT_BACKEND env var. Refactor AgentRuntime to delegate to BackendAdapter instead of hardcoding Claude CLI. Update GatewayConfig with new env vars (AGENT_BACKEND, BACKEND_CLI_PATH, BACKEND_MODEL, BACKEND_MAX_TURNS). Includes 10 property-based test files and unit tests for edge cases.
330 lines
13 KiB
TypeScript
330 lines
13 KiB
TypeScript
import { describe, it, expect } from "vitest";
|
|
import fc from "fast-check";
|
|
import { ClaudeCodeBackend } from "../../src/backends/claude-backend.js";
|
|
import { CodexBackend } from "../../src/backends/codex-backend.js";
|
|
import { GeminiBackend } from "../../src/backends/gemini-backend.js";
|
|
import { OpenCodeBackend } from "../../src/backends/opencode-backend.js";
|
|
import type { BackendAdapterConfig } from "../../src/backends/types.js";
|
|
|
|
// ── Shared arbitraries ──────────────────────────────────────────────
|
|
|
|
/** Non-empty string suitable for prompts / IDs */
|
|
const nonEmptyString = fc.string({ minLength: 1, maxLength: 200 });
|
|
|
|
/** Session ID: non-empty, no whitespace (realistic CLI arg) */
|
|
const sessionId = fc.stringMatching(/^[A-Za-z0-9_-]{1,64}$/);
|
|
|
|
/** Arbitrary for response text content */
|
|
const responseText = fc.string({ minLength: 1, maxLength: 500 });
|
|
|
|
/** Non-zero exit code */
|
|
const nonZeroExitCode = fc.integer({ min: 1, max: 255 });
|
|
|
|
/** Stderr string */
|
|
const stderrString = fc.string({ minLength: 0, maxLength: 500 });
|
|
|
|
// ── Helpers ─────────────────────────────────────────────────────────
|
|
|
|
function makeConfig(overrides?: Partial<BackendAdapterConfig>): BackendAdapterConfig {
|
|
return {
|
|
cliPath: "/usr/bin/test",
|
|
workingDir: "/tmp",
|
|
queryTimeoutMs: 60000,
|
|
allowedTools: [],
|
|
maxTurns: 25,
|
|
...overrides,
|
|
};
|
|
}
|
|
|
|
// ═══════════════════════════════════════════════════════════════════
|
|
// Feature: multi-cli-backend, Property 5: Session resume args across backends
|
|
// **Validates: Requirements 2.3, 3.7, 4.5, 5.4**
|
|
// ═══════════════════════════════════════════════════════════════════
|
|
|
|
describe("Property 5: Session resume args across backends", () => {
|
|
describe("Claude: --resume <id> when session provided, absent otherwise", () => {
|
|
it("includes --resume <id> when session ID is provided", () => {
|
|
fc.assert(
|
|
fc.property(nonEmptyString, nonEmptyString, sessionId, (prompt, sysFile, sid) => {
|
|
const backend = new ClaudeCodeBackend(makeConfig());
|
|
const args = backend.buildArgs(prompt, sysFile, sid);
|
|
const idx = args.indexOf("--resume");
|
|
return idx !== -1 && args[idx + 1] === sid;
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
|
|
it("does not include --resume when no session ID is provided", () => {
|
|
fc.assert(
|
|
fc.property(nonEmptyString, nonEmptyString, (prompt, sysFile) => {
|
|
const backend = new ClaudeCodeBackend(makeConfig());
|
|
const args = backend.buildArgs(prompt, sysFile);
|
|
return !args.includes("--resume");
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
});
|
|
|
|
describe("Codex: resume <id> subcommand when session provided, absent otherwise", () => {
|
|
it("includes resume <id> after exec when session ID is provided", () => {
|
|
fc.assert(
|
|
fc.property(nonEmptyString, nonEmptyString, sessionId, (prompt, sysPr, sid) => {
|
|
const backend = new CodexBackend(makeConfig());
|
|
const args = backend.buildArgs(prompt, sysPr, sid);
|
|
const execIdx = args.indexOf("exec");
|
|
const resumeIdx = args.indexOf("resume");
|
|
return (
|
|
execIdx !== -1 &&
|
|
resumeIdx !== -1 &&
|
|
resumeIdx > execIdx &&
|
|
args[resumeIdx + 1] === sid
|
|
);
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
|
|
it("does not include resume when no session ID is provided", () => {
|
|
fc.assert(
|
|
fc.property(nonEmptyString, nonEmptyString, (prompt, sysPr) => {
|
|
const backend = new CodexBackend(makeConfig());
|
|
const args = backend.buildArgs(prompt, sysPr);
|
|
return !args.includes("resume");
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
});
|
|
|
|
describe("Gemini: --resume <id> when session provided, absent otherwise", () => {
|
|
it("includes --resume <id> when session ID is provided", () => {
|
|
fc.assert(
|
|
fc.property(nonEmptyString, nonEmptyString, sessionId, (prompt, sysPr, sid) => {
|
|
const backend = new GeminiBackend(makeConfig());
|
|
const args = backend.buildArgs(prompt, sysPr, sid);
|
|
const idx = args.indexOf("--resume");
|
|
return idx !== -1 && args[idx + 1] === sid;
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
|
|
it("does not include --resume when no session ID is provided", () => {
|
|
fc.assert(
|
|
fc.property(nonEmptyString, nonEmptyString, (prompt, sysPr) => {
|
|
const backend = new GeminiBackend(makeConfig());
|
|
const args = backend.buildArgs(prompt, sysPr);
|
|
return !args.includes("--resume");
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
});
|
|
|
|
describe("OpenCode: --session <id> --continue when session provided, absent otherwise", () => {
|
|
it("includes --session <id> --continue when session ID is provided", () => {
|
|
fc.assert(
|
|
fc.property(nonEmptyString, nonEmptyString, sessionId, (prompt, sysPr, sid) => {
|
|
const backend = new OpenCodeBackend(makeConfig());
|
|
const args = backend.buildArgs(prompt, sysPr, sid);
|
|
const sessionIdx = args.indexOf("--session");
|
|
return (
|
|
sessionIdx !== -1 &&
|
|
args[sessionIdx + 1] === sid &&
|
|
args.includes("--continue")
|
|
);
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
|
|
it("does not include --session or --continue when no session ID is provided", () => {
|
|
fc.assert(
|
|
fc.property(nonEmptyString, nonEmptyString, (prompt, sysPr) => {
|
|
const backend = new OpenCodeBackend(makeConfig());
|
|
const args = backend.buildArgs(prompt, sysPr);
|
|
return !args.includes("--session") && !args.includes("--continue");
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
});
|
|
});
|
|
|
|
// ═══════════════════════════════════════════════════════════════════
|
|
// Feature: multi-cli-backend, Property 6: Output parsing extracts correct fields
|
|
// **Validates: Requirements 2.4, 3.6, 4.6, 5.6, 8.1**
|
|
// ═══════════════════════════════════════════════════════════════════
|
|
|
|
describe("Property 6: Output parsing extracts correct fields", () => {
|
|
it("Claude: parses JSON array with system/init session_id and result text", () => {
|
|
fc.assert(
|
|
fc.property(responseText, sessionId, (text, sid) => {
|
|
const backend = new ClaudeCodeBackend(makeConfig());
|
|
const json = JSON.stringify([
|
|
{ type: "system", subtype: "init", session_id: sid },
|
|
{ type: "result", result: text },
|
|
]);
|
|
const result = backend.parseOutput(json);
|
|
return (
|
|
result.isError === false &&
|
|
result.responseText === text &&
|
|
result.sessionId === sid
|
|
);
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
|
|
it("Codex: parses NDJSON with assistant message and session_id", () => {
|
|
fc.assert(
|
|
fc.property(responseText, sessionId, (text, sid) => {
|
|
const backend = new CodexBackend(makeConfig());
|
|
const lines = [
|
|
JSON.stringify({ type: "message", role: "assistant", content: text, session_id: sid }),
|
|
].join("\n");
|
|
const result = backend.parseOutput(lines);
|
|
return (
|
|
result.isError === false &&
|
|
result.responseText === text &&
|
|
result.sessionId === sid
|
|
);
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
|
|
it("Gemini: parses JSON object with response and session_id", () => {
|
|
fc.assert(
|
|
fc.property(responseText, sessionId, (text, sid) => {
|
|
const backend = new GeminiBackend(makeConfig());
|
|
const json = JSON.stringify({ response: text, session_id: sid });
|
|
const result = backend.parseOutput(json);
|
|
return (
|
|
result.isError === false &&
|
|
result.responseText === text &&
|
|
result.sessionId === sid
|
|
);
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
|
|
it("OpenCode: parses NDJSON with result type and session_id", () => {
|
|
fc.assert(
|
|
fc.property(responseText, sessionId, (text, sid) => {
|
|
const backend = new OpenCodeBackend(makeConfig());
|
|
const lines = [
|
|
JSON.stringify({ type: "result", text, session_id: sid }),
|
|
].join("\n");
|
|
const result = backend.parseOutput(lines);
|
|
return (
|
|
result.isError === false &&
|
|
result.responseText === text &&
|
|
result.sessionId === sid
|
|
);
|
|
}),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
});
|
|
|
|
// ═══════════════════════════════════════════════════════════════════
|
|
// Feature: multi-cli-backend, Property 8: Non-zero exit code produces error result
|
|
// **Validates: Requirements 8.2**
|
|
// ═══════════════════════════════════════════════════════════════════
|
|
|
|
/**
|
|
* Each backend's spawnCli resolves with:
|
|
* { isError: true, responseText: "<Name> CLI error (exit <code>): <stderr>" }
|
|
* for non-zero exit codes.
|
|
*
|
|
* Since we can't easily spawn real processes, we verify the error result
|
|
* format contract by constructing the expected error string and checking
|
|
* that it matches the pattern each backend produces.
|
|
*/
|
|
|
|
/** Maps backend name → the prefix used in error messages */
|
|
const backendErrorPrefixes: Record<string, string> = {
|
|
claude: "Claude CLI error",
|
|
codex: "Codex CLI error",
|
|
gemini: "Gemini CLI error",
|
|
opencode: "OpenCode CLI error",
|
|
};
|
|
|
|
/**
|
|
* Simulate the error result that each backend produces for non-zero exit codes.
|
|
* This mirrors the logic in each backend's spawnCli close handler.
|
|
*/
|
|
function simulateErrorResult(
|
|
backendName: string,
|
|
exitCode: number,
|
|
stderr: string,
|
|
): { isError: boolean; responseText: string } {
|
|
const prefix = backendErrorPrefixes[backendName];
|
|
const truncatedStderr = stderr.slice(0, 500) || "unknown error";
|
|
return {
|
|
isError: true,
|
|
responseText: `${prefix} (exit ${exitCode}): ${truncatedStderr}`,
|
|
};
|
|
}
|
|
|
|
describe("Property 8: Non-zero exit code produces error result", () => {
|
|
const backendNames = ["claude", "codex", "gemini", "opencode"] as const;
|
|
|
|
it("for any backend, non-zero exit code and stderr, result has isError=true and responseText contains stderr", () => {
|
|
fc.assert(
|
|
fc.property(
|
|
fc.constantFrom(...backendNames),
|
|
nonZeroExitCode,
|
|
stderrString,
|
|
(backend, exitCode, stderr) => {
|
|
const result = simulateErrorResult(backend, exitCode, stderr);
|
|
|
|
// isError must be true
|
|
if (!result.isError) return false;
|
|
|
|
// responseText must contain the backend error prefix
|
|
const prefix = backendErrorPrefixes[backend];
|
|
if (!result.responseText.includes(prefix)) return false;
|
|
|
|
// responseText must contain the exit code
|
|
if (!result.responseText.includes(`exit ${exitCode}`)) return false;
|
|
|
|
// responseText must contain stderr content (truncated to 500 chars)
|
|
// or "unknown error" when stderr is empty
|
|
if (stderr.length > 0) {
|
|
const truncated = stderr.slice(0, 500);
|
|
if (!result.responseText.includes(truncated)) return false;
|
|
} else {
|
|
if (!result.responseText.includes("unknown error")) return false;
|
|
}
|
|
|
|
return true;
|
|
},
|
|
),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
|
|
it("error format matches each backend's actual spawnCli pattern", () => {
|
|
fc.assert(
|
|
fc.property(
|
|
fc.constantFrom(...backendNames),
|
|
nonZeroExitCode,
|
|
stderrString,
|
|
(backend, exitCode, stderr) => {
|
|
const result = simulateErrorResult(backend, exitCode, stderr);
|
|
const prefix = backendErrorPrefixes[backend];
|
|
const truncatedStderr = stderr.slice(0, 500) || "unknown error";
|
|
const expected = `${prefix} (exit ${exitCode}): ${truncatedStderr}`;
|
|
return result.responseText === expected;
|
|
},
|
|
),
|
|
{ numRuns: 100 },
|
|
);
|
|
});
|
|
});
|