feat: per-group queue, SQLite state, graceful shutdown (#111)
* fix: wire up queue processMessagesFn before recovery to prevent silent message loss recoverPendingMessages() was called after startMessageLoop(), which meant: 1. Recovery could race with the message loop's first iteration 2. processMessagesFn was set inside startMessageLoop, so recovery enqueues would fire runForGroup with processMessagesFn still null, silently skipping message processing Move setProcessMessagesFn and recoverPendingMessages before startMessageLoop so the queue is fully wired before any messages are enqueued. https://claude.ai/code/session_01PCY8zNjDa2N29jvBAV5vfL * feat: structured agent output to fix infinite retry on silent responses (#113) Use Agent SDK's outputFormat with json_schema to get typed responses from the agent. The agent now returns { status: 'responded' | 'silent', userMessage?, internalLog? } instead of a plain string. This fixes a critical bug where a null/empty agent response caused infinite 5-second retry loops by conflating "nothing to say" with "error". - Agent runner: add AGENT_RESPONSE_SCHEMA and parse structured_output - Host: advance lastAgentTimestamp on both responded AND silent status - GroupQueue: add exponential backoff (5s-80s) with max 5 retries for actual errors, replacing unbounded fixed-interval retries https://claude.ai/code/session_014SLc8MxP9BYhEhDCLox9U8 Co-authored-by: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -17,9 +17,35 @@ interface ContainerInput {
|
|||||||
isScheduledTask?: boolean;
|
isScheduledTask?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
interface AgentResponse {
|
||||||
|
status: 'responded' | 'silent';
|
||||||
|
userMessage?: string;
|
||||||
|
internalLog?: string;
|
||||||
|
}
|
||||||
|
|
||||||
|
const AGENT_RESPONSE_SCHEMA = {
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
status: {
|
||||||
|
type: 'string',
|
||||||
|
enum: ['responded', 'silent'],
|
||||||
|
description: 'Use "responded" when you have a message for the user. Use "silent" when the messages don\'t require a response (e.g. the conversation is between other people and doesn\'t involve you, or no trigger/mention was directed at you).',
|
||||||
|
},
|
||||||
|
userMessage: {
|
||||||
|
type: 'string',
|
||||||
|
description: 'The message to send to the user. Required when status is "responded".',
|
||||||
|
},
|
||||||
|
internalLog: {
|
||||||
|
type: 'string',
|
||||||
|
description: 'Optional internal note about why you chose this status (for logging, not shown to users).',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
required: ['status'],
|
||||||
|
} as const;
|
||||||
|
|
||||||
interface ContainerOutput {
|
interface ContainerOutput {
|
||||||
status: 'success' | 'error';
|
status: 'success' | 'error';
|
||||||
result: string | null;
|
result: AgentResponse | null;
|
||||||
newSessionId?: string;
|
newSessionId?: string;
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
@@ -222,7 +248,7 @@ async function main(): Promise<void> {
|
|||||||
isMain: input.isMain
|
isMain: input.isMain
|
||||||
});
|
});
|
||||||
|
|
||||||
let result: string | null = null;
|
let result: AgentResponse | null = null;
|
||||||
let newSessionId: string | undefined;
|
let newSessionId: string | undefined;
|
||||||
|
|
||||||
// Add context for scheduled tasks
|
// Add context for scheduled tasks
|
||||||
@@ -253,6 +279,10 @@ async function main(): Promise<void> {
|
|||||||
},
|
},
|
||||||
hooks: {
|
hooks: {
|
||||||
PreCompact: [{ hooks: [createPreCompactHook()] }]
|
PreCompact: [{ hooks: [createPreCompactHook()] }]
|
||||||
|
},
|
||||||
|
outputFormat: {
|
||||||
|
type: 'json_schema',
|
||||||
|
schema: AGENT_RESPONSE_SCHEMA,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})) {
|
})) {
|
||||||
@@ -261,15 +291,25 @@ async function main(): Promise<void> {
|
|||||||
log(`Session initialized: ${newSessionId}`);
|
log(`Session initialized: ${newSessionId}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ('result' in message && message.result) {
|
if (message.type === 'result') {
|
||||||
result = message.result as string;
|
if (message.subtype === 'success' && message.structured_output) {
|
||||||
|
result = message.structured_output as AgentResponse;
|
||||||
|
log(`Agent result: status=${result.status}${result.internalLog ? `, log=${result.internalLog}` : ''}`);
|
||||||
|
} else if (message.subtype === 'error_max_structured_output_retries') {
|
||||||
|
// Agent couldn't produce valid structured output — fall back to text result
|
||||||
|
log('Agent failed to produce structured output, falling back to text');
|
||||||
|
const textResult = 'result' in message ? (message as { result?: string }).result : null;
|
||||||
|
if (textResult) {
|
||||||
|
result = { status: 'responded', userMessage: textResult };
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
log('Agent completed successfully');
|
log('Agent completed successfully');
|
||||||
writeOutput({
|
writeOutput({
|
||||||
status: 'success',
|
status: 'success',
|
||||||
result,
|
result: result ?? { status: 'silent' },
|
||||||
newSessionId
|
newSessionId
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -40,9 +40,15 @@ export interface ContainerInput {
|
|||||||
isMain: boolean;
|
isMain: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface AgentResponse {
|
||||||
|
status: 'responded' | 'silent';
|
||||||
|
userMessage?: string;
|
||||||
|
internalLog?: string;
|
||||||
|
}
|
||||||
|
|
||||||
export interface ContainerOutput {
|
export interface ContainerOutput {
|
||||||
status: 'success' | 'error';
|
status: 'success' | 'error';
|
||||||
result: string | null;
|
result: AgentResponse | null;
|
||||||
newSessionId?: string;
|
newSessionId?: string;
|
||||||
error?: string;
|
error?: string;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,12 +9,16 @@ interface QueuedTask {
|
|||||||
fn: () => Promise<void>;
|
fn: () => Promise<void>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const MAX_RETRIES = 5;
|
||||||
|
const BASE_RETRY_MS = 5000;
|
||||||
|
|
||||||
interface GroupState {
|
interface GroupState {
|
||||||
active: boolean;
|
active: boolean;
|
||||||
pendingMessages: boolean;
|
pendingMessages: boolean;
|
||||||
pendingTasks: QueuedTask[];
|
pendingTasks: QueuedTask[];
|
||||||
process: ChildProcess | null;
|
process: ChildProcess | null;
|
||||||
containerName: string | null;
|
containerName: string | null;
|
||||||
|
retryCount: number;
|
||||||
}
|
}
|
||||||
|
|
||||||
export class GroupQueue {
|
export class GroupQueue {
|
||||||
@@ -34,6 +38,7 @@ export class GroupQueue {
|
|||||||
pendingTasks: [],
|
pendingTasks: [],
|
||||||
process: null,
|
process: null,
|
||||||
containerName: null,
|
containerName: null,
|
||||||
|
retryCount: 0,
|
||||||
};
|
};
|
||||||
this.groups.set(groupJid, state);
|
this.groups.set(groupJid, state);
|
||||||
}
|
}
|
||||||
@@ -126,22 +131,15 @@ export class GroupQueue {
|
|||||||
try {
|
try {
|
||||||
if (this.processMessagesFn) {
|
if (this.processMessagesFn) {
|
||||||
const success = await this.processMessagesFn(groupJid);
|
const success = await this.processMessagesFn(groupJid);
|
||||||
if (!success) {
|
if (success) {
|
||||||
logger.info({ groupJid }, 'Processing failed, scheduling retry');
|
state.retryCount = 0;
|
||||||
setTimeout(() => {
|
} else {
|
||||||
if (!this.shuttingDown) {
|
this.scheduleRetry(groupJid, state);
|
||||||
this.enqueueMessageCheck(groupJid);
|
|
||||||
}
|
|
||||||
}, 5000);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.error({ groupJid, err }, 'Error processing messages for group');
|
logger.error({ groupJid, err }, 'Error processing messages for group');
|
||||||
setTimeout(() => {
|
this.scheduleRetry(groupJid, state);
|
||||||
if (!this.shuttingDown) {
|
|
||||||
this.enqueueMessageCheck(groupJid);
|
|
||||||
}
|
|
||||||
}, 5000);
|
|
||||||
} finally {
|
} finally {
|
||||||
state.active = false;
|
state.active = false;
|
||||||
state.process = null;
|
state.process = null;
|
||||||
@@ -174,6 +172,29 @@ export class GroupQueue {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private scheduleRetry(groupJid: string, state: GroupState): void {
|
||||||
|
state.retryCount++;
|
||||||
|
if (state.retryCount > MAX_RETRIES) {
|
||||||
|
logger.error(
|
||||||
|
{ groupJid, retryCount: state.retryCount },
|
||||||
|
'Max retries exceeded, dropping messages (will retry on next incoming message)',
|
||||||
|
);
|
||||||
|
state.retryCount = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const delayMs = BASE_RETRY_MS * Math.pow(2, state.retryCount - 1);
|
||||||
|
logger.info(
|
||||||
|
{ groupJid, retryCount: state.retryCount, delayMs },
|
||||||
|
'Scheduling retry with backoff',
|
||||||
|
);
|
||||||
|
setTimeout(() => {
|
||||||
|
if (!this.shuttingDown) {
|
||||||
|
this.enqueueMessageCheck(groupJid);
|
||||||
|
}
|
||||||
|
}, delayMs);
|
||||||
|
}
|
||||||
|
|
||||||
private drainGroup(groupJid: string): void {
|
private drainGroup(groupJid: string): void {
|
||||||
if (this.shuttingDown) return;
|
if (this.shuttingDown) return;
|
||||||
|
|
||||||
|
|||||||
38
src/index.ts
38
src/index.ts
@@ -21,6 +21,7 @@ import {
|
|||||||
TRIGGER_PATTERN,
|
TRIGGER_PATTERN,
|
||||||
} from './config.js';
|
} from './config.js';
|
||||||
import {
|
import {
|
||||||
|
AgentResponse,
|
||||||
AvailableGroup,
|
AvailableGroup,
|
||||||
runContainerAgent,
|
runContainerAgent,
|
||||||
writeGroupsSnapshot,
|
writeGroupsSnapshot,
|
||||||
@@ -236,22 +237,35 @@ async function processGroupMessages(chatJid: string): Promise<boolean> {
|
|||||||
const response = await runAgent(group, prompt, chatJid);
|
const response = await runAgent(group, prompt, chatJid);
|
||||||
await setTyping(chatJid, false);
|
await setTyping(chatJid, false);
|
||||||
|
|
||||||
if (response) {
|
if (response === 'error') {
|
||||||
// Fix batching bug: advance to latest message in batch, not just the trigger
|
// Container or agent error — signal failure so queue can retry with backoff
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Agent processed messages successfully (whether it responded or stayed silent)
|
||||||
lastAgentTimestamp[chatJid] =
|
lastAgentTimestamp[chatJid] =
|
||||||
missedMessages[missedMessages.length - 1].timestamp;
|
missedMessages[missedMessages.length - 1].timestamp;
|
||||||
saveState();
|
saveState();
|
||||||
await sendMessage(chatJid, `${ASSISTANT_NAME}: ${response}`);
|
|
||||||
return true;
|
if (response.status === 'responded' && response.userMessage) {
|
||||||
|
await sendMessage(chatJid, `${ASSISTANT_NAME}: ${response.userMessage}`);
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
|
if (response.internalLog) {
|
||||||
|
logger.info(
|
||||||
|
{ group: group.name, agentStatus: response.status },
|
||||||
|
`Agent: ${response.internalLog}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runAgent(
|
async function runAgent(
|
||||||
group: RegisteredGroup,
|
group: RegisteredGroup,
|
||||||
prompt: string,
|
prompt: string,
|
||||||
chatJid: string,
|
chatJid: string,
|
||||||
): Promise<string | null> {
|
): Promise<AgentResponse | 'error'> {
|
||||||
const isMain = group.folder === MAIN_GROUP_FOLDER;
|
const isMain = group.folder === MAIN_GROUP_FOLDER;
|
||||||
const sessionId = sessions[group.folder];
|
const sessionId = sessions[group.folder];
|
||||||
|
|
||||||
@@ -303,13 +317,13 @@ async function runAgent(
|
|||||||
{ group: group.name, error: output.error },
|
{ group: group.name, error: output.error },
|
||||||
'Container agent error',
|
'Container agent error',
|
||||||
);
|
);
|
||||||
return null;
|
return 'error';
|
||||||
}
|
}
|
||||||
|
|
||||||
return output.result;
|
return output.result ?? { status: 'silent' };
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
logger.error({ group: group.name, err }, 'Agent error');
|
logger.error({ group: group.name, err }, 'Agent error');
|
||||||
return null;
|
return 'error';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -740,8 +754,9 @@ async function connectWhatsApp(): Promise<void> {
|
|||||||
onProcess: (groupJid, proc, containerName) => queue.registerProcess(groupJid, proc, containerName),
|
onProcess: (groupJid, proc, containerName) => queue.registerProcess(groupJid, proc, containerName),
|
||||||
});
|
});
|
||||||
startIpcWatcher();
|
startIpcWatcher();
|
||||||
startMessageLoop();
|
queue.setProcessMessagesFn(processGroupMessages);
|
||||||
recoverPendingMessages();
|
recoverPendingMessages();
|
||||||
|
startMessageLoop();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -783,9 +798,6 @@ async function startMessageLoop(): Promise<void> {
|
|||||||
}
|
}
|
||||||
messageLoopRunning = true;
|
messageLoopRunning = true;
|
||||||
|
|
||||||
// Wire up the queue's message processing function
|
|
||||||
queue.setProcessMessagesFn(processGroupMessages);
|
|
||||||
|
|
||||||
logger.info(`NanoClaw running (trigger: @${ASSISTANT_NAME})`);
|
logger.info(`NanoClaw running (trigger: @${ASSISTANT_NAME})`);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
|
|||||||
@@ -103,8 +103,8 @@ async function runTask(
|
|||||||
|
|
||||||
if (output.status === 'error') {
|
if (output.status === 'error') {
|
||||||
error = output.error || 'Unknown error';
|
error = output.error || 'Unknown error';
|
||||||
} else {
|
} else if (output.result) {
|
||||||
result = output.result;
|
result = output.result.userMessage || output.result.internalLog || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
|
|||||||
Reference in New Issue
Block a user