feat: per-group queue, SQLite state, graceful shutdown (#111)
* fix: wire up queue processMessagesFn before recovery to prevent silent message loss recoverPendingMessages() was called after startMessageLoop(), which meant: 1. Recovery could race with the message loop's first iteration 2. processMessagesFn was set inside startMessageLoop, so recovery enqueues would fire runForGroup with processMessagesFn still null, silently skipping message processing Move setProcessMessagesFn and recoverPendingMessages before startMessageLoop so the queue is fully wired before any messages are enqueued. https://claude.ai/code/session_01PCY8zNjDa2N29jvBAV5vfL * feat: structured agent output to fix infinite retry on silent responses (#113) Use Agent SDK's outputFormat with json_schema to get typed responses from the agent. The agent now returns { status: 'responded' | 'silent', userMessage?, internalLog? } instead of a plain string. This fixes a critical bug where a null/empty agent response caused infinite 5-second retry loops by conflating "nothing to say" with "error". - Agent runner: add AGENT_RESPONSE_SCHEMA and parse structured_output - Host: advance lastAgentTimestamp on both responded AND silent status - GroupQueue: add exponential backoff (5s-80s) with max 5 retries for actual errors, replacing unbounded fixed-interval retries https://claude.ai/code/session_014SLc8MxP9BYhEhDCLox9U8 Co-authored-by: Claude <noreply@anthropic.com> --------- Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -17,9 +17,35 @@ interface ContainerInput {
|
||||
isScheduledTask?: boolean;
|
||||
}
|
||||
|
||||
interface AgentResponse {
|
||||
status: 'responded' | 'silent';
|
||||
userMessage?: string;
|
||||
internalLog?: string;
|
||||
}
|
||||
|
||||
const AGENT_RESPONSE_SCHEMA = {
|
||||
type: 'object',
|
||||
properties: {
|
||||
status: {
|
||||
type: 'string',
|
||||
enum: ['responded', 'silent'],
|
||||
description: 'Use "responded" when you have a message for the user. Use "silent" when the messages don\'t require a response (e.g. the conversation is between other people and doesn\'t involve you, or no trigger/mention was directed at you).',
|
||||
},
|
||||
userMessage: {
|
||||
type: 'string',
|
||||
description: 'The message to send to the user. Required when status is "responded".',
|
||||
},
|
||||
internalLog: {
|
||||
type: 'string',
|
||||
description: 'Optional internal note about why you chose this status (for logging, not shown to users).',
|
||||
},
|
||||
},
|
||||
required: ['status'],
|
||||
} as const;
|
||||
|
||||
interface ContainerOutput {
|
||||
status: 'success' | 'error';
|
||||
result: string | null;
|
||||
result: AgentResponse | null;
|
||||
newSessionId?: string;
|
||||
error?: string;
|
||||
}
|
||||
@@ -222,7 +248,7 @@ async function main(): Promise<void> {
|
||||
isMain: input.isMain
|
||||
});
|
||||
|
||||
let result: string | null = null;
|
||||
let result: AgentResponse | null = null;
|
||||
let newSessionId: string | undefined;
|
||||
|
||||
// Add context for scheduled tasks
|
||||
@@ -253,6 +279,10 @@ async function main(): Promise<void> {
|
||||
},
|
||||
hooks: {
|
||||
PreCompact: [{ hooks: [createPreCompactHook()] }]
|
||||
},
|
||||
outputFormat: {
|
||||
type: 'json_schema',
|
||||
schema: AGENT_RESPONSE_SCHEMA,
|
||||
}
|
||||
}
|
||||
})) {
|
||||
@@ -261,15 +291,25 @@ async function main(): Promise<void> {
|
||||
log(`Session initialized: ${newSessionId}`);
|
||||
}
|
||||
|
||||
if ('result' in message && message.result) {
|
||||
result = message.result as string;
|
||||
if (message.type === 'result') {
|
||||
if (message.subtype === 'success' && message.structured_output) {
|
||||
result = message.structured_output as AgentResponse;
|
||||
log(`Agent result: status=${result.status}${result.internalLog ? `, log=${result.internalLog}` : ''}`);
|
||||
} else if (message.subtype === 'error_max_structured_output_retries') {
|
||||
// Agent couldn't produce valid structured output — fall back to text result
|
||||
log('Agent failed to produce structured output, falling back to text');
|
||||
const textResult = 'result' in message ? (message as { result?: string }).result : null;
|
||||
if (textResult) {
|
||||
result = { status: 'responded', userMessage: textResult };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log('Agent completed successfully');
|
||||
writeOutput({
|
||||
status: 'success',
|
||||
result,
|
||||
result: result ?? { status: 'silent' },
|
||||
newSessionId
|
||||
});
|
||||
|
||||
|
||||
@@ -40,9 +40,15 @@ export interface ContainerInput {
|
||||
isMain: boolean;
|
||||
}
|
||||
|
||||
export interface AgentResponse {
|
||||
status: 'responded' | 'silent';
|
||||
userMessage?: string;
|
||||
internalLog?: string;
|
||||
}
|
||||
|
||||
export interface ContainerOutput {
|
||||
status: 'success' | 'error';
|
||||
result: string | null;
|
||||
result: AgentResponse | null;
|
||||
newSessionId?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
@@ -9,12 +9,16 @@ interface QueuedTask {
|
||||
fn: () => Promise<void>;
|
||||
}
|
||||
|
||||
const MAX_RETRIES = 5;
|
||||
const BASE_RETRY_MS = 5000;
|
||||
|
||||
interface GroupState {
|
||||
active: boolean;
|
||||
pendingMessages: boolean;
|
||||
pendingTasks: QueuedTask[];
|
||||
process: ChildProcess | null;
|
||||
containerName: string | null;
|
||||
retryCount: number;
|
||||
}
|
||||
|
||||
export class GroupQueue {
|
||||
@@ -34,6 +38,7 @@ export class GroupQueue {
|
||||
pendingTasks: [],
|
||||
process: null,
|
||||
containerName: null,
|
||||
retryCount: 0,
|
||||
};
|
||||
this.groups.set(groupJid, state);
|
||||
}
|
||||
@@ -126,22 +131,15 @@ export class GroupQueue {
|
||||
try {
|
||||
if (this.processMessagesFn) {
|
||||
const success = await this.processMessagesFn(groupJid);
|
||||
if (!success) {
|
||||
logger.info({ groupJid }, 'Processing failed, scheduling retry');
|
||||
setTimeout(() => {
|
||||
if (!this.shuttingDown) {
|
||||
this.enqueueMessageCheck(groupJid);
|
||||
}
|
||||
}, 5000);
|
||||
if (success) {
|
||||
state.retryCount = 0;
|
||||
} else {
|
||||
this.scheduleRetry(groupJid, state);
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
logger.error({ groupJid, err }, 'Error processing messages for group');
|
||||
setTimeout(() => {
|
||||
if (!this.shuttingDown) {
|
||||
this.enqueueMessageCheck(groupJid);
|
||||
}
|
||||
}, 5000);
|
||||
this.scheduleRetry(groupJid, state);
|
||||
} finally {
|
||||
state.active = false;
|
||||
state.process = null;
|
||||
@@ -174,6 +172,29 @@ export class GroupQueue {
|
||||
}
|
||||
}
|
||||
|
||||
private scheduleRetry(groupJid: string, state: GroupState): void {
|
||||
state.retryCount++;
|
||||
if (state.retryCount > MAX_RETRIES) {
|
||||
logger.error(
|
||||
{ groupJid, retryCount: state.retryCount },
|
||||
'Max retries exceeded, dropping messages (will retry on next incoming message)',
|
||||
);
|
||||
state.retryCount = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
const delayMs = BASE_RETRY_MS * Math.pow(2, state.retryCount - 1);
|
||||
logger.info(
|
||||
{ groupJid, retryCount: state.retryCount, delayMs },
|
||||
'Scheduling retry with backoff',
|
||||
);
|
||||
setTimeout(() => {
|
||||
if (!this.shuttingDown) {
|
||||
this.enqueueMessageCheck(groupJid);
|
||||
}
|
||||
}, delayMs);
|
||||
}
|
||||
|
||||
private drainGroup(groupJid: string): void {
|
||||
if (this.shuttingDown) return;
|
||||
|
||||
|
||||
38
src/index.ts
38
src/index.ts
@@ -21,6 +21,7 @@ import {
|
||||
TRIGGER_PATTERN,
|
||||
} from './config.js';
|
||||
import {
|
||||
AgentResponse,
|
||||
AvailableGroup,
|
||||
runContainerAgent,
|
||||
writeGroupsSnapshot,
|
||||
@@ -236,22 +237,35 @@ async function processGroupMessages(chatJid: string): Promise<boolean> {
|
||||
const response = await runAgent(group, prompt, chatJid);
|
||||
await setTyping(chatJid, false);
|
||||
|
||||
if (response) {
|
||||
// Fix batching bug: advance to latest message in batch, not just the trigger
|
||||
if (response === 'error') {
|
||||
// Container or agent error — signal failure so queue can retry with backoff
|
||||
return false;
|
||||
}
|
||||
|
||||
// Agent processed messages successfully (whether it responded or stayed silent)
|
||||
lastAgentTimestamp[chatJid] =
|
||||
missedMessages[missedMessages.length - 1].timestamp;
|
||||
saveState();
|
||||
await sendMessage(chatJid, `${ASSISTANT_NAME}: ${response}`);
|
||||
return true;
|
||||
|
||||
if (response.status === 'responded' && response.userMessage) {
|
||||
await sendMessage(chatJid, `${ASSISTANT_NAME}: ${response.userMessage}`);
|
||||
}
|
||||
return false;
|
||||
|
||||
if (response.internalLog) {
|
||||
logger.info(
|
||||
{ group: group.name, agentStatus: response.status },
|
||||
`Agent: ${response.internalLog}`,
|
||||
);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
async function runAgent(
|
||||
group: RegisteredGroup,
|
||||
prompt: string,
|
||||
chatJid: string,
|
||||
): Promise<string | null> {
|
||||
): Promise<AgentResponse | 'error'> {
|
||||
const isMain = group.folder === MAIN_GROUP_FOLDER;
|
||||
const sessionId = sessions[group.folder];
|
||||
|
||||
@@ -303,13 +317,13 @@ async function runAgent(
|
||||
{ group: group.name, error: output.error },
|
||||
'Container agent error',
|
||||
);
|
||||
return null;
|
||||
return 'error';
|
||||
}
|
||||
|
||||
return output.result;
|
||||
return output.result ?? { status: 'silent' };
|
||||
} catch (err) {
|
||||
logger.error({ group: group.name, err }, 'Agent error');
|
||||
return null;
|
||||
return 'error';
|
||||
}
|
||||
}
|
||||
|
||||
@@ -740,8 +754,9 @@ async function connectWhatsApp(): Promise<void> {
|
||||
onProcess: (groupJid, proc, containerName) => queue.registerProcess(groupJid, proc, containerName),
|
||||
});
|
||||
startIpcWatcher();
|
||||
startMessageLoop();
|
||||
queue.setProcessMessagesFn(processGroupMessages);
|
||||
recoverPendingMessages();
|
||||
startMessageLoop();
|
||||
}
|
||||
});
|
||||
|
||||
@@ -783,9 +798,6 @@ async function startMessageLoop(): Promise<void> {
|
||||
}
|
||||
messageLoopRunning = true;
|
||||
|
||||
// Wire up the queue's message processing function
|
||||
queue.setProcessMessagesFn(processGroupMessages);
|
||||
|
||||
logger.info(`NanoClaw running (trigger: @${ASSISTANT_NAME})`);
|
||||
|
||||
while (true) {
|
||||
|
||||
@@ -103,8 +103,8 @@ async function runTask(
|
||||
|
||||
if (output.status === 'error') {
|
||||
error = output.error || 'Unknown error';
|
||||
} else {
|
||||
result = output.result;
|
||||
} else if (output.result) {
|
||||
result = output.result.userMessage || output.result.internalLog || null;
|
||||
}
|
||||
|
||||
logger.info(
|
||||
|
||||
Reference in New Issue
Block a user