From db216a459e51acd319c53c5c88b18c72d27447cf Mon Sep 17 00:00:00 2001 From: gavrielc Date: Fri, 6 Feb 2026 07:10:26 +0200 Subject: [PATCH] fix: proper container lifecycle management to prevent stopped container accumulation - Name containers (nanoclaw-{group}-{timestamp}) for trackability - Replace SIGKILL timeout with graceful `container stop` so --rm fires - Add startup sweep to clean up stopped nanoclaw containers from previous runs Co-Authored-By: Claude Opus 4.6 --- src/container-runner.ts | 55 ++++++++++++++++++++++++++++++++--------- src/index.ts | 18 ++++++++++++++ 2 files changed, 62 insertions(+), 11 deletions(-) diff --git a/src/container-runner.ts b/src/container-runner.ts index cfcda9e..7c45446 100644 --- a/src/container-runner.ts +++ b/src/container-runner.ts @@ -2,7 +2,7 @@ * Container Runner for NanoClaw * Spawns agent execution in Apple Container and handles IPC */ -import { spawn } from 'child_process'; +import { exec, spawn } from 'child_process'; import fs from 'fs'; import os from 'os'; import path from 'path'; @@ -162,8 +162,8 @@ function buildVolumeMounts( return mounts; } -function buildContainerArgs(mounts: VolumeMount[]): string[] { - const args: string[] = ['run', '-i', '--rm']; +function buildContainerArgs(mounts: VolumeMount[], containerName: string): string[] { + const args: string[] = ['run', '-i', '--rm', '--name', containerName]; // Apple Container: --mount for readonly, -v for read-write for (const mount of mounts) { @@ -192,11 +192,14 @@ export async function runContainerAgent( fs.mkdirSync(groupDir, { recursive: true }); const mounts = buildVolumeMounts(group, input.isMain); - const containerArgs = buildContainerArgs(mounts); + const safeName = group.folder.replace(/[^a-zA-Z0-9-]/g, '-'); + const containerName = `nanoclaw-${safeName}-${Date.now()}`; + const containerArgs = buildContainerArgs(mounts, containerName); logger.debug( { group: group.name, + containerName, mounts: mounts.map( (m) => `${m.hostPath} -> ${m.containerPath}${m.readonly ? ' (ro)' : ''}`, @@ -209,6 +212,7 @@ export async function runContainerAgent( logger.info( { group: group.name, + containerName, mountCount: mounts.length, isMain: input.isMain, }, @@ -267,13 +271,17 @@ export async function runContainerAgent( } }); + let timedOut = false; + const timeout = setTimeout(() => { - logger.error({ group: group.name }, 'Container timeout, killing'); - container.kill('SIGKILL'); - resolve({ - status: 'error', - result: null, - error: `Container timed out after ${CONTAINER_TIMEOUT}ms`, + timedOut = true; + logger.error({ group: group.name, containerName }, 'Container timeout, stopping gracefully'); + // Graceful stop: sends SIGTERM, waits, then SIGKILL — lets --rm fire + exec(`container stop ${containerName}`, { timeout: 15000 }, (err) => { + if (err) { + logger.warn({ group: group.name, containerName, err }, 'Graceful stop failed, force killing'); + container.kill('SIGKILL'); + } }); }, group.containerConfig?.timeout || CONTAINER_TIMEOUT); @@ -281,6 +289,31 @@ export async function runContainerAgent( clearTimeout(timeout); const duration = Date.now() - startTime; + if (timedOut) { + const ts = new Date().toISOString().replace(/[:.]/g, '-'); + const timeoutLog = path.join(logsDir, `container-${ts}.log`); + fs.writeFileSync(timeoutLog, [ + `=== Container Run Log (TIMEOUT) ===`, + `Timestamp: ${new Date().toISOString()}`, + `Group: ${group.name}`, + `Container: ${containerName}`, + `Duration: ${duration}ms`, + `Exit Code: ${code}`, + ].join('\n')); + + logger.error( + { group: group.name, containerName, duration, code }, + 'Container timed out', + ); + + resolve({ + status: 'error', + result: null, + error: `Container timed out after ${group.containerConfig?.timeout || CONTAINER_TIMEOUT}ms`, + }); + return; + } + const timestamp = new Date().toISOString().replace(/[:.]/g, '-'); const logFile = path.join(logsDir, `container-${timestamp}.log`); const isVerbose = @@ -414,7 +447,7 @@ export async function runContainerAgent( container.on('error', (err) => { clearTimeout(timeout); - logger.error({ group: group.name, error: err }, 'Container spawn error'); + logger.error({ group: group.name, containerName, error: err }, 'Container spawn error'); resolve({ status: 'error', result: null, diff --git a/src/index.ts b/src/index.ts index 7a1f516..e137e58 100644 --- a/src/index.ts +++ b/src/index.ts @@ -832,6 +832,24 @@ function ensureContainerSystemRunning(): void { throw new Error('Apple Container system is required but failed to start'); } } + + // Clean up stopped NanoClaw containers from previous runs + try { + const output = execSync('container ls -a --format {{.Names}}', { + stdio: ['pipe', 'pipe', 'pipe'], + encoding: 'utf-8', + }); + const stale = output + .split('\n') + .map((n) => n.trim()) + .filter((n) => n.startsWith('nanoclaw-')); + if (stale.length > 0) { + execSync(`container rm ${stale.join(' ')}`, { stdio: 'pipe' }); + logger.info({ count: stale.length }, 'Cleaned up stopped containers'); + } + } catch { + // No stopped containers or ls/rm not supported + } } async function main(): Promise {