fix: proper container lifecycle management to prevent stopped container accumulation
- Name containers (nanoclaw-{group}-{timestamp}) for trackability
- Replace SIGKILL timeout with graceful `container stop` so --rm fires
- Add startup sweep to clean up stopped nanoclaw containers from previous runs
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -2,7 +2,7 @@
|
|||||||
* Container Runner for NanoClaw
|
* Container Runner for NanoClaw
|
||||||
* Spawns agent execution in Apple Container and handles IPC
|
* Spawns agent execution in Apple Container and handles IPC
|
||||||
*/
|
*/
|
||||||
import { spawn } from 'child_process';
|
import { exec, spawn } from 'child_process';
|
||||||
import fs from 'fs';
|
import fs from 'fs';
|
||||||
import os from 'os';
|
import os from 'os';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
@@ -162,8 +162,8 @@ function buildVolumeMounts(
|
|||||||
return mounts;
|
return mounts;
|
||||||
}
|
}
|
||||||
|
|
||||||
function buildContainerArgs(mounts: VolumeMount[]): string[] {
|
function buildContainerArgs(mounts: VolumeMount[], containerName: string): string[] {
|
||||||
const args: string[] = ['run', '-i', '--rm'];
|
const args: string[] = ['run', '-i', '--rm', '--name', containerName];
|
||||||
|
|
||||||
// Apple Container: --mount for readonly, -v for read-write
|
// Apple Container: --mount for readonly, -v for read-write
|
||||||
for (const mount of mounts) {
|
for (const mount of mounts) {
|
||||||
@@ -192,11 +192,14 @@ export async function runContainerAgent(
|
|||||||
fs.mkdirSync(groupDir, { recursive: true });
|
fs.mkdirSync(groupDir, { recursive: true });
|
||||||
|
|
||||||
const mounts = buildVolumeMounts(group, input.isMain);
|
const mounts = buildVolumeMounts(group, input.isMain);
|
||||||
const containerArgs = buildContainerArgs(mounts);
|
const safeName = group.folder.replace(/[^a-zA-Z0-9-]/g, '-');
|
||||||
|
const containerName = `nanoclaw-${safeName}-${Date.now()}`;
|
||||||
|
const containerArgs = buildContainerArgs(mounts, containerName);
|
||||||
|
|
||||||
logger.debug(
|
logger.debug(
|
||||||
{
|
{
|
||||||
group: group.name,
|
group: group.name,
|
||||||
|
containerName,
|
||||||
mounts: mounts.map(
|
mounts: mounts.map(
|
||||||
(m) =>
|
(m) =>
|
||||||
`${m.hostPath} -> ${m.containerPath}${m.readonly ? ' (ro)' : ''}`,
|
`${m.hostPath} -> ${m.containerPath}${m.readonly ? ' (ro)' : ''}`,
|
||||||
@@ -209,6 +212,7 @@ export async function runContainerAgent(
|
|||||||
logger.info(
|
logger.info(
|
||||||
{
|
{
|
||||||
group: group.name,
|
group: group.name,
|
||||||
|
containerName,
|
||||||
mountCount: mounts.length,
|
mountCount: mounts.length,
|
||||||
isMain: input.isMain,
|
isMain: input.isMain,
|
||||||
},
|
},
|
||||||
@@ -267,13 +271,17 @@ export async function runContainerAgent(
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let timedOut = false;
|
||||||
|
|
||||||
const timeout = setTimeout(() => {
|
const timeout = setTimeout(() => {
|
||||||
logger.error({ group: group.name }, 'Container timeout, killing');
|
timedOut = true;
|
||||||
|
logger.error({ group: group.name, containerName }, 'Container timeout, stopping gracefully');
|
||||||
|
// Graceful stop: sends SIGTERM, waits, then SIGKILL — lets --rm fire
|
||||||
|
exec(`container stop ${containerName}`, { timeout: 15000 }, (err) => {
|
||||||
|
if (err) {
|
||||||
|
logger.warn({ group: group.name, containerName, err }, 'Graceful stop failed, force killing');
|
||||||
container.kill('SIGKILL');
|
container.kill('SIGKILL');
|
||||||
resolve({
|
}
|
||||||
status: 'error',
|
|
||||||
result: null,
|
|
||||||
error: `Container timed out after ${CONTAINER_TIMEOUT}ms`,
|
|
||||||
});
|
});
|
||||||
}, group.containerConfig?.timeout || CONTAINER_TIMEOUT);
|
}, group.containerConfig?.timeout || CONTAINER_TIMEOUT);
|
||||||
|
|
||||||
@@ -281,6 +289,31 @@ export async function runContainerAgent(
|
|||||||
clearTimeout(timeout);
|
clearTimeout(timeout);
|
||||||
const duration = Date.now() - startTime;
|
const duration = Date.now() - startTime;
|
||||||
|
|
||||||
|
if (timedOut) {
|
||||||
|
const ts = new Date().toISOString().replace(/[:.]/g, '-');
|
||||||
|
const timeoutLog = path.join(logsDir, `container-${ts}.log`);
|
||||||
|
fs.writeFileSync(timeoutLog, [
|
||||||
|
`=== Container Run Log (TIMEOUT) ===`,
|
||||||
|
`Timestamp: ${new Date().toISOString()}`,
|
||||||
|
`Group: ${group.name}`,
|
||||||
|
`Container: ${containerName}`,
|
||||||
|
`Duration: ${duration}ms`,
|
||||||
|
`Exit Code: ${code}`,
|
||||||
|
].join('\n'));
|
||||||
|
|
||||||
|
logger.error(
|
||||||
|
{ group: group.name, containerName, duration, code },
|
||||||
|
'Container timed out',
|
||||||
|
);
|
||||||
|
|
||||||
|
resolve({
|
||||||
|
status: 'error',
|
||||||
|
result: null,
|
||||||
|
error: `Container timed out after ${group.containerConfig?.timeout || CONTAINER_TIMEOUT}ms`,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
|
||||||
const logFile = path.join(logsDir, `container-${timestamp}.log`);
|
const logFile = path.join(logsDir, `container-${timestamp}.log`);
|
||||||
const isVerbose =
|
const isVerbose =
|
||||||
@@ -414,7 +447,7 @@ export async function runContainerAgent(
|
|||||||
|
|
||||||
container.on('error', (err) => {
|
container.on('error', (err) => {
|
||||||
clearTimeout(timeout);
|
clearTimeout(timeout);
|
||||||
logger.error({ group: group.name, error: err }, 'Container spawn error');
|
logger.error({ group: group.name, containerName, error: err }, 'Container spawn error');
|
||||||
resolve({
|
resolve({
|
||||||
status: 'error',
|
status: 'error',
|
||||||
result: null,
|
result: null,
|
||||||
|
|||||||
18
src/index.ts
18
src/index.ts
@@ -832,6 +832,24 @@ function ensureContainerSystemRunning(): void {
|
|||||||
throw new Error('Apple Container system is required but failed to start');
|
throw new Error('Apple Container system is required but failed to start');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Clean up stopped NanoClaw containers from previous runs
|
||||||
|
try {
|
||||||
|
const output = execSync('container ls -a --format {{.Names}}', {
|
||||||
|
stdio: ['pipe', 'pipe', 'pipe'],
|
||||||
|
encoding: 'utf-8',
|
||||||
|
});
|
||||||
|
const stale = output
|
||||||
|
.split('\n')
|
||||||
|
.map((n) => n.trim())
|
||||||
|
.filter((n) => n.startsWith('nanoclaw-'));
|
||||||
|
if (stale.length > 0) {
|
||||||
|
execSync(`container rm ${stale.join(' ')}`, { stdio: 'pipe' });
|
||||||
|
logger.info({ count: stale.length }, 'Cleaned up stopped containers');
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// No stopped containers or ls/rm not supported
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function main(): Promise<void> {
|
async function main(): Promise<void> {
|
||||||
|
|||||||
Reference in New Issue
Block a user