Add containerized agent execution with Apple Container

- Agents run in isolated Linux VMs via Apple Container
- All groups get Bash access (safe - sandboxed in container)
- Browser automation via agent-browser + Chromium
- Per-group configurable additional directory mounts
- File-based IPC for messages and scheduled tasks
- Container image with Node.js 22, Chromium, agent-browser

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
gavrielc
2026-01-31 22:55:57 +02:00
parent fa13b14dae
commit 09c0e8142e
14 changed files with 1252 additions and 114 deletions

57
container/Dockerfile Normal file
View File

@@ -0,0 +1,57 @@
# NanoClaw Agent Container
# Runs Claude Agent SDK in isolated Linux VM with browser automation
FROM node:22-slim
# Install system dependencies for Chromium
RUN apt-get update && apt-get install -y \
chromium \
fonts-liberation \
fonts-noto-color-emoji \
libgbm1 \
libnss3 \
libatk-bridge2.0-0 \
libgtk-3-0 \
libx11-xcb1 \
libxcomposite1 \
libxdamage1 \
libxrandr2 \
libasound2 \
libpangocairo-1.0-0 \
libcups2 \
libdrm2 \
libxshmfence1 \
curl \
git \
&& rm -rf /var/lib/apt/lists/*
# Set Chromium path for agent-browser
ENV AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium
ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
# Install agent-browser globally
RUN npm install -g agent-browser
# Create app directory
WORKDIR /app
# Copy package files first for better caching
COPY agent-runner/package*.json ./
# Install dependencies
RUN npm install
# Copy source code
COPY agent-runner/ ./
# Build TypeScript
RUN npm run build
# Create workspace directories
RUN mkdir -p /workspace/group /workspace/global /workspace/extra /workspace/ipc/messages /workspace/ipc/tasks
# Set working directory to group workspace
WORKDIR /workspace/group
# Entry point reads JSON from stdin, outputs JSON to stdout
ENTRYPOINT ["node", "/app/dist/index.js"]

View File

@@ -0,0 +1,19 @@
{
"name": "nanoclaw-agent-runner",
"version": "1.0.0",
"type": "module",
"description": "Container-side agent runner for NanoClaw",
"main": "dist/index.js",
"scripts": {
"build": "tsc",
"start": "node dist/index.js"
},
"dependencies": {
"@anthropic-ai/claude-agent-sdk": "^0.1.9",
"zod": "^3.24.2"
},
"devDependencies": {
"@types/node": "^22.10.7",
"typescript": "^5.7.3"
}
}

View File

@@ -0,0 +1,124 @@
/**
* NanoClaw Agent Runner
* Runs inside a container, receives config via stdin, outputs result to stdout
*/
import { query } from '@anthropic-ai/claude-agent-sdk';
import { createIpcMcp } from './ipc-mcp.js';
interface ContainerInput {
prompt: string;
sessionId?: string;
groupFolder: string;
chatJid: string;
isMain: boolean;
}
interface ContainerOutput {
status: 'success' | 'error';
result: string | null;
newSessionId?: string;
error?: string;
}
async function readStdin(): Promise<string> {
return new Promise((resolve, reject) => {
let data = '';
process.stdin.setEncoding('utf8');
process.stdin.on('data', chunk => { data += chunk; });
process.stdin.on('end', () => resolve(data));
process.stdin.on('error', reject);
});
}
function writeOutput(output: ContainerOutput): void {
// Write to stdout as JSON (this is how the host process receives results)
console.log(JSON.stringify(output));
}
function log(message: string): void {
// Write logs to stderr so they don't interfere with JSON output
console.error(`[agent-runner] ${message}`);
}
async function main(): Promise<void> {
let input: ContainerInput;
try {
const stdinData = await readStdin();
input = JSON.parse(stdinData);
log(`Received input for group: ${input.groupFolder}`);
} catch (err) {
writeOutput({
status: 'error',
result: null,
error: `Failed to parse input: ${err instanceof Error ? err.message : String(err)}`
});
process.exit(1);
}
// Create IPC-based MCP for communicating back to host
const ipcMcp = createIpcMcp({
chatJid: input.chatJid,
groupFolder: input.groupFolder,
isMain: input.isMain
});
let result: string | null = null;
let newSessionId: string | undefined;
try {
log('Starting agent...');
for await (const message of query({
prompt: input.prompt,
options: {
cwd: '/workspace/group',
resume: input.sessionId,
allowedTools: [
'Bash', // Safe - sandboxed in container!
'Read', 'Write', 'Edit', 'Glob', 'Grep',
'WebSearch', 'WebFetch',
'mcp__nanoclaw__*',
'mcp__gmail__*'
],
permissionMode: 'bypassPermissions',
settingSources: ['project'],
mcpServers: {
nanoclaw: ipcMcp,
gmail: { command: 'npx', args: ['-y', '@gongrzhe/server-gmail-autoauth-mcp'] }
}
}
})) {
// Capture session ID from init message
if (message.type === 'system' && message.subtype === 'init') {
newSessionId = message.session_id;
log(`Session initialized: ${newSessionId}`);
}
// Capture final result
if ('result' in message && message.result) {
result = message.result as string;
}
}
log('Agent completed successfully');
writeOutput({
status: 'success',
result,
newSessionId
});
} catch (err) {
log(`Agent error: ${err instanceof Error ? err.message : String(err)}`);
writeOutput({
status: 'error',
result: null,
newSessionId,
error: err instanceof Error ? err.message : String(err)
});
process.exit(1);
}
}
main();

View File

@@ -0,0 +1,245 @@
/**
* IPC-based MCP Server for NanoClaw
* Writes messages and tasks to files for the host process to pick up
*/
import { createSdkMcpServer, tool } from '@anthropic-ai/claude-agent-sdk';
import { z } from 'zod';
import fs from 'fs';
import path from 'path';
const IPC_DIR = '/workspace/ipc';
const MESSAGES_DIR = path.join(IPC_DIR, 'messages');
const TASKS_DIR = path.join(IPC_DIR, 'tasks');
export interface IpcMcpContext {
chatJid: string;
groupFolder: string;
isMain: boolean;
}
function writeIpcFile(dir: string, data: object): string {
// Ensure directory exists
fs.mkdirSync(dir, { recursive: true });
// Use timestamp + random suffix for unique filename
const filename = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}.json`;
const filepath = path.join(dir, filename);
// Write atomically: write to temp file, then rename
const tempPath = `${filepath}.tmp`;
fs.writeFileSync(tempPath, JSON.stringify(data, null, 2));
fs.renameSync(tempPath, filepath);
return filename;
}
export function createIpcMcp(ctx: IpcMcpContext) {
const { chatJid, groupFolder, isMain } = ctx;
return createSdkMcpServer({
name: 'nanoclaw',
version: '1.0.0',
tools: [
// Send a message to the WhatsApp group
tool(
'send_message',
'Send a message to the current WhatsApp group. Use this to proactively share information or updates.',
{
text: z.string().describe('The message text to send')
},
async (args) => {
const data = {
type: 'message',
chatJid,
text: args.text,
groupFolder,
timestamp: new Date().toISOString()
};
const filename = writeIpcFile(MESSAGES_DIR, data);
return {
content: [{
type: 'text',
text: `Message queued for delivery (${filename})`
}]
};
}
),
// Schedule a new task
tool(
'schedule_task',
'Schedule a recurring or one-time task. The task will run as a full agent with access to all tools.',
{
prompt: z.string().describe('What the agent should do when the task runs'),
schedule_type: z.enum(['cron', 'interval', 'once']).describe('Type of schedule'),
schedule_value: z.string().describe('Cron expression, interval in ms, or ISO timestamp'),
target_group: z.string().optional().describe('Target group folder (main only, defaults to current group)')
},
async (args) => {
// Non-main groups can only schedule for themselves
const targetGroup = isMain && args.target_group ? args.target_group : groupFolder;
const data = {
type: 'schedule_task',
prompt: args.prompt,
schedule_type: args.schedule_type,
schedule_value: args.schedule_value,
groupFolder: targetGroup,
chatJid,
createdBy: groupFolder,
timestamp: new Date().toISOString()
};
const filename = writeIpcFile(TASKS_DIR, data);
return {
content: [{
type: 'text',
text: `Task scheduled (${filename}): ${args.schedule_type} - ${args.schedule_value}`
}]
};
}
),
// List tasks (reads from a mounted file that host keeps updated)
tool(
'list_tasks',
'List all scheduled tasks. From main: shows all tasks. From other groups: shows only that group\'s tasks.',
{},
async () => {
// Host process writes current tasks to this file
const tasksFile = path.join(IPC_DIR, 'current_tasks.json');
try {
if (!fs.existsSync(tasksFile)) {
return {
content: [{
type: 'text',
text: 'No scheduled tasks found.'
}]
};
}
const allTasks = JSON.parse(fs.readFileSync(tasksFile, 'utf-8'));
// Filter to current group unless main
const tasks = isMain
? allTasks
: allTasks.filter((t: { groupFolder: string }) => t.groupFolder === groupFolder);
if (tasks.length === 0) {
return {
content: [{
type: 'text',
text: 'No scheduled tasks found.'
}]
};
}
const formatted = tasks.map((t: { id: string; prompt: string; schedule_type: string; schedule_value: string; status: string; next_run: string }) =>
`- [${t.id}] ${t.prompt.slice(0, 50)}... (${t.schedule_type}: ${t.schedule_value}) - ${t.status}, next: ${t.next_run || 'N/A'}`
).join('\n');
return {
content: [{
type: 'text',
text: `Scheduled tasks:\n${formatted}`
}]
};
} catch (err) {
return {
content: [{
type: 'text',
text: `Error reading tasks: ${err instanceof Error ? err.message : String(err)}`
}]
};
}
}
),
// Pause a task
tool(
'pause_task',
'Pause a scheduled task. It will not run until resumed.',
{
task_id: z.string().describe('The task ID to pause')
},
async (args) => {
const data = {
type: 'pause_task',
taskId: args.task_id,
groupFolder,
isMain,
timestamp: new Date().toISOString()
};
writeIpcFile(TASKS_DIR, data);
return {
content: [{
type: 'text',
text: `Task ${args.task_id} pause requested.`
}]
};
}
),
// Resume a task
tool(
'resume_task',
'Resume a paused task.',
{
task_id: z.string().describe('The task ID to resume')
},
async (args) => {
const data = {
type: 'resume_task',
taskId: args.task_id,
groupFolder,
isMain,
timestamp: new Date().toISOString()
};
writeIpcFile(TASKS_DIR, data);
return {
content: [{
type: 'text',
text: `Task ${args.task_id} resume requested.`
}]
};
}
),
// Cancel a task
tool(
'cancel_task',
'Cancel and delete a scheduled task.',
{
task_id: z.string().describe('The task ID to cancel')
},
async (args) => {
const data = {
type: 'cancel_task',
taskId: args.task_id,
groupFolder,
isMain,
timestamp: new Date().toISOString()
};
writeIpcFile(TASKS_DIR, data);
return {
content: [{
type: 'text',
text: `Task ${args.task_id} cancellation requested.`
}]
};
}
)
]
});
}

View File

@@ -0,0 +1,15 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "NodeNext",
"moduleResolution": "NodeNext",
"outDir": "./dist",
"rootDir": "./src",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"declaration": true
},
"include": ["src/**/*"],
"exclude": ["node_modules", "dist"]
}

23
container/build.sh Executable file
View File

@@ -0,0 +1,23 @@
#!/bin/bash
# Build the NanoClaw agent container image
set -e
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
cd "$SCRIPT_DIR"
IMAGE_NAME="nanoclaw-agent"
TAG="${1:-latest}"
echo "Building NanoClaw agent container image..."
echo "Image: ${IMAGE_NAME}:${TAG}"
# Build with Apple Container
container build -t "${IMAGE_NAME}:${TAG}" .
echo ""
echo "Build complete!"
echo "Image: ${IMAGE_NAME}:${TAG}"
echo ""
echo "Test with:"
echo " echo '{\"prompt\":\"What is 2+2?\",\"groupFolder\":\"test\",\"chatJid\":\"test@g.us\",\"isMain\":false}' | container run -i ${IMAGE_NAME}:${TAG}"

View File

@@ -0,0 +1,159 @@
---
name: agent-browser
description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages.
allowed-tools: Bash(agent-browser:*)
---
# Browser Automation with agent-browser
## Quick start
```bash
agent-browser open <url> # Navigate to page
agent-browser snapshot -i # Get interactive elements with refs
agent-browser click @e1 # Click element by ref
agent-browser fill @e2 "text" # Fill input by ref
agent-browser close # Close browser
```
## Core workflow
1. Navigate: `agent-browser open <url>`
2. Snapshot: `agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`)
3. Interact using refs from the snapshot
4. Re-snapshot after navigation or significant DOM changes
## Commands
### Navigation
```bash
agent-browser open <url> # Navigate to URL
agent-browser back # Go back
agent-browser forward # Go forward
agent-browser reload # Reload page
agent-browser close # Close browser
```
### Snapshot (page analysis)
```bash
agent-browser snapshot # Full accessibility tree
agent-browser snapshot -i # Interactive elements only (recommended)
agent-browser snapshot -c # Compact output
agent-browser snapshot -d 3 # Limit depth to 3
agent-browser snapshot -s "#main" # Scope to CSS selector
```
### Interactions (use @refs from snapshot)
```bash
agent-browser click @e1 # Click
agent-browser dblclick @e1 # Double-click
agent-browser fill @e2 "text" # Clear and type
agent-browser type @e2 "text" # Type without clearing
agent-browser press Enter # Press key
agent-browser hover @e1 # Hover
agent-browser check @e1 # Check checkbox
agent-browser uncheck @e1 # Uncheck checkbox
agent-browser select @e1 "value" # Select dropdown option
agent-browser scroll down 500 # Scroll page
agent-browser upload @e1 file.pdf # Upload files
```
### Get information
```bash
agent-browser get text @e1 # Get element text
agent-browser get html @e1 # Get innerHTML
agent-browser get value @e1 # Get input value
agent-browser get attr @e1 href # Get attribute
agent-browser get title # Get page title
agent-browser get url # Get current URL
agent-browser get count ".item" # Count matching elements
```
### Screenshots & PDF
```bash
agent-browser screenshot # Save to temp directory
agent-browser screenshot path.png # Save to specific path
agent-browser screenshot --full # Full page
agent-browser pdf output.pdf # Save as PDF
```
### Wait
```bash
agent-browser wait @e1 # Wait for element
agent-browser wait 2000 # Wait milliseconds
agent-browser wait --text "Success" # Wait for text
agent-browser wait --url "**/dashboard" # Wait for URL pattern
agent-browser wait --load networkidle # Wait for network idle
```
### Semantic locators (alternative to refs)
```bash
agent-browser find role button click --name "Submit"
agent-browser find text "Sign In" click
agent-browser find label "Email" fill "user@test.com"
agent-browser find placeholder "Search" type "query"
```
### Authentication with saved state
```bash
# Login once
agent-browser open https://app.example.com/login
agent-browser snapshot -i
agent-browser fill @e1 "username"
agent-browser fill @e2 "password"
agent-browser click @e3
agent-browser wait --url "**/dashboard"
agent-browser state save auth.json
# Later: load saved state
agent-browser state load auth.json
agent-browser open https://app.example.com/dashboard
```
### Cookies & Storage
```bash
agent-browser cookies # Get all cookies
agent-browser cookies set name value # Set cookie
agent-browser cookies clear # Clear cookies
agent-browser storage local # Get localStorage
agent-browser storage local set k v # Set value
```
### JavaScript
```bash
agent-browser eval "document.title" # Run JavaScript
```
## Example: Form submission
```bash
agent-browser open https://example.com/form
agent-browser snapshot -i
# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
agent-browser fill @e1 "user@example.com"
agent-browser fill @e2 "password123"
agent-browser click @e3
agent-browser wait --load networkidle
agent-browser snapshot -i # Check result
```
## Example: Data extraction
```bash
agent-browser open https://example.com/products
agent-browser snapshot -i
agent-browser get text @e1 # Get product title
agent-browser get attr @e2 href # Get link URL
agent-browser screenshot products.png
```