Add containerized agent execution with Apple Container

- Agents run in isolated Linux VMs via Apple Container - All groups get Bash access (safe - sandboxed in container) - Browser automation via agent-browser + Chromium - Per-group configurable additional directory mounts - File-based IPC for messages and scheduled tasks - Container image with Node.js 22, Chromium, agent-browser Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-31 22:55:57 +02:00
parent fa13b14dae
commit 09c0e8142e
14 changed files with 1252 additions and 114 deletions
--- a/container/Dockerfile
+++ b/container/Dockerfile
@@ -0,0 +1,57 @@
+# NanoClaw Agent Container
+# Runs Claude Agent SDK in isolated Linux VM with browser automation
+
+FROM node:22-slim
+
+# Install system dependencies for Chromium
+RUN apt-get update && apt-get install -y \
+    chromium \
+    fonts-liberation \
+    fonts-noto-color-emoji \
+    libgbm1 \
+    libnss3 \
+    libatk-bridge2.0-0 \
+    libgtk-3-0 \
+    libx11-xcb1 \
+    libxcomposite1 \
+    libxdamage1 \
+    libxrandr2 \
+    libasound2 \
+    libpangocairo-1.0-0 \
+    libcups2 \
+    libdrm2 \
+    libxshmfence1 \
+    curl \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+
+# Set Chromium path for agent-browser
+ENV AGENT_BROWSER_EXECUTABLE_PATH=/usr/bin/chromium
+ENV PLAYWRIGHT_CHROMIUM_EXECUTABLE_PATH=/usr/bin/chromium
+
+# Install agent-browser globally
+RUN npm install -g agent-browser
+
+# Create app directory
+WORKDIR /app
+
+# Copy package files first for better caching
+COPY agent-runner/package*.json ./
+
+# Install dependencies
+RUN npm install
+
+# Copy source code
+COPY agent-runner/ ./
+
+# Build TypeScript
+RUN npm run build
+
+# Create workspace directories
+RUN mkdir -p /workspace/group /workspace/global /workspace/extra /workspace/ipc/messages /workspace/ipc/tasks
+
+# Set working directory to group workspace
+WORKDIR /workspace/group
+
+# Entry point reads JSON from stdin, outputs JSON to stdout
+ENTRYPOINT ["node", "/app/dist/index.js"]
--- a/container/agent-runner/package.json
+++ b/container/agent-runner/package.json
@@ -0,0 +1,19 @@
+{
+  "name": "nanoclaw-agent-runner",
+  "version": "1.0.0",
+  "type": "module",
+  "description": "Container-side agent runner for NanoClaw",
+  "main": "dist/index.js",
+  "scripts": {
+    "build": "tsc",
+    "start": "node dist/index.js"
+  },
+  "dependencies": {
+    "@anthropic-ai/claude-agent-sdk": "^0.1.9",
+    "zod": "^3.24.2"
+  },
+  "devDependencies": {
+    "@types/node": "^22.10.7",
+    "typescript": "^5.7.3"
+  }
+}
--- a/container/agent-runner/src/index.ts
+++ b/container/agent-runner/src/index.ts
@@ -0,0 +1,124 @@
+/**
+ * NanoClaw Agent Runner
+ * Runs inside a container, receives config via stdin, outputs result to stdout
+ */
+
+import { query } from '@anthropic-ai/claude-agent-sdk';
+import { createIpcMcp } from './ipc-mcp.js';
+
+interface ContainerInput {
+  prompt: string;
+  sessionId?: string;
+  groupFolder: string;
+  chatJid: string;
+  isMain: boolean;
+}
+
+interface ContainerOutput {
+  status: 'success' | 'error';
+  result: string | null;
+  newSessionId?: string;
+  error?: string;
+}
+
+async function readStdin(): Promise<string> {
+  return new Promise((resolve, reject) => {
+    let data = '';
+    process.stdin.setEncoding('utf8');
+    process.stdin.on('data', chunk => { data += chunk; });
+    process.stdin.on('end', () => resolve(data));
+    process.stdin.on('error', reject);
+  });
+}
+
+function writeOutput(output: ContainerOutput): void {
+  // Write to stdout as JSON (this is how the host process receives results)
+  console.log(JSON.stringify(output));
+}
+
+function log(message: string): void {
+  // Write logs to stderr so they don't interfere with JSON output
+  console.error(`[agent-runner] ${message}`);
+}
+
+async function main(): Promise<void> {
+  let input: ContainerInput;
+
+  try {
+    const stdinData = await readStdin();
+    input = JSON.parse(stdinData);
+    log(`Received input for group: ${input.groupFolder}`);
+  } catch (err) {
+    writeOutput({
+      status: 'error',
+      result: null,
+      error: `Failed to parse input: ${err instanceof Error ? err.message : String(err)}`
+    });
+    process.exit(1);
+  }
+
+  // Create IPC-based MCP for communicating back to host
+  const ipcMcp = createIpcMcp({
+    chatJid: input.chatJid,
+    groupFolder: input.groupFolder,
+    isMain: input.isMain
+  });
+
+  let result: string | null = null;
+  let newSessionId: string | undefined;
+
+  try {
+    log('Starting agent...');
+
+    for await (const message of query({
+      prompt: input.prompt,
+      options: {
+        cwd: '/workspace/group',
+        resume: input.sessionId,
+        allowedTools: [
+          'Bash',           // Safe - sandboxed in container!
+          'Read', 'Write', 'Edit', 'Glob', 'Grep',
+          'WebSearch', 'WebFetch',
+          'mcp__nanoclaw__*',
+          'mcp__gmail__*'
+        ],
+        permissionMode: 'bypassPermissions',
+        settingSources: ['project'],
+        mcpServers: {
+          nanoclaw: ipcMcp,
+          gmail: { command: 'npx', args: ['-y', '@gongrzhe/server-gmail-autoauth-mcp'] }
+        }
+      }
+    })) {
+      // Capture session ID from init message
+      if (message.type === 'system' && message.subtype === 'init') {
+        newSessionId = message.session_id;
+        log(`Session initialized: ${newSessionId}`);
+      }
+
+      // Capture final result
+      if ('result' in message && message.result) {
+        result = message.result as string;
+      }
+    }
+
+    log('Agent completed successfully');
+    writeOutput({
+      status: 'success',
+      result,
+      newSessionId
+    });
+
+  } catch (err) {
+    log(`Agent error: ${err instanceof Error ? err.message : String(err)}`);
+    writeOutput({
+      status: 'error',
+      result: null,
+      newSessionId,
+      error: err instanceof Error ? err.message : String(err)
+    });
+    process.exit(1);
+  }
+}
+
+main();
--- a/container/agent-runner/src/ipc-mcp.ts
+++ b/container/agent-runner/src/ipc-mcp.ts
@@ -0,0 +1,245 @@
+/**
+ * IPC-based MCP Server for NanoClaw
+ * Writes messages and tasks to files for the host process to pick up
+ */
+
+import { createSdkMcpServer, tool } from '@anthropic-ai/claude-agent-sdk';
+import { z } from 'zod';
+import fs from 'fs';
+import path from 'path';
+
+const IPC_DIR = '/workspace/ipc';
+const MESSAGES_DIR = path.join(IPC_DIR, 'messages');
+const TASKS_DIR = path.join(IPC_DIR, 'tasks');
+
+export interface IpcMcpContext {
+  chatJid: string;
+  groupFolder: string;
+  isMain: boolean;
+}
+
+function writeIpcFile(dir: string, data: object): string {
+  // Ensure directory exists
+  fs.mkdirSync(dir, { recursive: true });
+
+  // Use timestamp + random suffix for unique filename
+  const filename = `${Date.now()}-${Math.random().toString(36).slice(2, 8)}.json`;
+  const filepath = path.join(dir, filename);
+
+  // Write atomically: write to temp file, then rename
+  const tempPath = `${filepath}.tmp`;
+  fs.writeFileSync(tempPath, JSON.stringify(data, null, 2));
+  fs.renameSync(tempPath, filepath);
+
+  return filename;
+}
+
+export function createIpcMcp(ctx: IpcMcpContext) {
+  const { chatJid, groupFolder, isMain } = ctx;
+
+  return createSdkMcpServer({
+    name: 'nanoclaw',
+    version: '1.0.0',
+    tools: [
+      // Send a message to the WhatsApp group
+      tool(
+        'send_message',
+        'Send a message to the current WhatsApp group. Use this to proactively share information or updates.',
+        {
+          text: z.string().describe('The message text to send')
+        },
+        async (args) => {
+          const data = {
+            type: 'message',
+            chatJid,
+            text: args.text,
+            groupFolder,
+            timestamp: new Date().toISOString()
+          };
+
+          const filename = writeIpcFile(MESSAGES_DIR, data);
+
+          return {
+            content: [{
+              type: 'text',
+              text: `Message queued for delivery (${filename})`
+            }]
+          };
+        }
+      ),
+
+      // Schedule a new task
+      tool(
+        'schedule_task',
+        'Schedule a recurring or one-time task. The task will run as a full agent with access to all tools.',
+        {
+          prompt: z.string().describe('What the agent should do when the task runs'),
+          schedule_type: z.enum(['cron', 'interval', 'once']).describe('Type of schedule'),
+          schedule_value: z.string().describe('Cron expression, interval in ms, or ISO timestamp'),
+          target_group: z.string().optional().describe('Target group folder (main only, defaults to current group)')
+        },
+        async (args) => {
+          // Non-main groups can only schedule for themselves
+          const targetGroup = isMain && args.target_group ? args.target_group : groupFolder;
+
+          const data = {
+            type: 'schedule_task',
+            prompt: args.prompt,
+            schedule_type: args.schedule_type,
+            schedule_value: args.schedule_value,
+            groupFolder: targetGroup,
+            chatJid,
+            createdBy: groupFolder,
+            timestamp: new Date().toISOString()
+          };
+
+          const filename = writeIpcFile(TASKS_DIR, data);
+
+          return {
+            content: [{
+              type: 'text',
+              text: `Task scheduled (${filename}): ${args.schedule_type} - ${args.schedule_value}`
+            }]
+          };
+        }
+      ),
+
+      // List tasks (reads from a mounted file that host keeps updated)
+      tool(
+        'list_tasks',
+        'List all scheduled tasks. From main: shows all tasks. From other groups: shows only that group\'s tasks.',
+        {},
+        async () => {
+          // Host process writes current tasks to this file
+          const tasksFile = path.join(IPC_DIR, 'current_tasks.json');
+
+          try {
+            if (!fs.existsSync(tasksFile)) {
+              return {
+                content: [{
+                  type: 'text',
+                  text: 'No scheduled tasks found.'
+                }]
+              };
+            }
+
+            const allTasks = JSON.parse(fs.readFileSync(tasksFile, 'utf-8'));
+
+            // Filter to current group unless main
+            const tasks = isMain
+              ? allTasks
+              : allTasks.filter((t: { groupFolder: string }) => t.groupFolder === groupFolder);
+
+            if (tasks.length === 0) {
+              return {
+                content: [{
+                  type: 'text',
+                  text: 'No scheduled tasks found.'
+                }]
+              };
+            }
+
+            const formatted = tasks.map((t: { id: string; prompt: string; schedule_type: string; schedule_value: string; status: string; next_run: string }) =>
+              `- [${t.id}] ${t.prompt.slice(0, 50)}... (${t.schedule_type}: ${t.schedule_value}) - ${t.status}, next: ${t.next_run || 'N/A'}`
+            ).join('\n');
+
+            return {
+              content: [{
+                type: 'text',
+                text: `Scheduled tasks:\n${formatted}`
+              }]
+            };
+          } catch (err) {
+            return {
+              content: [{
+                type: 'text',
+                text: `Error reading tasks: ${err instanceof Error ? err.message : String(err)}`
+              }]
+            };
+          }
+        }
+      ),
+
+      // Pause a task
+      tool(
+        'pause_task',
+        'Pause a scheduled task. It will not run until resumed.',
+        {
+          task_id: z.string().describe('The task ID to pause')
+        },
+        async (args) => {
+          const data = {
+            type: 'pause_task',
+            taskId: args.task_id,
+            groupFolder,
+            isMain,
+            timestamp: new Date().toISOString()
+          };
+
+          writeIpcFile(TASKS_DIR, data);
+
+          return {
+            content: [{
+              type: 'text',
+              text: `Task ${args.task_id} pause requested.`
+            }]
+          };
+        }
+      ),
+
+      // Resume a task
+      tool(
+        'resume_task',
+        'Resume a paused task.',
+        {
+          task_id: z.string().describe('The task ID to resume')
+        },
+        async (args) => {
+          const data = {
+            type: 'resume_task',
+            taskId: args.task_id,
+            groupFolder,
+            isMain,
+            timestamp: new Date().toISOString()
+          };
+
+          writeIpcFile(TASKS_DIR, data);
+
+          return {
+            content: [{
+              type: 'text',
+              text: `Task ${args.task_id} resume requested.`
+            }]
+          };
+        }
+      ),
+
+      // Cancel a task
+      tool(
+        'cancel_task',
+        'Cancel and delete a scheduled task.',
+        {
+          task_id: z.string().describe('The task ID to cancel')
+        },
+        async (args) => {
+          const data = {
+            type: 'cancel_task',
+            taskId: args.task_id,
+            groupFolder,
+            isMain,
+            timestamp: new Date().toISOString()
+          };
+
+          writeIpcFile(TASKS_DIR, data);
+
+          return {
+            content: [{
+              type: 'text',
+              text: `Task ${args.task_id} cancellation requested.`
+            }]
+          };
+        }
+      )
+    ]
+  });
+}
--- a/container/agent-runner/tsconfig.json
+++ b/container/agent-runner/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "NodeNext",
+    "moduleResolution": "NodeNext",
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "declaration": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}
--- a/container/build.sh
+++ b/container/build.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# Build the NanoClaw agent container image
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+cd "$SCRIPT_DIR"
+
+IMAGE_NAME="nanoclaw-agent"
+TAG="${1:-latest}"
+
+echo "Building NanoClaw agent container image..."
+echo "Image: ${IMAGE_NAME}:${TAG}"
+
+# Build with Apple Container
+container build -t "${IMAGE_NAME}:${TAG}" .
+
+echo ""
+echo "Build complete!"
+echo "Image: ${IMAGE_NAME}:${TAG}"
+echo ""
+echo "Test with:"
+echo "  echo '{\"prompt\":\"What is 2+2?\",\"groupFolder\":\"test\",\"chatJid\":\"test@g.us\",\"isMain\":false}' | container run -i ${IMAGE_NAME}:${TAG}"
--- a/container/skills/agent-browser.md
+++ b/container/skills/agent-browser.md
@@ -0,0 +1,159 @@
+---
+name: agent-browser
+description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages.
+allowed-tools: Bash(agent-browser:*)
+---
+
+# Browser Automation with agent-browser
+
+## Quick start
+
+```bash
+agent-browser open <url>        # Navigate to page
+agent-browser snapshot -i       # Get interactive elements with refs
+agent-browser click @e1         # Click element by ref
+agent-browser fill @e2 "text"   # Fill input by ref
+agent-browser close             # Close browser
+```
+
+## Core workflow
+
+1. Navigate: `agent-browser open <url>`
+2. Snapshot: `agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`)
+3. Interact using refs from the snapshot
+4. Re-snapshot after navigation or significant DOM changes
+
+## Commands
+
+### Navigation
+
+```bash
+agent-browser open <url>      # Navigate to URL
+agent-browser back            # Go back
+agent-browser forward         # Go forward
+agent-browser reload          # Reload page
+agent-browser close           # Close browser
+```
+
+### Snapshot (page analysis)
+
+```bash
+agent-browser snapshot            # Full accessibility tree
+agent-browser snapshot -i         # Interactive elements only (recommended)
+agent-browser snapshot -c         # Compact output
+agent-browser snapshot -d 3       # Limit depth to 3
+agent-browser snapshot -s "#main" # Scope to CSS selector
+```
+
+### Interactions (use @refs from snapshot)
+
+```bash
+agent-browser click @e1           # Click
+agent-browser dblclick @e1        # Double-click
+agent-browser fill @e2 "text"     # Clear and type
+agent-browser type @e2 "text"     # Type without clearing
+agent-browser press Enter         # Press key
+agent-browser hover @e1           # Hover
+agent-browser check @e1           # Check checkbox
+agent-browser uncheck @e1         # Uncheck checkbox
+agent-browser select @e1 "value"  # Select dropdown option
+agent-browser scroll down 500     # Scroll page
+agent-browser upload @e1 file.pdf # Upload files
+```
+
+### Get information
+
+```bash
+agent-browser get text @e1        # Get element text
+agent-browser get html @e1        # Get innerHTML
+agent-browser get value @e1       # Get input value
+agent-browser get attr @e1 href   # Get attribute
+agent-browser get title           # Get page title
+agent-browser get url             # Get current URL
+agent-browser get count ".item"   # Count matching elements
+```
+
+### Screenshots & PDF
+
+```bash
+agent-browser screenshot          # Save to temp directory
+agent-browser screenshot path.png # Save to specific path
+agent-browser screenshot --full   # Full page
+agent-browser pdf output.pdf      # Save as PDF
+```
+
+### Wait
+
+```bash
+agent-browser wait @e1                     # Wait for element
+agent-browser wait 2000                    # Wait milliseconds
+agent-browser wait --text "Success"        # Wait for text
+agent-browser wait --url "**/dashboard"    # Wait for URL pattern
+agent-browser wait --load networkidle      # Wait for network idle
+```
+
+### Semantic locators (alternative to refs)
+
+```bash
+agent-browser find role button click --name "Submit"
+agent-browser find text "Sign In" click
+agent-browser find label "Email" fill "user@test.com"
+agent-browser find placeholder "Search" type "query"
+```
+
+### Authentication with saved state
+
+```bash
+# Login once
+agent-browser open https://app.example.com/login
+agent-browser snapshot -i
+agent-browser fill @e1 "username"
+agent-browser fill @e2 "password"
+agent-browser click @e3
+agent-browser wait --url "**/dashboard"
+agent-browser state save auth.json
+
+# Later: load saved state
+agent-browser state load auth.json
+agent-browser open https://app.example.com/dashboard
+```
+
+### Cookies & Storage
+
+```bash
+agent-browser cookies                     # Get all cookies
+agent-browser cookies set name value      # Set cookie
+agent-browser cookies clear               # Clear cookies
+agent-browser storage local               # Get localStorage
+agent-browser storage local set k v       # Set value
+```
+
+### JavaScript
+
+```bash
+agent-browser eval "document.title"   # Run JavaScript
+```
+
+## Example: Form submission
+
+```bash
+agent-browser open https://example.com/form
+agent-browser snapshot -i
+# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
+
+agent-browser fill @e1 "user@example.com"
+agent-browser fill @e2 "password123"
+agent-browser click @e3
+agent-browser wait --load networkidle
+agent-browser snapshot -i  # Check result
+```
+
+## Example: Data extraction
+
+```bash
+agent-browser open https://example.com/products
+agent-browser snapshot -i
+agent-browser get text @e1  # Get product title
+agent-browser get attr @e2 href  # Get link URL
+agent-browser screenshot products.png
+```