From f26468c9b07effaf15fb9e455cb0e6ffa488bcc6 Mon Sep 17 00:00:00 2001 From: gavrielc Date: Sat, 7 Feb 2026 01:39:31 +0200 Subject: [PATCH] fix: setup skill reliability, requiresTrigger option, agent-browser visibility Setup skill fixes: - Run QR auth in foreground with long timeout, not background - Replace fragile message-based registration with DB group sync lookup - Personal chats: ask for phone number instead of querying empty DB - Consolidate trigger word + security model + channel selection into one step - Remove `timeout` shell command (unavailable on macOS), use Bash tool timeout - Query 40 groups, display 10 at a time, support name lookup requiresTrigger support: - Add requiresTrigger field to RegisteredGroup type and DB schema - Skip trigger check when requiresTrigger is false (for solo/personal chats) - Main group still always processes all messages (unchanged) Agent-browser visibility: - Append global CLAUDE.md to non-main agent system prompts via SDK - Add browser tool docs to global and main CLAUDE.md - Update skill description to be broader (not just "web testing") - Reference agent-browser.md in root CLAUDE.md key files Co-Authored-By: Claude Opus 4.6 --- .claude/skills/setup/SKILL.md | 118 +++++++++++++++++----------- CLAUDE.md | 1 + container/agent-runner/src/index.ts | 10 +++ container/skills/agent-browser.md | 2 +- groups/global/CLAUDE.md | 1 + groups/main/CLAUDE.md | 8 ++ src/db.ts | 21 ++++- src/index.ts | 4 +- src/types.ts | 1 + 9 files changed, 114 insertions(+), 52 deletions(-) diff --git a/.claude/skills/setup/SKILL.md b/.claude/skills/setup/SKILL.md index a92520e..33ae309 100644 --- a/.claude/skills/setup/SKILL.md +++ b/.claude/skills/setup/SKILL.md @@ -141,39 +141,39 @@ fi **USER ACTION REQUIRED** -Run the authentication script: +**IMPORTANT:** Run this command in the **foreground**. The QR code is multi-line ASCII art that must be displayed in full. Do NOT run in background or truncate the output. + +Tell the user: +> A QR code will appear below. On your phone: +> 1. Open WhatsApp +> 2. Tap **Settings → Linked Devices → Link a Device** +> 3. Scan the QR code + +Run with a long Bash tool timeout (120000ms) so the user has time to scan. Do NOT use the `timeout` shell command (it's not available on macOS). ```bash npm run auth ``` -Tell the user: -> A QR code will appear. On your phone: -> 1. Open WhatsApp -> 2. Tap **Settings → Linked Devices → Link a Device** -> 3. Scan the QR code - Wait for the script to output "Successfully authenticated" then continue. If it says "Already authenticated", skip to the next step. -## 6. Configure Assistant Name +## 6. Configure Assistant Name and Main Channel + +This step configures three things at once: the trigger word, the main channel type, and the main channel selection. + +### 6a. Ask for trigger word Ask the user: > What trigger word do you want to use? (default: `Andy`) > -> Messages starting with `@TriggerWord` will be sent to Claude. +> In group chats, messages starting with `@TriggerWord` will be sent to Claude. +> In your main channel (and optionally solo chats), no prefix is needed — all messages are processed. -If they choose something other than `Andy`, update it in these places: -1. `groups/CLAUDE.md` - Change "# Andy" and "You are Andy" to the new name -2. `groups/main/CLAUDE.md` - Same changes at the top -3. `data/registered_groups.json` - Use `@NewName` as the trigger when registering groups +Store their choice for use in the steps below. -Store their choice - you'll use it when creating the registered_groups.json and when telling them how to test. - -## 7. Understand the Security Model - -Before registering your main channel, you need to understand an important security concept. +### 6b. Explain security model and ask about main channel type **Use the AskUserQuestion tool** to present this: @@ -207,51 +207,73 @@ If they choose option 3, ask a follow-up: > 1. Yes, I understand and want to proceed > 2. No, let me use a personal chat or solo group instead -## 8. Register Main Channel +### 6c. Register the main channel -Ask the user: -> Do you want to use your **personal chat** (message yourself) or a **WhatsApp group** as your main control channel? - -For personal chat: -> Send any message to yourself in WhatsApp (the "Message Yourself" chat). Tell me when done. - -For group: -> Send any message in the WhatsApp group you want to use as your main channel. Tell me when done. - -After user confirms, start the app briefly to capture the message: +First build, then start the app briefly to connect to WhatsApp and sync group metadata. Use the Bash tool's timeout parameter (15000ms) — do NOT use the `timeout` shell command (it's not available on macOS). The app will be killed when the timeout fires, which is expected. ```bash -timeout 10 npm run dev || true +npm run build ``` -Then find the JID from the database: - +Then run briefly (set Bash tool timeout to 15000ms): ```bash -# For personal chat (ends with @s.whatsapp.net) -sqlite3 store/messages.db "SELECT DISTINCT chat_jid FROM messages WHERE chat_jid LIKE '%@s.whatsapp.net' ORDER BY timestamp DESC LIMIT 5" - -# For group (ends with @g.us) -sqlite3 store/messages.db "SELECT DISTINCT chat_jid FROM messages WHERE chat_jid LIKE '%@g.us' ORDER BY timestamp DESC LIMIT 5" +npm run dev ``` -Create/update `data/registered_groups.json` using the JID from above and the assistant name from step 5: +**For personal chat** (they chose option 1): + +Personal chats are NOT synced to the database on startup — only groups are. Instead, ask the user for their phone number (with country code, no + or spaces, e.g. `14155551234`), then construct the JID as `{number}@s.whatsapp.net`. + +**For group** (they chose option 2 or 3): + +Groups are synced on startup via `groupFetchAllParticipating`. Query the database for recent groups: +```bash +sqlite3 store/messages.db "SELECT jid, name FROM chats WHERE jid LIKE '%@g.us' AND jid != '__group_sync__' ORDER BY last_message_time DESC LIMIT 40" +``` + +Show only the **10 most recent** group names to the user and ask them to pick one. If they say their group isn't in the list, show the next batch from the results you already have. If they tell you the group name directly, look it up: +```bash +sqlite3 store/messages.db "SELECT jid, name FROM chats WHERE name LIKE '%GROUP_NAME%' AND jid LIKE '%@g.us'" +``` + +### 6d. Write the configuration + +Once you have the JID, configure it. Use the assistant name from step 6a. + +For personal chats (solo, no prefix needed), set `requiresTrigger` to `false`: + ```json { "JID_HERE": { "name": "main", "folder": "main", "trigger": "@ASSISTANT_NAME", - "added_at": "CURRENT_ISO_TIMESTAMP" + "added_at": "CURRENT_ISO_TIMESTAMP", + "requiresTrigger": false } } ``` +For groups, keep `requiresTrigger` as `true` (default). + +Write to the database directly by creating a temporary registration script, or write `data/registered_groups.json` which will be auto-migrated on first run: + +```bash +mkdir -p data +``` + +Then write `data/registered_groups.json` with the correct JID, trigger, and timestamp. + +If the user chose a name other than `Andy`, also update: +1. `groups/global/CLAUDE.md` - Change "# Andy" and "You are Andy" to the new name +2. `groups/main/CLAUDE.md` - Same changes at the top + Ensure the groups folder exists: ```bash mkdir -p groups/main/logs ``` -## 9. Configure External Directory Access (Mount Allowlist) +## 7. Configure External Directory Access (Mount Allowlist) Ask the user: > Do you want the agent to be able to access any directories **outside** the NanoClaw project? @@ -278,7 +300,7 @@ Skip to the next step. If **yes**, ask follow-up questions: -### 9a. Collect Directory Paths +### 7a. Collect Directory Paths Ask the user: > Which directories do you want to allow access to? @@ -295,14 +317,14 @@ For each directory they provide, ask: > Read-write is needed for: code changes, creating files, git commits > Read-only is safer for: reference docs, config examples, templates -### 9b. Configure Non-Main Group Access +### 7b. Configure Non-Main Group Access Ask the user: > Should **non-main groups** (other WhatsApp chats you add later) be restricted to **read-only** access even if read-write is allowed for the directory? > > Recommended: **Yes** - this prevents other groups from modifying files even if you grant them access to a directory. -### 9c. Create the Allowlist +### 7c. Create the Allowlist Create the allowlist file based on their answers: @@ -358,7 +380,7 @@ Tell the user: > } > ``` -## 10. Configure launchd Service +## 8. Configure launchd Service Generate the plist file with correct paths automatically: @@ -418,10 +440,12 @@ Verify it's running: launchctl list | grep nanoclaw ``` -## 11. Test +## 9. Test Tell the user (using the assistant name they configured): > Send `@ASSISTANT_NAME hello` in your registered chat. +> +> **Tip:** In your main channel, you don't need the `@` prefix — just send `hello` and the agent will respond. Check the logs: ```bash @@ -442,7 +466,9 @@ The user should receive a response in WhatsApp. **No response to messages**: - Verify the trigger pattern matches (e.g., `@AssistantName` at start of message) -- Check that the chat JID is in `data/registered_groups.json` +- Main channel doesn't require a prefix — all messages are processed +- Personal/solo chats with `requiresTrigger: false` also don't need a prefix +- Check that the chat JID is in the database: `sqlite3 store/messages.db "SELECT * FROM registered_groups"` - Check `logs/nanoclaw.log` for errors **WhatsApp disconnected**: diff --git a/CLAUDE.md b/CLAUDE.md index 1fd6a78..5de8fe0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -16,6 +16,7 @@ Single Node.js process that connects to WhatsApp, routes messages to Claude Agen | `src/task-scheduler.ts` | Runs scheduled tasks | | `src/db.ts` | SQLite operations | | `groups/{name}/CLAUDE.md` | Per-group memory (isolated) | +| `container/skills/agent-browser.md` | Browser automation tool (available to all agents via Bash) | ## Skills diff --git a/container/agent-runner/src/index.ts b/container/agent-runner/src/index.ts index fd57e7e..92aed6d 100644 --- a/container/agent-runner/src/index.ts +++ b/container/agent-runner/src/index.ts @@ -257,6 +257,13 @@ async function main(): Promise { prompt = `[SCHEDULED TASK - The following message was sent automatically and is not coming directly from the user or group.]\n\n${input.prompt}`; } + // Load global CLAUDE.md as additional system context (shared across all groups) + const globalClaudeMdPath = '/workspace/global/CLAUDE.md'; + let globalClaudeMd: string | undefined; + if (!input.isMain && fs.existsSync(globalClaudeMdPath)) { + globalClaudeMd = fs.readFileSync(globalClaudeMdPath, 'utf-8'); + } + try { log('Starting agent...'); @@ -265,6 +272,9 @@ async function main(): Promise { options: { cwd: '/workspace/group', resume: input.sessionId, + systemPrompt: globalClaudeMd + ? { type: 'preset' as const, preset: 'claude_code' as const, append: globalClaudeMd } + : undefined, allowedTools: [ 'Bash', 'Read', 'Write', 'Edit', 'Glob', 'Grep', diff --git a/container/skills/agent-browser.md b/container/skills/agent-browser.md index f82ca68..dd6c6bf 100644 --- a/container/skills/agent-browser.md +++ b/container/skills/agent-browser.md @@ -1,6 +1,6 @@ --- name: agent-browser -description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages. +description: Browse the web for any task — research topics, read articles, interact with web apps, fill forms, take screenshots, extract data, and test web pages. Use whenever a browser would be useful, not just when the user explicitly asks. allowed-tools: Bash(agent-browser:*) --- diff --git a/groups/global/CLAUDE.md b/groups/global/CLAUDE.md index bbe7fe9..db8bc00 100644 --- a/groups/global/CLAUDE.md +++ b/groups/global/CLAUDE.md @@ -6,6 +6,7 @@ You are Andy, a personal assistant. You help with tasks, answer questions, and c - Answer questions and have conversations - Search the web and fetch content from URLs +- **Browse the web** with `agent-browser` — open pages, click, fill forms, take screenshots, extract data (run `agent-browser open ` to start, then `agent-browser snapshot -i` to see interactive elements) - Read and write files in your workspace - Run bash commands in your sandbox - Schedule tasks to run later or on a recurring basis diff --git a/groups/main/CLAUDE.md b/groups/main/CLAUDE.md index 6952dd1..464a448 100644 --- a/groups/main/CLAUDE.md +++ b/groups/main/CLAUDE.md @@ -6,6 +6,7 @@ You are Andy, a personal assistant. You help with tasks, answer questions, and c - Answer questions and have conversations - Search the web and fetch content from URLs +- **Browse the web** with `agent-browser` — open pages, click, fill forms, take screenshots, extract data (run `agent-browser open ` to start, then `agent-browser snapshot -i` to see interactive elements) - Read and write files in your workspace - Run bash commands in your sandbox - Schedule tasks to run later or on a recurring basis @@ -126,8 +127,15 @@ Fields: - **name**: Display name for the group - **folder**: Folder name under `groups/` for this group's files and memory - **trigger**: The trigger word (usually same as global, but could differ) +- **requiresTrigger**: Whether `@trigger` prefix is needed (default: `true`). Set to `false` for solo/personal chats where all messages should be processed - **added_at**: ISO timestamp when registered +### Trigger Behavior + +- **Main group**: No trigger needed — all messages are processed automatically +- **Groups with `requiresTrigger: false`**: No trigger needed — all messages processed (use for 1-on-1 or solo chats) +- **Other groups** (default): Messages must start with `@AssistantName` to be processed + ### Adding a Group 1. Query the database to find the group's JID diff --git a/src/db.ts b/src/db.ts index a5bd049..949677c 100644 --- a/src/db.ts +++ b/src/db.ts @@ -78,6 +78,15 @@ export function initDatabase(): void { /* column already exists */ } + // Add requires_trigger column if it doesn't exist (migration for existing DBs) + try { + db.exec( + `ALTER TABLE registered_groups ADD COLUMN requires_trigger INTEGER DEFAULT 1`, + ); + } catch { + /* column already exists */ + } + // State tables (replacing JSON files) db.exec(` CREATE TABLE IF NOT EXISTS router_state ( @@ -94,7 +103,8 @@ export function initDatabase(): void { folder TEXT NOT NULL UNIQUE, trigger_pattern TEXT NOT NULL, added_at TEXT NOT NULL, - container_config TEXT + container_config TEXT, + requires_trigger INTEGER DEFAULT 1 ); `); @@ -460,6 +470,7 @@ export function getRegisteredGroup( trigger_pattern: string; added_at: string; container_config: string | null; + requires_trigger: number | null; } | undefined; if (!row) return undefined; @@ -472,6 +483,7 @@ export function getRegisteredGroup( containerConfig: row.container_config ? JSON.parse(row.container_config) : undefined, + requiresTrigger: row.requires_trigger === null ? undefined : row.requires_trigger === 1, }; } @@ -480,8 +492,8 @@ export function setRegisteredGroup( group: RegisteredGroup, ): void { db.prepare( - `INSERT OR REPLACE INTO registered_groups (jid, name, folder, trigger_pattern, added_at, container_config) - VALUES (?, ?, ?, ?, ?, ?)`, + `INSERT OR REPLACE INTO registered_groups (jid, name, folder, trigger_pattern, added_at, container_config, requires_trigger) + VALUES (?, ?, ?, ?, ?, ?, ?)`, ).run( jid, group.name, @@ -489,6 +501,7 @@ export function setRegisteredGroup( group.trigger, group.added_at, group.containerConfig ? JSON.stringify(group.containerConfig) : null, + group.requiresTrigger === undefined ? 1 : group.requiresTrigger ? 1 : 0, ); } @@ -502,6 +515,7 @@ export function getAllRegisteredGroups(): Record { trigger_pattern: string; added_at: string; container_config: string | null; + requires_trigger: number | null; }>; const result: Record = {}; for (const row of rows) { @@ -513,6 +527,7 @@ export function getAllRegisteredGroups(): Record { containerConfig: row.container_config ? JSON.parse(row.container_config) : undefined, + requiresTrigger: row.requires_trigger === null ? undefined : row.requires_trigger === 1, }; } return result; diff --git a/src/index.ts b/src/index.ts index c13a488..0a33dc3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -209,8 +209,8 @@ async function processGroupMessages(chatJid: string): Promise { if (missedMessages.length === 0) return true; - // For non-main groups, check if any message has the trigger - if (!isMainGroup) { + // For non-main groups, check if trigger is required and present + if (!isMainGroup && group.requiresTrigger !== false) { const hasTrigger = missedMessages.some((m) => TRIGGER_PATTERN.test(m.content.trim()), ); diff --git a/src/types.ts b/src/types.ts index 53027b4..9fc7bda 100644 --- a/src/types.ts +++ b/src/types.ts @@ -38,6 +38,7 @@ export interface RegisteredGroup { trigger: string; added_at: string; containerConfig?: ContainerConfig; + requiresTrigger?: boolean; // Default: true for groups, false for solo chats } export interface NewMessage {