feat: config-driven architecture, install wizard, live runtime switching, usage tracking, auto-failover

Major changes: - Config-driven adapters: all channels (Slack, Discord, Telegram, WebChat, Webhooks) controlled via config.json with enabled flags and token auto-detection, no CLI flags required - Runtime engine field: runtime.engine selects opencode/claude from config - Interactive install script: 8-phase setup wizard with AI runtime detection/installation, token setup, identity file personalization (personality presets), aetheel CLI command, background service (launchd/systemd) - Live runtime switching: /engine, /model, /provider commands hot-swap the AI runtime from chat without restart, changes persisted to config.json - Usage tracking: per-request cost extraction from Claude Code JSON output, cumulative stats via /usage command - Auto-failover: rate limit detection on both runtimes, automatic switch to other engine on quota errors with user notification - Chat commands work without / prefix (Slack intercepts / in channels), commands: engine, model, provider, config, usage, reload, cron, subagents, status, help - /config set for editing config.json from chat with dotted key notation - Security audit saved to docs/security-audit.md - Full command reference in docs/commands.md - Future changes doc with NanoClaw agent teams analysis - Logo added to README and WebChat UI - README fully rewritten with all features documented
2026-02-18 01:07:12 -05:00
parent 41b2f9a593
commit 6d73f74e0b
41 changed files with 11363 additions and 437 deletions
--- a/agent/claude_runtime.py
+++ b/agent/claude_runtime.py
@@ -27,6 +27,32 @@ from agent.opencode_runtime import AgentResponse, SessionStore
 logger = logging.getLogger("aetheel.agent.claude")


+# ---------------------------------------------------------------------------
+# Rate Limit Detection
+# ---------------------------------------------------------------------------
+
+_RATE_LIMIT_PATTERNS = [
+    "rate limit",
+    "rate_limit",
+    "too many requests",
+    "429",
+    "quota exceeded",
+    "usage limit",
+    "capacity",
+    "overloaded",
+    "credit balance",
+    "billing",
+    "exceeded your",
+    "max usage",
+]
+
+
+def _is_rate_limited(text: str) -> bool:
+    """Check if an error message indicates a rate limit or quota issue."""
+    lower = text.lower()
+    return any(pattern in lower for pattern in _RATE_LIMIT_PATTERNS)
+
+
 # ---------------------------------------------------------------------------
 # CLI Resolution
 # ---------------------------------------------------------------------------
@@ -85,9 +111,14 @@ class ClaudeCodeConfig:
    # claude -p flags
    output_format: str = "json"  # "json", "text", or "stream-json"
    # Permission settings
-    allowed_tools: list[str] = field(default_factory=list)
+    allowed_tools: list[str] = field(default_factory=lambda: [
+        "Bash", "Read", "Write", "Edit", "Glob", "Grep",
+        "WebSearch", "WebFetch",
+        "Task", "TaskOutput", "TaskStop", "Skill",
+        "TeamCreate", "TeamDelete", "SendMessage",
+    ])
    # Whether to disable all tool use (pure conversation mode)
-    no_tools: bool = True  # Default: no tools for chat responses
+    no_tools: bool = False  # Default: tools enabled

    @classmethod
    def from_env(cls) -> "ClaudeCodeConfig":
@@ -99,7 +130,7 @@ class ClaudeCodeConfig:
            max_turns=int(os.environ.get("CLAUDE_MAX_TURNS", "3")),
            workspace_dir=os.environ.get("CLAUDE_WORKSPACE"),
            system_prompt=os.environ.get("CLAUDE_SYSTEM_PROMPT"),
-            no_tools=os.environ.get("CLAUDE_NO_TOOLS", "true").lower() == "true",
+            no_tools=os.environ.get("CLAUDE_NO_TOOLS", "false").lower() == "true",
        )


@@ -222,10 +253,11 @@ class ClaudeCodeRuntime:
                return AgentResponse(
                    text="",
                    error=f"Claude Code error: {error_text[:500]}",
+                    rate_limited=_is_rate_limited(error_text),
                )

            # Parse the output
-            response_text, session_id = self._parse_output(stdout)
+            response_text, session_id, usage = self._parse_output(stdout)

            if not response_text:
                response_text = stdout  # Fallback to raw output
@@ -234,11 +266,22 @@ class ClaudeCodeRuntime:
            if session_id and conversation_id:
                self._sessions.set(conversation_id, session_id)

+            # Detect rate limiting from error text
+            rate_limited = False
+            if not response_text and stderr:
+                rate_limited = _is_rate_limited(stderr)
+            if usage and usage.get("is_error"):
+                rate_limited = rate_limited or _is_rate_limited(
+                    usage.get("error_text", "")
+                )
+
            return AgentResponse(
                text=response_text,
                session_id=session_id,
                model=self._config.model,
                provider="anthropic",
+                usage=usage,
+                rate_limited=rate_limited,
            )

        except subprocess.TimeoutExpired:
@@ -314,7 +357,7 @@ class ClaudeCodeRuntime:
        env = os.environ.copy()
        return env

-    def _parse_output(self, stdout: str) -> tuple[str, str | None]:
+    def _parse_output(self, stdout: str) -> tuple[str, str | None, dict | None]:
        """
        Parse claude CLI output.

@@ -334,7 +377,7 @@ class ClaudeCodeRuntime:
        With --output-format text, it returns plain text.
        """
        if not stdout.strip():
-            return "", None
+            return "", None, None

        # Try JSON format first
        try:
@@ -345,12 +388,23 @@ class ClaudeCodeRuntime:
                text = data.get("result", "")
                session_id = data.get("session_id")

+                # Extract usage stats
+                usage = {
+                    "cost_usd": data.get("cost_usd", 0),
+                    "num_turns": data.get("num_turns", 0),
+                    "duration_ms": data.get("duration_ms", 0),
+                    "duration_api_ms": data.get("duration_api_ms", 0),
+                    "is_error": data.get("is_error", False),
+                    "subtype": data.get("subtype", ""),
+                }
+
                if data.get("is_error"):
                    error_msg = text or data.get("error", "Unknown error")
+                    usage["error_text"] = error_msg
                    logger.warning(f"Claude returned error: {error_msg[:200]}")
-                    return f"⚠️ {error_msg}", session_id
+                    return f"⚠️ {error_msg}", session_id, usage

-                return text, session_id
+                return text, session_id, usage

        except json.JSONDecodeError:
            pass
@@ -358,6 +412,7 @@ class ClaudeCodeRuntime:
        # Try JSONL (stream-json) format
        text_parts = []
        session_id = None
+        usage = None
        for line in stdout.splitlines():
            line = line.strip()
            if not line:
@@ -368,6 +423,13 @@ class ClaudeCodeRuntime:
                    if event.get("type") == "result":
                        text_parts.append(event.get("result", ""))
                        session_id = event.get("session_id", session_id)
+                        usage = {
+                            "cost_usd": event.get("cost_usd", 0),
+                            "num_turns": event.get("num_turns", 0),
+                            "duration_ms": event.get("duration_ms", 0),
+                            "duration_api_ms": event.get("duration_api_ms", 0),
+                            "is_error": event.get("is_error", False),
+                        }
                    elif event.get("type") == "assistant" and "message" in event:
                        # Extract text from content blocks
                        msg = event["message"]
@@ -380,10 +442,10 @@ class ClaudeCodeRuntime:
                continue

        if text_parts:
-            return "\n".join(text_parts), session_id
+            return "\n".join(text_parts), session_id, usage

        # Fallback: treat as plain text
-        return stdout, None
+        return stdout, None, None

    # -------------------------------------------------------------------
    # Validation