Files
Aetheel/agent/subagent.py
tanmay11k 6d73f74e0b feat: config-driven architecture, install wizard, live runtime switching, usage tracking, auto-failover
Major changes:
- Config-driven adapters: all channels (Slack, Discord, Telegram, WebChat, Webhooks) controlled via config.json with enabled flags and token auto-detection, no CLI flags required
- Runtime engine field: runtime.engine selects opencode/claude from config
- Interactive install script: 8-phase setup wizard with AI runtime detection/installation, token setup, identity file personalization (personality presets), aetheel CLI command, background service (launchd/systemd)
- Live runtime switching: /engine, /model, /provider commands hot-swap the AI runtime from chat without restart, changes persisted to config.json
- Usage tracking: per-request cost extraction from Claude Code JSON output, cumulative stats via /usage command
- Auto-failover: rate limit detection on both runtimes, automatic switch to other engine on quota errors with user notification
- Chat commands work without / prefix (Slack intercepts / in channels), commands: engine, model, provider, config, usage, reload, cron, subagents, status, help
- /config set for editing config.json from chat with dotted key notation
- Security audit saved to docs/security-audit.md
- Full command reference in docs/commands.md
- Future changes doc with NanoClaw agent teams analysis
- Logo added to README and WebChat UI
- README fully rewritten with all features documented
2026-02-18 01:07:12 -05:00

334 lines
11 KiB
Python

"""
Aetheel Subagent Manager
=========================
Spawns background AI agent sessions for long-running tasks.
The main agent can "spawn" a subagent by including an action tag in its
response. The subagent runs in a background thread with its own runtime
session and sends results back to the originating channel when done.
Usage:
from agent.subagent import SubagentManager
manager = SubagentManager(runtime_factory=make_runtime, send_fn=send_message)
manager.spawn(
task="Research Python 3.14 features",
channel_id="C123",
channel_type="slack",
)
"""
import logging
import threading
import time
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any, Callable
logger = logging.getLogger("aetheel.subagent")
# ---------------------------------------------------------------------------
# Types
# ---------------------------------------------------------------------------
@dataclass
class SubagentTask:
"""A running or completed subagent task."""
id: str
task: str # The task/prompt given to the subagent
channel_id: str
channel_type: str # "slack", "telegram", etc.
thread_id: str | None = None
user_name: str | None = None
status: str = "pending" # pending, running, done, failed
result: str | None = None
error: str | None = None
created_at: str = field(
default_factory=lambda: datetime.now(timezone.utc).isoformat()
)
duration_ms: int = 0
# Type aliases
RuntimeFactory = Callable[[], Any] # Creates a fresh runtime instance
SendFunction = Callable[[str, str, str | None, str], None]
# send_fn(channel_id, text, thread_id, channel_type)
# ---------------------------------------------------------------------------
# Subagent Bus (pub/sub for inter-subagent communication)
# ---------------------------------------------------------------------------
class SubagentBus:
"""Simple pub/sub message bus for inter-subagent communication."""
def __init__(self):
self._channels: dict[str, list[Callable]] = {}
self._lock = threading.Lock()
def subscribe(self, channel: str, callback: Callable[[str, str], None]) -> None:
"""Register a callback for messages on a channel."""
with self._lock:
self._channels.setdefault(channel, []).append(callback)
def publish(self, channel: str, message: str, sender: str) -> None:
"""Publish a message to all subscribers of a channel."""
with self._lock:
callbacks = list(self._channels.get(channel, []))
for cb in callbacks:
try:
cb(message, sender)
except Exception as e:
logger.error(f"SubagentBus callback error: {e}")
def unsubscribe_all(self, channel: str) -> None:
"""Remove all subscribers from a channel."""
with self._lock:
self._channels.pop(channel, None)
# ---------------------------------------------------------------------------
# Subagent Manager
# ---------------------------------------------------------------------------
class SubagentManager:
"""
Manages background subagent tasks.
Each subagent runs in its own thread with a fresh runtime instance.
When complete, it sends results back to the originating channel
via the send function.
"""
def __init__(
self,
runtime_factory: RuntimeFactory,
send_fn: SendFunction,
max_concurrent: int = 3,
):
self._runtime_factory = runtime_factory
self._send_fn = send_fn
self._max_concurrent = max_concurrent
self._tasks: dict[str, SubagentTask] = {}
self._lock = threading.Lock()
self._bus = SubagentBus()
@property
def bus(self) -> SubagentBus:
"""The pub/sub message bus for inter-subagent communication."""
return self._bus
def spawn(
self,
*,
task: str,
channel_id: str,
channel_type: str = "slack",
thread_id: str | None = None,
user_name: str | None = None,
context: str | None = None,
) -> str:
"""
Spawn a background subagent to work on a task.
Returns the subagent ID immediately. The subagent runs in a
background thread and sends results back when done.
"""
# Check concurrent limit
active = self._count_active()
if active >= self._max_concurrent:
logger.warning(
f"Max concurrent subagents reached ({self._max_concurrent}). "
f"Rejecting task: {task[:50]}"
)
raise RuntimeError(
f"Too many active subagents ({active}/{self._max_concurrent}). "
"Wait for one to finish."
)
task_id = uuid.uuid4().hex[:8]
subagent_task = SubagentTask(
id=task_id,
task=task,
channel_id=channel_id,
channel_type=channel_type,
thread_id=thread_id,
user_name=user_name,
)
with self._lock:
self._tasks[task_id] = subagent_task
# Launch in background thread
thread = threading.Thread(
target=self._run_subagent,
args=(task_id, context),
daemon=True,
name=f"subagent-{task_id}",
)
thread.start()
logger.info(
f"🚀 Subagent spawned: {task_id}'{task[:50]}' "
f"(channel={channel_type}/{channel_id})"
)
return task_id
def list_active(self) -> list[SubagentTask]:
"""List all active (running/pending) subagent tasks."""
with self._lock:
return [
t
for t in self._tasks.values()
if t.status in ("pending", "running")
]
def list_all(self) -> list[SubagentTask]:
"""List all subagent tasks (including completed)."""
with self._lock:
return list(self._tasks.values())
def cancel(self, task_id: str) -> bool:
"""
Mark a subagent task as cancelled.
Note: This doesn't kill the thread (subprocess may still finish),
but prevents the result from being sent back.
"""
with self._lock:
task = self._tasks.get(task_id)
if task and task.status in ("pending", "running"):
task.status = "cancelled"
logger.info(f"Subagent cancelled: {task_id}")
return True
return False
# -------------------------------------------------------------------
# Internal
# -------------------------------------------------------------------
def _count_active(self) -> int:
with self._lock:
return sum(
1
for t in self._tasks.values()
if t.status in ("pending", "running")
)
def _run_subagent(self, task_id: str, context: str | None) -> None:
"""Background thread that runs a subagent session."""
with self._lock:
task = self._tasks.get(task_id)
if not task:
return
task.status = "running"
started = time.time()
try:
# Lazy import to avoid circular dependency
from agent.opencode_runtime import build_aetheel_system_prompt
# Create a fresh runtime instance
runtime = self._runtime_factory()
# Build system prompt for the subagent
system_prompt = build_aetheel_system_prompt(
user_name=task.user_name,
extra_context=(
f"# Subagent Context\n\n"
f"You are a background subagent running task: {task.task}\n"
f"Complete the task and provide your findings.\n"
+ (f"\n{context}" if context else "")
),
)
# Run the task through the runtime
response = runtime.chat(
message=task.task,
conversation_id=f"subagent-{task_id}",
system_prompt=system_prompt,
)
duration_ms = int((time.time() - started) * 1000)
with self._lock:
current = self._tasks.get(task_id)
if not current or current.status == "cancelled":
return
current.duration_ms = duration_ms
if response.ok:
with self._lock:
current = self._tasks.get(task_id)
if current:
current.status = "done"
current.result = response.text
# Send result back to the originating channel
result_msg = (
f"🤖 *Subagent Complete* (task `{task_id}`)\n\n"
f"**Task:** {task.task[:200]}\n\n"
f"{response.text}"
)
try:
self._send_fn(
task.channel_id,
result_msg,
task.thread_id,
task.channel_type,
)
logger.info(
f"✅ Subagent {task_id} complete ({duration_ms}ms)"
)
except Exception as e:
logger.error(
f"Failed to send subagent result: {e}", exc_info=True
)
else:
with self._lock:
current = self._tasks.get(task_id)
if current:
current.status = "failed"
current.error = response.error
# Notify of failure
error_msg = (
f"⚠️ *Subagent Failed* (task `{task_id}`)\n\n"
f"**Task:** {task.task[:200]}\n\n"
f"Error: {response.error or 'Unknown error'}"
)
try:
self._send_fn(
task.channel_id,
error_msg,
task.thread_id,
task.channel_type,
)
except Exception:
pass
logger.warning(
f"❌ Subagent {task_id} failed: {response.error}"
)
except Exception as e:
duration_ms = int((time.time() - started) * 1000)
with self._lock:
current = self._tasks.get(task_id)
if current:
current.status = "failed"
current.error = str(e)
current.duration_ms = duration_ms
logger.error(
f"❌ Subagent {task_id} crashed: {e}", exc_info=True
)