Aetheel/agent/subagent.py

"""
Aetheel Subagent Manager
=========================
Spawns background AI agent sessions for long-running tasks.

The main agent can "spawn" a subagent by including an action tag in its
response. The subagent runs in a background thread with its own runtime
session and sends results back to the originating channel when done.

Usage:
    from agent.subagent import SubagentManager

    manager = SubagentManager(runtime_factory=make_runtime, send_fn=send_message)
    manager.spawn(
        task="Research Python 3.14 features",
        channel_id="C123",
        channel_type="slack",
    )
"""

import logging
import threading
import time
import uuid
from dataclasses import dataclass, field
from datetime import datetime, timezone
from typing import Any, Callable

logger = logging.getLogger("aetheel.subagent")


# ---------------------------------------------------------------------------
# Types
# ---------------------------------------------------------------------------


@dataclass
class SubagentTask:
    """A running or completed subagent task."""

    id: str
    task: str  # The task/prompt given to the subagent
    channel_id: str
    channel_type: str  # "slack", "telegram", etc.
    thread_id: str | None = None
    user_name: str | None = None
    status: str = "pending"  # pending, running, done, failed
    result: str | None = None
    error: str | None = None
    created_at: str = field(
        default_factory=lambda: datetime.now(timezone.utc).isoformat()
    )
    duration_ms: int = 0


# Type aliases
RuntimeFactory = Callable[[], Any]  # Creates a fresh runtime instance
SendFunction = Callable[[str, str, str | None, str], None]
# send_fn(channel_id, text, thread_id, channel_type)


# ---------------------------------------------------------------------------
# Subagent Bus (pub/sub for inter-subagent communication)
# ---------------------------------------------------------------------------


class SubagentBus:
    """Simple pub/sub message bus for inter-subagent communication."""

    def __init__(self):
        self._channels: dict[str, list[Callable]] = {}
        self._lock = threading.Lock()

    def subscribe(self, channel: str, callback: Callable[[str, str], None]) -> None:
        """Register a callback for messages on a channel."""
        with self._lock:
            self._channels.setdefault(channel, []).append(callback)

    def publish(self, channel: str, message: str, sender: str) -> None:
        """Publish a message to all subscribers of a channel."""
        with self._lock:
            callbacks = list(self._channels.get(channel, []))
        for cb in callbacks:
            try:
                cb(message, sender)
            except Exception as e:
                logger.error(f"SubagentBus callback error: {e}")

    def unsubscribe_all(self, channel: str) -> None:
        """Remove all subscribers from a channel."""
        with self._lock:
            self._channels.pop(channel, None)


# ---------------------------------------------------------------------------
# Subagent Manager
# ---------------------------------------------------------------------------


class SubagentManager:
    """
    Manages background subagent tasks.

    Each subagent runs in its own thread with a fresh runtime instance.
    When complete, it sends results back to the originating channel
    via the send function.
    """

    def __init__(
        self,
        runtime_factory: RuntimeFactory,
        send_fn: SendFunction,
        max_concurrent: int = 3,
    ):
        self._runtime_factory = runtime_factory
        self._send_fn = send_fn
        self._max_concurrent = max_concurrent
        self._tasks: dict[str, SubagentTask] = {}
        self._lock = threading.Lock()
        self._bus = SubagentBus()

    @property
    def bus(self) -> SubagentBus:
        """The pub/sub message bus for inter-subagent communication."""
        return self._bus

    def spawn(
        self,
        *,
        task: str,
        channel_id: str,
        channel_type: str = "slack",
        thread_id: str | None = None,
        user_name: str | None = None,
        context: str | None = None,
    ) -> str:
        """
        Spawn a background subagent to work on a task.

        Returns the subagent ID immediately. The subagent runs in a
        background thread and sends results back when done.
        """
        # Check concurrent limit
        active = self._count_active()
        if active >= self._max_concurrent:
            logger.warning(
                f"Max concurrent subagents reached ({self._max_concurrent}). "
                f"Rejecting task: {task[:50]}"
            )
            raise RuntimeError(
                f"Too many active subagents ({active}/{self._max_concurrent}). "
                "Wait for one to finish."
            )

        task_id = uuid.uuid4().hex[:8]
        subagent_task = SubagentTask(
            id=task_id,
            task=task,
            channel_id=channel_id,
            channel_type=channel_type,
            thread_id=thread_id,
            user_name=user_name,
        )

        with self._lock:
            self._tasks[task_id] = subagent_task

        # Launch in background thread
        thread = threading.Thread(
            target=self._run_subagent,
            args=(task_id, context),
            daemon=True,
            name=f"subagent-{task_id}",
        )
        thread.start()

        logger.info(
            f"🚀 Subagent spawned: {task_id} — '{task[:50]}' "
            f"(channel={channel_type}/{channel_id})"
        )
        return task_id

    def list_active(self) -> list[SubagentTask]:
        """List all active (running/pending) subagent tasks."""
        with self._lock:
            return [
                t
                for t in self._tasks.values()
                if t.status in ("pending", "running")
            ]

    def list_all(self) -> list[SubagentTask]:
        """List all subagent tasks (including completed)."""
        with self._lock:
            return list(self._tasks.values())

    def cancel(self, task_id: str) -> bool:
        """
        Mark a subagent task as cancelled.
        Note: This doesn't kill the thread (subprocess may still finish),
        but prevents the result from being sent back.
        """
        with self._lock:
            task = self._tasks.get(task_id)
            if task and task.status in ("pending", "running"):
                task.status = "cancelled"
                logger.info(f"Subagent cancelled: {task_id}")
                return True
        return False

    # -------------------------------------------------------------------
    # Internal
    # -------------------------------------------------------------------

    def _count_active(self) -> int:
        with self._lock:
            return sum(
                1
                for t in self._tasks.values()
                if t.status in ("pending", "running")
            )

    def _run_subagent(self, task_id: str, context: str | None) -> None:
        """Background thread that runs a subagent session."""
        with self._lock:
            task = self._tasks.get(task_id)
            if not task:
                return
            task.status = "running"

        started = time.time()

        try:
            # Lazy import to avoid circular dependency
            from agent.opencode_runtime import build_aetheel_system_prompt

            # Create a fresh runtime instance
            runtime = self._runtime_factory()

            # Build system prompt for the subagent
            system_prompt = build_aetheel_system_prompt(
                user_name=task.user_name,
                extra_context=(
                    f"# Subagent Context\n\n"
                    f"You are a background subagent running task: {task.task}\n"
                    f"Complete the task and provide your findings.\n"
                    + (f"\n{context}" if context else "")
                ),
            )

            # Run the task through the runtime
            response = runtime.chat(
                message=task.task,
                conversation_id=f"subagent-{task_id}",
                system_prompt=system_prompt,
            )

            duration_ms = int((time.time() - started) * 1000)

            with self._lock:
                current = self._tasks.get(task_id)
                if not current or current.status == "cancelled":
                    return
                current.duration_ms = duration_ms

            if response.ok:
                with self._lock:
                    current = self._tasks.get(task_id)
                    if current:
                        current.status = "done"
                        current.result = response.text

                # Send result back to the originating channel
                result_msg = (
                    f"🤖 *Subagent Complete* (task `{task_id}`)\n\n"
                    f"**Task:** {task.task[:200]}\n\n"
                    f"{response.text}"
                )

                try:
                    self._send_fn(
                        task.channel_id,
                        result_msg,
                        task.thread_id,
                        task.channel_type,
                    )
                    logger.info(
                        f"✅ Subagent {task_id} complete ({duration_ms}ms)"
                    )
                except Exception as e:
                    logger.error(
                        f"Failed to send subagent result: {e}", exc_info=True
                    )
            else:
                with self._lock:
                    current = self._tasks.get(task_id)
                    if current:
                        current.status = "failed"
                        current.error = response.error

                # Notify of failure
                error_msg = (
                    f"⚠️ *Subagent Failed* (task `{task_id}`)\n\n"
                    f"**Task:** {task.task[:200]}\n\n"
                    f"Error: {response.error or 'Unknown error'}"
                )

                try:
                    self._send_fn(
                        task.channel_id,
                        error_msg,
                        task.thread_id,
                        task.channel_type,
                    )
                except Exception:
                    pass

                logger.warning(
                    f"❌ Subagent {task_id} failed: {response.error}"
                )

        except Exception as e:
            duration_ms = int((time.time() - started) * 1000)
            with self._lock:
                current = self._tasks.get(task_id)
                if current:
                    current.status = "failed"
                    current.error = str(e)
                    current.duration_ms = duration_ms

            logger.error(
                f"❌ Subagent {task_id} crashed: {e}", exc_info=True
            )