fix: prevent JSON event leaking to users and reload skills after AI creation

- OpenCode runtime: stop calling _collect_text fallback on non-text events (step_start, step_finish, etc.) - Both runtimes: guard raw stdout fallback to only apply for non-JSON output - main.py: reload skills after AI responses containing skill-related keywords - main.py: return friendly message instead of empty string for tool-only responses
2026-02-18 23:40:15 -05:00
parent 34dea65a07
commit 4e31e77286
3 changed files with 93 additions and 16 deletions
--- a/agent/claude_runtime.py
+++ b/agent/claude_runtime.py
@@ -259,8 +259,11 @@ class ClaudeCodeRuntime:
            # Parse the output
            response_text, session_id, usage = self._parse_output(stdout)
-            if not response_text:
+            if not response_text and stdout.strip():
-                response_text = stdout  # Fallback to raw output
+                # Only fall back to raw output if it doesn't look like JSON events
                # (which would leak internal lifecycle data to the user)
                if not stdout.strip().startswith("{"):
                    response_text = stdout
            # Store session mapping
            if session_id and conversation_id:
@@ -420,7 +423,8 @@ class ClaudeCodeRuntime:
            try:
                event = json.loads(line)
                if isinstance(event, dict):
-                    if event.get("type") == "result":
+                    event_type = event.get("type", "")
                    if event_type == "result":
                        text_parts.append(event.get("result", ""))
                        session_id = event.get("session_id", session_id)
                        usage = {
@@ -430,7 +434,7 @@ class ClaudeCodeRuntime:
                            "duration_api_ms": event.get("duration_api_ms", 0),
                            "is_error": event.get("is_error", False),
                        }
-                    elif event.get("type") == "assistant" and "message" in event:
+                    elif event_type == "assistant" and "message" in event:
                        # Extract text from content blocks
                        msg = event["message"]
                        if "content" in msg:
@@ -438,14 +442,33 @@ class ClaudeCodeRuntime:
                                if block.get("type") == "text":
                                    text_parts.append(block.get("text", ""))
                        session_id = event.get("session_id", session_id)
                    # Silently skip non-content events (step_start, step_finish,
                    # system, tool_use, tool_result, etc.) — these are internal
                    # lifecycle events that should never reach the user.
            except json.JSONDecodeError:
                # Not JSON — could be plain text mixed in; only include if
                # it doesn't look like a truncated JSON blob.
                if not line.startswith("{") and not line.startswith("["):
                    text_parts.append(line)
                continue
        if text_parts:
            return "\n".join(text_parts), session_id, usage
-        # Fallback: treat as plain text
+        # Fallback: treat as plain text, but strip any JSON-like lines
-        return stdout, None, None
+        # to prevent raw event objects from leaking to the user.
        plain_lines = []
        for line in stdout.splitlines():
            stripped = line.strip()
            if stripped and not stripped.startswith("{"):
                plain_lines.append(line)
        if plain_lines:
            return "\n".join(plain_lines), None, None
        # Everything was JSON events with no extractable text
        logger.warning("Claude output contained only non-content JSON events")
        return "", None, None
    # -------------------------------------------------------------------
    # Validation
--- a/agent/opencode_runtime.py
+++ b/agent/opencode_runtime.py
@@ -695,8 +695,11 @@ class OpenCodeRuntime:
            # Parse the output — mirrors OpenClaw's parseCliJson/parseCliJsonl
            response_text = self._parse_cli_output(stdout)
-            if not response_text:
+            if not response_text and stdout.strip():
-                response_text = stdout  # fallback to raw output
+                # Only fall back to raw output if it doesn't look like JSON events
                # (which would leak internal lifecycle data to the user)
                if not stdout.strip().startswith("{"):
                    response_text = stdout
            # Extract session ID if returned
            session_id = self._extract_session_id(stdout)
@@ -792,12 +795,17 @@ class OpenCodeRuntime:
          {"type":"text",       "sessionID":"ses_...", "part":{"type":"text","text":"Hello!"}}
          {"type":"step_finish","sessionID":"ses_...", "part":{"type":"step-finish",...}}
-        We extract text from events where type == "text" and part.text exists.
+        We extract text ONLY from "text" type events. All other event types
        (step_start, step_finish, tool_use, tool_result, etc.) are internal
        lifecycle events and must never be shown to the user.
        """
        if not stdout.strip():
            return ""
-        # Parse JSONL lines — collect text from "text" type events
+        # Track whether we found any JSON at all (to distinguish JSONL from plain text)
        found_json = False
        # Parse JSONL lines — collect text from "text" type events only
        lines = stdout.strip().split("\n")
        texts = []
        for line in lines:
@@ -806,6 +814,7 @@ class OpenCodeRuntime:
                continue
            try:
                event = json.loads(line)
                found_json = True
                if not isinstance(event, dict):
                    continue
@@ -819,19 +828,47 @@ class OpenCodeRuntime:
                        texts.append(text)
                        continue
-                # Fallback: try generic text extraction (for non-OpenCode formats)
+                # Also handle "result" type events (Claude JSON format)
-                text = self._collect_text(event)
+                if event_type == "result":
-                if text:
+                    text = event.get("result", "")
-                    texts.append(text)
+                    if text:
                        texts.append(text)
                        continue
                # Also handle "assistant" type events (Claude stream-json)
                if event_type == "assistant" and "message" in event:
                    msg = event["message"]
                    if "content" in msg:
                        for block in msg["content"]:
                            if block.get("type") == "text":
                                t = block.get("text", "")
                                if t:
                                    texts.append(t)
                    continue
                # Skip all other event types silently (step_start, step_finish,
                # tool_use, tool_result, system, etc.)
            except json.JSONDecodeError:
                # Not JSON — might be plain text output (--format default)
-                texts.append(line)
+                # Only include if we haven't seen JSON yet (pure plain text mode)
                if not found_json:
                    texts.append(line)
        if texts:
            return "\n".join(texts)
-        # Final fallback to raw text
+        # If we parsed JSON events but found no text, the response was
        # purely tool-use with no user-facing text. Return empty rather
        # than leaking raw JSON events.
        if found_json:
            logger.warning(
                "OpenCode output contained only non-text JSON events "
                "(no user-facing text found)"
            )
            return ""
        # Final fallback for non-JSON output
        return stdout.strip()
    def _collect_text(self, value: Any) -> str:
--- a/main.py
+++ b/main.py
@@ -299,6 +299,18 @@ def ai_handler(msg: IncomingMessage) -> str:
    # Parse and execute action tags (reminders, cron, spawn)
    reply_text = _process_action_tags(response.text, msg)
    # If the AI may have created/modified skills (via file tools), reload them
    # so that `skill list` reflects the changes immediately.
    if _skills and any(
        kw in text_lower
        for kw in ("skill", "create a skill", "new skill", "add a skill", "make a skill")
    ):
        try:
            _skills.reload()
            logger.info("Skills reloaded after potential skill modification")
        except Exception as e:
            logger.debug(f"Skills reload after AI response failed: {e}")
    # Log conversation to memory session log
    if _memory:
        try:
@@ -311,6 +323,11 @@ def ai_handler(msg: IncomingMessage) -> str:
        except Exception as e:
            logger.debug(f"Session logging failed: {e}")
    # Guard against empty responses (e.g. when AI did tool-only work
    # and the parser correctly stripped all non-text events)
    if not reply_text or not reply_text.strip():
        return "✅ Done — I processed that but had no text to show."
    return reply_text