first commit

2026-02-13 23:56:09 -05:00
commit ec8bd80a3d
27 changed files with 6725 additions and 0 deletions
--- a/memory/manager.py
+++ b/memory/manager.py
@@ -0,0 +1,839 @@
+"""
+MemoryManager — the main memory system orchestrator.
+Port of OpenClaw's MemoryIndexManager (src/memory/manager.ts, 2,300 LOC).
+
+Lifecycle: sync → chunk → embed → store → search
+
+Key features:
+  • Incremental sync — only re-indexes changed files (hash-based)
+  • Hybrid search — vector (0.7) + BM25 keyword (0.3)
+  • File watching — auto re-index on workspace changes (via watchdog)
+  • Embedding cache — avoids re-computing embeddings for unchanged chunks
+  • Session log indexing — indexes daily/ conversation transcripts
+"""
+
+import json
+import logging
+import os
+import sqlite3
+import threading
+import time
+import uuid
+from pathlib import Path
+
+from memory.embeddings import embed_batch, embed_query, get_embedding_dims
+from memory.hybrid import bm25_rank_to_score, build_fts_query, merge_hybrid_results
+from memory.internal import (
+    build_file_entry,
+    chunk_markdown,
+    hash_text,
+    list_memory_files,
+)
+from memory.schema import ensure_schema
+from memory.types import (
+    MemoryConfig,
+    MemorySearchResult,
+    MemorySource,
+)
+
+logger = logging.getLogger("aetheel.memory")
+
+SNIPPET_MAX_CHARS = 700
+
+
+class MemoryManager:
+    """
+    Main memory system — manages the full lifecycle:
+      sync → chunk → embed → store → search
+
+    Inspired by OpenClaw's MemoryIndexManager.
+    """
+
+    def __init__(self, config: MemoryConfig | None = None):
+        self._config = config or MemoryConfig()
+        self._workspace_dir = str(
+            Path(self._config.workspace_dir).expanduser().resolve()
+        )
+        self._db_path = str(Path(self._config.db_path).expanduser().resolve())
+        self._sessions_dir = (
+            str(Path(self._config.sessions_dir).expanduser().resolve())
+            if self._config.sessions_dir
+            else os.path.join(self._workspace_dir, "daily")
+        )
+
+        # Ensure directories exist
+        os.makedirs(self._workspace_dir, exist_ok=True)
+        os.makedirs(self._sessions_dir, exist_ok=True)
+        os.makedirs(os.path.dirname(self._db_path), exist_ok=True)
+
+        # Open database and ensure schema
+        self._db = sqlite3.connect(self._db_path, check_same_thread=False)
+        self._db.row_factory = sqlite3.Row
+        schema_result = ensure_schema(self._db, fts_enabled=True)
+        self._fts_available = schema_result.get("fts_available", False)
+
+        # State
+        self._dirty = True
+        self._syncing = False
+        self._sync_lock = threading.Lock()
+        self._closed = False
+        self._watcher = None
+
+        # Create default identity files if they don't exist
+        self._ensure_identity_files()
+
+        logger.info(
+            f"MemoryManager initialized: workspace={self._workspace_dir}, "
+            f"db={self._db_path}, fts={self._fts_available}"
+        )
+
+    # ── Identity File Bootstrap ──────────────────────────────────────
+
+    def _ensure_identity_files(self) -> None:
+        """Create default SOUL.md, USER.md, MEMORY.md if they don't exist."""
+        defaults = {
+            "SOUL.md": (
+                "# SOUL.md — Who You Are\n\n"
+                "_You're not a chatbot. You're becoming someone._\n\n"
+                "## Core Truths\n\n"
+                "**Be genuinely helpful, not performatively helpful.** "
+                "Skip the filler — just help.\n\n"
+                "**Have opinions.** You're allowed to disagree, prefer things, "
+                "find stuff amusing or boring.\n\n"
+                "**Be resourceful before asking.** Try to figure it out first. "
+                "Then ask if you're stuck.\n\n"
+                "**Earn trust through competence.** Be careful with external actions. "
+                "Be bold with internal ones.\n\n"
+                "## Boundaries\n\n"
+                "- Private things stay private. Period.\n"
+                "- When in doubt, ask before acting externally.\n"
+                "- Never send half-baked replies.\n\n"
+                "## Continuity\n\n"
+                "Each session, you wake up fresh. These files _are_ your memory. "
+                "Read them. Update them. They're how you persist.\n\n"
+                "---\n\n"
+                "_This file is yours to evolve. As you learn who you are, update it._\n"
+            ),
+            "USER.md": (
+                "# USER.md — Who I Am\n\n"
+                "## About Me\n\n"
+                "<!-- Fill in your details -->\n"
+                "- **Name:** \n"
+                "- **Role:** \n"
+                "- **Location:** \n"
+                "- **Timezone:** \n\n"
+                "## Preferences\n\n"
+                "<!-- How you like to communicate -->\n"
+                "- **Communication style:** \n"
+                "- **Response length:** \n"
+                "- **Technical level:** \n\n"
+                "## Current Focus\n\n"
+                "<!-- What you're working on -->\n\n"
+                "## Tools & Services\n\n"
+                "<!-- Services you use regularly -->\n\n"
+                "---\n\n"
+                "_Update this file as your preferences evolve._\n"
+            ),
+            "MEMORY.md": (
+                "# MEMORY.md — Long-Term Memory\n\n"
+                "## Decisions & Lessons\n\n"
+                "<!-- Record important decisions and lessons learned -->\n\n"
+                "## Context\n\n"
+                "<!-- Persistent context that should carry across sessions -->\n\n"
+                "## Notes\n\n"
+                "<!-- Anything worth remembering -->\n\n"
+                "---\n\n"
+                "_This file persists across sessions. "
+                "Update it when you learn something important._\n"
+            ),
+        }
+
+        for filename, content in defaults.items():
+            filepath = os.path.join(self._workspace_dir, filename)
+            if not os.path.exists(filepath):
+                with open(filepath, "w", encoding="utf-8") as f:
+                    f.write(content)
+                logger.info(f"Created default identity file: {filepath}")
+
+    # ── Search ───────────────────────────────────────────────────────
+
+    async def search(
+        self,
+        query: str,
+        *,
+        max_results: int | None = None,
+        min_score: float | None = None,
+    ) -> list[MemorySearchResult]:
+        """
+        Search memory using hybrid vector + keyword search.
+        Port of OpenClaw's MemoryIndexManager.search().
+
+        Steps:
+          1. (Optional) Trigger sync if dirty
+          2. Run FTS5 keyword search → BM25 scored
+          3. Generate query embedding → vector search
+          4. Merge results with weighted scoring (0.7v + 0.3k)
+          5. Filter by min_score and return top-N results
+        """
+        # Auto-sync if dirty
+        if self._config.sync_on_search and self._dirty:
+            await self.sync()
+
+        cleaned = query.strip()
+        if not cleaned:
+            return []
+
+        max_r = max_results or self._config.max_results
+        min_s = min_score if min_score is not None else self._config.min_score
+        candidates = min(200, max(1, max_r * 3))
+
+        # Keyword search (BM25)
+        keyword_results = self._search_keyword(cleaned, candidates)
+
+        # Vector search
+        try:
+            query_vec = embed_query(cleaned, self._config.embedding_model)
+            has_vector = any(v != 0 for v in query_vec)
+        except Exception as e:
+            logger.warning(f"Embedding failed, falling back to keyword-only: {e}")
+            query_vec = []
+            has_vector = False
+
+        vector_results = (
+            self._search_vector(query_vec, candidates) if has_vector else []
+        )
+
+        # If no keyword results, return vector-only
+        if not keyword_results:
+            return [
+                r for r in self._vector_to_search_results(vector_results)
+                if r.score >= min_s
+            ][:max_r]
+
+        # Merge hybrid results
+        merged = merge_hybrid_results(
+            vector=vector_results,
+            keyword=keyword_results,
+            vector_weight=self._config.vector_weight,
+            text_weight=self._config.text_weight,
+        )
+
+        return [r for r in merged if r.score >= min_s][:max_r]
+
+    def _search_vector(
+        self, query_vec: list[float], limit: int
+    ) -> list[dict]:
+        """
+        Search chunks by vector cosine similarity.
+        Uses embedding stored as JSON in the chunks table.
+        """
+        if not query_vec:
+            return []
+
+        try:
+            rows = self._db.execute(
+                "SELECT id, path, start_line, end_line, source, text, embedding "
+                "FROM chunks ORDER BY rowid"
+            ).fetchall()
+        except Exception as e:
+            logger.warning(f"Vector search failed: {e}")
+            return []
+
+        from memory.internal import cosine_similarity
+
+        results = []
+        for row in rows:
+            try:
+                stored_vec = json.loads(row["embedding"])
+                if not stored_vec:
+                    continue
+                score = cosine_similarity(query_vec, stored_vec)
+                snippet = row["text"][:SNIPPET_MAX_CHARS]
+                results.append({
+                    "id": row["id"],
+                    "path": row["path"],
+                    "start_line": row["start_line"],
+                    "end_line": row["end_line"],
+                    "source": row["source"],
+                    "snippet": snippet,
+                    "vector_score": max(0.0, score),
+                })
+            except (json.JSONDecodeError, TypeError):
+                continue
+
+        results.sort(key=lambda r: r["vector_score"], reverse=True)
+        return results[:limit]
+
+    def _search_keyword(self, query: str, limit: int) -> list[dict]:
+        """
+        Search chunks using FTS5 full-text search with BM25 ranking.
+        Port of OpenClaw's searchKeyword().
+        """
+        if not self._fts_available:
+            return []
+
+        fts_query = build_fts_query(query)
+        if not fts_query:
+            return []
+
+        try:
+            rows = self._db.execute(
+                "SELECT id, path, start_line, end_line, source, text, "
+                "rank AS bm25_rank "
+                "FROM chunks_fts "
+                "WHERE chunks_fts MATCH ? "
+                "ORDER BY rank "
+                "LIMIT ?",
+                (fts_query, limit),
+            ).fetchall()
+        except Exception as e:
+            logger.debug(f"FTS search failed for query '{fts_query}': {e}")
+            return []
+
+        results = []
+        for row in rows:
+            # FTS5 rank is negative (lower = better), convert to 0-1 score
+            bm25_rank = abs(row["bm25_rank"]) if row["bm25_rank"] else 999.0
+            text_score = bm25_rank_to_score(bm25_rank)
+            snippet = row["text"][:SNIPPET_MAX_CHARS]
+            results.append({
+                "id": row["id"],
+                "path": row["path"],
+                "start_line": row["start_line"],
+                "end_line": row["end_line"],
+                "source": row["source"],
+                "snippet": snippet,
+                "text_score": text_score,
+            })
+
+        return results
+
+    def _vector_to_search_results(
+        self, vector_results: list[dict]
+    ) -> list[MemorySearchResult]:
+        """Convert raw vector results to MemorySearchResult objects."""
+        return [
+            MemorySearchResult(
+                path=r["path"],
+                start_line=r["start_line"],
+                end_line=r["end_line"],
+                score=r["vector_score"],
+                snippet=r["snippet"],
+                source=MemorySource(r["source"]),
+            )
+            for r in vector_results
+        ]
+
+    # ── Sync ─────────────────────────────────────────────────────────
+
+    async def sync(self, *, force: bool = False) -> dict:
+        """
+        Synchronize workspace markdown files into the index.
+        Port of OpenClaw's MemoryIndexManager.sync().
+
+        Steps:
+          1. List all memory files (SOUL.md, USER.md, MEMORY.md, memory/*)
+          2. For each file, check if content hash has changed
+          3. If changed: chunk → embed → store in DB
+          4. Remove stale entries for deleted files
+          5. Optionally sync session logs from daily/
+
+        Returns a summary dict with counts.
+        """
+        if self._syncing and not force:
+            logger.debug("Sync already in progress, skipping")
+            return {"skipped": True}
+
+        with self._sync_lock:
+            self._syncing = True
+            try:
+                return self._run_sync(force=force)
+            finally:
+                self._syncing = False
+                self._dirty = False
+
+    def _run_sync(self, *, force: bool = False) -> dict:
+        """Execute the actual sync logic."""
+        stats = {
+            "files_found": 0,
+            "files_indexed": 0,
+            "files_skipped": 0,
+            "chunks_created": 0,
+            "stale_removed": 0,
+            "sessions_indexed": 0,
+        }
+
+        # ── Memory files ──
+        if "memory" in self._config.sources:
+            files = list_memory_files(self._workspace_dir)
+            stats["files_found"] = len(files)
+
+            active_paths: set[str] = set()
+
+            for abs_path in files:
+                entry = build_file_entry(abs_path, self._workspace_dir)
+                active_paths.add(entry.path)
+
+                # Check if file has changed
+                row = self._db.execute(
+                    "SELECT hash FROM files WHERE path = ? AND source = ?",
+                    (entry.path, MemorySource.MEMORY.value),
+                ).fetchone()
+
+                if not force and row and row["hash"] == entry.hash:
+                    stats["files_skipped"] += 1
+                    continue
+
+                # File is new or changed — re-index it
+                self._index_file(entry, MemorySource.MEMORY)
+                stats["files_indexed"] += 1
+
+            # Remove stale entries for deleted files
+            stale_rows = self._db.execute(
+                "SELECT path FROM files WHERE source = ?",
+                (MemorySource.MEMORY.value,),
+            ).fetchall()
+            for stale in stale_rows:
+                if stale["path"] not in active_paths:
+                    self._remove_file(stale["path"], MemorySource.MEMORY)
+                    stats["stale_removed"] += 1
+
+        # ── Session files ──
+        if "sessions" in self._config.sources:
+            session_count = self._sync_session_files(force=force)
+            stats["sessions_indexed"] = session_count
+
+        # Count total chunks
+        row = self._db.execute("SELECT COUNT(*) as c FROM chunks").fetchone()
+        stats["chunks_created"] = row["c"] if row else 0
+
+        self._db.commit()
+
+        logger.info(
+            f"Sync complete: {stats['files_indexed']} indexed, "
+            f"{stats['files_skipped']} unchanged, "
+            f"{stats['stale_removed']} removed, "
+            f"{stats['chunks_created']} total chunks"
+        )
+        return stats
+
+    def _index_file(self, entry, source: MemorySource) -> None:
+        """
+        Index a single file: read → chunk → embed → store.
+        Port of OpenClaw's indexFile method.
+        """
+        try:
+            with open(entry.abs_path, "r", encoding="utf-8") as f:
+                content = f.read()
+        except Exception as e:
+            logger.warning(f"Failed to read {entry.abs_path}: {e}")
+            return
+
+        if not content.strip():
+            return
+
+        # Chunk the content
+        chunks = chunk_markdown(
+            content,
+            chunk_tokens=self._config.chunk_tokens,
+            chunk_overlap=self._config.chunk_overlap,
+        )
+
+        if not chunks:
+            return
+
+        # Check embedding cache and compute new embeddings
+        texts_to_embed = []
+        chunk_hashes = []
+        cached_embeddings: dict[str, list[float]] = {}
+
+        for chunk in chunks:
+            # Check cache first
+            cache_row = self._db.execute(
+                "SELECT embedding FROM embedding_cache WHERE model = ? AND hash = ?",
+                (self._config.embedding_model, chunk.hash),
+            ).fetchone()
+
+            if cache_row:
+                cached_embeddings[chunk.hash] = json.loads(cache_row["embedding"])
+            else:
+                texts_to_embed.append(chunk.text)
+                chunk_hashes.append(chunk.hash)
+
+        # Batch embed uncached chunks
+        new_embeddings: dict[str, list[float]] = {}
+        if texts_to_embed:
+            try:
+                vectors = embed_batch(texts_to_embed, self._config.embedding_model)
+                now = int(time.time())
+                for i, chunk_hash in enumerate(chunk_hashes):
+                    vec = vectors[i] if i < len(vectors) else []
+                    new_embeddings[chunk_hash] = vec
+                    # Store in cache
+                    self._db.execute(
+                        "INSERT OR REPLACE INTO embedding_cache "
+                        "(model, hash, embedding, dims, updated_at) "
+                        "VALUES (?, ?, ?, ?, ?)",
+                        (
+                            self._config.embedding_model,
+                            chunk_hash,
+                            json.dumps(vec),
+                            len(vec),
+                            now,
+                        ),
+                    )
+            except Exception as e:
+                logger.warning(f"Embedding batch failed for {entry.path}: {e}")
+                # Fall back to empty embeddings
+                for chunk_hash in chunk_hashes:
+                    new_embeddings[chunk_hash] = []
+
+        # Remove old chunks for this file
+        self._remove_file_chunks(entry.path, source)
+
+        # Insert new chunks
+        now = int(time.time())
+        for chunk in chunks:
+            chunk_id = str(uuid.uuid4())
+            embedding = cached_embeddings.get(chunk.hash) or new_embeddings.get(
+                chunk.hash, []
+            )
+
+            self._db.execute(
+                "INSERT INTO chunks "
+                "(id, path, source, start_line, end_line, hash, model, text, embedding, updated_at) "
+                "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                (
+                    chunk_id,
+                    entry.path,
+                    source.value,
+                    chunk.start_line,
+                    chunk.end_line,
+                    chunk.hash,
+                    self._config.embedding_model,
+                    chunk.text,
+                    json.dumps(embedding),
+                    now,
+                ),
+            )
+
+            # Insert into FTS index
+            if self._fts_available:
+                try:
+                    self._db.execute(
+                        "INSERT INTO chunks_fts "
+                        "(text, id, path, source, model, start_line, end_line) "
+                        "VALUES (?, ?, ?, ?, ?, ?, ?)",
+                        (
+                            chunk.text,
+                            chunk_id,
+                            entry.path,
+                            source.value,
+                            self._config.embedding_model,
+                            chunk.start_line,
+                            chunk.end_line,
+                        ),
+                    )
+                except Exception as e:
+                    logger.debug(f"FTS insert failed for chunk {chunk_id}: {e}")
+
+        # Update files table
+        self._db.execute(
+            "INSERT OR REPLACE INTO files (path, source, hash, mtime, size) "
+            "VALUES (?, ?, ?, ?, ?)",
+            (
+                entry.path,
+                source.value,
+                entry.hash,
+                int(entry.mtime_ms),
+                entry.size,
+            ),
+        )
+
+    def _remove_file_chunks(self, path: str, source: MemorySource) -> None:
+        """Remove all chunks (and FTS entries) for a given file."""
+        # Get chunk IDs for FTS cleanup
+        if self._fts_available:
+            chunk_ids = self._db.execute(
+                "SELECT id FROM chunks WHERE path = ? AND source = ?",
+                (path, source.value),
+            ).fetchall()
+            for row in chunk_ids:
+                try:
+                    self._db.execute(
+                        "DELETE FROM chunks_fts WHERE id = ?", (row["id"],)
+                    )
+                except Exception:
+                    pass
+
+        self._db.execute(
+            "DELETE FROM chunks WHERE path = ? AND source = ?",
+            (path, source.value),
+        )
+
+    def _remove_file(self, path: str, source: MemorySource) -> None:
+        """Remove a file and all its chunks from the index."""
+        self._remove_file_chunks(path, source)
+        self._db.execute(
+            "DELETE FROM files WHERE path = ? AND source = ?",
+            (path, source.value),
+        )
+
+    # ── Session Logs ─────────────────────────────────────────────────
+
+    def _sync_session_files(self, *, force: bool = False) -> int:
+        """
+        Sync session log files from the daily/ directory.
+        Returns the number of session files indexed.
+        """
+        sessions_dir = Path(self._sessions_dir)
+        if not sessions_dir.is_dir():
+            return 0
+
+        indexed = 0
+        active_paths: set[str] = set()
+
+        for md_file in sorted(sessions_dir.glob("*.md")):
+            if md_file.is_symlink() or not md_file.is_file():
+                continue
+            entry = build_file_entry(str(md_file), self._workspace_dir)
+            active_paths.add(entry.path)
+
+            # Check if changed
+            row = self._db.execute(
+                "SELECT hash FROM files WHERE path = ? AND source = ?",
+                (entry.path, MemorySource.SESSIONS.value),
+            ).fetchone()
+
+            if not force and row and row["hash"] == entry.hash:
+                continue
+
+            self._index_file(entry, MemorySource.SESSIONS)
+            indexed += 1
+
+        # Clean stale session entries
+        stale_rows = self._db.execute(
+            "SELECT path FROM files WHERE source = ?",
+            (MemorySource.SESSIONS.value,),
+        ).fetchall()
+        for stale in stale_rows:
+            if stale["path"] not in active_paths:
+                self._remove_file(stale["path"], MemorySource.SESSIONS)
+
+        return indexed
+
+    def log_session(
+        self,
+        content: str,
+        *,
+        date: str | None = None,
+        channel: str = "slack",
+    ) -> str:
+        """
+        Append to today's session log in daily/.
+
+        Args:
+            content: The text to log (e.g., a user message or AI response).
+            date: Optional date string (YYYY-MM-DD). Defaults to today.
+            channel: Channel the conversation came from.
+
+        Returns:
+            Path to the session log file.
+        """
+        if date is None:
+            date = time.strftime("%Y-%m-%d")
+
+        log_path = os.path.join(self._sessions_dir, f"{date}.md")
+
+        # Create file with header if it doesn't exist
+        if not os.path.exists(log_path):
+            header = f"# Session Log — {date}\n\n"
+            with open(log_path, "w", encoding="utf-8") as f:
+                f.write(header)
+
+        # Append the content
+        timestamp = time.strftime("%H:%M:%S")
+        with open(log_path, "a", encoding="utf-8") as f:
+            f.write(f"\n---\n\n**[{timestamp}] ({channel})**\n\n{content}\n")
+
+        # Mark as dirty for next sync
+        self._dirty = True
+
+        return log_path
+
+    # ── Identity File Access ─────────────────────────────────────────
+
+    def read_identity_file(self, name: str) -> str | None:
+        """Read an identity file (SOUL.md, USER.md, MEMORY.md)."""
+        filepath = os.path.join(self._workspace_dir, name)
+        if not os.path.isfile(filepath):
+            return None
+        with open(filepath, "r", encoding="utf-8") as f:
+            return f.read()
+
+    def update_identity_file(self, name: str, content: str) -> None:
+        """Update an identity file and mark index as dirty."""
+        filepath = os.path.join(self._workspace_dir, name)
+        with open(filepath, "w", encoding="utf-8") as f:
+            f.write(content)
+        self._dirty = True
+        logger.info(f"Updated identity file: {name}")
+
+    def read_soul(self) -> str | None:
+        return self.read_identity_file("SOUL.md")
+
+    def read_user(self) -> str | None:
+        return self.read_identity_file("USER.md")
+
+    def read_long_term_memory(self) -> str | None:
+        return self.read_identity_file("MEMORY.md")
+
+    def append_to_memory(self, entry: str) -> None:
+        """Append a new entry to MEMORY.md."""
+        filepath = os.path.join(self._workspace_dir, "MEMORY.md")
+        timestamp = time.strftime("%Y-%m-%d %H:%M")
+        with open(filepath, "a", encoding="utf-8") as f:
+            f.write(f"\n### [{timestamp}]\n\n{entry}\n")
+        self._dirty = True
+        logger.info("Appended to MEMORY.md")
+
+    # ── File Reading ─────────────────────────────────────────────────
+
+    def read_file(
+        self,
+        rel_path: str,
+        *,
+        from_line: int | None = None,
+        num_lines: int | None = None,
+    ) -> dict:
+        """
+        Read a memory file by relative path.
+        Port of OpenClaw's readFile().
+        """
+        raw = rel_path.strip()
+        if not raw:
+            raise ValueError("path required")
+
+        if os.path.isabs(raw):
+            abs_path = os.path.realpath(raw)
+        else:
+            abs_path = os.path.realpath(
+                os.path.join(self._workspace_dir, raw)
+            )
+
+        if not abs_path.endswith(".md"):
+            raise ValueError("Only .md files are supported")
+
+        if not os.path.isfile(abs_path):
+            raise FileNotFoundError(f"File not found: {abs_path}")
+
+        with open(abs_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        if from_line is None and num_lines is None:
+            return {"text": content, "path": rel_path}
+
+        lines = content.split("\n")
+        start = max(1, from_line or 1)
+        count = max(1, num_lines or len(lines))
+        sliced = lines[start - 1 : start - 1 + count]
+        return {"text": "\n".join(sliced), "path": rel_path}
+
+    # ── Status ───────────────────────────────────────────────────────
+
+    def status(self) -> dict:
+        """Get the current status of the memory index."""
+        files_row = self._db.execute(
+            "SELECT COUNT(*) as c FROM files"
+        ).fetchone()
+        chunks_row = self._db.execute(
+            "SELECT COUNT(*) as c FROM chunks"
+        ).fetchone()
+        cache_row = self._db.execute(
+            "SELECT COUNT(*) as c FROM embedding_cache"
+        ).fetchone()
+
+        return {
+            "workspace_dir": self._workspace_dir,
+            "db_path": self._db_path,
+            "sessions_dir": self._sessions_dir,
+            "files": files_row["c"] if files_row else 0,
+            "chunks": chunks_row["c"] if chunks_row else 0,
+            "cached_embeddings": cache_row["c"] if cache_row else 0,
+            "fts_available": self._fts_available,
+            "dirty": self._dirty,
+            "embedding_model": self._config.embedding_model,
+            "embedding_dims": get_embedding_dims(self._config.embedding_model),
+            "vector_weight": self._config.vector_weight,
+            "text_weight": self._config.text_weight,
+        }
+
+    # ── File Watching ────────────────────────────────────────────────
+
+    def start_watching(self) -> None:
+        """
+        Start watching the workspace for file changes.
+        Uses watchdog for cross-platform file system events.
+        """
+        if self._watcher or not self._config.watch:
+            return
+
+        try:
+            from watchdog.events import FileSystemEventHandler
+            from watchdog.observers import Observer
+        except ImportError:
+            logger.warning(
+                "watchdog not installed — file watching disabled. "
+                "Install with: uv add watchdog"
+            )
+            return
+
+        manager = self
+
+        class MemoryFileHandler(FileSystemEventHandler):
+            def on_any_event(self, event):
+                if event.is_directory:
+                    return
+                src = getattr(event, "src_path", "")
+                if src.endswith(".md"):
+                    manager._dirty = True
+                    logger.debug(f"Workspace change detected: {src}")
+
+        observer = Observer()
+        handler = MemoryFileHandler()
+        observer.schedule(handler, self._workspace_dir, recursive=True)
+        observer.start()
+        self._watcher = observer
+        logger.info(f"File watching started: {self._workspace_dir}")
+
+    def stop_watching(self) -> None:
+        """Stop the file watcher."""
+        if self._watcher:
+            self._watcher.stop()
+            self._watcher.join()
+            self._watcher = None
+            logger.info("File watching stopped")
+
+    # ── Lifecycle ────────────────────────────────────────────────────
+
+    def close(self) -> None:
+        """Close the memory manager and release resources."""
+        if self._closed:
+            return
+        self._closed = True
+        self.stop_watching()
+        self._db.close()
+        logger.info("MemoryManager closed")
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.close()
+
+    def __del__(self):
+        if not self._closed:
+            try:
+                self.close()
+            except Exception:
+                pass