first commit

2026-02-13 23:56:09 -05:00
commit ec8bd80a3d
27 changed files with 6725 additions and 0 deletions
--- a/memory/init.py
+++ b/memory/init.py
@@ -0,0 +1,25 @@
+"""
+Aetheel Memory System
+=====================
+Hybrid search memory with SQLite + markdown + local embeddings.
+
+Inspired by OpenClaw's memory architecture (src/memory/):
+  • Identity files: SOUL.md, USER.md, MEMORY.md
+  • SQLite storage: chunks, FTS5, vector similarity
+  • Hybrid search: vector (0.7) + BM25 keyword (0.3)
+  • Local embeddings: fastembed ONNX (384-dim, zero API calls)
+  • File watching: auto re-index on workspace changes
+  • Session logs: daily/ conversation transcripts
+
+Usage:
+    from memory import MemoryManager
+
+    manager = MemoryManager(workspace_dir="~/.aetheel/workspace")
+    await manager.sync()
+    results = await manager.search("what are my preferences?")
+"""
+
+from memory.manager import MemoryManager
+from memory.types import MemorySearchResult, MemorySource
+
+__all__ = ["MemoryManager", "MemorySearchResult", "MemorySource"]
--- a/memory/embeddings.py
+++ b/memory/embeddings.py
@@ -0,0 +1,88 @@
+"""
+Embedding provider for the memory system.
+Uses fastembed (ONNX) for fully local, zero-API-call embeddings.
+
+Inspired by OpenClaw's src/memory/embeddings.ts, simplified to:
+  • Single provider: fastembed with BAAI/bge-small-en-v1.5 (384-dim)
+  • Local only — no OpenAI/Voyage/Gemini API calls
+  • Thread-safe lazy initialization
+"""
+
+import logging
+import threading
+
+from memory.internal import normalize_embedding
+
+logger = logging.getLogger("aetheel.memory.embeddings")
+
+# The fastembed model is loaded lazily on first use
+_model_lock = threading.Lock()
+_model = None
+_model_name: str | None = None
+
+
+def _ensure_model(model_name: str = "BAAI/bge-small-en-v1.5"):
+    """Lazy-load the fastembed model (thread-safe)."""
+    global _model, _model_name
+
+    if _model is not None and _model_name == model_name:
+        return _model
+
+    with _model_lock:
+        # Double-check after acquiring lock
+        if _model is not None and _model_name == model_name:
+            return _model
+
+        try:
+            from fastembed import TextEmbedding
+        except ImportError:
+            raise ImportError(
+                "fastembed is required for local embeddings.\n"
+                "Install with: uv add fastembed\n"
+                "Or: pip install fastembed"
+            )
+
+        logger.info(f"Loading embedding model: {model_name}...")
+        _model = TextEmbedding(model_name=model_name)
+        _model_name = model_name
+        logger.info(f"Embedding model loaded: {model_name}")
+        return _model
+
+
+def embed_query(text: str, model_name: str = "BAAI/bge-small-en-v1.5") -> list[float]:
+    """
+    Generate an embedding vector for a single query string.
+    Returns a normalized 384-dimensional vector.
+    """
+    model = _ensure_model(model_name)
+    embeddings = list(model.query_embed([text]))
+    if not embeddings:
+        return []
+    vec = embeddings[0].tolist()
+    return normalize_embedding(vec)
+
+
+def embed_batch(
+    texts: list[str],
+    model_name: str = "BAAI/bge-small-en-v1.5",
+) -> list[list[float]]:
+    """
+    Generate embedding vectors for a batch of text strings.
+    Returns a list of normalized 384-dimensional vectors.
+    """
+    if not texts:
+        return []
+    model = _ensure_model(model_name)
+    embeddings = list(model.passage_embed(texts))
+    return [normalize_embedding(e.tolist()) for e in embeddings]
+
+
+def get_embedding_dims(model_name: str = "BAAI/bge-small-en-v1.5") -> int:
+    """Get the dimensionality of the embedding model."""
+    # Known dimensions for common models
+    known_dims = {
+        "BAAI/bge-small-en-v1.5": 384,
+        "BAAI/bge-base-en-v1.5": 768,
+        "sentence-transformers/all-MiniLM-L6-v2": 384,
+    }
+    return known_dims.get(model_name, 384)
--- a/memory/hybrid.py
+++ b/memory/hybrid.py
@@ -0,0 +1,111 @@
+"""
+Hybrid search — merges vector similarity + BM25 keyword results.
+Direct port of OpenClaw's src/memory/hybrid.ts.
+
+The algorithm:
+  1. Run vector search → ranked by cosine similarity
+  2. Run FTS5 keyword search → ranked by BM25
+  3. Merge by weighted score: 0.7 × vector + 0.3 × keyword
+  4. Deduplicate by chunk ID
+  5. Sort by combined score (descending)
+"""
+
+import re
+
+from memory.types import MemorySearchResult, MemorySource
+
+
+def build_fts_query(raw: str) -> str | None:
+    """
+    Build an FTS5 match query from raw text.
+    Port of OpenClaw's buildFtsQuery() — quotes each token
+    and joins with AND for a conjunctive match.
+
+    Example: "hello world" → '"hello" AND "world"'
+    """
+    tokens = re.findall(r"[A-Za-z0-9_]+", raw)
+    if not tokens:
+        return None
+    quoted = [f'"{t}"' for t in tokens]
+    return " AND ".join(quoted)
+
+
+def bm25_rank_to_score(rank: float) -> float:
+    """
+    Convert FTS5 BM25 rank (negative = better) to a 0-1 score.
+    Port of OpenClaw's bm25RankToScore().
+    """
+    normalized = max(0.0, rank) if isinstance(rank, (int, float)) else 999.0
+    return 1.0 / (1.0 + normalized)
+
+
+def merge_hybrid_results(
+    vector: list[dict],
+    keyword: list[dict],
+    vector_weight: float = 0.7,
+    text_weight: float = 0.3,
+) -> list[MemorySearchResult]:
+    """
+    Merge vector and keyword search results with weighted scoring.
+    Direct port of OpenClaw's mergeHybridResults() from hybrid.ts.
+
+    Each vector result dict has: id, path, start_line, end_line, source, snippet, vector_score
+    Each keyword result dict has: id, path, start_line, end_line, source, snippet, text_score
+    """
+    by_id: dict[str, dict] = {}
+
+    # Process vector results
+    for r in vector:
+        by_id[r["id"]] = {
+            "id": r["id"],
+            "path": r["path"],
+            "start_line": r["start_line"],
+            "end_line": r["end_line"],
+            "source": r["source"],
+            "snippet": r["snippet"],
+            "vector_score": r.get("vector_score", 0.0),
+            "text_score": 0.0,
+        }
+
+    # Process keyword results — merge with existing or create new
+    for r in keyword:
+        existing = by_id.get(r["id"])
+        if existing:
+            existing["text_score"] = r.get("text_score", 0.0)
+            # Prefer keyword snippet if available (often more relevant)
+            if r.get("snippet"):
+                existing["snippet"] = r["snippet"]
+        else:
+            by_id[r["id"]] = {
+                "id": r["id"],
+                "path": r["path"],
+                "start_line": r["start_line"],
+                "end_line": r["end_line"],
+                "source": r["source"],
+                "snippet": r["snippet"],
+                "vector_score": 0.0,
+                "text_score": r.get("text_score", 0.0),
+            }
+
+    # Compute weighted score and convert to MemorySearchResult
+    merged: list[MemorySearchResult] = []
+    for entry in by_id.values():
+        score = (
+            vector_weight * entry["vector_score"]
+            + text_weight * entry["text_score"]
+        )
+        source = entry["source"]
+        if isinstance(source, str):
+            source = MemorySource(source)
+        merged.append(MemorySearchResult(
+            path=entry["path"],
+            start_line=entry["start_line"],
+            end_line=entry["end_line"],
+            score=score,
+            snippet=entry["snippet"],
+            source=source,
+        ))
+
+    # Sort by score descending
+    merged.sort(key=lambda r: r.score, reverse=True)
+    return merged
--- a/memory/internal.py
+++ b/memory/internal.py
@@ -0,0 +1,214 @@
+"""
+Internal utilities for the memory system.
+Port of OpenClaw's src/memory/internal.ts:
+  • hashText  — SHA-256 content hashing
+  • chunkMarkdown — split markdown into overlapping chunks
+  • listMemoryFiles — discover .md files in workspace
+  • buildFileEntry — create MemoryFileEntry from a file
+  • cosineSimilarity — vector similarity calculation
+"""
+
+import hashlib
+import os
+from pathlib import Path
+
+from memory.types import MemoryChunk, MemoryFileEntry
+
+
+def hash_text(value: str) -> str:
+    """SHA-256 hash of text content. Mirrors OpenClaw's hashText()."""
+    return hashlib.sha256(value.encode("utf-8")).hexdigest()
+
+
+def chunk_markdown(
+    content: str,
+    chunk_tokens: int = 512,
+    chunk_overlap: int = 50,
+) -> list[MemoryChunk]:
+    """
+    Split markdown content into overlapping chunks.
+    Direct port of OpenClaw's chunkMarkdown() from internal.ts.
+
+    Uses character-based approximation: ~4 chars per token.
+    """
+    lines = content.split("\n")
+    if not lines:
+        return []
+
+    max_chars = max(32, chunk_tokens * 4)
+    overlap_chars = max(0, chunk_overlap * 4)
+    chunks: list[MemoryChunk] = []
+
+    current: list[tuple[str, int]] = []  # (line_text, 1-indexed line_no)
+    current_chars = 0
+
+    def flush() -> None:
+        nonlocal current, current_chars
+        if not current:
+            return
+        text = "\n".join(line for line, _ in current)
+        start_line = current[0][1]
+        end_line = current[-1][1]
+        chunks.append(MemoryChunk(
+            start_line=start_line,
+            end_line=end_line,
+            text=text,
+            hash=hash_text(text),
+        ))
+
+    def carry_overlap() -> None:
+        nonlocal current, current_chars
+        if overlap_chars <= 0 or not current:
+            current = []
+            current_chars = 0
+            return
+        acc = 0
+        kept: list[tuple[str, int]] = []
+        for line_text, line_no in reversed(current):
+            acc += len(line_text) + 1
+            kept.insert(0, (line_text, line_no))
+            if acc >= overlap_chars:
+                break
+        current = kept
+        current_chars = sum(len(lt) + 1 for lt, _ in kept)
+
+    for i, line in enumerate(lines):
+        line_no = i + 1
+        # Handle very long lines by splitting into segments
+        segments = [""] if not line else [
+            line[start:start + max_chars]
+            for start in range(0, len(line), max_chars)
+        ]
+        for segment in segments:
+            line_size = len(segment) + 1
+            if current_chars + line_size > max_chars and current:
+                flush()
+                carry_overlap()
+            current.append((segment, line_no))
+            current_chars += line_size
+
+    flush()
+    return chunks
+
+
+def list_memory_files(
+    workspace_dir: str,
+    extra_paths: list[str] | None = None,
+) -> list[str]:
+    """
+    List all markdown files in the workspace memory directory.
+    Port of OpenClaw's listMemoryFiles() from internal.ts.
+
+    Searches for:
+      - MEMORY.md (or memory.md) in workspace root
+      - All .md files in memory/ subdirectory
+      - Any additional paths specified
+    """
+    result: list[str] = []
+    ws = Path(workspace_dir).expanduser().resolve()
+
+    # Check MEMORY.md and memory.md in workspace root
+    for name in ("MEMORY.md", "memory.md"):
+        candidate = ws / name
+        if candidate.is_file() and not candidate.is_symlink():
+            result.append(str(candidate))
+
+    # Check SOUL.md and USER.md (identity files)
+    for name in ("SOUL.md", "USER.md"):
+        candidate = ws / name
+        if candidate.is_file() and not candidate.is_symlink():
+            result.append(str(candidate))
+
+    # Walk memory/ subdirectory
+    memory_dir = ws / "memory"
+    if memory_dir.is_dir() and not memory_dir.is_symlink():
+        _walk_md_files(memory_dir, result)
+
+    # Extra paths
+    if extra_paths:
+        for extra in extra_paths:
+            p = Path(extra).expanduser().resolve()
+            if p.is_symlink():
+                continue
+            if p.is_dir():
+                _walk_md_files(p, result)
+            elif p.is_file() and p.suffix == ".md":
+                result.append(str(p))
+
+    # Deduplicate by resolved path
+    seen: set[str] = set()
+    deduped: list[str] = []
+    for entry in result:
+        real = os.path.realpath(entry)
+        if real not in seen:
+            seen.add(real)
+            deduped.append(entry)
+
+    return deduped
+
+
+def _walk_md_files(directory: Path, result: list[str]) -> None:
+    """Recursively collect .md files from a directory."""
+    try:
+        for entry in sorted(directory.iterdir()):
+            if entry.is_symlink():
+                continue
+            if entry.is_dir():
+                _walk_md_files(entry, result)
+            elif entry.is_file() and entry.suffix == ".md":
+                result.append(str(entry))
+    except PermissionError:
+        pass
+
+
+def build_file_entry(abs_path: str, workspace_dir: str) -> MemoryFileEntry:
+    """
+    Create a MemoryFileEntry from a file path.
+    Port of OpenClaw's buildFileEntry() from internal.ts.
+    """
+    stat = os.stat(abs_path)
+    with open(abs_path, "r", encoding="utf-8") as f:
+        content = f.read()
+    content_hash = hash_text(content)
+    rel_path = os.path.relpath(abs_path, workspace_dir).replace("\\", "/")
+    return MemoryFileEntry(
+        path=rel_path,
+        abs_path=abs_path,
+        mtime_ms=stat.st_mtime * 1000,
+        size=stat.st_size,
+        hash=content_hash,
+    )
+
+
+def cosine_similarity(a: list[float], b: list[float]) -> float:
+    """
+    Compute cosine similarity between two vectors.
+    Port of OpenClaw's cosineSimilarity() from internal.ts.
+    """
+    if not a or not b:
+        return 0.0
+    length = min(len(a), len(b))
+    dot = 0.0
+    norm_a = 0.0
+    norm_b = 0.0
+    for i in range(length):
+        av = a[i]
+        bv = b[i]
+        dot += av * bv
+        norm_a += av * av
+        norm_b += bv * bv
+    if norm_a == 0.0 or norm_b == 0.0:
+        return 0.0
+    return dot / (norm_a ** 0.5 * norm_b ** 0.5)
+
+
+def normalize_embedding(vec: list[float]) -> list[float]:
+    """
+    L2-normalize an embedding vector.
+    Port of OpenClaw's sanitizeAndNormalizeEmbedding().
+    """
+    sanitized = [v if isinstance(v, (int, float)) and v == v else 0.0 for v in vec]
+    magnitude = sum(v * v for v in sanitized) ** 0.5
+    if magnitude < 1e-10:
+        return sanitized
+    return [v / magnitude for v in sanitized]
--- a/memory/manager.py
+++ b/memory/manager.py
@@ -0,0 +1,839 @@
+"""
+MemoryManager — the main memory system orchestrator.
+Port of OpenClaw's MemoryIndexManager (src/memory/manager.ts, 2,300 LOC).
+
+Lifecycle: sync → chunk → embed → store → search
+
+Key features:
+  • Incremental sync — only re-indexes changed files (hash-based)
+  • Hybrid search — vector (0.7) + BM25 keyword (0.3)
+  • File watching — auto re-index on workspace changes (via watchdog)
+  • Embedding cache — avoids re-computing embeddings for unchanged chunks
+  • Session log indexing — indexes daily/ conversation transcripts
+"""
+
+import json
+import logging
+import os
+import sqlite3
+import threading
+import time
+import uuid
+from pathlib import Path
+
+from memory.embeddings import embed_batch, embed_query, get_embedding_dims
+from memory.hybrid import bm25_rank_to_score, build_fts_query, merge_hybrid_results
+from memory.internal import (
+    build_file_entry,
+    chunk_markdown,
+    hash_text,
+    list_memory_files,
+)
+from memory.schema import ensure_schema
+from memory.types import (
+    MemoryConfig,
+    MemorySearchResult,
+    MemorySource,
+)
+
+logger = logging.getLogger("aetheel.memory")
+
+SNIPPET_MAX_CHARS = 700
+
+
+class MemoryManager:
+    """
+    Main memory system — manages the full lifecycle:
+      sync → chunk → embed → store → search
+
+    Inspired by OpenClaw's MemoryIndexManager.
+    """
+
+    def __init__(self, config: MemoryConfig | None = None):
+        self._config = config or MemoryConfig()
+        self._workspace_dir = str(
+            Path(self._config.workspace_dir).expanduser().resolve()
+        )
+        self._db_path = str(Path(self._config.db_path).expanduser().resolve())
+        self._sessions_dir = (
+            str(Path(self._config.sessions_dir).expanduser().resolve())
+            if self._config.sessions_dir
+            else os.path.join(self._workspace_dir, "daily")
+        )
+
+        # Ensure directories exist
+        os.makedirs(self._workspace_dir, exist_ok=True)
+        os.makedirs(self._sessions_dir, exist_ok=True)
+        os.makedirs(os.path.dirname(self._db_path), exist_ok=True)
+
+        # Open database and ensure schema
+        self._db = sqlite3.connect(self._db_path, check_same_thread=False)
+        self._db.row_factory = sqlite3.Row
+        schema_result = ensure_schema(self._db, fts_enabled=True)
+        self._fts_available = schema_result.get("fts_available", False)
+
+        # State
+        self._dirty = True
+        self._syncing = False
+        self._sync_lock = threading.Lock()
+        self._closed = False
+        self._watcher = None
+
+        # Create default identity files if they don't exist
+        self._ensure_identity_files()
+
+        logger.info(
+            f"MemoryManager initialized: workspace={self._workspace_dir}, "
+            f"db={self._db_path}, fts={self._fts_available}"
+        )
+
+    # ── Identity File Bootstrap ──────────────────────────────────────
+
+    def _ensure_identity_files(self) -> None:
+        """Create default SOUL.md, USER.md, MEMORY.md if they don't exist."""
+        defaults = {
+            "SOUL.md": (
+                "# SOUL.md — Who You Are\n\n"
+                "_You're not a chatbot. You're becoming someone._\n\n"
+                "## Core Truths\n\n"
+                "**Be genuinely helpful, not performatively helpful.** "
+                "Skip the filler — just help.\n\n"
+                "**Have opinions.** You're allowed to disagree, prefer things, "
+                "find stuff amusing or boring.\n\n"
+                "**Be resourceful before asking.** Try to figure it out first. "
+                "Then ask if you're stuck.\n\n"
+                "**Earn trust through competence.** Be careful with external actions. "
+                "Be bold with internal ones.\n\n"
+                "## Boundaries\n\n"
+                "- Private things stay private. Period.\n"
+                "- When in doubt, ask before acting externally.\n"
+                "- Never send half-baked replies.\n\n"
+                "## Continuity\n\n"
+                "Each session, you wake up fresh. These files _are_ your memory. "
+                "Read them. Update them. They're how you persist.\n\n"
+                "---\n\n"
+                "_This file is yours to evolve. As you learn who you are, update it._\n"
+            ),
+            "USER.md": (
+                "# USER.md — Who I Am\n\n"
+                "## About Me\n\n"
+                "<!-- Fill in your details -->\n"
+                "- **Name:** \n"
+                "- **Role:** \n"
+                "- **Location:** \n"
+                "- **Timezone:** \n\n"
+                "## Preferences\n\n"
+                "<!-- How you like to communicate -->\n"
+                "- **Communication style:** \n"
+                "- **Response length:** \n"
+                "- **Technical level:** \n\n"
+                "## Current Focus\n\n"
+                "<!-- What you're working on -->\n\n"
+                "## Tools & Services\n\n"
+                "<!-- Services you use regularly -->\n\n"
+                "---\n\n"
+                "_Update this file as your preferences evolve._\n"
+            ),
+            "MEMORY.md": (
+                "# MEMORY.md — Long-Term Memory\n\n"
+                "## Decisions & Lessons\n\n"
+                "<!-- Record important decisions and lessons learned -->\n\n"
+                "## Context\n\n"
+                "<!-- Persistent context that should carry across sessions -->\n\n"
+                "## Notes\n\n"
+                "<!-- Anything worth remembering -->\n\n"
+                "---\n\n"
+                "_This file persists across sessions. "
+                "Update it when you learn something important._\n"
+            ),
+        }
+
+        for filename, content in defaults.items():
+            filepath = os.path.join(self._workspace_dir, filename)
+            if not os.path.exists(filepath):
+                with open(filepath, "w", encoding="utf-8") as f:
+                    f.write(content)
+                logger.info(f"Created default identity file: {filepath}")
+
+    # ── Search ───────────────────────────────────────────────────────
+
+    async def search(
+        self,
+        query: str,
+        *,
+        max_results: int | None = None,
+        min_score: float | None = None,
+    ) -> list[MemorySearchResult]:
+        """
+        Search memory using hybrid vector + keyword search.
+        Port of OpenClaw's MemoryIndexManager.search().
+
+        Steps:
+          1. (Optional) Trigger sync if dirty
+          2. Run FTS5 keyword search → BM25 scored
+          3. Generate query embedding → vector search
+          4. Merge results with weighted scoring (0.7v + 0.3k)
+          5. Filter by min_score and return top-N results
+        """
+        # Auto-sync if dirty
+        if self._config.sync_on_search and self._dirty:
+            await self.sync()
+
+        cleaned = query.strip()
+        if not cleaned:
+            return []
+
+        max_r = max_results or self._config.max_results
+        min_s = min_score if min_score is not None else self._config.min_score
+        candidates = min(200, max(1, max_r * 3))
+
+        # Keyword search (BM25)
+        keyword_results = self._search_keyword(cleaned, candidates)
+
+        # Vector search
+        try:
+            query_vec = embed_query(cleaned, self._config.embedding_model)
+            has_vector = any(v != 0 for v in query_vec)
+        except Exception as e:
+            logger.warning(f"Embedding failed, falling back to keyword-only: {e}")
+            query_vec = []
+            has_vector = False
+
+        vector_results = (
+            self._search_vector(query_vec, candidates) if has_vector else []
+        )
+
+        # If no keyword results, return vector-only
+        if not keyword_results:
+            return [
+                r for r in self._vector_to_search_results(vector_results)
+                if r.score >= min_s
+            ][:max_r]
+
+        # Merge hybrid results
+        merged = merge_hybrid_results(
+            vector=vector_results,
+            keyword=keyword_results,
+            vector_weight=self._config.vector_weight,
+            text_weight=self._config.text_weight,
+        )
+
+        return [r for r in merged if r.score >= min_s][:max_r]
+
+    def _search_vector(
+        self, query_vec: list[float], limit: int
+    ) -> list[dict]:
+        """
+        Search chunks by vector cosine similarity.
+        Uses embedding stored as JSON in the chunks table.
+        """
+        if not query_vec:
+            return []
+
+        try:
+            rows = self._db.execute(
+                "SELECT id, path, start_line, end_line, source, text, embedding "
+                "FROM chunks ORDER BY rowid"
+            ).fetchall()
+        except Exception as e:
+            logger.warning(f"Vector search failed: {e}")
+            return []
+
+        from memory.internal import cosine_similarity
+
+        results = []
+        for row in rows:
+            try:
+                stored_vec = json.loads(row["embedding"])
+                if not stored_vec:
+                    continue
+                score = cosine_similarity(query_vec, stored_vec)
+                snippet = row["text"][:SNIPPET_MAX_CHARS]
+                results.append({
+                    "id": row["id"],
+                    "path": row["path"],
+                    "start_line": row["start_line"],
+                    "end_line": row["end_line"],
+                    "source": row["source"],
+                    "snippet": snippet,
+                    "vector_score": max(0.0, score),
+                })
+            except (json.JSONDecodeError, TypeError):
+                continue
+
+        results.sort(key=lambda r: r["vector_score"], reverse=True)
+        return results[:limit]
+
+    def _search_keyword(self, query: str, limit: int) -> list[dict]:
+        """
+        Search chunks using FTS5 full-text search with BM25 ranking.
+        Port of OpenClaw's searchKeyword().
+        """
+        if not self._fts_available:
+            return []
+
+        fts_query = build_fts_query(query)
+        if not fts_query:
+            return []
+
+        try:
+            rows = self._db.execute(
+                "SELECT id, path, start_line, end_line, source, text, "
+                "rank AS bm25_rank "
+                "FROM chunks_fts "
+                "WHERE chunks_fts MATCH ? "
+                "ORDER BY rank "
+                "LIMIT ?",
+                (fts_query, limit),
+            ).fetchall()
+        except Exception as e:
+            logger.debug(f"FTS search failed for query '{fts_query}': {e}")
+            return []
+
+        results = []
+        for row in rows:
+            # FTS5 rank is negative (lower = better), convert to 0-1 score
+            bm25_rank = abs(row["bm25_rank"]) if row["bm25_rank"] else 999.0
+            text_score = bm25_rank_to_score(bm25_rank)
+            snippet = row["text"][:SNIPPET_MAX_CHARS]
+            results.append({
+                "id": row["id"],
+                "path": row["path"],
+                "start_line": row["start_line"],
+                "end_line": row["end_line"],
+                "source": row["source"],
+                "snippet": snippet,
+                "text_score": text_score,
+            })
+
+        return results
+
+    def _vector_to_search_results(
+        self, vector_results: list[dict]
+    ) -> list[MemorySearchResult]:
+        """Convert raw vector results to MemorySearchResult objects."""
+        return [
+            MemorySearchResult(
+                path=r["path"],
+                start_line=r["start_line"],
+                end_line=r["end_line"],
+                score=r["vector_score"],
+                snippet=r["snippet"],
+                source=MemorySource(r["source"]),
+            )
+            for r in vector_results
+        ]
+
+    # ── Sync ─────────────────────────────────────────────────────────
+
+    async def sync(self, *, force: bool = False) -> dict:
+        """
+        Synchronize workspace markdown files into the index.
+        Port of OpenClaw's MemoryIndexManager.sync().
+
+        Steps:
+          1. List all memory files (SOUL.md, USER.md, MEMORY.md, memory/*)
+          2. For each file, check if content hash has changed
+          3. If changed: chunk → embed → store in DB
+          4. Remove stale entries for deleted files
+          5. Optionally sync session logs from daily/
+
+        Returns a summary dict with counts.
+        """
+        if self._syncing and not force:
+            logger.debug("Sync already in progress, skipping")
+            return {"skipped": True}
+
+        with self._sync_lock:
+            self._syncing = True
+            try:
+                return self._run_sync(force=force)
+            finally:
+                self._syncing = False
+                self._dirty = False
+
+    def _run_sync(self, *, force: bool = False) -> dict:
+        """Execute the actual sync logic."""
+        stats = {
+            "files_found": 0,
+            "files_indexed": 0,
+            "files_skipped": 0,
+            "chunks_created": 0,
+            "stale_removed": 0,
+            "sessions_indexed": 0,
+        }
+
+        # ── Memory files ──
+        if "memory" in self._config.sources:
+            files = list_memory_files(self._workspace_dir)
+            stats["files_found"] = len(files)
+
+            active_paths: set[str] = set()
+
+            for abs_path in files:
+                entry = build_file_entry(abs_path, self._workspace_dir)
+                active_paths.add(entry.path)
+
+                # Check if file has changed
+                row = self._db.execute(
+                    "SELECT hash FROM files WHERE path = ? AND source = ?",
+                    (entry.path, MemorySource.MEMORY.value),
+                ).fetchone()
+
+                if not force and row and row["hash"] == entry.hash:
+                    stats["files_skipped"] += 1
+                    continue
+
+                # File is new or changed — re-index it
+                self._index_file(entry, MemorySource.MEMORY)
+                stats["files_indexed"] += 1
+
+            # Remove stale entries for deleted files
+            stale_rows = self._db.execute(
+                "SELECT path FROM files WHERE source = ?",
+                (MemorySource.MEMORY.value,),
+            ).fetchall()
+            for stale in stale_rows:
+                if stale["path"] not in active_paths:
+                    self._remove_file(stale["path"], MemorySource.MEMORY)
+                    stats["stale_removed"] += 1
+
+        # ── Session files ──
+        if "sessions" in self._config.sources:
+            session_count = self._sync_session_files(force=force)
+            stats["sessions_indexed"] = session_count
+
+        # Count total chunks
+        row = self._db.execute("SELECT COUNT(*) as c FROM chunks").fetchone()
+        stats["chunks_created"] = row["c"] if row else 0
+
+        self._db.commit()
+
+        logger.info(
+            f"Sync complete: {stats['files_indexed']} indexed, "
+            f"{stats['files_skipped']} unchanged, "
+            f"{stats['stale_removed']} removed, "
+            f"{stats['chunks_created']} total chunks"
+        )
+        return stats
+
+    def _index_file(self, entry, source: MemorySource) -> None:
+        """
+        Index a single file: read → chunk → embed → store.
+        Port of OpenClaw's indexFile method.
+        """
+        try:
+            with open(entry.abs_path, "r", encoding="utf-8") as f:
+                content = f.read()
+        except Exception as e:
+            logger.warning(f"Failed to read {entry.abs_path}: {e}")
+            return
+
+        if not content.strip():
+            return
+
+        # Chunk the content
+        chunks = chunk_markdown(
+            content,
+            chunk_tokens=self._config.chunk_tokens,
+            chunk_overlap=self._config.chunk_overlap,
+        )
+
+        if not chunks:
+            return
+
+        # Check embedding cache and compute new embeddings
+        texts_to_embed = []
+        chunk_hashes = []
+        cached_embeddings: dict[str, list[float]] = {}
+
+        for chunk in chunks:
+            # Check cache first
+            cache_row = self._db.execute(
+                "SELECT embedding FROM embedding_cache WHERE model = ? AND hash = ?",
+                (self._config.embedding_model, chunk.hash),
+            ).fetchone()
+
+            if cache_row:
+                cached_embeddings[chunk.hash] = json.loads(cache_row["embedding"])
+            else:
+                texts_to_embed.append(chunk.text)
+                chunk_hashes.append(chunk.hash)
+
+        # Batch embed uncached chunks
+        new_embeddings: dict[str, list[float]] = {}
+        if texts_to_embed:
+            try:
+                vectors = embed_batch(texts_to_embed, self._config.embedding_model)
+                now = int(time.time())
+                for i, chunk_hash in enumerate(chunk_hashes):
+                    vec = vectors[i] if i < len(vectors) else []
+                    new_embeddings[chunk_hash] = vec
+                    # Store in cache
+                    self._db.execute(
+                        "INSERT OR REPLACE INTO embedding_cache "
+                        "(model, hash, embedding, dims, updated_at) "
+                        "VALUES (?, ?, ?, ?, ?)",
+                        (
+                            self._config.embedding_model,
+                            chunk_hash,
+                            json.dumps(vec),
+                            len(vec),
+                            now,
+                        ),
+                    )
+            except Exception as e:
+                logger.warning(f"Embedding batch failed for {entry.path}: {e}")
+                # Fall back to empty embeddings
+                for chunk_hash in chunk_hashes:
+                    new_embeddings[chunk_hash] = []
+
+        # Remove old chunks for this file
+        self._remove_file_chunks(entry.path, source)
+
+        # Insert new chunks
+        now = int(time.time())
+        for chunk in chunks:
+            chunk_id = str(uuid.uuid4())
+            embedding = cached_embeddings.get(chunk.hash) or new_embeddings.get(
+                chunk.hash, []
+            )
+
+            self._db.execute(
+                "INSERT INTO chunks "
+                "(id, path, source, start_line, end_line, hash, model, text, embedding, updated_at) "
+                "VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
+                (
+                    chunk_id,
+                    entry.path,
+                    source.value,
+                    chunk.start_line,
+                    chunk.end_line,
+                    chunk.hash,
+                    self._config.embedding_model,
+                    chunk.text,
+                    json.dumps(embedding),
+                    now,
+                ),
+            )
+
+            # Insert into FTS index
+            if self._fts_available:
+                try:
+                    self._db.execute(
+                        "INSERT INTO chunks_fts "
+                        "(text, id, path, source, model, start_line, end_line) "
+                        "VALUES (?, ?, ?, ?, ?, ?, ?)",
+                        (
+                            chunk.text,
+                            chunk_id,
+                            entry.path,
+                            source.value,
+                            self._config.embedding_model,
+                            chunk.start_line,
+                            chunk.end_line,
+                        ),
+                    )
+                except Exception as e:
+                    logger.debug(f"FTS insert failed for chunk {chunk_id}: {e}")
+
+        # Update files table
+        self._db.execute(
+            "INSERT OR REPLACE INTO files (path, source, hash, mtime, size) "
+            "VALUES (?, ?, ?, ?, ?)",
+            (
+                entry.path,
+                source.value,
+                entry.hash,
+                int(entry.mtime_ms),
+                entry.size,
+            ),
+        )
+
+    def _remove_file_chunks(self, path: str, source: MemorySource) -> None:
+        """Remove all chunks (and FTS entries) for a given file."""
+        # Get chunk IDs for FTS cleanup
+        if self._fts_available:
+            chunk_ids = self._db.execute(
+                "SELECT id FROM chunks WHERE path = ? AND source = ?",
+                (path, source.value),
+            ).fetchall()
+            for row in chunk_ids:
+                try:
+                    self._db.execute(
+                        "DELETE FROM chunks_fts WHERE id = ?", (row["id"],)
+                    )
+                except Exception:
+                    pass
+
+        self._db.execute(
+            "DELETE FROM chunks WHERE path = ? AND source = ?",
+            (path, source.value),
+        )
+
+    def _remove_file(self, path: str, source: MemorySource) -> None:
+        """Remove a file and all its chunks from the index."""
+        self._remove_file_chunks(path, source)
+        self._db.execute(
+            "DELETE FROM files WHERE path = ? AND source = ?",
+            (path, source.value),
+        )
+
+    # ── Session Logs ─────────────────────────────────────────────────
+
+    def _sync_session_files(self, *, force: bool = False) -> int:
+        """
+        Sync session log files from the daily/ directory.
+        Returns the number of session files indexed.
+        """
+        sessions_dir = Path(self._sessions_dir)
+        if not sessions_dir.is_dir():
+            return 0
+
+        indexed = 0
+        active_paths: set[str] = set()
+
+        for md_file in sorted(sessions_dir.glob("*.md")):
+            if md_file.is_symlink() or not md_file.is_file():
+                continue
+            entry = build_file_entry(str(md_file), self._workspace_dir)
+            active_paths.add(entry.path)
+
+            # Check if changed
+            row = self._db.execute(
+                "SELECT hash FROM files WHERE path = ? AND source = ?",
+                (entry.path, MemorySource.SESSIONS.value),
+            ).fetchone()
+
+            if not force and row and row["hash"] == entry.hash:
+                continue
+
+            self._index_file(entry, MemorySource.SESSIONS)
+            indexed += 1
+
+        # Clean stale session entries
+        stale_rows = self._db.execute(
+            "SELECT path FROM files WHERE source = ?",
+            (MemorySource.SESSIONS.value,),
+        ).fetchall()
+        for stale in stale_rows:
+            if stale["path"] not in active_paths:
+                self._remove_file(stale["path"], MemorySource.SESSIONS)
+
+        return indexed
+
+    def log_session(
+        self,
+        content: str,
+        *,
+        date: str | None = None,
+        channel: str = "slack",
+    ) -> str:
+        """
+        Append to today's session log in daily/.
+
+        Args:
+            content: The text to log (e.g., a user message or AI response).
+            date: Optional date string (YYYY-MM-DD). Defaults to today.
+            channel: Channel the conversation came from.
+
+        Returns:
+            Path to the session log file.
+        """
+        if date is None:
+            date = time.strftime("%Y-%m-%d")
+
+        log_path = os.path.join(self._sessions_dir, f"{date}.md")
+
+        # Create file with header if it doesn't exist
+        if not os.path.exists(log_path):
+            header = f"# Session Log — {date}\n\n"
+            with open(log_path, "w", encoding="utf-8") as f:
+                f.write(header)
+
+        # Append the content
+        timestamp = time.strftime("%H:%M:%S")
+        with open(log_path, "a", encoding="utf-8") as f:
+            f.write(f"\n---\n\n**[{timestamp}] ({channel})**\n\n{content}\n")
+
+        # Mark as dirty for next sync
+        self._dirty = True
+
+        return log_path
+
+    # ── Identity File Access ─────────────────────────────────────────
+
+    def read_identity_file(self, name: str) -> str | None:
+        """Read an identity file (SOUL.md, USER.md, MEMORY.md)."""
+        filepath = os.path.join(self._workspace_dir, name)
+        if not os.path.isfile(filepath):
+            return None
+        with open(filepath, "r", encoding="utf-8") as f:
+            return f.read()
+
+    def update_identity_file(self, name: str, content: str) -> None:
+        """Update an identity file and mark index as dirty."""
+        filepath = os.path.join(self._workspace_dir, name)
+        with open(filepath, "w", encoding="utf-8") as f:
+            f.write(content)
+        self._dirty = True
+        logger.info(f"Updated identity file: {name}")
+
+    def read_soul(self) -> str | None:
+        return self.read_identity_file("SOUL.md")
+
+    def read_user(self) -> str | None:
+        return self.read_identity_file("USER.md")
+
+    def read_long_term_memory(self) -> str | None:
+        return self.read_identity_file("MEMORY.md")
+
+    def append_to_memory(self, entry: str) -> None:
+        """Append a new entry to MEMORY.md."""
+        filepath = os.path.join(self._workspace_dir, "MEMORY.md")
+        timestamp = time.strftime("%Y-%m-%d %H:%M")
+        with open(filepath, "a", encoding="utf-8") as f:
+            f.write(f"\n### [{timestamp}]\n\n{entry}\n")
+        self._dirty = True
+        logger.info("Appended to MEMORY.md")
+
+    # ── File Reading ─────────────────────────────────────────────────
+
+    def read_file(
+        self,
+        rel_path: str,
+        *,
+        from_line: int | None = None,
+        num_lines: int | None = None,
+    ) -> dict:
+        """
+        Read a memory file by relative path.
+        Port of OpenClaw's readFile().
+        """
+        raw = rel_path.strip()
+        if not raw:
+            raise ValueError("path required")
+
+        if os.path.isabs(raw):
+            abs_path = os.path.realpath(raw)
+        else:
+            abs_path = os.path.realpath(
+                os.path.join(self._workspace_dir, raw)
+            )
+
+        if not abs_path.endswith(".md"):
+            raise ValueError("Only .md files are supported")
+
+        if not os.path.isfile(abs_path):
+            raise FileNotFoundError(f"File not found: {abs_path}")
+
+        with open(abs_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        if from_line is None and num_lines is None:
+            return {"text": content, "path": rel_path}
+
+        lines = content.split("\n")
+        start = max(1, from_line or 1)
+        count = max(1, num_lines or len(lines))
+        sliced = lines[start - 1 : start - 1 + count]
+        return {"text": "\n".join(sliced), "path": rel_path}
+
+    # ── Status ───────────────────────────────────────────────────────
+
+    def status(self) -> dict:
+        """Get the current status of the memory index."""
+        files_row = self._db.execute(
+            "SELECT COUNT(*) as c FROM files"
+        ).fetchone()
+        chunks_row = self._db.execute(
+            "SELECT COUNT(*) as c FROM chunks"
+        ).fetchone()
+        cache_row = self._db.execute(
+            "SELECT COUNT(*) as c FROM embedding_cache"
+        ).fetchone()
+
+        return {
+            "workspace_dir": self._workspace_dir,
+            "db_path": self._db_path,
+            "sessions_dir": self._sessions_dir,
+            "files": files_row["c"] if files_row else 0,
+            "chunks": chunks_row["c"] if chunks_row else 0,
+            "cached_embeddings": cache_row["c"] if cache_row else 0,
+            "fts_available": self._fts_available,
+            "dirty": self._dirty,
+            "embedding_model": self._config.embedding_model,
+            "embedding_dims": get_embedding_dims(self._config.embedding_model),
+            "vector_weight": self._config.vector_weight,
+            "text_weight": self._config.text_weight,
+        }
+
+    # ── File Watching ────────────────────────────────────────────────
+
+    def start_watching(self) -> None:
+        """
+        Start watching the workspace for file changes.
+        Uses watchdog for cross-platform file system events.
+        """
+        if self._watcher or not self._config.watch:
+            return
+
+        try:
+            from watchdog.events import FileSystemEventHandler
+            from watchdog.observers import Observer
+        except ImportError:
+            logger.warning(
+                "watchdog not installed — file watching disabled. "
+                "Install with: uv add watchdog"
+            )
+            return
+
+        manager = self
+
+        class MemoryFileHandler(FileSystemEventHandler):
+            def on_any_event(self, event):
+                if event.is_directory:
+                    return
+                src = getattr(event, "src_path", "")
+                if src.endswith(".md"):
+                    manager._dirty = True
+                    logger.debug(f"Workspace change detected: {src}")
+
+        observer = Observer()
+        handler = MemoryFileHandler()
+        observer.schedule(handler, self._workspace_dir, recursive=True)
+        observer.start()
+        self._watcher = observer
+        logger.info(f"File watching started: {self._workspace_dir}")
+
+    def stop_watching(self) -> None:
+        """Stop the file watcher."""
+        if self._watcher:
+            self._watcher.stop()
+            self._watcher.join()
+            self._watcher = None
+            logger.info("File watching stopped")
+
+    # ── Lifecycle ────────────────────────────────────────────────────
+
+    def close(self) -> None:
+        """Close the memory manager and release resources."""
+        if self._closed:
+            return
+        self._closed = True
+        self.stop_watching()
+        self._db.close()
+        logger.info("MemoryManager closed")
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, *args):
+        self.close()
+
+    def __del__(self):
+        if not self._closed:
+            try:
+                self.close()
+            except Exception:
+                pass
--- a/memory/schema.py
+++ b/memory/schema.py
@@ -0,0 +1,124 @@
+"""
+SQLite schema for the memory system.
+Port of OpenClaw's src/memory/memory-schema.ts.
+
+Tables:
+  • meta      — key-value store for index metadata
+  • files     — tracked files with content hashes (for incremental sync)
+  • chunks    — text chunks with embeddings
+  • chunks_fts — FTS5 virtual table for keyword/BM25 search
+  • chunks_vec — sqlite-vec virtual table for vector similarity (optional)
+"""
+
+import logging
+import sqlite3
+
+logger = logging.getLogger("aetheel.memory.schema")
+
+
+def ensure_schema(
+    db: sqlite3.Connection,
+    *,
+    fts_enabled: bool = True,
+) -> dict:
+    """
+    Create all required tables if they don't exist.
+    Returns a dict with 'fts_available' and optionally 'fts_error'.
+    """
+    # Meta table — stores index config (model, dimensions, etc.)
+    db.execute("""
+        CREATE TABLE IF NOT EXISTS meta (
+            key TEXT PRIMARY KEY,
+            value TEXT NOT NULL
+        )
+    """)
+
+    # Files table — tracks which files have been indexed and their content hash
+    db.execute("""
+        CREATE TABLE IF NOT EXISTS files (
+            path TEXT NOT NULL,
+            source TEXT NOT NULL DEFAULT 'memory',
+            hash TEXT NOT NULL,
+            mtime INTEGER NOT NULL,
+            size INTEGER NOT NULL,
+            PRIMARY KEY (path, source)
+        )
+    """)
+
+    # Chunks table — stores text chunks and their embeddings
+    db.execute("""
+        CREATE TABLE IF NOT EXISTS chunks (
+            id TEXT PRIMARY KEY,
+            path TEXT NOT NULL,
+            source TEXT NOT NULL DEFAULT 'memory',
+            start_line INTEGER NOT NULL,
+            end_line INTEGER NOT NULL,
+            hash TEXT NOT NULL,
+            model TEXT NOT NULL,
+            text TEXT NOT NULL,
+            embedding TEXT NOT NULL,
+            updated_at INTEGER NOT NULL
+        )
+    """)
+
+    # Indices for efficient lookups
+    db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path)")
+    db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source)")
+    db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(hash)")
+
+    # FTS5 full-text search table for keyword/BM25 matching
+    fts_available = False
+    fts_error = None
+    if fts_enabled:
+        try:
+            db.execute("""
+                CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
+                    text,
+                    id UNINDEXED,
+                    path UNINDEXED,
+                    source UNINDEXED,
+                    model UNINDEXED,
+                    start_line UNINDEXED,
+                    end_line UNINDEXED
+                )
+            """)
+            fts_available = True
+        except Exception as e:
+            fts_error = str(e)
+            logger.warning(f"FTS5 unavailable: {fts_error}")
+
+    # Embedding cache table — avoids re-computing embeddings
+    db.execute("""
+        CREATE TABLE IF NOT EXISTS embedding_cache (
+            model TEXT NOT NULL,
+            hash TEXT NOT NULL,
+            embedding TEXT NOT NULL,
+            dims INTEGER,
+            updated_at INTEGER NOT NULL,
+            PRIMARY KEY (model, hash)
+        )
+    """)
+    db.execute(
+        "CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at "
+        "ON embedding_cache(updated_at)"
+    )
+
+    # Session logs table — tracks daily session transcripts
+    db.execute("""
+        CREATE TABLE IF NOT EXISTS session_logs (
+            session_date TEXT NOT NULL,
+            channel TEXT NOT NULL DEFAULT 'slack',
+            user_id TEXT,
+            summary TEXT,
+            raw_transcript TEXT,
+            created_at INTEGER NOT NULL,
+            PRIMARY KEY (session_date, channel)
+        )
+    """)
+
+    db.commit()
+
+    result = {"fts_available": fts_available}
+    if fts_error:
+        result["fts_error"] = fts_error
+    return result
--- a/memory/types.py
+++ b/memory/types.py
@@ -0,0 +1,104 @@
+"""
+Memory system types — mirrors OpenClaw's src/memory/types.ts.
+"""
+
+from dataclasses import dataclass, field
+from enum import Enum
+
+
+class MemorySource(str, Enum):
+    """Source of a memory entry — either workspace markdown or session logs."""
+    MEMORY = "memory"
+    SESSIONS = "sessions"
+
+
+@dataclass
+class MemorySearchResult:
+    """
+    A single search result from the memory system.
+    Mirrors OpenClaw's MemorySearchResult type.
+    """
+    path: str
+    start_line: int
+    end_line: int
+    score: float
+    snippet: str
+    source: MemorySource
+    citation: str | None = None
+
+
+@dataclass
+class MemoryChunk:
+    """
+    A chunk of text extracted from a markdown file.
+    Mirrors OpenClaw's MemoryChunk from internal.ts.
+    """
+    start_line: int
+    end_line: int
+    text: str
+    hash: str
+
+
+@dataclass
+class MemoryFileEntry:
+    """
+    Metadata about an indexed markdown file.
+    Mirrors OpenClaw's MemoryFileEntry from internal.ts.
+    """
+    path: str           # relative path within workspace
+    abs_path: str       # absolute filesystem path
+    mtime_ms: float     # modification time (ms since epoch)
+    size: int           # file size in bytes
+    hash: str           # SHA-256 of file content
+
+
+@dataclass
+class SessionFileEntry:
+    """
+    Metadata about an indexed session transcript file.
+    Mirrors OpenClaw's SessionFileEntry from session-files.ts.
+    """
+    path: str           # relative path (sessions/<filename>)
+    abs_path: str       # absolute filesystem path
+    mtime_ms: float
+    size: int
+    hash: str
+    content: str        # extracted text content
+    line_map: list[int] = field(default_factory=list)
+
+
+@dataclass
+class MemoryConfig:
+    """
+    Configuration for the memory system.
+    """
+    # Workspace directory containing SOUL.md, USER.md, MEMORY.md, etc.
+    workspace_dir: str = "~/.aetheel/workspace"
+
+    # SQLite database path (created automatically)
+    db_path: str = "~/.aetheel/memory.db"
+
+    # Chunking
+    chunk_tokens: int = 512
+    chunk_overlap: int = 50
+
+    # Search
+    max_results: int = 10
+    min_score: float = 0.1
+    vector_weight: float = 0.7
+    text_weight: float = 0.3
+
+    # Embedding
+    embedding_model: str = "BAAI/bge-small-en-v1.5"
+    embedding_dims: int = 384
+
+    # Sync
+    watch: bool = True
+    watch_debounce_ms: int = 2000
+    sync_on_search: bool = True
+
+    # Session logs
+    sessions_dir: str | None = None  # defaults to workspace_dir/daily/
+
+    # Sources to index
+    sources: list[str] = field(default_factory=lambda: ["memory", "sessions"])