""" SQLite schema for the memory system. Port of OpenClaw's src/memory/memory-schema.ts. Tables: • meta — key-value store for index metadata • files — tracked files with content hashes (for incremental sync) • chunks — text chunks with embeddings • chunks_fts — FTS5 virtual table for keyword/BM25 search • chunks_vec — sqlite-vec virtual table for vector similarity (optional) """ import logging import sqlite3 logger = logging.getLogger("aetheel.memory.schema") def ensure_schema( db: sqlite3.Connection, *, fts_enabled: bool = True, ) -> dict: """ Create all required tables if they don't exist. Returns a dict with 'fts_available' and optionally 'fts_error'. """ # Meta table — stores index config (model, dimensions, etc.) db.execute(""" CREATE TABLE IF NOT EXISTS meta ( key TEXT PRIMARY KEY, value TEXT NOT NULL ) """) # Files table — tracks which files have been indexed and their content hash db.execute(""" CREATE TABLE IF NOT EXISTS files ( path TEXT NOT NULL, source TEXT NOT NULL DEFAULT 'memory', hash TEXT NOT NULL, mtime INTEGER NOT NULL, size INTEGER NOT NULL, PRIMARY KEY (path, source) ) """) # Chunks table — stores text chunks and their embeddings db.execute(""" CREATE TABLE IF NOT EXISTS chunks ( id TEXT PRIMARY KEY, path TEXT NOT NULL, source TEXT NOT NULL DEFAULT 'memory', start_line INTEGER NOT NULL, end_line INTEGER NOT NULL, hash TEXT NOT NULL, model TEXT NOT NULL, text TEXT NOT NULL, embedding TEXT NOT NULL, updated_at INTEGER NOT NULL ) """) # Indices for efficient lookups db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path)") db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source)") db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(hash)") # FTS5 full-text search table for keyword/BM25 matching fts_available = False fts_error = None if fts_enabled: try: db.execute(""" CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5( text, id UNINDEXED, path UNINDEXED, source UNINDEXED, model UNINDEXED, start_line UNINDEXED, end_line UNINDEXED ) """) fts_available = True except Exception as e: fts_error = str(e) logger.warning(f"FTS5 unavailable: {fts_error}") # Embedding cache table — avoids re-computing embeddings db.execute(""" CREATE TABLE IF NOT EXISTS embedding_cache ( model TEXT NOT NULL, hash TEXT NOT NULL, embedding TEXT NOT NULL, dims INTEGER, updated_at INTEGER NOT NULL, PRIMARY KEY (model, hash) ) """) db.execute( "CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at " "ON embedding_cache(updated_at)" ) # Session logs table — tracks daily session transcripts db.execute(""" CREATE TABLE IF NOT EXISTS session_logs ( session_date TEXT NOT NULL, channel TEXT NOT NULL DEFAULT 'slack', user_id TEXT, summary TEXT, raw_transcript TEXT, created_at INTEGER NOT NULL, PRIMARY KEY (session_date, channel) ) """) db.commit() result = {"fts_available": fts_available} if fts_error: result["fts_error"] = fts_error return result