125 lines
3.8 KiB
Python
125 lines
3.8 KiB
Python
"""
|
|
SQLite schema for the memory system.
|
|
Port of OpenClaw's src/memory/memory-schema.ts.
|
|
|
|
Tables:
|
|
• meta — key-value store for index metadata
|
|
• files — tracked files with content hashes (for incremental sync)
|
|
• chunks — text chunks with embeddings
|
|
• chunks_fts — FTS5 virtual table for keyword/BM25 search
|
|
• chunks_vec — sqlite-vec virtual table for vector similarity (optional)
|
|
"""
|
|
|
|
import logging
|
|
import sqlite3
|
|
|
|
logger = logging.getLogger("aetheel.memory.schema")
|
|
|
|
|
|
def ensure_schema(
|
|
db: sqlite3.Connection,
|
|
*,
|
|
fts_enabled: bool = True,
|
|
) -> dict:
|
|
"""
|
|
Create all required tables if they don't exist.
|
|
Returns a dict with 'fts_available' and optionally 'fts_error'.
|
|
"""
|
|
# Meta table — stores index config (model, dimensions, etc.)
|
|
db.execute("""
|
|
CREATE TABLE IF NOT EXISTS meta (
|
|
key TEXT PRIMARY KEY,
|
|
value TEXT NOT NULL
|
|
)
|
|
""")
|
|
|
|
# Files table — tracks which files have been indexed and their content hash
|
|
db.execute("""
|
|
CREATE TABLE IF NOT EXISTS files (
|
|
path TEXT NOT NULL,
|
|
source TEXT NOT NULL DEFAULT 'memory',
|
|
hash TEXT NOT NULL,
|
|
mtime INTEGER NOT NULL,
|
|
size INTEGER NOT NULL,
|
|
PRIMARY KEY (path, source)
|
|
)
|
|
""")
|
|
|
|
# Chunks table — stores text chunks and their embeddings
|
|
db.execute("""
|
|
CREATE TABLE IF NOT EXISTS chunks (
|
|
id TEXT PRIMARY KEY,
|
|
path TEXT NOT NULL,
|
|
source TEXT NOT NULL DEFAULT 'memory',
|
|
start_line INTEGER NOT NULL,
|
|
end_line INTEGER NOT NULL,
|
|
hash TEXT NOT NULL,
|
|
model TEXT NOT NULL,
|
|
text TEXT NOT NULL,
|
|
embedding TEXT NOT NULL,
|
|
updated_at INTEGER NOT NULL
|
|
)
|
|
""")
|
|
|
|
# Indices for efficient lookups
|
|
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path)")
|
|
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source)")
|
|
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(hash)")
|
|
|
|
# FTS5 full-text search table for keyword/BM25 matching
|
|
fts_available = False
|
|
fts_error = None
|
|
if fts_enabled:
|
|
try:
|
|
db.execute("""
|
|
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
text,
|
|
id UNINDEXED,
|
|
path UNINDEXED,
|
|
source UNINDEXED,
|
|
model UNINDEXED,
|
|
start_line UNINDEXED,
|
|
end_line UNINDEXED
|
|
)
|
|
""")
|
|
fts_available = True
|
|
except Exception as e:
|
|
fts_error = str(e)
|
|
logger.warning(f"FTS5 unavailable: {fts_error}")
|
|
|
|
# Embedding cache table — avoids re-computing embeddings
|
|
db.execute("""
|
|
CREATE TABLE IF NOT EXISTS embedding_cache (
|
|
model TEXT NOT NULL,
|
|
hash TEXT NOT NULL,
|
|
embedding TEXT NOT NULL,
|
|
dims INTEGER,
|
|
updated_at INTEGER NOT NULL,
|
|
PRIMARY KEY (model, hash)
|
|
)
|
|
""")
|
|
db.execute(
|
|
"CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at "
|
|
"ON embedding_cache(updated_at)"
|
|
)
|
|
|
|
# Session logs table — tracks daily session transcripts
|
|
db.execute("""
|
|
CREATE TABLE IF NOT EXISTS session_logs (
|
|
session_date TEXT NOT NULL,
|
|
channel TEXT NOT NULL DEFAULT 'slack',
|
|
user_id TEXT,
|
|
summary TEXT,
|
|
raw_transcript TEXT,
|
|
created_at INTEGER NOT NULL,
|
|
PRIMARY KEY (session_date, channel)
|
|
)
|
|
""")
|
|
|
|
db.commit()
|
|
|
|
result = {"fts_available": fts_available}
|
|
if fts_error:
|
|
result["fts_error"] = fts_error
|
|
return result
|