first commit
This commit is contained in:
124
memory/schema.py
Normal file
124
memory/schema.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
SQLite schema for the memory system.
|
||||
Port of OpenClaw's src/memory/memory-schema.ts.
|
||||
|
||||
Tables:
|
||||
• meta — key-value store for index metadata
|
||||
• files — tracked files with content hashes (for incremental sync)
|
||||
• chunks — text chunks with embeddings
|
||||
• chunks_fts — FTS5 virtual table for keyword/BM25 search
|
||||
• chunks_vec — sqlite-vec virtual table for vector similarity (optional)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import sqlite3
|
||||
|
||||
logger = logging.getLogger("aetheel.memory.schema")
|
||||
|
||||
|
||||
def ensure_schema(
|
||||
db: sqlite3.Connection,
|
||||
*,
|
||||
fts_enabled: bool = True,
|
||||
) -> dict:
|
||||
"""
|
||||
Create all required tables if they don't exist.
|
||||
Returns a dict with 'fts_available' and optionally 'fts_error'.
|
||||
"""
|
||||
# Meta table — stores index config (model, dimensions, etc.)
|
||||
db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS meta (
|
||||
key TEXT PRIMARY KEY,
|
||||
value TEXT NOT NULL
|
||||
)
|
||||
""")
|
||||
|
||||
# Files table — tracks which files have been indexed and their content hash
|
||||
db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
path TEXT NOT NULL,
|
||||
source TEXT NOT NULL DEFAULT 'memory',
|
||||
hash TEXT NOT NULL,
|
||||
mtime INTEGER NOT NULL,
|
||||
size INTEGER NOT NULL,
|
||||
PRIMARY KEY (path, source)
|
||||
)
|
||||
""")
|
||||
|
||||
# Chunks table — stores text chunks and their embeddings
|
||||
db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
id TEXT PRIMARY KEY,
|
||||
path TEXT NOT NULL,
|
||||
source TEXT NOT NULL DEFAULT 'memory',
|
||||
start_line INTEGER NOT NULL,
|
||||
end_line INTEGER NOT NULL,
|
||||
hash TEXT NOT NULL,
|
||||
model TEXT NOT NULL,
|
||||
text TEXT NOT NULL,
|
||||
embedding TEXT NOT NULL,
|
||||
updated_at INTEGER NOT NULL
|
||||
)
|
||||
""")
|
||||
|
||||
# Indices for efficient lookups
|
||||
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path)")
|
||||
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source)")
|
||||
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(hash)")
|
||||
|
||||
# FTS5 full-text search table for keyword/BM25 matching
|
||||
fts_available = False
|
||||
fts_error = None
|
||||
if fts_enabled:
|
||||
try:
|
||||
db.execute("""
|
||||
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
||||
text,
|
||||
id UNINDEXED,
|
||||
path UNINDEXED,
|
||||
source UNINDEXED,
|
||||
model UNINDEXED,
|
||||
start_line UNINDEXED,
|
||||
end_line UNINDEXED
|
||||
)
|
||||
""")
|
||||
fts_available = True
|
||||
except Exception as e:
|
||||
fts_error = str(e)
|
||||
logger.warning(f"FTS5 unavailable: {fts_error}")
|
||||
|
||||
# Embedding cache table — avoids re-computing embeddings
|
||||
db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS embedding_cache (
|
||||
model TEXT NOT NULL,
|
||||
hash TEXT NOT NULL,
|
||||
embedding TEXT NOT NULL,
|
||||
dims INTEGER,
|
||||
updated_at INTEGER NOT NULL,
|
||||
PRIMARY KEY (model, hash)
|
||||
)
|
||||
""")
|
||||
db.execute(
|
||||
"CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at "
|
||||
"ON embedding_cache(updated_at)"
|
||||
)
|
||||
|
||||
# Session logs table — tracks daily session transcripts
|
||||
db.execute("""
|
||||
CREATE TABLE IF NOT EXISTS session_logs (
|
||||
session_date TEXT NOT NULL,
|
||||
channel TEXT NOT NULL DEFAULT 'slack',
|
||||
user_id TEXT,
|
||||
summary TEXT,
|
||||
raw_transcript TEXT,
|
||||
created_at INTEGER NOT NULL,
|
||||
PRIMARY KEY (session_date, channel)
|
||||
)
|
||||
""")
|
||||
|
||||
db.commit()
|
||||
|
||||
result = {"fts_available": fts_available}
|
||||
if fts_error:
|
||||
result["fts_error"] = fts_error
|
||||
return result
|
||||
Reference in New Issue
Block a user