first commit

This commit is contained in:
Tanmay Karande
2026-02-13 23:56:09 -05:00
commit ec8bd80a3d
27 changed files with 6725 additions and 0 deletions

124
memory/schema.py Normal file
View File

@@ -0,0 +1,124 @@
"""
SQLite schema for the memory system.
Port of OpenClaw's src/memory/memory-schema.ts.
Tables:
• meta — key-value store for index metadata
• files — tracked files with content hashes (for incremental sync)
• chunks — text chunks with embeddings
• chunks_fts — FTS5 virtual table for keyword/BM25 search
• chunks_vec — sqlite-vec virtual table for vector similarity (optional)
"""
import logging
import sqlite3
logger = logging.getLogger("aetheel.memory.schema")
def ensure_schema(
db: sqlite3.Connection,
*,
fts_enabled: bool = True,
) -> dict:
"""
Create all required tables if they don't exist.
Returns a dict with 'fts_available' and optionally 'fts_error'.
"""
# Meta table — stores index config (model, dimensions, etc.)
db.execute("""
CREATE TABLE IF NOT EXISTS meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
)
""")
# Files table — tracks which files have been indexed and their content hash
db.execute("""
CREATE TABLE IF NOT EXISTS files (
path TEXT NOT NULL,
source TEXT NOT NULL DEFAULT 'memory',
hash TEXT NOT NULL,
mtime INTEGER NOT NULL,
size INTEGER NOT NULL,
PRIMARY KEY (path, source)
)
""")
# Chunks table — stores text chunks and their embeddings
db.execute("""
CREATE TABLE IF NOT EXISTS chunks (
id TEXT PRIMARY KEY,
path TEXT NOT NULL,
source TEXT NOT NULL DEFAULT 'memory',
start_line INTEGER NOT NULL,
end_line INTEGER NOT NULL,
hash TEXT NOT NULL,
model TEXT NOT NULL,
text TEXT NOT NULL,
embedding TEXT NOT NULL,
updated_at INTEGER NOT NULL
)
""")
# Indices for efficient lookups
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_path ON chunks(path)")
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source)")
db.execute("CREATE INDEX IF NOT EXISTS idx_chunks_hash ON chunks(hash)")
# FTS5 full-text search table for keyword/BM25 matching
fts_available = False
fts_error = None
if fts_enabled:
try:
db.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
text,
id UNINDEXED,
path UNINDEXED,
source UNINDEXED,
model UNINDEXED,
start_line UNINDEXED,
end_line UNINDEXED
)
""")
fts_available = True
except Exception as e:
fts_error = str(e)
logger.warning(f"FTS5 unavailable: {fts_error}")
# Embedding cache table — avoids re-computing embeddings
db.execute("""
CREATE TABLE IF NOT EXISTS embedding_cache (
model TEXT NOT NULL,
hash TEXT NOT NULL,
embedding TEXT NOT NULL,
dims INTEGER,
updated_at INTEGER NOT NULL,
PRIMARY KEY (model, hash)
)
""")
db.execute(
"CREATE INDEX IF NOT EXISTS idx_embedding_cache_updated_at "
"ON embedding_cache(updated_at)"
)
# Session logs table — tracks daily session transcripts
db.execute("""
CREATE TABLE IF NOT EXISTS session_logs (
session_date TEXT NOT NULL,
channel TEXT NOT NULL DEFAULT 'slack',
user_id TEXT,
summary TEXT,
raw_transcript TEXT,
created_at INTEGER NOT NULL,
PRIMARY KEY (session_date, channel)
)
""")
db.commit()
result = {"fts_available": fts_available}
if fts_error:
result["fts_error"] = fts_error
return result