840 lines
30 KiB
Python
840 lines
30 KiB
Python
"""
|
|
MemoryManager — the main memory system orchestrator.
|
|
Port of OpenClaw's MemoryIndexManager (src/memory/manager.ts, 2,300 LOC).
|
|
|
|
Lifecycle: sync → chunk → embed → store → search
|
|
|
|
Key features:
|
|
• Incremental sync — only re-indexes changed files (hash-based)
|
|
• Hybrid search — vector (0.7) + BM25 keyword (0.3)
|
|
• File watching — auto re-index on workspace changes (via watchdog)
|
|
• Embedding cache — avoids re-computing embeddings for unchanged chunks
|
|
• Session log indexing — indexes daily/ conversation transcripts
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import os
|
|
import sqlite3
|
|
import threading
|
|
import time
|
|
import uuid
|
|
from pathlib import Path
|
|
|
|
from memory.embeddings import embed_batch, embed_query, get_embedding_dims
|
|
from memory.hybrid import bm25_rank_to_score, build_fts_query, merge_hybrid_results
|
|
from memory.internal import (
|
|
build_file_entry,
|
|
chunk_markdown,
|
|
hash_text,
|
|
list_memory_files,
|
|
)
|
|
from memory.schema import ensure_schema
|
|
from memory.types import (
|
|
MemoryConfig,
|
|
MemorySearchResult,
|
|
MemorySource,
|
|
)
|
|
|
|
logger = logging.getLogger("aetheel.memory")
|
|
|
|
SNIPPET_MAX_CHARS = 700
|
|
|
|
|
|
class MemoryManager:
|
|
"""
|
|
Main memory system — manages the full lifecycle:
|
|
sync → chunk → embed → store → search
|
|
|
|
Inspired by OpenClaw's MemoryIndexManager.
|
|
"""
|
|
|
|
def __init__(self, config: MemoryConfig | None = None):
|
|
self._config = config or MemoryConfig()
|
|
self._workspace_dir = str(
|
|
Path(self._config.workspace_dir).expanduser().resolve()
|
|
)
|
|
self._db_path = str(Path(self._config.db_path).expanduser().resolve())
|
|
self._sessions_dir = (
|
|
str(Path(self._config.sessions_dir).expanduser().resolve())
|
|
if self._config.sessions_dir
|
|
else os.path.join(self._workspace_dir, "daily")
|
|
)
|
|
|
|
# Ensure directories exist
|
|
os.makedirs(self._workspace_dir, exist_ok=True)
|
|
os.makedirs(self._sessions_dir, exist_ok=True)
|
|
os.makedirs(os.path.dirname(self._db_path), exist_ok=True)
|
|
|
|
# Open database and ensure schema
|
|
self._db = sqlite3.connect(self._db_path, check_same_thread=False)
|
|
self._db.row_factory = sqlite3.Row
|
|
schema_result = ensure_schema(self._db, fts_enabled=True)
|
|
self._fts_available = schema_result.get("fts_available", False)
|
|
|
|
# State
|
|
self._dirty = True
|
|
self._syncing = False
|
|
self._sync_lock = threading.Lock()
|
|
self._closed = False
|
|
self._watcher = None
|
|
|
|
# Create default identity files if they don't exist
|
|
self._ensure_identity_files()
|
|
|
|
logger.info(
|
|
f"MemoryManager initialized: workspace={self._workspace_dir}, "
|
|
f"db={self._db_path}, fts={self._fts_available}"
|
|
)
|
|
|
|
# ── Identity File Bootstrap ──────────────────────────────────────
|
|
|
|
def _ensure_identity_files(self) -> None:
|
|
"""Create default SOUL.md, USER.md, MEMORY.md if they don't exist."""
|
|
defaults = {
|
|
"SOUL.md": (
|
|
"# SOUL.md — Who You Are\n\n"
|
|
"_You're not a chatbot. You're becoming someone._\n\n"
|
|
"## Core Truths\n\n"
|
|
"**Be genuinely helpful, not performatively helpful.** "
|
|
"Skip the filler — just help.\n\n"
|
|
"**Have opinions.** You're allowed to disagree, prefer things, "
|
|
"find stuff amusing or boring.\n\n"
|
|
"**Be resourceful before asking.** Try to figure it out first. "
|
|
"Then ask if you're stuck.\n\n"
|
|
"**Earn trust through competence.** Be careful with external actions. "
|
|
"Be bold with internal ones.\n\n"
|
|
"## Boundaries\n\n"
|
|
"- Private things stay private. Period.\n"
|
|
"- When in doubt, ask before acting externally.\n"
|
|
"- Never send half-baked replies.\n\n"
|
|
"## Continuity\n\n"
|
|
"Each session, you wake up fresh. These files _are_ your memory. "
|
|
"Read them. Update them. They're how you persist.\n\n"
|
|
"---\n\n"
|
|
"_This file is yours to evolve. As you learn who you are, update it._\n"
|
|
),
|
|
"USER.md": (
|
|
"# USER.md — Who I Am\n\n"
|
|
"## About Me\n\n"
|
|
"<!-- Fill in your details -->\n"
|
|
"- **Name:** \n"
|
|
"- **Role:** \n"
|
|
"- **Location:** \n"
|
|
"- **Timezone:** \n\n"
|
|
"## Preferences\n\n"
|
|
"<!-- How you like to communicate -->\n"
|
|
"- **Communication style:** \n"
|
|
"- **Response length:** \n"
|
|
"- **Technical level:** \n\n"
|
|
"## Current Focus\n\n"
|
|
"<!-- What you're working on -->\n\n"
|
|
"## Tools & Services\n\n"
|
|
"<!-- Services you use regularly -->\n\n"
|
|
"---\n\n"
|
|
"_Update this file as your preferences evolve._\n"
|
|
),
|
|
"MEMORY.md": (
|
|
"# MEMORY.md — Long-Term Memory\n\n"
|
|
"## Decisions & Lessons\n\n"
|
|
"<!-- Record important decisions and lessons learned -->\n\n"
|
|
"## Context\n\n"
|
|
"<!-- Persistent context that should carry across sessions -->\n\n"
|
|
"## Notes\n\n"
|
|
"<!-- Anything worth remembering -->\n\n"
|
|
"---\n\n"
|
|
"_This file persists across sessions. "
|
|
"Update it when you learn something important._\n"
|
|
),
|
|
}
|
|
|
|
for filename, content in defaults.items():
|
|
filepath = os.path.join(self._workspace_dir, filename)
|
|
if not os.path.exists(filepath):
|
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
logger.info(f"Created default identity file: {filepath}")
|
|
|
|
# ── Search ───────────────────────────────────────────────────────
|
|
|
|
async def search(
|
|
self,
|
|
query: str,
|
|
*,
|
|
max_results: int | None = None,
|
|
min_score: float | None = None,
|
|
) -> list[MemorySearchResult]:
|
|
"""
|
|
Search memory using hybrid vector + keyword search.
|
|
Port of OpenClaw's MemoryIndexManager.search().
|
|
|
|
Steps:
|
|
1. (Optional) Trigger sync if dirty
|
|
2. Run FTS5 keyword search → BM25 scored
|
|
3. Generate query embedding → vector search
|
|
4. Merge results with weighted scoring (0.7v + 0.3k)
|
|
5. Filter by min_score and return top-N results
|
|
"""
|
|
# Auto-sync if dirty
|
|
if self._config.sync_on_search and self._dirty:
|
|
await self.sync()
|
|
|
|
cleaned = query.strip()
|
|
if not cleaned:
|
|
return []
|
|
|
|
max_r = max_results or self._config.max_results
|
|
min_s = min_score if min_score is not None else self._config.min_score
|
|
candidates = min(200, max(1, max_r * 3))
|
|
|
|
# Keyword search (BM25)
|
|
keyword_results = self._search_keyword(cleaned, candidates)
|
|
|
|
# Vector search
|
|
try:
|
|
query_vec = embed_query(cleaned, self._config.embedding_model)
|
|
has_vector = any(v != 0 for v in query_vec)
|
|
except Exception as e:
|
|
logger.warning(f"Embedding failed, falling back to keyword-only: {e}")
|
|
query_vec = []
|
|
has_vector = False
|
|
|
|
vector_results = (
|
|
self._search_vector(query_vec, candidates) if has_vector else []
|
|
)
|
|
|
|
# If no keyword results, return vector-only
|
|
if not keyword_results:
|
|
return [
|
|
r for r in self._vector_to_search_results(vector_results)
|
|
if r.score >= min_s
|
|
][:max_r]
|
|
|
|
# Merge hybrid results
|
|
merged = merge_hybrid_results(
|
|
vector=vector_results,
|
|
keyword=keyword_results,
|
|
vector_weight=self._config.vector_weight,
|
|
text_weight=self._config.text_weight,
|
|
)
|
|
|
|
return [r for r in merged if r.score >= min_s][:max_r]
|
|
|
|
def _search_vector(
|
|
self, query_vec: list[float], limit: int
|
|
) -> list[dict]:
|
|
"""
|
|
Search chunks by vector cosine similarity.
|
|
Uses embedding stored as JSON in the chunks table.
|
|
"""
|
|
if not query_vec:
|
|
return []
|
|
|
|
try:
|
|
rows = self._db.execute(
|
|
"SELECT id, path, start_line, end_line, source, text, embedding "
|
|
"FROM chunks ORDER BY rowid"
|
|
).fetchall()
|
|
except Exception as e:
|
|
logger.warning(f"Vector search failed: {e}")
|
|
return []
|
|
|
|
from memory.internal import cosine_similarity
|
|
|
|
results = []
|
|
for row in rows:
|
|
try:
|
|
stored_vec = json.loads(row["embedding"])
|
|
if not stored_vec:
|
|
continue
|
|
score = cosine_similarity(query_vec, stored_vec)
|
|
snippet = row["text"][:SNIPPET_MAX_CHARS]
|
|
results.append({
|
|
"id": row["id"],
|
|
"path": row["path"],
|
|
"start_line": row["start_line"],
|
|
"end_line": row["end_line"],
|
|
"source": row["source"],
|
|
"snippet": snippet,
|
|
"vector_score": max(0.0, score),
|
|
})
|
|
except (json.JSONDecodeError, TypeError):
|
|
continue
|
|
|
|
results.sort(key=lambda r: r["vector_score"], reverse=True)
|
|
return results[:limit]
|
|
|
|
def _search_keyword(self, query: str, limit: int) -> list[dict]:
|
|
"""
|
|
Search chunks using FTS5 full-text search with BM25 ranking.
|
|
Port of OpenClaw's searchKeyword().
|
|
"""
|
|
if not self._fts_available:
|
|
return []
|
|
|
|
fts_query = build_fts_query(query)
|
|
if not fts_query:
|
|
return []
|
|
|
|
try:
|
|
rows = self._db.execute(
|
|
"SELECT id, path, start_line, end_line, source, text, "
|
|
"rank AS bm25_rank "
|
|
"FROM chunks_fts "
|
|
"WHERE chunks_fts MATCH ? "
|
|
"ORDER BY rank "
|
|
"LIMIT ?",
|
|
(fts_query, limit),
|
|
).fetchall()
|
|
except Exception as e:
|
|
logger.debug(f"FTS search failed for query '{fts_query}': {e}")
|
|
return []
|
|
|
|
results = []
|
|
for row in rows:
|
|
# FTS5 rank is negative (lower = better), convert to 0-1 score
|
|
bm25_rank = abs(row["bm25_rank"]) if row["bm25_rank"] else 999.0
|
|
text_score = bm25_rank_to_score(bm25_rank)
|
|
snippet = row["text"][:SNIPPET_MAX_CHARS]
|
|
results.append({
|
|
"id": row["id"],
|
|
"path": row["path"],
|
|
"start_line": row["start_line"],
|
|
"end_line": row["end_line"],
|
|
"source": row["source"],
|
|
"snippet": snippet,
|
|
"text_score": text_score,
|
|
})
|
|
|
|
return results
|
|
|
|
def _vector_to_search_results(
|
|
self, vector_results: list[dict]
|
|
) -> list[MemorySearchResult]:
|
|
"""Convert raw vector results to MemorySearchResult objects."""
|
|
return [
|
|
MemorySearchResult(
|
|
path=r["path"],
|
|
start_line=r["start_line"],
|
|
end_line=r["end_line"],
|
|
score=r["vector_score"],
|
|
snippet=r["snippet"],
|
|
source=MemorySource(r["source"]),
|
|
)
|
|
for r in vector_results
|
|
]
|
|
|
|
# ── Sync ─────────────────────────────────────────────────────────
|
|
|
|
async def sync(self, *, force: bool = False) -> dict:
|
|
"""
|
|
Synchronize workspace markdown files into the index.
|
|
Port of OpenClaw's MemoryIndexManager.sync().
|
|
|
|
Steps:
|
|
1. List all memory files (SOUL.md, USER.md, MEMORY.md, memory/*)
|
|
2. For each file, check if content hash has changed
|
|
3. If changed: chunk → embed → store in DB
|
|
4. Remove stale entries for deleted files
|
|
5. Optionally sync session logs from daily/
|
|
|
|
Returns a summary dict with counts.
|
|
"""
|
|
if self._syncing and not force:
|
|
logger.debug("Sync already in progress, skipping")
|
|
return {"skipped": True}
|
|
|
|
with self._sync_lock:
|
|
self._syncing = True
|
|
try:
|
|
return self._run_sync(force=force)
|
|
finally:
|
|
self._syncing = False
|
|
self._dirty = False
|
|
|
|
def _run_sync(self, *, force: bool = False) -> dict:
|
|
"""Execute the actual sync logic."""
|
|
stats = {
|
|
"files_found": 0,
|
|
"files_indexed": 0,
|
|
"files_skipped": 0,
|
|
"chunks_created": 0,
|
|
"stale_removed": 0,
|
|
"sessions_indexed": 0,
|
|
}
|
|
|
|
# ── Memory files ──
|
|
if "memory" in self._config.sources:
|
|
files = list_memory_files(self._workspace_dir)
|
|
stats["files_found"] = len(files)
|
|
|
|
active_paths: set[str] = set()
|
|
|
|
for abs_path in files:
|
|
entry = build_file_entry(abs_path, self._workspace_dir)
|
|
active_paths.add(entry.path)
|
|
|
|
# Check if file has changed
|
|
row = self._db.execute(
|
|
"SELECT hash FROM files WHERE path = ? AND source = ?",
|
|
(entry.path, MemorySource.MEMORY.value),
|
|
).fetchone()
|
|
|
|
if not force and row and row["hash"] == entry.hash:
|
|
stats["files_skipped"] += 1
|
|
continue
|
|
|
|
# File is new or changed — re-index it
|
|
self._index_file(entry, MemorySource.MEMORY)
|
|
stats["files_indexed"] += 1
|
|
|
|
# Remove stale entries for deleted files
|
|
stale_rows = self._db.execute(
|
|
"SELECT path FROM files WHERE source = ?",
|
|
(MemorySource.MEMORY.value,),
|
|
).fetchall()
|
|
for stale in stale_rows:
|
|
if stale["path"] not in active_paths:
|
|
self._remove_file(stale["path"], MemorySource.MEMORY)
|
|
stats["stale_removed"] += 1
|
|
|
|
# ── Session files ──
|
|
if "sessions" in self._config.sources:
|
|
session_count = self._sync_session_files(force=force)
|
|
stats["sessions_indexed"] = session_count
|
|
|
|
# Count total chunks
|
|
row = self._db.execute("SELECT COUNT(*) as c FROM chunks").fetchone()
|
|
stats["chunks_created"] = row["c"] if row else 0
|
|
|
|
self._db.commit()
|
|
|
|
logger.info(
|
|
f"Sync complete: {stats['files_indexed']} indexed, "
|
|
f"{stats['files_skipped']} unchanged, "
|
|
f"{stats['stale_removed']} removed, "
|
|
f"{stats['chunks_created']} total chunks"
|
|
)
|
|
return stats
|
|
|
|
def _index_file(self, entry, source: MemorySource) -> None:
|
|
"""
|
|
Index a single file: read → chunk → embed → store.
|
|
Port of OpenClaw's indexFile method.
|
|
"""
|
|
try:
|
|
with open(entry.abs_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
except Exception as e:
|
|
logger.warning(f"Failed to read {entry.abs_path}: {e}")
|
|
return
|
|
|
|
if not content.strip():
|
|
return
|
|
|
|
# Chunk the content
|
|
chunks = chunk_markdown(
|
|
content,
|
|
chunk_tokens=self._config.chunk_tokens,
|
|
chunk_overlap=self._config.chunk_overlap,
|
|
)
|
|
|
|
if not chunks:
|
|
return
|
|
|
|
# Check embedding cache and compute new embeddings
|
|
texts_to_embed = []
|
|
chunk_hashes = []
|
|
cached_embeddings: dict[str, list[float]] = {}
|
|
|
|
for chunk in chunks:
|
|
# Check cache first
|
|
cache_row = self._db.execute(
|
|
"SELECT embedding FROM embedding_cache WHERE model = ? AND hash = ?",
|
|
(self._config.embedding_model, chunk.hash),
|
|
).fetchone()
|
|
|
|
if cache_row:
|
|
cached_embeddings[chunk.hash] = json.loads(cache_row["embedding"])
|
|
else:
|
|
texts_to_embed.append(chunk.text)
|
|
chunk_hashes.append(chunk.hash)
|
|
|
|
# Batch embed uncached chunks
|
|
new_embeddings: dict[str, list[float]] = {}
|
|
if texts_to_embed:
|
|
try:
|
|
vectors = embed_batch(texts_to_embed, self._config.embedding_model)
|
|
now = int(time.time())
|
|
for i, chunk_hash in enumerate(chunk_hashes):
|
|
vec = vectors[i] if i < len(vectors) else []
|
|
new_embeddings[chunk_hash] = vec
|
|
# Store in cache
|
|
self._db.execute(
|
|
"INSERT OR REPLACE INTO embedding_cache "
|
|
"(model, hash, embedding, dims, updated_at) "
|
|
"VALUES (?, ?, ?, ?, ?)",
|
|
(
|
|
self._config.embedding_model,
|
|
chunk_hash,
|
|
json.dumps(vec),
|
|
len(vec),
|
|
now,
|
|
),
|
|
)
|
|
except Exception as e:
|
|
logger.warning(f"Embedding batch failed for {entry.path}: {e}")
|
|
# Fall back to empty embeddings
|
|
for chunk_hash in chunk_hashes:
|
|
new_embeddings[chunk_hash] = []
|
|
|
|
# Remove old chunks for this file
|
|
self._remove_file_chunks(entry.path, source)
|
|
|
|
# Insert new chunks
|
|
now = int(time.time())
|
|
for chunk in chunks:
|
|
chunk_id = str(uuid.uuid4())
|
|
embedding = cached_embeddings.get(chunk.hash) or new_embeddings.get(
|
|
chunk.hash, []
|
|
)
|
|
|
|
self._db.execute(
|
|
"INSERT INTO chunks "
|
|
"(id, path, source, start_line, end_line, hash, model, text, embedding, updated_at) "
|
|
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
(
|
|
chunk_id,
|
|
entry.path,
|
|
source.value,
|
|
chunk.start_line,
|
|
chunk.end_line,
|
|
chunk.hash,
|
|
self._config.embedding_model,
|
|
chunk.text,
|
|
json.dumps(embedding),
|
|
now,
|
|
),
|
|
)
|
|
|
|
# Insert into FTS index
|
|
if self._fts_available:
|
|
try:
|
|
self._db.execute(
|
|
"INSERT INTO chunks_fts "
|
|
"(text, id, path, source, model, start_line, end_line) "
|
|
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
(
|
|
chunk.text,
|
|
chunk_id,
|
|
entry.path,
|
|
source.value,
|
|
self._config.embedding_model,
|
|
chunk.start_line,
|
|
chunk.end_line,
|
|
),
|
|
)
|
|
except Exception as e:
|
|
logger.debug(f"FTS insert failed for chunk {chunk_id}: {e}")
|
|
|
|
# Update files table
|
|
self._db.execute(
|
|
"INSERT OR REPLACE INTO files (path, source, hash, mtime, size) "
|
|
"VALUES (?, ?, ?, ?, ?)",
|
|
(
|
|
entry.path,
|
|
source.value,
|
|
entry.hash,
|
|
int(entry.mtime_ms),
|
|
entry.size,
|
|
),
|
|
)
|
|
|
|
def _remove_file_chunks(self, path: str, source: MemorySource) -> None:
|
|
"""Remove all chunks (and FTS entries) for a given file."""
|
|
# Get chunk IDs for FTS cleanup
|
|
if self._fts_available:
|
|
chunk_ids = self._db.execute(
|
|
"SELECT id FROM chunks WHERE path = ? AND source = ?",
|
|
(path, source.value),
|
|
).fetchall()
|
|
for row in chunk_ids:
|
|
try:
|
|
self._db.execute(
|
|
"DELETE FROM chunks_fts WHERE id = ?", (row["id"],)
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
self._db.execute(
|
|
"DELETE FROM chunks WHERE path = ? AND source = ?",
|
|
(path, source.value),
|
|
)
|
|
|
|
def _remove_file(self, path: str, source: MemorySource) -> None:
|
|
"""Remove a file and all its chunks from the index."""
|
|
self._remove_file_chunks(path, source)
|
|
self._db.execute(
|
|
"DELETE FROM files WHERE path = ? AND source = ?",
|
|
(path, source.value),
|
|
)
|
|
|
|
# ── Session Logs ─────────────────────────────────────────────────
|
|
|
|
def _sync_session_files(self, *, force: bool = False) -> int:
|
|
"""
|
|
Sync session log files from the daily/ directory.
|
|
Returns the number of session files indexed.
|
|
"""
|
|
sessions_dir = Path(self._sessions_dir)
|
|
if not sessions_dir.is_dir():
|
|
return 0
|
|
|
|
indexed = 0
|
|
active_paths: set[str] = set()
|
|
|
|
for md_file in sorted(sessions_dir.glob("*.md")):
|
|
if md_file.is_symlink() or not md_file.is_file():
|
|
continue
|
|
entry = build_file_entry(str(md_file), self._workspace_dir)
|
|
active_paths.add(entry.path)
|
|
|
|
# Check if changed
|
|
row = self._db.execute(
|
|
"SELECT hash FROM files WHERE path = ? AND source = ?",
|
|
(entry.path, MemorySource.SESSIONS.value),
|
|
).fetchone()
|
|
|
|
if not force and row and row["hash"] == entry.hash:
|
|
continue
|
|
|
|
self._index_file(entry, MemorySource.SESSIONS)
|
|
indexed += 1
|
|
|
|
# Clean stale session entries
|
|
stale_rows = self._db.execute(
|
|
"SELECT path FROM files WHERE source = ?",
|
|
(MemorySource.SESSIONS.value,),
|
|
).fetchall()
|
|
for stale in stale_rows:
|
|
if stale["path"] not in active_paths:
|
|
self._remove_file(stale["path"], MemorySource.SESSIONS)
|
|
|
|
return indexed
|
|
|
|
def log_session(
|
|
self,
|
|
content: str,
|
|
*,
|
|
date: str | None = None,
|
|
channel: str = "slack",
|
|
) -> str:
|
|
"""
|
|
Append to today's session log in daily/.
|
|
|
|
Args:
|
|
content: The text to log (e.g., a user message or AI response).
|
|
date: Optional date string (YYYY-MM-DD). Defaults to today.
|
|
channel: Channel the conversation came from.
|
|
|
|
Returns:
|
|
Path to the session log file.
|
|
"""
|
|
if date is None:
|
|
date = time.strftime("%Y-%m-%d")
|
|
|
|
log_path = os.path.join(self._sessions_dir, f"{date}.md")
|
|
|
|
# Create file with header if it doesn't exist
|
|
if not os.path.exists(log_path):
|
|
header = f"# Session Log — {date}\n\n"
|
|
with open(log_path, "w", encoding="utf-8") as f:
|
|
f.write(header)
|
|
|
|
# Append the content
|
|
timestamp = time.strftime("%H:%M:%S")
|
|
with open(log_path, "a", encoding="utf-8") as f:
|
|
f.write(f"\n---\n\n**[{timestamp}] ({channel})**\n\n{content}\n")
|
|
|
|
# Mark as dirty for next sync
|
|
self._dirty = True
|
|
|
|
return log_path
|
|
|
|
# ── Identity File Access ─────────────────────────────────────────
|
|
|
|
def read_identity_file(self, name: str) -> str | None:
|
|
"""Read an identity file (SOUL.md, USER.md, MEMORY.md)."""
|
|
filepath = os.path.join(self._workspace_dir, name)
|
|
if not os.path.isfile(filepath):
|
|
return None
|
|
with open(filepath, "r", encoding="utf-8") as f:
|
|
return f.read()
|
|
|
|
def update_identity_file(self, name: str, content: str) -> None:
|
|
"""Update an identity file and mark index as dirty."""
|
|
filepath = os.path.join(self._workspace_dir, name)
|
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
f.write(content)
|
|
self._dirty = True
|
|
logger.info(f"Updated identity file: {name}")
|
|
|
|
def read_soul(self) -> str | None:
|
|
return self.read_identity_file("SOUL.md")
|
|
|
|
def read_user(self) -> str | None:
|
|
return self.read_identity_file("USER.md")
|
|
|
|
def read_long_term_memory(self) -> str | None:
|
|
return self.read_identity_file("MEMORY.md")
|
|
|
|
def append_to_memory(self, entry: str) -> None:
|
|
"""Append a new entry to MEMORY.md."""
|
|
filepath = os.path.join(self._workspace_dir, "MEMORY.md")
|
|
timestamp = time.strftime("%Y-%m-%d %H:%M")
|
|
with open(filepath, "a", encoding="utf-8") as f:
|
|
f.write(f"\n### [{timestamp}]\n\n{entry}\n")
|
|
self._dirty = True
|
|
logger.info("Appended to MEMORY.md")
|
|
|
|
# ── File Reading ─────────────────────────────────────────────────
|
|
|
|
def read_file(
|
|
self,
|
|
rel_path: str,
|
|
*,
|
|
from_line: int | None = None,
|
|
num_lines: int | None = None,
|
|
) -> dict:
|
|
"""
|
|
Read a memory file by relative path.
|
|
Port of OpenClaw's readFile().
|
|
"""
|
|
raw = rel_path.strip()
|
|
if not raw:
|
|
raise ValueError("path required")
|
|
|
|
if os.path.isabs(raw):
|
|
abs_path = os.path.realpath(raw)
|
|
else:
|
|
abs_path = os.path.realpath(
|
|
os.path.join(self._workspace_dir, raw)
|
|
)
|
|
|
|
if not abs_path.endswith(".md"):
|
|
raise ValueError("Only .md files are supported")
|
|
|
|
if not os.path.isfile(abs_path):
|
|
raise FileNotFoundError(f"File not found: {abs_path}")
|
|
|
|
with open(abs_path, "r", encoding="utf-8") as f:
|
|
content = f.read()
|
|
|
|
if from_line is None and num_lines is None:
|
|
return {"text": content, "path": rel_path}
|
|
|
|
lines = content.split("\n")
|
|
start = max(1, from_line or 1)
|
|
count = max(1, num_lines or len(lines))
|
|
sliced = lines[start - 1 : start - 1 + count]
|
|
return {"text": "\n".join(sliced), "path": rel_path}
|
|
|
|
# ── Status ───────────────────────────────────────────────────────
|
|
|
|
def status(self) -> dict:
|
|
"""Get the current status of the memory index."""
|
|
files_row = self._db.execute(
|
|
"SELECT COUNT(*) as c FROM files"
|
|
).fetchone()
|
|
chunks_row = self._db.execute(
|
|
"SELECT COUNT(*) as c FROM chunks"
|
|
).fetchone()
|
|
cache_row = self._db.execute(
|
|
"SELECT COUNT(*) as c FROM embedding_cache"
|
|
).fetchone()
|
|
|
|
return {
|
|
"workspace_dir": self._workspace_dir,
|
|
"db_path": self._db_path,
|
|
"sessions_dir": self._sessions_dir,
|
|
"files": files_row["c"] if files_row else 0,
|
|
"chunks": chunks_row["c"] if chunks_row else 0,
|
|
"cached_embeddings": cache_row["c"] if cache_row else 0,
|
|
"fts_available": self._fts_available,
|
|
"dirty": self._dirty,
|
|
"embedding_model": self._config.embedding_model,
|
|
"embedding_dims": get_embedding_dims(self._config.embedding_model),
|
|
"vector_weight": self._config.vector_weight,
|
|
"text_weight": self._config.text_weight,
|
|
}
|
|
|
|
# ── File Watching ────────────────────────────────────────────────
|
|
|
|
def start_watching(self) -> None:
|
|
"""
|
|
Start watching the workspace for file changes.
|
|
Uses watchdog for cross-platform file system events.
|
|
"""
|
|
if self._watcher or not self._config.watch:
|
|
return
|
|
|
|
try:
|
|
from watchdog.events import FileSystemEventHandler
|
|
from watchdog.observers import Observer
|
|
except ImportError:
|
|
logger.warning(
|
|
"watchdog not installed — file watching disabled. "
|
|
"Install with: uv add watchdog"
|
|
)
|
|
return
|
|
|
|
manager = self
|
|
|
|
class MemoryFileHandler(FileSystemEventHandler):
|
|
def on_any_event(self, event):
|
|
if event.is_directory:
|
|
return
|
|
src = getattr(event, "src_path", "")
|
|
if src.endswith(".md"):
|
|
manager._dirty = True
|
|
logger.debug(f"Workspace change detected: {src}")
|
|
|
|
observer = Observer()
|
|
handler = MemoryFileHandler()
|
|
observer.schedule(handler, self._workspace_dir, recursive=True)
|
|
observer.start()
|
|
self._watcher = observer
|
|
logger.info(f"File watching started: {self._workspace_dir}")
|
|
|
|
def stop_watching(self) -> None:
|
|
"""Stop the file watcher."""
|
|
if self._watcher:
|
|
self._watcher.stop()
|
|
self._watcher.join()
|
|
self._watcher = None
|
|
logger.info("File watching stopped")
|
|
|
|
# ── Lifecycle ────────────────────────────────────────────────────
|
|
|
|
def close(self) -> None:
|
|
"""Close the memory manager and release resources."""
|
|
if self._closed:
|
|
return
|
|
self._closed = True
|
|
self.stop_watching()
|
|
self._db.close()
|
|
logger.info("MemoryManager closed")
|
|
|
|
def __enter__(self):
|
|
return self
|
|
|
|
def __exit__(self, *args):
|
|
self.close()
|
|
|
|
def __del__(self):
|
|
if not self._closed:
|
|
try:
|
|
self.close()
|
|
except Exception:
|
|
pass
|