first commit
This commit is contained in:
839
memory/manager.py
Normal file
839
memory/manager.py
Normal file
@@ -0,0 +1,839 @@
|
||||
"""
|
||||
MemoryManager — the main memory system orchestrator.
|
||||
Port of OpenClaw's MemoryIndexManager (src/memory/manager.ts, 2,300 LOC).
|
||||
|
||||
Lifecycle: sync → chunk → embed → store → search
|
||||
|
||||
Key features:
|
||||
• Incremental sync — only re-indexes changed files (hash-based)
|
||||
• Hybrid search — vector (0.7) + BM25 keyword (0.3)
|
||||
• File watching — auto re-index on workspace changes (via watchdog)
|
||||
• Embedding cache — avoids re-computing embeddings for unchanged chunks
|
||||
• Session log indexing — indexes daily/ conversation transcripts
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
|
||||
from memory.embeddings import embed_batch, embed_query, get_embedding_dims
|
||||
from memory.hybrid import bm25_rank_to_score, build_fts_query, merge_hybrid_results
|
||||
from memory.internal import (
|
||||
build_file_entry,
|
||||
chunk_markdown,
|
||||
hash_text,
|
||||
list_memory_files,
|
||||
)
|
||||
from memory.schema import ensure_schema
|
||||
from memory.types import (
|
||||
MemoryConfig,
|
||||
MemorySearchResult,
|
||||
MemorySource,
|
||||
)
|
||||
|
||||
logger = logging.getLogger("aetheel.memory")
|
||||
|
||||
SNIPPET_MAX_CHARS = 700
|
||||
|
||||
|
||||
class MemoryManager:
|
||||
"""
|
||||
Main memory system — manages the full lifecycle:
|
||||
sync → chunk → embed → store → search
|
||||
|
||||
Inspired by OpenClaw's MemoryIndexManager.
|
||||
"""
|
||||
|
||||
def __init__(self, config: MemoryConfig | None = None):
|
||||
self._config = config or MemoryConfig()
|
||||
self._workspace_dir = str(
|
||||
Path(self._config.workspace_dir).expanduser().resolve()
|
||||
)
|
||||
self._db_path = str(Path(self._config.db_path).expanduser().resolve())
|
||||
self._sessions_dir = (
|
||||
str(Path(self._config.sessions_dir).expanduser().resolve())
|
||||
if self._config.sessions_dir
|
||||
else os.path.join(self._workspace_dir, "daily")
|
||||
)
|
||||
|
||||
# Ensure directories exist
|
||||
os.makedirs(self._workspace_dir, exist_ok=True)
|
||||
os.makedirs(self._sessions_dir, exist_ok=True)
|
||||
os.makedirs(os.path.dirname(self._db_path), exist_ok=True)
|
||||
|
||||
# Open database and ensure schema
|
||||
self._db = sqlite3.connect(self._db_path, check_same_thread=False)
|
||||
self._db.row_factory = sqlite3.Row
|
||||
schema_result = ensure_schema(self._db, fts_enabled=True)
|
||||
self._fts_available = schema_result.get("fts_available", False)
|
||||
|
||||
# State
|
||||
self._dirty = True
|
||||
self._syncing = False
|
||||
self._sync_lock = threading.Lock()
|
||||
self._closed = False
|
||||
self._watcher = None
|
||||
|
||||
# Create default identity files if they don't exist
|
||||
self._ensure_identity_files()
|
||||
|
||||
logger.info(
|
||||
f"MemoryManager initialized: workspace={self._workspace_dir}, "
|
||||
f"db={self._db_path}, fts={self._fts_available}"
|
||||
)
|
||||
|
||||
# ── Identity File Bootstrap ──────────────────────────────────────
|
||||
|
||||
def _ensure_identity_files(self) -> None:
|
||||
"""Create default SOUL.md, USER.md, MEMORY.md if they don't exist."""
|
||||
defaults = {
|
||||
"SOUL.md": (
|
||||
"# SOUL.md — Who You Are\n\n"
|
||||
"_You're not a chatbot. You're becoming someone._\n\n"
|
||||
"## Core Truths\n\n"
|
||||
"**Be genuinely helpful, not performatively helpful.** "
|
||||
"Skip the filler — just help.\n\n"
|
||||
"**Have opinions.** You're allowed to disagree, prefer things, "
|
||||
"find stuff amusing or boring.\n\n"
|
||||
"**Be resourceful before asking.** Try to figure it out first. "
|
||||
"Then ask if you're stuck.\n\n"
|
||||
"**Earn trust through competence.** Be careful with external actions. "
|
||||
"Be bold with internal ones.\n\n"
|
||||
"## Boundaries\n\n"
|
||||
"- Private things stay private. Period.\n"
|
||||
"- When in doubt, ask before acting externally.\n"
|
||||
"- Never send half-baked replies.\n\n"
|
||||
"## Continuity\n\n"
|
||||
"Each session, you wake up fresh. These files _are_ your memory. "
|
||||
"Read them. Update them. They're how you persist.\n\n"
|
||||
"---\n\n"
|
||||
"_This file is yours to evolve. As you learn who you are, update it._\n"
|
||||
),
|
||||
"USER.md": (
|
||||
"# USER.md — Who I Am\n\n"
|
||||
"## About Me\n\n"
|
||||
"<!-- Fill in your details -->\n"
|
||||
"- **Name:** \n"
|
||||
"- **Role:** \n"
|
||||
"- **Location:** \n"
|
||||
"- **Timezone:** \n\n"
|
||||
"## Preferences\n\n"
|
||||
"<!-- How you like to communicate -->\n"
|
||||
"- **Communication style:** \n"
|
||||
"- **Response length:** \n"
|
||||
"- **Technical level:** \n\n"
|
||||
"## Current Focus\n\n"
|
||||
"<!-- What you're working on -->\n\n"
|
||||
"## Tools & Services\n\n"
|
||||
"<!-- Services you use regularly -->\n\n"
|
||||
"---\n\n"
|
||||
"_Update this file as your preferences evolve._\n"
|
||||
),
|
||||
"MEMORY.md": (
|
||||
"# MEMORY.md — Long-Term Memory\n\n"
|
||||
"## Decisions & Lessons\n\n"
|
||||
"<!-- Record important decisions and lessons learned -->\n\n"
|
||||
"## Context\n\n"
|
||||
"<!-- Persistent context that should carry across sessions -->\n\n"
|
||||
"## Notes\n\n"
|
||||
"<!-- Anything worth remembering -->\n\n"
|
||||
"---\n\n"
|
||||
"_This file persists across sessions. "
|
||||
"Update it when you learn something important._\n"
|
||||
),
|
||||
}
|
||||
|
||||
for filename, content in defaults.items():
|
||||
filepath = os.path.join(self._workspace_dir, filename)
|
||||
if not os.path.exists(filepath):
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
logger.info(f"Created default identity file: {filepath}")
|
||||
|
||||
# ── Search ───────────────────────────────────────────────────────
|
||||
|
||||
async def search(
|
||||
self,
|
||||
query: str,
|
||||
*,
|
||||
max_results: int | None = None,
|
||||
min_score: float | None = None,
|
||||
) -> list[MemorySearchResult]:
|
||||
"""
|
||||
Search memory using hybrid vector + keyword search.
|
||||
Port of OpenClaw's MemoryIndexManager.search().
|
||||
|
||||
Steps:
|
||||
1. (Optional) Trigger sync if dirty
|
||||
2. Run FTS5 keyword search → BM25 scored
|
||||
3. Generate query embedding → vector search
|
||||
4. Merge results with weighted scoring (0.7v + 0.3k)
|
||||
5. Filter by min_score and return top-N results
|
||||
"""
|
||||
# Auto-sync if dirty
|
||||
if self._config.sync_on_search and self._dirty:
|
||||
await self.sync()
|
||||
|
||||
cleaned = query.strip()
|
||||
if not cleaned:
|
||||
return []
|
||||
|
||||
max_r = max_results or self._config.max_results
|
||||
min_s = min_score if min_score is not None else self._config.min_score
|
||||
candidates = min(200, max(1, max_r * 3))
|
||||
|
||||
# Keyword search (BM25)
|
||||
keyword_results = self._search_keyword(cleaned, candidates)
|
||||
|
||||
# Vector search
|
||||
try:
|
||||
query_vec = embed_query(cleaned, self._config.embedding_model)
|
||||
has_vector = any(v != 0 for v in query_vec)
|
||||
except Exception as e:
|
||||
logger.warning(f"Embedding failed, falling back to keyword-only: {e}")
|
||||
query_vec = []
|
||||
has_vector = False
|
||||
|
||||
vector_results = (
|
||||
self._search_vector(query_vec, candidates) if has_vector else []
|
||||
)
|
||||
|
||||
# If no keyword results, return vector-only
|
||||
if not keyword_results:
|
||||
return [
|
||||
r for r in self._vector_to_search_results(vector_results)
|
||||
if r.score >= min_s
|
||||
][:max_r]
|
||||
|
||||
# Merge hybrid results
|
||||
merged = merge_hybrid_results(
|
||||
vector=vector_results,
|
||||
keyword=keyword_results,
|
||||
vector_weight=self._config.vector_weight,
|
||||
text_weight=self._config.text_weight,
|
||||
)
|
||||
|
||||
return [r for r in merged if r.score >= min_s][:max_r]
|
||||
|
||||
def _search_vector(
|
||||
self, query_vec: list[float], limit: int
|
||||
) -> list[dict]:
|
||||
"""
|
||||
Search chunks by vector cosine similarity.
|
||||
Uses embedding stored as JSON in the chunks table.
|
||||
"""
|
||||
if not query_vec:
|
||||
return []
|
||||
|
||||
try:
|
||||
rows = self._db.execute(
|
||||
"SELECT id, path, start_line, end_line, source, text, embedding "
|
||||
"FROM chunks ORDER BY rowid"
|
||||
).fetchall()
|
||||
except Exception as e:
|
||||
logger.warning(f"Vector search failed: {e}")
|
||||
return []
|
||||
|
||||
from memory.internal import cosine_similarity
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
try:
|
||||
stored_vec = json.loads(row["embedding"])
|
||||
if not stored_vec:
|
||||
continue
|
||||
score = cosine_similarity(query_vec, stored_vec)
|
||||
snippet = row["text"][:SNIPPET_MAX_CHARS]
|
||||
results.append({
|
||||
"id": row["id"],
|
||||
"path": row["path"],
|
||||
"start_line": row["start_line"],
|
||||
"end_line": row["end_line"],
|
||||
"source": row["source"],
|
||||
"snippet": snippet,
|
||||
"vector_score": max(0.0, score),
|
||||
})
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
continue
|
||||
|
||||
results.sort(key=lambda r: r["vector_score"], reverse=True)
|
||||
return results[:limit]
|
||||
|
||||
def _search_keyword(self, query: str, limit: int) -> list[dict]:
|
||||
"""
|
||||
Search chunks using FTS5 full-text search with BM25 ranking.
|
||||
Port of OpenClaw's searchKeyword().
|
||||
"""
|
||||
if not self._fts_available:
|
||||
return []
|
||||
|
||||
fts_query = build_fts_query(query)
|
||||
if not fts_query:
|
||||
return []
|
||||
|
||||
try:
|
||||
rows = self._db.execute(
|
||||
"SELECT id, path, start_line, end_line, source, text, "
|
||||
"rank AS bm25_rank "
|
||||
"FROM chunks_fts "
|
||||
"WHERE chunks_fts MATCH ? "
|
||||
"ORDER BY rank "
|
||||
"LIMIT ?",
|
||||
(fts_query, limit),
|
||||
).fetchall()
|
||||
except Exception as e:
|
||||
logger.debug(f"FTS search failed for query '{fts_query}': {e}")
|
||||
return []
|
||||
|
||||
results = []
|
||||
for row in rows:
|
||||
# FTS5 rank is negative (lower = better), convert to 0-1 score
|
||||
bm25_rank = abs(row["bm25_rank"]) if row["bm25_rank"] else 999.0
|
||||
text_score = bm25_rank_to_score(bm25_rank)
|
||||
snippet = row["text"][:SNIPPET_MAX_CHARS]
|
||||
results.append({
|
||||
"id": row["id"],
|
||||
"path": row["path"],
|
||||
"start_line": row["start_line"],
|
||||
"end_line": row["end_line"],
|
||||
"source": row["source"],
|
||||
"snippet": snippet,
|
||||
"text_score": text_score,
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
def _vector_to_search_results(
|
||||
self, vector_results: list[dict]
|
||||
) -> list[MemorySearchResult]:
|
||||
"""Convert raw vector results to MemorySearchResult objects."""
|
||||
return [
|
||||
MemorySearchResult(
|
||||
path=r["path"],
|
||||
start_line=r["start_line"],
|
||||
end_line=r["end_line"],
|
||||
score=r["vector_score"],
|
||||
snippet=r["snippet"],
|
||||
source=MemorySource(r["source"]),
|
||||
)
|
||||
for r in vector_results
|
||||
]
|
||||
|
||||
# ── Sync ─────────────────────────────────────────────────────────
|
||||
|
||||
async def sync(self, *, force: bool = False) -> dict:
|
||||
"""
|
||||
Synchronize workspace markdown files into the index.
|
||||
Port of OpenClaw's MemoryIndexManager.sync().
|
||||
|
||||
Steps:
|
||||
1. List all memory files (SOUL.md, USER.md, MEMORY.md, memory/*)
|
||||
2. For each file, check if content hash has changed
|
||||
3. If changed: chunk → embed → store in DB
|
||||
4. Remove stale entries for deleted files
|
||||
5. Optionally sync session logs from daily/
|
||||
|
||||
Returns a summary dict with counts.
|
||||
"""
|
||||
if self._syncing and not force:
|
||||
logger.debug("Sync already in progress, skipping")
|
||||
return {"skipped": True}
|
||||
|
||||
with self._sync_lock:
|
||||
self._syncing = True
|
||||
try:
|
||||
return self._run_sync(force=force)
|
||||
finally:
|
||||
self._syncing = False
|
||||
self._dirty = False
|
||||
|
||||
def _run_sync(self, *, force: bool = False) -> dict:
|
||||
"""Execute the actual sync logic."""
|
||||
stats = {
|
||||
"files_found": 0,
|
||||
"files_indexed": 0,
|
||||
"files_skipped": 0,
|
||||
"chunks_created": 0,
|
||||
"stale_removed": 0,
|
||||
"sessions_indexed": 0,
|
||||
}
|
||||
|
||||
# ── Memory files ──
|
||||
if "memory" in self._config.sources:
|
||||
files = list_memory_files(self._workspace_dir)
|
||||
stats["files_found"] = len(files)
|
||||
|
||||
active_paths: set[str] = set()
|
||||
|
||||
for abs_path in files:
|
||||
entry = build_file_entry(abs_path, self._workspace_dir)
|
||||
active_paths.add(entry.path)
|
||||
|
||||
# Check if file has changed
|
||||
row = self._db.execute(
|
||||
"SELECT hash FROM files WHERE path = ? AND source = ?",
|
||||
(entry.path, MemorySource.MEMORY.value),
|
||||
).fetchone()
|
||||
|
||||
if not force and row and row["hash"] == entry.hash:
|
||||
stats["files_skipped"] += 1
|
||||
continue
|
||||
|
||||
# File is new or changed — re-index it
|
||||
self._index_file(entry, MemorySource.MEMORY)
|
||||
stats["files_indexed"] += 1
|
||||
|
||||
# Remove stale entries for deleted files
|
||||
stale_rows = self._db.execute(
|
||||
"SELECT path FROM files WHERE source = ?",
|
||||
(MemorySource.MEMORY.value,),
|
||||
).fetchall()
|
||||
for stale in stale_rows:
|
||||
if stale["path"] not in active_paths:
|
||||
self._remove_file(stale["path"], MemorySource.MEMORY)
|
||||
stats["stale_removed"] += 1
|
||||
|
||||
# ── Session files ──
|
||||
if "sessions" in self._config.sources:
|
||||
session_count = self._sync_session_files(force=force)
|
||||
stats["sessions_indexed"] = session_count
|
||||
|
||||
# Count total chunks
|
||||
row = self._db.execute("SELECT COUNT(*) as c FROM chunks").fetchone()
|
||||
stats["chunks_created"] = row["c"] if row else 0
|
||||
|
||||
self._db.commit()
|
||||
|
||||
logger.info(
|
||||
f"Sync complete: {stats['files_indexed']} indexed, "
|
||||
f"{stats['files_skipped']} unchanged, "
|
||||
f"{stats['stale_removed']} removed, "
|
||||
f"{stats['chunks_created']} total chunks"
|
||||
)
|
||||
return stats
|
||||
|
||||
def _index_file(self, entry, source: MemorySource) -> None:
|
||||
"""
|
||||
Index a single file: read → chunk → embed → store.
|
||||
Port of OpenClaw's indexFile method.
|
||||
"""
|
||||
try:
|
||||
with open(entry.abs_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to read {entry.abs_path}: {e}")
|
||||
return
|
||||
|
||||
if not content.strip():
|
||||
return
|
||||
|
||||
# Chunk the content
|
||||
chunks = chunk_markdown(
|
||||
content,
|
||||
chunk_tokens=self._config.chunk_tokens,
|
||||
chunk_overlap=self._config.chunk_overlap,
|
||||
)
|
||||
|
||||
if not chunks:
|
||||
return
|
||||
|
||||
# Check embedding cache and compute new embeddings
|
||||
texts_to_embed = []
|
||||
chunk_hashes = []
|
||||
cached_embeddings: dict[str, list[float]] = {}
|
||||
|
||||
for chunk in chunks:
|
||||
# Check cache first
|
||||
cache_row = self._db.execute(
|
||||
"SELECT embedding FROM embedding_cache WHERE model = ? AND hash = ?",
|
||||
(self._config.embedding_model, chunk.hash),
|
||||
).fetchone()
|
||||
|
||||
if cache_row:
|
||||
cached_embeddings[chunk.hash] = json.loads(cache_row["embedding"])
|
||||
else:
|
||||
texts_to_embed.append(chunk.text)
|
||||
chunk_hashes.append(chunk.hash)
|
||||
|
||||
# Batch embed uncached chunks
|
||||
new_embeddings: dict[str, list[float]] = {}
|
||||
if texts_to_embed:
|
||||
try:
|
||||
vectors = embed_batch(texts_to_embed, self._config.embedding_model)
|
||||
now = int(time.time())
|
||||
for i, chunk_hash in enumerate(chunk_hashes):
|
||||
vec = vectors[i] if i < len(vectors) else []
|
||||
new_embeddings[chunk_hash] = vec
|
||||
# Store in cache
|
||||
self._db.execute(
|
||||
"INSERT OR REPLACE INTO embedding_cache "
|
||||
"(model, hash, embedding, dims, updated_at) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
(
|
||||
self._config.embedding_model,
|
||||
chunk_hash,
|
||||
json.dumps(vec),
|
||||
len(vec),
|
||||
now,
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Embedding batch failed for {entry.path}: {e}")
|
||||
# Fall back to empty embeddings
|
||||
for chunk_hash in chunk_hashes:
|
||||
new_embeddings[chunk_hash] = []
|
||||
|
||||
# Remove old chunks for this file
|
||||
self._remove_file_chunks(entry.path, source)
|
||||
|
||||
# Insert new chunks
|
||||
now = int(time.time())
|
||||
for chunk in chunks:
|
||||
chunk_id = str(uuid.uuid4())
|
||||
embedding = cached_embeddings.get(chunk.hash) or new_embeddings.get(
|
||||
chunk.hash, []
|
||||
)
|
||||
|
||||
self._db.execute(
|
||||
"INSERT INTO chunks "
|
||||
"(id, path, source, start_line, end_line, hash, model, text, embedding, updated_at) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
chunk_id,
|
||||
entry.path,
|
||||
source.value,
|
||||
chunk.start_line,
|
||||
chunk.end_line,
|
||||
chunk.hash,
|
||||
self._config.embedding_model,
|
||||
chunk.text,
|
||||
json.dumps(embedding),
|
||||
now,
|
||||
),
|
||||
)
|
||||
|
||||
# Insert into FTS index
|
||||
if self._fts_available:
|
||||
try:
|
||||
self._db.execute(
|
||||
"INSERT INTO chunks_fts "
|
||||
"(text, id, path, source, model, start_line, end_line) "
|
||||
"VALUES (?, ?, ?, ?, ?, ?, ?)",
|
||||
(
|
||||
chunk.text,
|
||||
chunk_id,
|
||||
entry.path,
|
||||
source.value,
|
||||
self._config.embedding_model,
|
||||
chunk.start_line,
|
||||
chunk.end_line,
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
logger.debug(f"FTS insert failed for chunk {chunk_id}: {e}")
|
||||
|
||||
# Update files table
|
||||
self._db.execute(
|
||||
"INSERT OR REPLACE INTO files (path, source, hash, mtime, size) "
|
||||
"VALUES (?, ?, ?, ?, ?)",
|
||||
(
|
||||
entry.path,
|
||||
source.value,
|
||||
entry.hash,
|
||||
int(entry.mtime_ms),
|
||||
entry.size,
|
||||
),
|
||||
)
|
||||
|
||||
def _remove_file_chunks(self, path: str, source: MemorySource) -> None:
|
||||
"""Remove all chunks (and FTS entries) for a given file."""
|
||||
# Get chunk IDs for FTS cleanup
|
||||
if self._fts_available:
|
||||
chunk_ids = self._db.execute(
|
||||
"SELECT id FROM chunks WHERE path = ? AND source = ?",
|
||||
(path, source.value),
|
||||
).fetchall()
|
||||
for row in chunk_ids:
|
||||
try:
|
||||
self._db.execute(
|
||||
"DELETE FROM chunks_fts WHERE id = ?", (row["id"],)
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._db.execute(
|
||||
"DELETE FROM chunks WHERE path = ? AND source = ?",
|
||||
(path, source.value),
|
||||
)
|
||||
|
||||
def _remove_file(self, path: str, source: MemorySource) -> None:
|
||||
"""Remove a file and all its chunks from the index."""
|
||||
self._remove_file_chunks(path, source)
|
||||
self._db.execute(
|
||||
"DELETE FROM files WHERE path = ? AND source = ?",
|
||||
(path, source.value),
|
||||
)
|
||||
|
||||
# ── Session Logs ─────────────────────────────────────────────────
|
||||
|
||||
def _sync_session_files(self, *, force: bool = False) -> int:
|
||||
"""
|
||||
Sync session log files from the daily/ directory.
|
||||
Returns the number of session files indexed.
|
||||
"""
|
||||
sessions_dir = Path(self._sessions_dir)
|
||||
if not sessions_dir.is_dir():
|
||||
return 0
|
||||
|
||||
indexed = 0
|
||||
active_paths: set[str] = set()
|
||||
|
||||
for md_file in sorted(sessions_dir.glob("*.md")):
|
||||
if md_file.is_symlink() or not md_file.is_file():
|
||||
continue
|
||||
entry = build_file_entry(str(md_file), self._workspace_dir)
|
||||
active_paths.add(entry.path)
|
||||
|
||||
# Check if changed
|
||||
row = self._db.execute(
|
||||
"SELECT hash FROM files WHERE path = ? AND source = ?",
|
||||
(entry.path, MemorySource.SESSIONS.value),
|
||||
).fetchone()
|
||||
|
||||
if not force and row and row["hash"] == entry.hash:
|
||||
continue
|
||||
|
||||
self._index_file(entry, MemorySource.SESSIONS)
|
||||
indexed += 1
|
||||
|
||||
# Clean stale session entries
|
||||
stale_rows = self._db.execute(
|
||||
"SELECT path FROM files WHERE source = ?",
|
||||
(MemorySource.SESSIONS.value,),
|
||||
).fetchall()
|
||||
for stale in stale_rows:
|
||||
if stale["path"] not in active_paths:
|
||||
self._remove_file(stale["path"], MemorySource.SESSIONS)
|
||||
|
||||
return indexed
|
||||
|
||||
def log_session(
|
||||
self,
|
||||
content: str,
|
||||
*,
|
||||
date: str | None = None,
|
||||
channel: str = "slack",
|
||||
) -> str:
|
||||
"""
|
||||
Append to today's session log in daily/.
|
||||
|
||||
Args:
|
||||
content: The text to log (e.g., a user message or AI response).
|
||||
date: Optional date string (YYYY-MM-DD). Defaults to today.
|
||||
channel: Channel the conversation came from.
|
||||
|
||||
Returns:
|
||||
Path to the session log file.
|
||||
"""
|
||||
if date is None:
|
||||
date = time.strftime("%Y-%m-%d")
|
||||
|
||||
log_path = os.path.join(self._sessions_dir, f"{date}.md")
|
||||
|
||||
# Create file with header if it doesn't exist
|
||||
if not os.path.exists(log_path):
|
||||
header = f"# Session Log — {date}\n\n"
|
||||
with open(log_path, "w", encoding="utf-8") as f:
|
||||
f.write(header)
|
||||
|
||||
# Append the content
|
||||
timestamp = time.strftime("%H:%M:%S")
|
||||
with open(log_path, "a", encoding="utf-8") as f:
|
||||
f.write(f"\n---\n\n**[{timestamp}] ({channel})**\n\n{content}\n")
|
||||
|
||||
# Mark as dirty for next sync
|
||||
self._dirty = True
|
||||
|
||||
return log_path
|
||||
|
||||
# ── Identity File Access ─────────────────────────────────────────
|
||||
|
||||
def read_identity_file(self, name: str) -> str | None:
|
||||
"""Read an identity file (SOUL.md, USER.md, MEMORY.md)."""
|
||||
filepath = os.path.join(self._workspace_dir, name)
|
||||
if not os.path.isfile(filepath):
|
||||
return None
|
||||
with open(filepath, "r", encoding="utf-8") as f:
|
||||
return f.read()
|
||||
|
||||
def update_identity_file(self, name: str, content: str) -> None:
|
||||
"""Update an identity file and mark index as dirty."""
|
||||
filepath = os.path.join(self._workspace_dir, name)
|
||||
with open(filepath, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
self._dirty = True
|
||||
logger.info(f"Updated identity file: {name}")
|
||||
|
||||
def read_soul(self) -> str | None:
|
||||
return self.read_identity_file("SOUL.md")
|
||||
|
||||
def read_user(self) -> str | None:
|
||||
return self.read_identity_file("USER.md")
|
||||
|
||||
def read_long_term_memory(self) -> str | None:
|
||||
return self.read_identity_file("MEMORY.md")
|
||||
|
||||
def append_to_memory(self, entry: str) -> None:
|
||||
"""Append a new entry to MEMORY.md."""
|
||||
filepath = os.path.join(self._workspace_dir, "MEMORY.md")
|
||||
timestamp = time.strftime("%Y-%m-%d %H:%M")
|
||||
with open(filepath, "a", encoding="utf-8") as f:
|
||||
f.write(f"\n### [{timestamp}]\n\n{entry}\n")
|
||||
self._dirty = True
|
||||
logger.info("Appended to MEMORY.md")
|
||||
|
||||
# ── File Reading ─────────────────────────────────────────────────
|
||||
|
||||
def read_file(
|
||||
self,
|
||||
rel_path: str,
|
||||
*,
|
||||
from_line: int | None = None,
|
||||
num_lines: int | None = None,
|
||||
) -> dict:
|
||||
"""
|
||||
Read a memory file by relative path.
|
||||
Port of OpenClaw's readFile().
|
||||
"""
|
||||
raw = rel_path.strip()
|
||||
if not raw:
|
||||
raise ValueError("path required")
|
||||
|
||||
if os.path.isabs(raw):
|
||||
abs_path = os.path.realpath(raw)
|
||||
else:
|
||||
abs_path = os.path.realpath(
|
||||
os.path.join(self._workspace_dir, raw)
|
||||
)
|
||||
|
||||
if not abs_path.endswith(".md"):
|
||||
raise ValueError("Only .md files are supported")
|
||||
|
||||
if not os.path.isfile(abs_path):
|
||||
raise FileNotFoundError(f"File not found: {abs_path}")
|
||||
|
||||
with open(abs_path, "r", encoding="utf-8") as f:
|
||||
content = f.read()
|
||||
|
||||
if from_line is None and num_lines is None:
|
||||
return {"text": content, "path": rel_path}
|
||||
|
||||
lines = content.split("\n")
|
||||
start = max(1, from_line or 1)
|
||||
count = max(1, num_lines or len(lines))
|
||||
sliced = lines[start - 1 : start - 1 + count]
|
||||
return {"text": "\n".join(sliced), "path": rel_path}
|
||||
|
||||
# ── Status ───────────────────────────────────────────────────────
|
||||
|
||||
def status(self) -> dict:
|
||||
"""Get the current status of the memory index."""
|
||||
files_row = self._db.execute(
|
||||
"SELECT COUNT(*) as c FROM files"
|
||||
).fetchone()
|
||||
chunks_row = self._db.execute(
|
||||
"SELECT COUNT(*) as c FROM chunks"
|
||||
).fetchone()
|
||||
cache_row = self._db.execute(
|
||||
"SELECT COUNT(*) as c FROM embedding_cache"
|
||||
).fetchone()
|
||||
|
||||
return {
|
||||
"workspace_dir": self._workspace_dir,
|
||||
"db_path": self._db_path,
|
||||
"sessions_dir": self._sessions_dir,
|
||||
"files": files_row["c"] if files_row else 0,
|
||||
"chunks": chunks_row["c"] if chunks_row else 0,
|
||||
"cached_embeddings": cache_row["c"] if cache_row else 0,
|
||||
"fts_available": self._fts_available,
|
||||
"dirty": self._dirty,
|
||||
"embedding_model": self._config.embedding_model,
|
||||
"embedding_dims": get_embedding_dims(self._config.embedding_model),
|
||||
"vector_weight": self._config.vector_weight,
|
||||
"text_weight": self._config.text_weight,
|
||||
}
|
||||
|
||||
# ── File Watching ────────────────────────────────────────────────
|
||||
|
||||
def start_watching(self) -> None:
|
||||
"""
|
||||
Start watching the workspace for file changes.
|
||||
Uses watchdog for cross-platform file system events.
|
||||
"""
|
||||
if self._watcher or not self._config.watch:
|
||||
return
|
||||
|
||||
try:
|
||||
from watchdog.events import FileSystemEventHandler
|
||||
from watchdog.observers import Observer
|
||||
except ImportError:
|
||||
logger.warning(
|
||||
"watchdog not installed — file watching disabled. "
|
||||
"Install with: uv add watchdog"
|
||||
)
|
||||
return
|
||||
|
||||
manager = self
|
||||
|
||||
class MemoryFileHandler(FileSystemEventHandler):
|
||||
def on_any_event(self, event):
|
||||
if event.is_directory:
|
||||
return
|
||||
src = getattr(event, "src_path", "")
|
||||
if src.endswith(".md"):
|
||||
manager._dirty = True
|
||||
logger.debug(f"Workspace change detected: {src}")
|
||||
|
||||
observer = Observer()
|
||||
handler = MemoryFileHandler()
|
||||
observer.schedule(handler, self._workspace_dir, recursive=True)
|
||||
observer.start()
|
||||
self._watcher = observer
|
||||
logger.info(f"File watching started: {self._workspace_dir}")
|
||||
|
||||
def stop_watching(self) -> None:
|
||||
"""Stop the file watcher."""
|
||||
if self._watcher:
|
||||
self._watcher.stop()
|
||||
self._watcher.join()
|
||||
self._watcher = None
|
||||
logger.info("File watching stopped")
|
||||
|
||||
# ── Lifecycle ────────────────────────────────────────────────────
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the memory manager and release resources."""
|
||||
if self._closed:
|
||||
return
|
||||
self._closed = True
|
||||
self.stop_watching()
|
||||
self._db.close()
|
||||
logger.info("MemoryManager closed")
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
def __del__(self):
|
||||
if not self._closed:
|
||||
try:
|
||||
self.close()
|
||||
except Exception:
|
||||
pass
|
||||
Reference in New Issue
Block a user