import json import sqlite3 import time from typing import List, Dict, Any, Optional from dataclasses import dataclass from .semantic_index import SemanticIndex @dataclass class KnowledgeEntry: entry_id: str category: str content: str metadata: Dict[str, Any] created_at: float updated_at: float access_count: int = 0 importance_score: float = 1.0 def to_dict(self) -> Dict[str, Any]: return { 'entry_id': self.entry_id, 'category': self.category, 'content': self.content, 'metadata': self.metadata, 'created_at': self.created_at, 'updated_at': self.updated_at, 'access_count': self.access_count, 'importance_score': self.importance_score } class KnowledgeStore: def __init__(self, db_path: str): self.db_path = db_path self.conn = sqlite3.connect(self.db_path, check_same_thread=False) self.semantic_index = SemanticIndex() self._initialize_store() self._load_index() def _initialize_store(self): cursor = self.conn.cursor() cursor.execute(''' CREATE TABLE IF NOT EXISTS knowledge_entries ( entry_id TEXT PRIMARY KEY, category TEXT NOT NULL, content TEXT NOT NULL, metadata TEXT, created_at REAL NOT NULL, updated_at REAL NOT NULL, access_count INTEGER DEFAULT 0, importance_score REAL DEFAULT 1.0 ) ''') cursor.execute(''' CREATE INDEX IF NOT EXISTS idx_category ON knowledge_entries(category) ''') cursor.execute(''' CREATE INDEX IF NOT EXISTS idx_importance ON knowledge_entries(importance_score DESC) ''') cursor.execute(''' CREATE INDEX IF NOT EXISTS idx_created ON knowledge_entries(created_at DESC) ''') self.conn.commit() def _load_index(self): cursor = self.conn.cursor() cursor.execute('SELECT entry_id, content FROM knowledge_entries') for row in cursor.fetchall(): self.semantic_index.add_document(row[0], row[1]) def add_entry(self, entry: KnowledgeEntry): cursor = self.conn.cursor() cursor.execute(''' INSERT OR REPLACE INTO knowledge_entries (entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score) VALUES (?, ?, ?, ?, ?, ?, ?, ?) ''', ( entry.entry_id, entry.category, entry.content, json.dumps(entry.metadata), entry.created_at, entry.updated_at, entry.access_count, entry.importance_score )) self.conn.commit() self.semantic_index.add_document(entry.entry_id, entry.content) def get_entry(self, entry_id: str) -> Optional[KnowledgeEntry]: cursor = self.conn.cursor() cursor.execute(''' SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score FROM knowledge_entries WHERE entry_id = ? ''', (entry_id,)) row = cursor.fetchone() if row: cursor.execute(''' UPDATE knowledge_entries SET access_count = access_count + 1 WHERE entry_id = ? ''', (entry_id,)) self.conn.commit() return KnowledgeEntry( entry_id=row[0], category=row[1], content=row[2], metadata=json.loads(row[3]) if row[3] else {}, created_at=row[4], updated_at=row[5], access_count=row[6] + 1, importance_score=row[7] ) return None def search_entries(self, query: str, category: Optional[str] = None, top_k: int = 5) -> List[KnowledgeEntry]: search_results = self.semantic_index.search(query, top_k * 2) cursor = self.conn.cursor() entries = [] for entry_id, score in search_results: if category: cursor.execute(''' SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score FROM knowledge_entries WHERE entry_id = ? AND category = ? ''', (entry_id, category)) else: cursor.execute(''' SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score FROM knowledge_entries WHERE entry_id = ? ''', (entry_id,)) row = cursor.fetchone() if row: entry = KnowledgeEntry( entry_id=row[0], category=row[1], content=row[2], metadata=json.loads(row[3]) if row[3] else {}, created_at=row[4], updated_at=row[5], access_count=row[6], importance_score=row[7] ) entries.append(entry) if len(entries) >= top_k: break return entries def get_by_category(self, category: str, limit: int = 20) -> List[KnowledgeEntry]: cursor = self.conn.cursor() cursor.execute(''' SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score FROM knowledge_entries WHERE category = ? ORDER BY importance_score DESC, created_at DESC LIMIT ? ''', (category, limit)) entries = [] for row in cursor.fetchall(): entries.append(KnowledgeEntry( entry_id=row[0], category=row[1], content=row[2], metadata=json.loads(row[3]) if row[3] else {}, created_at=row[4], updated_at=row[5], access_count=row[6], importance_score=row[7] )) return entries def update_importance(self, entry_id: str, importance_score: float): cursor = self.conn.cursor() cursor.execute(''' UPDATE knowledge_entries SET importance_score = ?, updated_at = ? WHERE entry_id = ? ''', (importance_score, time.time(), entry_id)) self.conn.commit() def delete_entry(self, entry_id: str) -> bool: cursor = self.conn.cursor() cursor.execute('DELETE FROM knowledge_entries WHERE entry_id = ?', (entry_id,)) deleted = cursor.rowcount > 0 self.conn.commit() if deleted: self.semantic_index.remove_document(entry_id) return deleted def get_statistics(self) -> Dict[str, Any]: cursor = self.conn.cursor() cursor.execute('SELECT COUNT(*) FROM knowledge_entries') total_entries = cursor.fetchone()[0] cursor.execute('SELECT COUNT(DISTINCT category) FROM knowledge_entries') total_categories = cursor.fetchone()[0] cursor.execute(''' SELECT category, COUNT(*) as count FROM knowledge_entries GROUP BY category ORDER BY count DESC ''') category_counts = {row[0]: row[1] for row in cursor.fetchall()} cursor.execute('SELECT SUM(access_count) FROM knowledge_entries') total_accesses = cursor.fetchone()[0] or 0 return { 'total_entries': total_entries, 'total_categories': total_categories, 'category_distribution': category_counts, 'total_accesses': total_accesses, 'vocabulary_size': len(self.semantic_index.vocabulary) }