266 lines
8.5 KiB
Python
Raw Normal View History

2025-11-04 05:17:27 +01:00
import json
import sqlite3
import time
from typing import List, Dict, Any, Optional
from dataclasses import dataclass
from .semantic_index import SemanticIndex
@dataclass
class KnowledgeEntry:
entry_id: str
category: str
content: str
metadata: Dict[str, Any]
created_at: float
updated_at: float
access_count: int = 0
importance_score: float = 1.0
def to_dict(self) -> Dict[str, Any]:
return {
'entry_id': self.entry_id,
'category': self.category,
'content': self.content,
'metadata': self.metadata,
'created_at': self.created_at,
'updated_at': self.updated_at,
'access_count': self.access_count,
'importance_score': self.importance_score
}
class KnowledgeStore:
def __init__(self, db_path: str):
self.db_path = db_path
self.semantic_index = SemanticIndex()
self._initialize_store()
self._load_index()
def _initialize_store(self):
conn = sqlite3.connect(self.db_path, check_same_thread=False)
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS knowledge_entries (
entry_id TEXT PRIMARY KEY,
category TEXT NOT NULL,
content TEXT NOT NULL,
metadata TEXT,
created_at REAL NOT NULL,
updated_at REAL NOT NULL,
access_count INTEGER DEFAULT 0,
importance_score REAL DEFAULT 1.0
)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_category ON knowledge_entries(category)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_importance ON knowledge_entries(importance_score DESC)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_created ON knowledge_entries(created_at DESC)
''')
conn.commit()
conn.close()
def _load_index(self):
conn = sqlite3.connect(self.db_path, check_same_thread=False)
cursor = conn.cursor()
cursor.execute('SELECT entry_id, content FROM knowledge_entries')
for row in cursor.fetchall():
self.semantic_index.add_document(row[0], row[1])
conn.close()
def add_entry(self, entry: KnowledgeEntry):
conn = sqlite3.connect(self.db_path, check_same_thread=False)
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO knowledge_entries
(entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
''', (
entry.entry_id,
entry.category,
entry.content,
json.dumps(entry.metadata),
entry.created_at,
entry.updated_at,
entry.access_count,
entry.importance_score
))
conn.commit()
conn.close()
self.semantic_index.add_document(entry.entry_id, entry.content)
def get_entry(self, entry_id: str) -> Optional[KnowledgeEntry]:
conn = sqlite3.connect(self.db_path, check_same_thread=False)
cursor = conn.cursor()
cursor.execute('''
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
FROM knowledge_entries
WHERE entry_id = ?
''', (entry_id,))
row = cursor.fetchone()
if row:
cursor.execute('''
UPDATE knowledge_entries
SET access_count = access_count + 1
WHERE entry_id = ?
''', (entry_id,))
conn.commit()
conn.close()
return KnowledgeEntry(
entry_id=row[0],
category=row[1],
content=row[2],
metadata=json.loads(row[3]) if row[3] else {},
created_at=row[4],
updated_at=row[5],
access_count=row[6] + 1,
importance_score=row[7]
)
conn.close()
return None
def search_entries(self, query: str, category: Optional[str] = None,
top_k: int = 5) -> List[KnowledgeEntry]:
search_results = self.semantic_index.search(query, top_k * 2)
conn = sqlite3.connect(self.db_path, check_same_thread=False)
cursor = conn.cursor()
entries = []
for entry_id, score in search_results:
if category:
cursor.execute('''
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
FROM knowledge_entries
WHERE entry_id = ? AND category = ?
''', (entry_id, category))
else:
cursor.execute('''
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
FROM knowledge_entries
WHERE entry_id = ?
''', (entry_id,))
row = cursor.fetchone()
if row:
entry = KnowledgeEntry(
entry_id=row[0],
category=row[1],
content=row[2],
metadata=json.loads(row[3]) if row[3] else {},
created_at=row[4],
updated_at=row[5],
access_count=row[6],
importance_score=row[7]
)
entries.append(entry)
if len(entries) >= top_k:
break
conn.close()
return entries
def get_by_category(self, category: str, limit: int = 20) -> List[KnowledgeEntry]:
conn = sqlite3.connect(self.db_path, check_same_thread=False)
cursor = conn.cursor()
cursor.execute('''
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
FROM knowledge_entries
WHERE category = ?
ORDER BY importance_score DESC, created_at DESC
LIMIT ?
''', (category, limit))
entries = []
for row in cursor.fetchall():
entries.append(KnowledgeEntry(
entry_id=row[0],
category=row[1],
content=row[2],
metadata=json.loads(row[3]) if row[3] else {},
created_at=row[4],
updated_at=row[5],
access_count=row[6],
importance_score=row[7]
))
conn.close()
return entries
def update_importance(self, entry_id: str, importance_score: float):
conn = sqlite3.connect(self.db_path, check_same_thread=False)
cursor = conn.cursor()
cursor.execute('''
UPDATE knowledge_entries
SET importance_score = ?, updated_at = ?
WHERE entry_id = ?
''', (importance_score, time.time(), entry_id))
conn.commit()
conn.close()
def delete_entry(self, entry_id: str) -> bool:
conn = sqlite3.connect(self.db_path, check_same_thread=False)
cursor = conn.cursor()
cursor.execute('DELETE FROM knowledge_entries WHERE entry_id = ?', (entry_id,))
deleted = cursor.rowcount > 0
conn.commit()
conn.close()
if deleted:
self.semantic_index.remove_document(entry_id)
return deleted
def get_statistics(self) -> Dict[str, Any]:
conn = sqlite3.connect(self.db_path, check_same_thread=False)
cursor = conn.cursor()
cursor.execute('SELECT COUNT(*) FROM knowledge_entries')
total_entries = cursor.fetchone()[0]
cursor.execute('SELECT COUNT(DISTINCT category) FROM knowledge_entries')
total_categories = cursor.fetchone()[0]
cursor.execute('''
SELECT category, COUNT(*) as count
FROM knowledge_entries
GROUP BY category
ORDER BY count DESC
''')
category_counts = {row[0]: row[1] for row in cursor.fetchall()}
cursor.execute('SELECT SUM(access_count) FROM knowledge_entries')
total_accesses = cursor.fetchone()[0] or 0
conn.close()
return {
'total_entries': total_entries,
'total_categories': total_categories,
'category_distribution': category_counts,
'total_accesses': total_accesses,
'vocabulary_size': len(self.semantic_index.vocabulary)
}