|
import json
|
|
import sqlite3
|
|
import time
|
|
from typing import List, Dict, Any, Optional
|
|
from dataclasses import dataclass
|
|
from .semantic_index import SemanticIndex
|
|
|
|
@dataclass
|
|
class KnowledgeEntry:
|
|
entry_id: str
|
|
category: str
|
|
content: str
|
|
metadata: Dict[str, Any]
|
|
created_at: float
|
|
updated_at: float
|
|
access_count: int = 0
|
|
importance_score: float = 1.0
|
|
|
|
def to_dict(self) -> Dict[str, Any]:
|
|
return {
|
|
'entry_id': self.entry_id,
|
|
'category': self.category,
|
|
'content': self.content,
|
|
'metadata': self.metadata,
|
|
'created_at': self.created_at,
|
|
'updated_at': self.updated_at,
|
|
'access_count': self.access_count,
|
|
'importance_score': self.importance_score
|
|
}
|
|
|
|
class KnowledgeStore:
|
|
def __init__(self, db_path: str):
|
|
self.db_path = db_path
|
|
self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
|
|
self.semantic_index = SemanticIndex()
|
|
self._initialize_store()
|
|
self._load_index()
|
|
|
|
def _initialize_store(self):
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS knowledge_entries (
|
|
entry_id TEXT PRIMARY KEY,
|
|
category TEXT NOT NULL,
|
|
content TEXT NOT NULL,
|
|
metadata TEXT,
|
|
created_at REAL NOT NULL,
|
|
updated_at REAL NOT NULL,
|
|
access_count INTEGER DEFAULT 0,
|
|
importance_score REAL DEFAULT 1.0
|
|
)
|
|
''')
|
|
|
|
cursor.execute('''
|
|
CREATE INDEX IF NOT EXISTS idx_category ON knowledge_entries(category)
|
|
''')
|
|
cursor.execute('''
|
|
CREATE INDEX IF NOT EXISTS idx_importance ON knowledge_entries(importance_score DESC)
|
|
''')
|
|
cursor.execute('''
|
|
CREATE INDEX IF NOT EXISTS idx_created ON knowledge_entries(created_at DESC)
|
|
''')
|
|
|
|
self.conn.commit()
|
|
|
|
def _load_index(self):
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute('SELECT entry_id, content FROM knowledge_entries')
|
|
for row in cursor.fetchall():
|
|
self.semantic_index.add_document(row[0], row[1])
|
|
|
|
def add_entry(self, entry: KnowledgeEntry):
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute('''
|
|
INSERT OR REPLACE INTO knowledge_entries
|
|
(entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
''', (
|
|
entry.entry_id,
|
|
entry.category,
|
|
entry.content,
|
|
json.dumps(entry.metadata),
|
|
entry.created_at,
|
|
entry.updated_at,
|
|
entry.access_count,
|
|
entry.importance_score
|
|
))
|
|
|
|
self.conn.commit()
|
|
|
|
self.semantic_index.add_document(entry.entry_id, entry.content)
|
|
|
|
def get_entry(self, entry_id: str) -> Optional[KnowledgeEntry]:
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute('''
|
|
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
|
|
FROM knowledge_entries
|
|
WHERE entry_id = ?
|
|
''', (entry_id,))
|
|
|
|
row = cursor.fetchone()
|
|
|
|
if row:
|
|
cursor.execute('''
|
|
UPDATE knowledge_entries
|
|
SET access_count = access_count + 1
|
|
WHERE entry_id = ?
|
|
''', (entry_id,))
|
|
self.conn.commit()
|
|
|
|
return KnowledgeEntry(
|
|
entry_id=row[0],
|
|
category=row[1],
|
|
content=row[2],
|
|
metadata=json.loads(row[3]) if row[3] else {},
|
|
created_at=row[4],
|
|
updated_at=row[5],
|
|
access_count=row[6] + 1,
|
|
importance_score=row[7]
|
|
)
|
|
|
|
return None
|
|
|
|
def search_entries(self, query: str, category: Optional[str] = None,
|
|
top_k: int = 5) -> List[KnowledgeEntry]:
|
|
search_results = self.semantic_index.search(query, top_k * 2)
|
|
|
|
cursor = self.conn.cursor()
|
|
|
|
entries = []
|
|
for entry_id, score in search_results:
|
|
if category:
|
|
cursor.execute('''
|
|
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
|
|
FROM knowledge_entries
|
|
WHERE entry_id = ? AND category = ?
|
|
''', (entry_id, category))
|
|
else:
|
|
cursor.execute('''
|
|
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
|
|
FROM knowledge_entries
|
|
WHERE entry_id = ?
|
|
''', (entry_id,))
|
|
|
|
row = cursor.fetchone()
|
|
if row:
|
|
entry = KnowledgeEntry(
|
|
entry_id=row[0],
|
|
category=row[1],
|
|
content=row[2],
|
|
metadata=json.loads(row[3]) if row[3] else {},
|
|
created_at=row[4],
|
|
updated_at=row[5],
|
|
access_count=row[6],
|
|
importance_score=row[7]
|
|
)
|
|
entries.append(entry)
|
|
|
|
if len(entries) >= top_k:
|
|
break
|
|
|
|
return entries
|
|
|
|
def get_by_category(self, category: str, limit: int = 20) -> List[KnowledgeEntry]:
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute('''
|
|
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
|
|
FROM knowledge_entries
|
|
WHERE category = ?
|
|
ORDER BY importance_score DESC, created_at DESC
|
|
LIMIT ?
|
|
''', (category, limit))
|
|
|
|
entries = []
|
|
for row in cursor.fetchall():
|
|
entries.append(KnowledgeEntry(
|
|
entry_id=row[0],
|
|
category=row[1],
|
|
content=row[2],
|
|
metadata=json.loads(row[3]) if row[3] else {},
|
|
created_at=row[4],
|
|
updated_at=row[5],
|
|
access_count=row[6],
|
|
importance_score=row[7]
|
|
))
|
|
|
|
return entries
|
|
|
|
def update_importance(self, entry_id: str, importance_score: float):
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute('''
|
|
UPDATE knowledge_entries
|
|
SET importance_score = ?, updated_at = ?
|
|
WHERE entry_id = ?
|
|
''', (importance_score, time.time(), entry_id))
|
|
|
|
self.conn.commit()
|
|
|
|
def delete_entry(self, entry_id: str) -> bool:
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute('DELETE FROM knowledge_entries WHERE entry_id = ?', (entry_id,))
|
|
deleted = cursor.rowcount > 0
|
|
|
|
self.conn.commit()
|
|
|
|
if deleted:
|
|
self.semantic_index.remove_document(entry_id)
|
|
|
|
return deleted
|
|
|
|
def get_statistics(self) -> Dict[str, Any]:
|
|
cursor = self.conn.cursor()
|
|
|
|
cursor.execute('SELECT COUNT(*) FROM knowledge_entries')
|
|
total_entries = cursor.fetchone()[0]
|
|
|
|
cursor.execute('SELECT COUNT(DISTINCT category) FROM knowledge_entries')
|
|
total_categories = cursor.fetchone()[0]
|
|
|
|
cursor.execute('''
|
|
SELECT category, COUNT(*) as count
|
|
FROM knowledge_entries
|
|
GROUP BY category
|
|
ORDER BY count DESC
|
|
''')
|
|
category_counts = {row[0]: row[1] for row in cursor.fetchall()}
|
|
|
|
cursor.execute('SELECT SUM(access_count) FROM knowledge_entries')
|
|
total_accesses = cursor.fetchone()[0] or 0
|
|
|
|
return {
|
|
'total_entries': total_entries,
|
|
'total_categories': total_categories,
|
|
'category_distribution': category_counts,
|
|
'total_accesses': total_accesses,
|
|
'vocabulary_size': len(self.semantic_index.vocabulary)
|
|
}
|