282 lines
8.3 KiB
Python
Raw Normal View History

2025-11-04 05:17:27 +01:00
import json
import sqlite3
import time
from dataclasses import dataclass
2025-11-04 08:09:12 +01:00
from typing import Any, Dict, List, Optional
2025-11-04 05:17:27 +01:00
from .semantic_index import SemanticIndex
2025-11-04 08:09:12 +01:00
2025-11-04 05:17:27 +01:00
@dataclass
class KnowledgeEntry:
entry_id: str
category: str
content: str
metadata: Dict[str, Any]
created_at: float
updated_at: float
access_count: int = 0
importance_score: float = 1.0
def to_dict(self) -> Dict[str, Any]:
return {
2025-11-04 08:09:12 +01:00
"entry_id": self.entry_id,
"category": self.category,
"content": self.content,
"metadata": self.metadata,
"created_at": self.created_at,
"updated_at": self.updated_at,
"access_count": self.access_count,
"importance_score": self.importance_score,
2025-11-04 05:17:27 +01:00
}
2025-11-04 08:09:12 +01:00
2025-11-04 05:17:27 +01:00
class KnowledgeStore:
def __init__(self, db_path: str):
self.db_path = db_path
2025-11-04 08:01:20 +01:00
self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
2025-11-04 05:17:27 +01:00
self.semantic_index = SemanticIndex()
self._initialize_store()
self._load_index()
def _initialize_store(self):
2025-11-04 08:01:20 +01:00
cursor = self.conn.cursor()
2025-11-04 05:17:27 +01:00
2025-11-04 08:09:12 +01:00
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
CREATE TABLE IF NOT EXISTS knowledge_entries (
entry_id TEXT PRIMARY KEY,
category TEXT NOT NULL,
content TEXT NOT NULL,
metadata TEXT,
created_at REAL NOT NULL,
updated_at REAL NOT NULL,
access_count INTEGER DEFAULT 0,
importance_score REAL DEFAULT 1.0
)
2025-11-04 08:09:12 +01:00
"""
)
2025-11-04 05:17:27 +01:00
2025-11-04 08:09:12 +01:00
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
CREATE INDEX IF NOT EXISTS idx_category ON knowledge_entries(category)
2025-11-04 08:09:12 +01:00
"""
)
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
CREATE INDEX IF NOT EXISTS idx_importance ON knowledge_entries(importance_score DESC)
2025-11-04 08:09:12 +01:00
"""
)
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
CREATE INDEX IF NOT EXISTS idx_created ON knowledge_entries(created_at DESC)
2025-11-04 08:09:12 +01:00
"""
)
2025-11-04 05:17:27 +01:00
2025-11-04 08:01:20 +01:00
self.conn.commit()
2025-11-04 05:17:27 +01:00
def _load_index(self):
2025-11-04 08:01:20 +01:00
cursor = self.conn.cursor()
2025-11-04 05:17:27 +01:00
2025-11-04 08:09:12 +01:00
cursor.execute("SELECT entry_id, content FROM knowledge_entries")
2025-11-04 05:17:27 +01:00
for row in cursor.fetchall():
self.semantic_index.add_document(row[0], row[1])
def add_entry(self, entry: KnowledgeEntry):
2025-11-04 08:01:20 +01:00
cursor = self.conn.cursor()
2025-11-04 05:17:27 +01:00
2025-11-04 08:09:12 +01:00
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
INSERT OR REPLACE INTO knowledge_entries
(entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
2025-11-04 08:09:12 +01:00
""",
(
entry.entry_id,
entry.category,
entry.content,
json.dumps(entry.metadata),
entry.created_at,
entry.updated_at,
entry.access_count,
entry.importance_score,
),
)
2025-11-04 05:17:27 +01:00
2025-11-04 08:01:20 +01:00
self.conn.commit()
2025-11-04 05:17:27 +01:00
self.semantic_index.add_document(entry.entry_id, entry.content)
def get_entry(self, entry_id: str) -> Optional[KnowledgeEntry]:
2025-11-04 08:01:20 +01:00
cursor = self.conn.cursor()
2025-11-04 05:17:27 +01:00
2025-11-04 08:09:12 +01:00
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
FROM knowledge_entries
WHERE entry_id = ?
2025-11-04 08:09:12 +01:00
""",
(entry_id,),
)
2025-11-04 05:17:27 +01:00
row = cursor.fetchone()
if row:
2025-11-04 08:09:12 +01:00
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
UPDATE knowledge_entries
SET access_count = access_count + 1
WHERE entry_id = ?
2025-11-04 08:09:12 +01:00
""",
(entry_id,),
)
2025-11-04 08:01:20 +01:00
self.conn.commit()
2025-11-04 05:17:27 +01:00
return KnowledgeEntry(
entry_id=row[0],
category=row[1],
content=row[2],
metadata=json.loads(row[3]) if row[3] else {},
created_at=row[4],
updated_at=row[5],
access_count=row[6] + 1,
2025-11-04 08:09:12 +01:00
importance_score=row[7],
2025-11-04 05:17:27 +01:00
)
return None
2025-11-04 08:09:12 +01:00
def search_entries(
self, query: str, category: Optional[str] = None, top_k: int = 5
) -> List[KnowledgeEntry]:
2025-11-04 05:17:27 +01:00
search_results = self.semantic_index.search(query, top_k * 2)
2025-11-04 08:01:20 +01:00
cursor = self.conn.cursor()
2025-11-04 05:17:27 +01:00
entries = []
for entry_id, score in search_results:
if category:
2025-11-04 08:09:12 +01:00
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
FROM knowledge_entries
WHERE entry_id = ? AND category = ?
2025-11-04 08:09:12 +01:00
""",
(entry_id, category),
)
2025-11-04 05:17:27 +01:00
else:
2025-11-04 08:09:12 +01:00
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
FROM knowledge_entries
WHERE entry_id = ?
2025-11-04 08:09:12 +01:00
""",
(entry_id,),
)
2025-11-04 05:17:27 +01:00
row = cursor.fetchone()
if row:
entry = KnowledgeEntry(
entry_id=row[0],
category=row[1],
content=row[2],
metadata=json.loads(row[3]) if row[3] else {},
created_at=row[4],
updated_at=row[5],
access_count=row[6],
2025-11-04 08:09:12 +01:00
importance_score=row[7],
2025-11-04 05:17:27 +01:00
)
entries.append(entry)
if len(entries) >= top_k:
break
return entries
def get_by_category(self, category: str, limit: int = 20) -> List[KnowledgeEntry]:
2025-11-04 08:01:20 +01:00
cursor = self.conn.cursor()
2025-11-04 05:17:27 +01:00
2025-11-04 08:09:12 +01:00
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
SELECT entry_id, category, content, metadata, created_at, updated_at, access_count, importance_score
FROM knowledge_entries
WHERE category = ?
ORDER BY importance_score DESC, created_at DESC
LIMIT ?
2025-11-04 08:09:12 +01:00
""",
(category, limit),
)
2025-11-04 05:17:27 +01:00
entries = []
for row in cursor.fetchall():
2025-11-04 08:09:12 +01:00
entries.append(
KnowledgeEntry(
entry_id=row[0],
category=row[1],
content=row[2],
metadata=json.loads(row[3]) if row[3] else {},
created_at=row[4],
updated_at=row[5],
access_count=row[6],
importance_score=row[7],
)
)
2025-11-04 05:17:27 +01:00
return entries
def update_importance(self, entry_id: str, importance_score: float):
2025-11-04 08:01:20 +01:00
cursor = self.conn.cursor()
2025-11-04 05:17:27 +01:00
2025-11-04 08:09:12 +01:00
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
UPDATE knowledge_entries
SET importance_score = ?, updated_at = ?
WHERE entry_id = ?
2025-11-04 08:09:12 +01:00
""",
(importance_score, time.time(), entry_id),
)
2025-11-04 05:17:27 +01:00
2025-11-04 08:01:20 +01:00
self.conn.commit()
2025-11-04 05:17:27 +01:00
def delete_entry(self, entry_id: str) -> bool:
2025-11-04 08:01:20 +01:00
cursor = self.conn.cursor()
2025-11-04 05:17:27 +01:00
2025-11-04 08:09:12 +01:00
cursor.execute("DELETE FROM knowledge_entries WHERE entry_id = ?", (entry_id,))
2025-11-04 05:17:27 +01:00
deleted = cursor.rowcount > 0
2025-11-04 08:01:20 +01:00
self.conn.commit()
2025-11-04 05:17:27 +01:00
if deleted:
self.semantic_index.remove_document(entry_id)
return deleted
def get_statistics(self) -> Dict[str, Any]:
2025-11-04 08:01:20 +01:00
cursor = self.conn.cursor()
2025-11-04 05:17:27 +01:00
2025-11-04 08:09:12 +01:00
cursor.execute("SELECT COUNT(*) FROM knowledge_entries")
2025-11-04 05:17:27 +01:00
total_entries = cursor.fetchone()[0]
2025-11-04 08:09:12 +01:00
cursor.execute("SELECT COUNT(DISTINCT category) FROM knowledge_entries")
2025-11-04 05:17:27 +01:00
total_categories = cursor.fetchone()[0]
2025-11-04 08:09:12 +01:00
cursor.execute(
"""
2025-11-04 05:17:27 +01:00
SELECT category, COUNT(*) as count
FROM knowledge_entries
GROUP BY category
ORDER BY count DESC
2025-11-04 08:09:12 +01:00
"""
)
2025-11-04 05:17:27 +01:00
category_counts = {row[0]: row[1] for row in cursor.fetchall()}
2025-11-04 08:09:12 +01:00
cursor.execute("SELECT SUM(access_count) FROM knowledge_entries")
2025-11-04 05:17:27 +01:00
total_accesses = cursor.fetchone()[0] or 0
return {
2025-11-04 08:09:12 +01:00
"total_entries": total_entries,
"total_categories": total_categories,
"category_distribution": category_counts,
"total_accesses": total_accesses,
"vocabulary_size": len(self.semantic_index.vocabulary),
2025-11-04 05:17:27 +01:00
}