2025-11-04 05:17:27 +01:00
|
|
|
import re
|
2025-11-04 08:09:12 +01:00
|
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
|
|
|
|
|
class AdvancedContextManager:
|
|
|
|
|
def __init__(self, knowledge_store=None, conversation_memory=None):
|
|
|
|
|
self.knowledge_store = knowledge_store
|
|
|
|
|
self.conversation_memory = conversation_memory
|
|
|
|
|
|
2025-11-04 08:09:12 +01:00
|
|
|
def adaptive_context_window(
|
|
|
|
|
self, messages: List[Dict[str, Any]], task_complexity: str = "medium"
|
|
|
|
|
) -> int:
|
2025-11-04 05:17:27 +01:00
|
|
|
complexity_thresholds = {
|
2025-11-04 08:09:12 +01:00
|
|
|
"simple": 10,
|
|
|
|
|
"medium": 20,
|
|
|
|
|
"complex": 35,
|
|
|
|
|
"very_complex": 50,
|
2025-11-04 05:17:27 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
base_threshold = complexity_thresholds.get(task_complexity, 20)
|
|
|
|
|
|
|
|
|
|
message_complexity_score = self._analyze_message_complexity(messages)
|
|
|
|
|
|
|
|
|
|
if message_complexity_score > 0.7:
|
|
|
|
|
adjusted = int(base_threshold * 1.5)
|
|
|
|
|
elif message_complexity_score < 0.3:
|
|
|
|
|
adjusted = int(base_threshold * 0.7)
|
|
|
|
|
else:
|
|
|
|
|
adjusted = base_threshold
|
|
|
|
|
|
|
|
|
|
return max(base_threshold, adjusted)
|
|
|
|
|
|
|
|
|
|
def _analyze_message_complexity(self, messages: List[Dict[str, Any]]) -> float:
|
2025-11-04 08:09:12 +01:00
|
|
|
total_length = sum(len(msg.get("content", "")) for msg in messages)
|
2025-11-04 05:17:27 +01:00
|
|
|
avg_length = total_length / len(messages) if messages else 0
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
unique_words = set()
|
|
|
|
|
for msg in messages:
|
2025-11-04 08:09:12 +01:00
|
|
|
content = msg.get("content", "")
|
|
|
|
|
words = re.findall(r"\b\w+\b", content.lower())
|
2025-11-04 05:17:27 +01:00
|
|
|
unique_words.update(words)
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-04 08:10:37 +01:00
|
|
|
vocabulary_richness = len(unique_words) / total_length if total_length > 0 else 0
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
# Simple complexity score based on length and richness
|
|
|
|
|
complexity = min(1.0, (avg_length / 100) + vocabulary_richness)
|
|
|
|
|
return complexity
|
|
|
|
|
|
|
|
|
|
def extract_key_sentences(self, text: str, top_k: int = 5) -> List[str]:
|
2025-11-04 05:57:23 +01:00
|
|
|
if not text.strip():
|
|
|
|
|
return []
|
2025-11-04 08:09:12 +01:00
|
|
|
sentences = re.split(r"(?<=[.!?])\s+", text)
|
2025-11-04 05:17:27 +01:00
|
|
|
if not sentences:
|
|
|
|
|
return []
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
# Simple scoring based on length and position
|
|
|
|
|
scored_sentences = []
|
|
|
|
|
for i, sentence in enumerate(sentences):
|
|
|
|
|
length_score = min(1.0, len(sentence) / 50)
|
|
|
|
|
position_score = 1.0 if i == 0 else 0.8 if i < len(sentences) / 2 else 0.6
|
|
|
|
|
score = (length_score + position_score) / 2
|
|
|
|
|
scored_sentences.append((sentence, score))
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
scored_sentences.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
|
return [s[0] for s in scored_sentences[:top_k]]
|
|
|
|
|
|
|
|
|
|
def advanced_summarize_messages(self, messages: List[Dict[str, Any]]) -> str:
|
2025-11-04 08:09:12 +01:00
|
|
|
all_content = " ".join([msg.get("content", "") for msg in messages])
|
2025-11-04 05:17:27 +01:00
|
|
|
key_sentences = self.extract_key_sentences(all_content, top_k=3)
|
2025-11-04 08:09:12 +01:00
|
|
|
summary = " ".join(key_sentences)
|
2025-11-04 05:17:27 +01:00
|
|
|
return summary if summary else "No content to summarize."
|
|
|
|
|
|
|
|
|
|
def score_message_relevance(self, message: Dict[str, Any], context: str) -> float:
|
2025-11-04 08:09:12 +01:00
|
|
|
content = message.get("content", "")
|
|
|
|
|
content_words = set(re.findall(r"\b\w+\b", content.lower()))
|
|
|
|
|
context_words = set(re.findall(r"\b\w+\b", context.lower()))
|
|
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
intersection = content_words & context_words
|
|
|
|
|
union = content_words | context_words
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
if not union:
|
|
|
|
|
return 0.0
|
2025-11-04 08:09:12 +01:00
|
|
|
|
|
|
|
|
return len(intersection) / len(union)
|