2025-11-04 05:17:27 +01:00
|
|
|
import re
|
2025-11-04 08:09:12 +01:00
|
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
|
|
|
|
|
class AdvancedContextManager:
|
|
|
|
|
def __init__(self, knowledge_store=None, conversation_memory=None):
|
|
|
|
|
self.knowledge_store = knowledge_store
|
|
|
|
|
self.conversation_memory = conversation_memory
|
|
|
|
|
|
2025-11-06 15:15:06 +01:00
|
|
|
def adaptive_context_window(self, messages: List[Dict[str, Any]], complexity: str) -> int:
|
|
|
|
|
"""Calculate adaptive context window size based on message complexity."""
|
|
|
|
|
base_window = 10
|
|
|
|
|
|
|
|
|
|
complexity_multipliers = {
|
|
|
|
|
"simple": 1.0,
|
|
|
|
|
"medium": 2.0,
|
|
|
|
|
"complex": 3.5,
|
|
|
|
|
"very_complex": 5.0,
|
2025-11-04 05:17:27 +01:00
|
|
|
}
|
|
|
|
|
|
2025-11-06 15:15:06 +01:00
|
|
|
multiplier = complexity_multipliers.get(complexity, 2.0)
|
|
|
|
|
return int(base_window * multiplier)
|
2025-11-04 05:17:27 +01:00
|
|
|
|
2025-11-06 15:15:06 +01:00
|
|
|
def _analyze_message_complexity(self, messages: List[Dict[str, Any]]) -> float:
|
|
|
|
|
"""Analyze the complexity of messages and return a score between 0.0 and 1.0."""
|
|
|
|
|
if not messages:
|
|
|
|
|
return 0.0
|
2025-11-04 05:17:27 +01:00
|
|
|
|
2025-11-06 15:15:06 +01:00
|
|
|
total_complexity = 0.0
|
|
|
|
|
for message in messages:
|
|
|
|
|
content = message.get("content", "")
|
|
|
|
|
if not content:
|
|
|
|
|
continue
|
2025-11-04 05:17:27 +01:00
|
|
|
|
2025-11-06 15:15:06 +01:00
|
|
|
# Calculate complexity based on various factors
|
|
|
|
|
word_count = len(content.split())
|
|
|
|
|
sentence_count = len(re.split(r"[.!?]+", content))
|
|
|
|
|
avg_word_length = sum(len(word) for word in content.split()) / max(word_count, 1)
|
2025-11-04 05:17:27 +01:00
|
|
|
|
2025-11-06 15:15:06 +01:00
|
|
|
# Complexity score based on length, vocabulary, and structure
|
|
|
|
|
length_score = min(1.0, word_count / 100) # Normalize to 0-1
|
|
|
|
|
structure_score = min(1.0, sentence_count / 10)
|
|
|
|
|
vocabulary_score = min(1.0, avg_word_length / 8)
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-06 15:15:06 +01:00
|
|
|
message_complexity = (length_score + structure_score + vocabulary_score) / 3
|
|
|
|
|
total_complexity += message_complexity
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-06 15:15:06 +01:00
|
|
|
return min(1.0, total_complexity / len(messages))
|
2025-11-04 05:17:27 +01:00
|
|
|
|
|
|
|
|
def extract_key_sentences(self, text: str, top_k: int = 5) -> List[str]:
|
2025-11-04 05:57:23 +01:00
|
|
|
if not text.strip():
|
|
|
|
|
return []
|
2025-11-04 08:09:12 +01:00
|
|
|
sentences = re.split(r"(?<=[.!?])\s+", text)
|
2025-11-04 05:17:27 +01:00
|
|
|
if not sentences:
|
|
|
|
|
return []
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
# Simple scoring based on length and position
|
|
|
|
|
scored_sentences = []
|
|
|
|
|
for i, sentence in enumerate(sentences):
|
|
|
|
|
length_score = min(1.0, len(sentence) / 50)
|
|
|
|
|
position_score = 1.0 if i == 0 else 0.8 if i < len(sentences) / 2 else 0.6
|
|
|
|
|
score = (length_score + position_score) / 2
|
|
|
|
|
scored_sentences.append((sentence, score))
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
scored_sentences.sort(key=lambda x: x[1], reverse=True)
|
|
|
|
|
return [s[0] for s in scored_sentences[:top_k]]
|
|
|
|
|
|
|
|
|
|
def advanced_summarize_messages(self, messages: List[Dict[str, Any]]) -> str:
|
2025-11-04 08:09:12 +01:00
|
|
|
all_content = " ".join([msg.get("content", "") for msg in messages])
|
2025-11-04 05:17:27 +01:00
|
|
|
key_sentences = self.extract_key_sentences(all_content, top_k=3)
|
2025-11-04 08:09:12 +01:00
|
|
|
summary = " ".join(key_sentences)
|
2025-11-04 05:17:27 +01:00
|
|
|
return summary if summary else "No content to summarize."
|
|
|
|
|
|
|
|
|
|
def score_message_relevance(self, message: Dict[str, Any], context: str) -> float:
|
2025-11-04 08:09:12 +01:00
|
|
|
content = message.get("content", "")
|
|
|
|
|
content_words = set(re.findall(r"\b\w+\b", content.lower()))
|
|
|
|
|
context_words = set(re.findall(r"\b\w+\b", context.lower()))
|
|
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
intersection = content_words & context_words
|
|
|
|
|
union = content_words | context_words
|
2025-11-04 08:09:12 +01:00
|
|
|
|
2025-11-04 05:17:27 +01:00
|
|
|
if not union:
|
|
|
|
|
return 0.0
|
2025-11-04 08:09:12 +01:00
|
|
|
|
|
|
|
|
return len(intersection) / len(union)
|
2025-11-06 15:15:06 +01:00
|
|
|
|
|
|
|
|
def create_enhanced_context(
|
|
|
|
|
self, messages: List[Dict[str, Any]], user_message: str, include_knowledge: bool = True
|
|
|
|
|
) -> tuple:
|
|
|
|
|
"""Create enhanced context with knowledge base integration."""
|
|
|
|
|
working_messages = messages.copy()
|
|
|
|
|
|
|
|
|
|
if include_knowledge and self.knowledge_store:
|
|
|
|
|
# Search knowledge base for relevant information
|
|
|
|
|
search_results = self.knowledge_store.search_entries(user_message, top_k=3)
|
|
|
|
|
|
|
|
|
|
if search_results:
|
|
|
|
|
knowledge_parts = []
|
|
|
|
|
for idx, entry in enumerate(search_results, 1):
|
|
|
|
|
content = entry.content
|
|
|
|
|
if len(content) > 2000:
|
|
|
|
|
content = content[:2000] + "..."
|
|
|
|
|
|
|
|
|
|
knowledge_parts.append(f"Match {idx} (Category: {entry.category}):\n{content}")
|
|
|
|
|
|
|
|
|
|
knowledge_message_content = (
|
|
|
|
|
"[KNOWLEDGE_BASE_CONTEXT]\nRelevant knowledge base entries:\n\n"
|
|
|
|
|
+ "\n\n".join(knowledge_parts)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
knowledge_message = {"role": "user", "content": knowledge_message_content}
|
|
|
|
|
working_messages.append(knowledge_message)
|
|
|
|
|
|
|
|
|
|
context_info = f"Added {len(search_results)} knowledge base entries"
|
|
|
|
|
else:
|
|
|
|
|
context_info = "No relevant knowledge base entries found"
|
|
|
|
|
else:
|
|
|
|
|
context_info = "Knowledge base integration disabled"
|
|
|
|
|
|
|
|
|
|
return working_messages, context_info
|