rp/pr/core/advanced_context.py

import re
from typing import Any, Dict, List


class AdvancedContextManager:
    def __init__(self, knowledge_store=None, conversation_memory=None):
        self.knowledge_store = knowledge_store
        self.conversation_memory = conversation_memory

    def adaptive_context_window(
        self, messages: List[Dict[str, Any]], task_complexity: str = "medium"
    ) -> int:
        complexity_thresholds = {
            "simple": 10,
            "medium": 20,
            "complex": 35,
            "very_complex": 50,
        }

        base_threshold = complexity_thresholds.get(task_complexity, 20)

        message_complexity_score = self._analyze_message_complexity(messages)

        if message_complexity_score > 0.7:
            adjusted = int(base_threshold * 1.5)
        elif message_complexity_score < 0.3:
            adjusted = int(base_threshold * 0.7)
        else:
            adjusted = base_threshold

        return max(base_threshold, adjusted)

    def _analyze_message_complexity(self, messages: List[Dict[str, Any]]) -> float:
        total_length = sum(len(msg.get("content", "")) for msg in messages)
        avg_length = total_length / len(messages) if messages else 0

        unique_words = set()
        for msg in messages:
            content = msg.get("content", "")
            words = re.findall(r"\b\w+\b", content.lower())
            unique_words.update(words)

        vocabulary_richness = len(unique_words) / total_length if total_length > 0 else 0

        # Simple complexity score based on length and richness
        complexity = min(1.0, (avg_length / 100) + vocabulary_richness)
        return complexity

    def extract_key_sentences(self, text: str, top_k: int = 5) -> List[str]:
        if not text.strip():
            return []
        sentences = re.split(r"(?<=[.!?])\s+", text)
        if not sentences:
            return []

        # Simple scoring based on length and position
        scored_sentences = []
        for i, sentence in enumerate(sentences):
            length_score = min(1.0, len(sentence) / 50)
            position_score = 1.0 if i == 0 else 0.8 if i < len(sentences) / 2 else 0.6
            score = (length_score + position_score) / 2
            scored_sentences.append((sentence, score))

        scored_sentences.sort(key=lambda x: x[1], reverse=True)
        return [s[0] for s in scored_sentences[:top_k]]

    def advanced_summarize_messages(self, messages: List[Dict[str, Any]]) -> str:
        all_content = " ".join([msg.get("content", "") for msg in messages])
        key_sentences = self.extract_key_sentences(all_content, top_k=3)
        summary = " ".join(key_sentences)
        return summary if summary else "No content to summarize."

    def score_message_relevance(self, message: Dict[str, Any], context: str) -> float:
        content = message.get("content", "")
        content_words = set(re.findall(r"\b\w+\b", content.lower()))
        context_words = set(re.findall(r"\b\w+\b", context.lower()))

        intersection = content_words & context_words
        union = content_words | context_words

        if not union:
            return 0.0

        return len(intersection) / len(union)