from rp.memory.fact_extractor import FactExtractor class TestFactExtractor: def setup_method(self): """Set up test fixture.""" self.extractor = FactExtractor() def test_init(self): """Test FactExtractor initialization.""" assert self.extractor.fact_patterns is not None assert len(self.extractor.fact_patterns) > 0 def test_extract_facts_definition(self): """Test extracting definition facts.""" text = "John Smith is a software engineer. Python is a programming language." facts = self.extractor.extract_facts(text) assert len(facts) >= 2 # Check for definition pattern matches definition_facts = [f for f in facts if f["type"] == "definition"] assert len(definition_facts) >= 1 def test_extract_facts_temporal(self): """Test extracting temporal facts.""" text = "John was born in 1990. The company was founded in 2010." facts = self.extractor.extract_facts(text) temporal_facts = [f for f in facts if f["type"] == "temporal"] assert len(temporal_facts) >= 1 def test_extract_facts_attribution(self): """Test extracting attribution facts.""" text = "John invented the widget. Mary developed the software." facts = self.extractor.extract_facts(text) attribution_facts = [f for f in facts if f["type"] == "attribution"] assert len(attribution_facts) >= 1 def test_extract_facts_numeric(self): """Test extracting numeric facts.""" text = "The car costs $25,000. The house is worth $500,000." facts = self.extractor.extract_facts(text) numeric_facts = [f for f in facts if f["type"] == "numeric"] assert len(numeric_facts) >= 1 def test_extract_facts_location(self): """Test extracting location facts.""" text = "John lives in San Francisco. The office is located in New York." facts = self.extractor.extract_facts(text) location_facts = [f for f in facts if f["type"] == "location"] assert len(location_facts) >= 1 def test_extract_facts_entity(self): """Test extracting entity facts from noun phrases.""" text = "John Smith works at Google Inc. He uses Python programming." facts = self.extractor.extract_facts(text) entity_facts = [f for f in facts if f["type"] == "entity"] assert len(entity_facts) >= 1 def test_extract_noun_phrases(self): """Test noun phrase extraction.""" text = "John Smith is a software engineer at Google. He works on Python Projects." phrases = self.extractor._extract_noun_phrases(text) assert "John Smith" in phrases assert "Google" in phrases assert "Python Projects" in phrases def test_extract_noun_phrases_capitalized(self): """Test that only capitalized noun phrases are extracted.""" text = "the quick brown fox jumps over the lazy dog" phrases = self.extractor._extract_noun_phrases(text) # Should be empty since no capitalized words assert len(phrases) == 0 def test_extract_key_terms(self): """Test key term extraction.""" text = "Python is a programming language used for software development and data analysis." terms = self.extractor.extract_key_terms(text, top_k=5) assert len(terms) <= 5 # Should contain programming, language, software, development, data, analysis term_words = [term[0] for term in terms] assert "programming" in term_words assert "language" in term_words assert "software" in term_words def test_extract_key_terms_stopwords_filtered(self): """Test that stopwords are filtered from key terms.""" text = "This is a test of the system that should work properly." terms = self.extractor.extract_key_terms(text) term_words = [term[0] for term in terms] # Stopwords should not appear assert "this" not in term_words assert "is" not in term_words assert "a" not in term_words assert "of" not in term_words assert "the" not in term_words def test_extract_relationships_employment(self): """Test extracting employment relationships.""" text = "John works for Google. Mary is employed by Microsoft." relationships = self.extractor.extract_relationships(text) employment_rels = [r for r in relationships if r["type"] == "employment"] assert len(employment_rels) >= 1 def test_extract_relationships_ownership(self): """Test extracting ownership relationships.""" text = "John owns a car. Mary has a house." relationships = self.extractor.extract_relationships(text) ownership_rels = [r for r in relationships if r["type"] == "ownership"] assert len(ownership_rels) >= 1 def test_extract_relationships_location(self): """Test extracting location relationships.""" text = "John located in New York. The factory belongs to Google." relationships = self.extractor.extract_relationships(text) location_rels = [r for r in relationships if r["type"] == "location"] assert len(location_rels) >= 1 def test_extract_relationships_usage(self): """Test extracting usage relationships.""" text = "John uses Python. The company implements agile methodology." relationships = self.extractor.extract_relationships(text) usage_rels = [r for r in relationships if r["type"] == "usage"] assert len(usage_rels) >= 1 def test_extract_metadata(self): """Test metadata extraction.""" text = "This is a test document. It contains some information about Python programming. You can visit https://python.org for more details. Contact john@example.com for questions. The project started in 2020 and costs $10,000." metadata = self.extractor.extract_metadata(text) assert metadata["word_count"] > 0 assert metadata["sentence_count"] > 0 assert metadata["avg_words_per_sentence"] > 0 assert len(metadata["urls"]) > 0 assert len(metadata["email_addresses"]) > 0 assert len(metadata["dates"]) > 0 assert len(metadata["numeric_values"]) > 0 assert metadata["has_code"] is False # No code in this text def test_extract_metadata_with_code(self): """Test metadata extraction with code content.""" text = "Here is a function: def hello(): print('Hello, world!')" metadata = self.extractor.extract_metadata(text) assert metadata["has_code"] is True def test_extract_metadata_with_questions(self): """Test metadata extraction with questions.""" text = "What is Python? How does it work? Why use it?" metadata = self.extractor.extract_metadata(text) assert metadata["has_questions"] is True def test_categorize_content_programming(self): """Test content categorization for programming.""" text = "Python is a programming language used for code development and debugging." categories = self.extractor.categorize_content(text) assert "programming" in categories def test_categorize_content_data(self): """Test content categorization for data.""" text = "The database contains records and tables with statistical analysis." categories = self.extractor.categorize_content(text) assert "data" in categories def test_categorize_content_documentation(self): """Test content categorization for documentation.""" text = "This guide explains how to use the tutorial and manual." categories = self.extractor.categorize_content(text) assert "documentation" in categories def test_categorize_content_configuration(self): """Test content categorization for configuration.""" text = "Configure the settings and setup the deployment environment." categories = self.extractor.categorize_content(text) assert "configuration" in categories def test_categorize_content_testing(self): """Test content categorization for testing.""" text = "Run the tests to validate the functionality and verify quality." categories = self.extractor.categorize_content(text) assert "testing" in categories def test_categorize_content_research(self): """Test content categorization for research.""" text = "The study investigates findings and results from the analysis." categories = self.extractor.categorize_content(text) assert "research" in categories def test_categorize_content_planning(self): """Test content categorization for planning.""" text = "Plan the project schedule with milestones and timeline." categories = self.extractor.categorize_content(text) assert "planning" in categories def test_categorize_content_general(self): """Test content categorization defaults to general.""" text = "This is some random text without specific keywords." categories = self.extractor.categorize_content(text) assert "general" in categories def test_extract_facts_empty_text(self): """Test fact extraction with empty text.""" facts = self.extractor.extract_facts("") assert len(facts) == 0 def test_extract_key_terms_empty_text(self): """Test key term extraction with empty text.""" terms = self.extractor.extract_key_terms("") assert len(terms) == 0 def test_extract_relationships_empty_text(self): """Test relationship extraction with empty text.""" relationships = self.extractor.extract_relationships("") assert len(relationships) == 0 def test_extract_metadata_empty_text(self): """Test metadata extraction with empty text.""" metadata = self.extractor.extract_metadata("") assert metadata["word_count"] == 0 assert metadata["sentence_count"] == 0 assert metadata["avg_words_per_sentence"] == 0.0