chore: update md, py, toml files

This commit is contained in:
retoor 2026-01-01 23:27:55 +01:00
commit ccb50fbdbb
11 changed files with 1044 additions and 0 deletions

37
.gitignore vendored Normal file
View File

@ -0,0 +1,37 @@
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
.pytest_cache/
.coverage
htmlcov/
.tox/
.nox/
.hypothesis/
*.log
.env
.venv
venv/
ENV/
.idea/
.vscode/
*.swp
*.swo
*~
.DS_Store

10
CHANGELOG.md Normal file
View File

@ -0,0 +1,10 @@
# Changelog
## Version 1.1.0 - 2026-01-01
update md, py, toml files
**Changes:** 10 files, 1034 lines
**Languages:** Markdown (182 lines), Other (68 lines), Python (729 lines), TOML (50 lines), Text (5 lines)

31
Makefile Normal file
View File

@ -0,0 +1,31 @@
.PHONY: install dev run test test-unit test-integration clean build uninstall
install:
pip install -r requirements.txt
dev:
pip install -e .
run:
python -m rsearch
test: test-integration
test-unit:
pytest tests/ -v --ignore=tests/test_providers.py
test-integration:
pytest tests/test_providers.py -v
test-quick:
curl -s "http://localhost:8080/health" | python -m json.tool
curl -s "http://localhost:8080/search?query=python&count=3" | python -m json.tool
clean:
rm -rf __pycache__ *.egg-info dist build .eggs rsearch/__pycache__ tests/__pycache__ .pytest_cache
build:
python -m build
uninstall:
pip uninstall -y rsearch

182
README.md Normal file
View File

@ -0,0 +1,182 @@
# rsearch
Author: retoor <retoor@molodetz.nl>
Multi-source search aggregator API that queries multiple search engines and returns unified results without requiring API keys.
## Features
- 7 search providers with automatic fallback
- No API keys required (HTML scraping + public APIs)
- Async architecture for performance
- Unified JSON response format
- Fixed provider ordering by result quality
- Comprehensive integration tests
## Search Providers
| Provider | Type | Description |
|----------|------|-------------|
| Brave | Scraping | High quality web results |
| DuckDuckGo HTML | Scraping | Reliable lightweight version |
| Bing | Scraping | Microsoft search engine |
| Mojeek | Scraping | Independent search index |
| DuckDuckGo | API | Instant answers |
| Wikipedia | API | Encyclopedia reference |
| Wikidata | API | Structured knowledge base |
## Installation
Install dependencies:
```bash
pip install -r requirements.txt
```
Install as package (development mode):
```bash
pip install -e .
```
Install with test dependencies:
```bash
pip install -e ".[test]"
```
Or using make:
```bash
make dev
```
## Usage
Run as module:
```bash
python -m rsearch
```
Or after installation:
```bash
rsearch
```
Or using make:
```bash
make run
```
### Command Line Options
```
usage: rsearch [-h] [-H HOST] [-p PORT] [-l {DEBUG,INFO,WARNING,ERROR}] [-v]
options:
-h, --help show help message
-H, --host HOST Host to bind to (default: 0.0.0.0)
-p, --port PORT Port to listen on (default: 8080)
-l, --log-level Log level: DEBUG, INFO, WARNING, ERROR (default: INFO)
-v, --version show version number
```
Examples:
```bash
rsearch --port 9000 # Run on port 9000
rsearch --host 127.0.0.1 --port 3000 # Bind to localhost:3000
rsearch --log-level DEBUG # Enable debug logging
```
## Testing
Run integration tests:
```bash
make test
```
Or directly with pytest:
```bash
pytest tests/test_providers.py -v
```
Quick API test (requires running server):
```bash
make test-quick
```
## API Endpoints
### Search
```
GET /search?query=<q>&count=<n>
```
Parameters:
- `query`: Search term (required)
- `count`: Number of results (default: 10, max: 100)
Response:
```json
{
"query": "python",
"source": "brave",
"count": 3,
"results": [
{
"title": "Welcome to Python.org",
"url": "https://www.python.org/",
"description": "The official home of the Python Programming Language",
"source": "brave",
"extra": {}
}
],
"timestamp": "2024-01-01T12:00:00.000000Z",
"success": true,
"error": null
}
```
### Health Check
```
GET /health
```
Response:
```json
{
"status": "ok",
"services": ["brave", "duckduckgo_html", "bing", "mojeek", "duckduckgo", "wikipedia", "wikidata"],
"timestamp": "2024-01-01T12:00:00.000000Z"
}
```
## Project Structure
```
rsearch/
├── rsearch/
│ ├── __init__.py
│ ├── __main__.py
│ └── app.py
├── tests/
│ ├── __init__.py
│ └── test_providers.py
├── requirements.txt
├── pyproject.toml
├── Makefile
└── README.md
```
## License
MIT

50
pyproject.toml Normal file
View File

@ -0,0 +1,50 @@
[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"
[project]
name = "rsearch"
version = "1.1.0"
description = "Multi-source search aggregator API"
authors = [
{name = "retoor", email = "retoor@molodetz.nl"}
]
readme = "README.md"
license = {text = "MIT"}
requires-python = ">=3.9"
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Internet :: WWW/HTTP :: Indexing/Search",
]
dependencies = [
"aiohttp>=3.9.0",
"beautifulsoup4>=4.12.0",
"lxml>=5.0.0",
]
[project.optional-dependencies]
test = [
"pytest>=7.0.0",
"pytest-asyncio>=0.21.0",
]
[project.scripts]
rsearch = "rsearch:main"
[project.urls]
Homepage = "https://github.com/retoor/rsearch"
[tool.setuptools.packages.find]
where = ["."]
include = ["rsearch*"]
[tool.pytest.ini_options]
asyncio_mode = "auto"
testpaths = ["tests"]

5
requirements.txt Normal file
View File

@ -0,0 +1,5 @@
aiohttp>=3.9.0
beautifulsoup4>=4.12.0
lxml>=5.0.0
pytest>=7.0.0
pytest-asyncio>=0.21.0

6
rsearch/__init__.py Normal file
View File

@ -0,0 +1,6 @@
# retoor <retoor@molodetz.nl>
from rsearch.app import MultiSearch, create_app, main
__version__ = "1.0.0"
__all__ = ["MultiSearch", "create_app", "main"]

6
rsearch/__main__.py Normal file
View File

@ -0,0 +1,6 @@
# retoor <retoor@molodetz.nl>
from rsearch.app import main
if __name__ == "__main__":
main()

549
rsearch/app.py Normal file
View File

@ -0,0 +1,549 @@
# retoor <retoor@molodetz.nl>
import aiohttp
import argparse
import asyncio
from aiohttp import web
from datetime import datetime
from html import unescape
from typing import Dict, Optional, List, Callable
from urllib.parse import unquote, parse_qs, urlparse
import random
import logging
import re
import sys
from bs4 import BeautifulSoup
logger = logging.getLogger("search-api")
class MultiSearch:
def __init__(self):
self.services: List[Callable[[str, int], asyncio.Future]] = [
self.brave_search,
self.duckduckgo_html_search,
self.bing_search,
self.mojeek_search,
self.duckduckgo_search,
self.wikipedia_search,
self.wikidata_search,
]
self.user_agents = [
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
]
def _headers(self) -> Dict[str, str]:
return {
"User-Agent": random.choice(self.user_agents),
"Accept": "application/json, text/html;q=0.9",
"Accept-Language": "en-US,en;q=0.9",
"Connection": "keep-alive",
}
async def _safe_get(
self,
session: aiohttp.ClientSession,
url: str,
params: Optional[Dict] = None,
timeout: int = 10,
) -> Optional[Dict]:
try:
async with session.get(
url,
params=params,
headers=self._headers(),
timeout=aiohttp.ClientTimeout(total=timeout),
) as resp:
if resp.status != 200:
return None
try:
return await resp.json(content_type=None)
except Exception:
text = await resp.text()
return {"_raw": text}
except Exception as e:
logger.warning(f"GET {url} failed: {e}")
return None
def _scrape_headers(self) -> Dict[str, str]:
ua = random.choice(self.user_agents)
headers = {
"User-Agent": ua,
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Accept-Encoding": "gzip, deflate",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"DNT": "1",
}
if "Chrome" in ua and "Edg" not in ua:
headers["Sec-Ch-Ua"] = '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"'
headers["Sec-Ch-Ua-Mobile"] = "?0"
headers["Sec-Ch-Ua-Platform"] = '"Linux"' if "Linux" in ua else '"Windows"'
headers["Sec-Fetch-Dest"] = "document"
headers["Sec-Fetch-Mode"] = "navigate"
headers["Sec-Fetch-Site"] = "none"
headers["Sec-Fetch-User"] = "?1"
return headers
async def _safe_get_html(
self,
session: aiohttp.ClientSession,
url: str,
params: Optional[Dict] = None,
timeout: int = 15,
) -> Optional[str]:
try:
async with session.get(
url,
params=params,
headers=self._scrape_headers(),
timeout=aiohttp.ClientTimeout(total=timeout),
allow_redirects=True,
) as resp:
if resp.status != 200:
return None
return await resp.text()
except Exception as e:
logger.warning(f"HTML GET {url} failed: {e}")
return None
def _clean_html(self, html: str) -> str:
if not html:
return ""
text = re.sub(r'<[^>]+>', ' ', html)
text = re.sub(r'\s+', ' ', text)
return unescape(text).strip()
async def duckduckgo_search(self, query: str, count: int) -> Optional[Dict]:
url = "https://api.duckduckgo.com/"
params = {
"q": query,
"format": "json",
"no_html": "1",
"skip_disambig": "1",
}
async with aiohttp.ClientSession() as session:
data = await self._safe_get(session, url, params, timeout=10)
if not data or not isinstance(data, dict):
return None
results = []
if data.get("AbstractText"):
results.append(
{
"title": data.get("Heading", "Instant Answer"),
"url": data.get("AbstractURL", ""),
"description": data.get("AbstractText", "")[:500],
"source": "duckduckgo_instant",
"extra": {
"image": data.get("Image", ""),
"abstract_source": data.get("AbstractSource", ""),
},
}
)
for item in data.get("Results", [])[: max(0, count - len(results))]:
results.append(
{
"title": item.get("Text", "")[:200],
"url": item.get("FirstURL", ""),
"description": item.get("Text", "")[:500],
"source": "duckduckgo",
"extra": {
"icon": item.get("Icon", ""),
},
}
)
if not results:
return None
return self._wrap(query, "duckduckgo", results[:count])
async def wikipedia_search(self, query: str, count: int) -> Optional[Dict]:
url = "https://en.wikipedia.org/w/api.php"
params = {
"action": "query",
"format": "json",
"list": "search",
"srsearch": query,
"srlimit": min(count, 50),
}
async with aiohttp.ClientSession() as session:
data = await self._safe_get(session, url, params, timeout=10)
if not data or not isinstance(data, dict):
return None
results = []
for item in data.get("query", {}).get("search", [])[:count]:
snippet = item.get("snippet", "")
snippet = snippet.replace("<span class='searchmatch'>", "").replace(
"</span>", ""
)
snippet = unescape(snippet)
title = item.get("title", "")
results.append(
{
"title": title,
"url": "https://en.wikipedia.org/wiki/" + title.replace(" ", "_"),
"description": snippet[:500],
"source": "wikipedia",
"extra": {
"pageid": item.get("pageid"),
"size": item.get("size"),
"wordcount": item.get("wordcount"),
"timestamp": item.get("timestamp"),
},
}
)
if not results:
return None
return self._wrap(query, "wikipedia", results)
async def wikidata_search(self, query: str, count: int) -> Optional[Dict]:
url = "https://www.wikidata.org/w/api.php"
params = {
"action": "wbsearchentities",
"search": query,
"format": "json",
"language": "en",
"limit": min(count, 50),
}
async with aiohttp.ClientSession() as session:
data = await self._safe_get(session, url, params, timeout=10)
if not data or not isinstance(data, dict):
return None
results = []
for item in data.get("search", [])[:count]:
results.append(
{
"title": item.get("label", ""),
"url": item.get("url", ""),
"description": (item.get("description") or "")[:500],
"source": "wikidata",
"extra": {
"id": item.get("id"),
"aliases": (item.get("aliases") or [])[:5],
},
}
)
if not results:
return None
return self._wrap(query, "wikidata", results)
async def jina_search(self, query: str, count: int) -> Optional[Dict]:
# This may change; if JSON not available, this will just return None
url = f"https://s.jina.ai/{query}"
async with aiohttp.ClientSession() as session:
try:
async with session.get(
url,
headers={"Accept": "application/json", **self._headers()},
timeout=aiohttp.ClientTimeout(total=15),
) as resp:
if resp.status != 200:
return None
data = await resp.json()
except Exception as e:
logger.warning(f"Jina search failed: {e}")
return None
if not isinstance(data, dict):
return None
raw_list = data.get("data") or data.get("results") or []
if not isinstance(raw_list, list):
return None
results = []
for item in raw_list[:count]:
results.append(
{
"title": (item.get("title") or "")[:200],
"url": item.get("url", ""),
"description": (item.get("description") or "")[:500],
"source": "jina",
"extra": {},
}
)
if not results:
return None
return self._wrap(query, "jina", results)
async def brave_search(self, query: str, count: int) -> Optional[Dict]:
url = "https://search.brave.com/search"
params = {"q": query, "source": "web"}
async with aiohttp.ClientSession() as session:
html = await self._safe_get_html(session, url, params)
if not html:
return None
soup = BeautifulSoup(html, "html.parser")
results = []
for snippet in soup.select('div[data-type="web"][data-pos]')[:count]:
link = snippet.select_one('a[href^="http"]')
title_div = snippet.select_one('div[class*="title"]')
desc_div = snippet.select_one('div.generic-snippet div.content')
if not desc_div:
desc_div = snippet.select_one('div[class*="snippet-description"]')
if link:
title = ""
if title_div:
title = title_div.get("title") or title_div.get_text(strip=True)
desc = ""
if desc_div:
desc = desc_div.get_text(strip=True)
results.append({
"title": title[:200],
"url": link.get("href", ""),
"description": desc[:500],
"source": "brave",
"extra": {},
})
if not results:
return None
return self._wrap(query, "brave", results)
async def duckduckgo_html_search(self, query: str, count: int) -> Optional[Dict]:
url = "https://html.duckduckgo.com/html/"
params = {"q": query}
async with aiohttp.ClientSession() as session:
html = await self._safe_get_html(session, url, params)
if not html:
return None
soup = BeautifulSoup(html, "html.parser")
results = []
for result in soup.select('div.result.results_links')[:count * 2]:
link = result.select_one('a.result__a')
snippet = result.select_one('a.result__snippet')
if not link:
continue
result_url = link.get("href", "")
if "uddg=" in result_url:
parsed = urlparse(result_url)
qs = parse_qs(parsed.query)
if "uddg" in qs:
result_url = unquote(qs["uddg"][0])
elif result_url.startswith("//"):
result_url = "https:" + result_url
if result_url.startswith("/") or "duckduckgo.com" in result_url:
continue
results.append({
"title": link.get_text(strip=True)[:200],
"url": result_url,
"description": (snippet.get_text(strip=True) if snippet else "")[:500],
"source": "duckduckgo_html",
"extra": {},
})
if len(results) >= count:
break
if not results:
return None
return self._wrap(query, "duckduckgo_html", results)
async def bing_search(self, query: str, count: int) -> Optional[Dict]:
url = "https://www.bing.com/search"
params = {"q": query, "count": min(count, 50)}
async with aiohttp.ClientSession() as session:
html = await self._safe_get_html(session, url, params)
if not html:
return None
soup = BeautifulSoup(html, "html.parser")
results = []
for item in soup.select('li.b_algo')[:count]:
link = item.select_one('h2 a')
desc = item.select_one('p')
if link:
results.append({
"title": link.get_text(strip=True)[:200],
"url": link.get("href", ""),
"description": (desc.get_text(strip=True) if desc else "")[:500],
"source": "bing",
"extra": {},
})
if not results:
return None
return self._wrap(query, "bing", results)
async def mojeek_search(self, query: str, count: int) -> Optional[Dict]:
url = "https://www.mojeek.com/search"
params = {"q": query}
async with aiohttp.ClientSession() as session:
html = await self._safe_get_html(session, url, params)
if not html:
return None
soup = BeautifulSoup(html, "html.parser")
results = []
for item in soup.select('ul.results-standard li')[:count]:
link = item.select_one('a.ob')
title_el = item.select_one('a.title')
desc = item.select_one('p.s')
if link:
title = title_el.get_text(strip=True) if title_el else link.get_text(strip=True)
results.append({
"title": title[:200],
"url": link.get("href", ""),
"description": (desc.get_text(strip=True) if desc else "")[:500],
"source": "mojeek",
"extra": {},
})
if not results:
return None
return self._wrap(query, "mojeek", results)
def _wrap(self, query: str, service: str, results: List[Dict]) -> Dict:
return {
"query": query,
"source": service,
"count": len(results),
"results": results,
"timestamp": datetime.utcnow().isoformat() + "Z",
"success": True,
"error": None,
}
async def search(self, query: str, count: int) -> Dict:
query = (query or "").strip()
if not query:
return {
"query": "",
"source": "none",
"count": 0,
"results": [],
"timestamp": datetime.utcnow().isoformat() + "Z",
"success": False,
"error": "Empty query",
}
count = max(1, min(int(count), 100))
services = self.services
logger.info(f"search '{query}' count={count} services={len(services)}")
for fn in services:
name = fn.__name__
try:
result = await asyncio.wait_for(fn(query, count), timeout=20)
except asyncio.TimeoutError:
logger.warning(f"{name} timed out")
continue
except Exception as e:
logger.warning(f"{name} failed: {e}")
continue
if result and result.get("success") and result.get("count", 0) > 0:
logger.info(f"using {result.get('source')} for '{query}'")
return result
return {
"query": query,
"source": "none",
"count": 0,
"results": [],
"timestamp": datetime.utcnow().isoformat() + "Z",
"success": False,
"error": "All services failed",
}
searcher = MultiSearch()
async def handle_search(request: web.Request) -> web.Response:
q = request.query.get("query", "")
count_raw = request.query.get("count", "10")
try:
count = int(count_raw)
except ValueError:
count = 10
result = await searcher.search(q, count)
status = 200 if result.get("success") else 400
return web.json_response(result, status=status)
async def handle_health(request: web.Request) -> web.Response:
return web.json_response(
{
"status": "ok",
"services": [
"brave",
"duckduckgo_html",
"bing",
"mojeek",
"duckduckgo",
"wikipedia",
"wikidata",
],
"timestamp": datetime.utcnow().isoformat() + "Z",
}
)
def create_app() -> web.Application:
app = web.Application()
app.router.add_get("/search", handle_search)
app.router.add_get("/health", handle_health)
return app
def parse_args(args=None):
parser = argparse.ArgumentParser(
prog="rsearch",
description="Multi-source search aggregator API"
)
parser.add_argument(
"-H", "--host",
default="0.0.0.0",
help="Host to bind to (default: 0.0.0.0)"
)
parser.add_argument(
"-p", "--port",
type=int,
default=8080,
help="Port to listen on (default: 8080)"
)
parser.add_argument(
"-l", "--log-level",
choices=["DEBUG", "INFO", "WARNING", "ERROR"],
default="INFO",
help="Log level (default: INFO)"
)
parser.add_argument(
"-v", "--version",
action="version",
version="%(prog)s 1.0.0"
)
return parser.parse_args(args)
def main(args=None):
opts = parse_args(args)
logging.basicConfig(
level=getattr(logging, opts.log_level),
format="%(asctime)s %(levelname)s %(name)s: %(message)s"
)
app = create_app()
logger.info(f"Starting server on {opts.host}:{opts.port}")
web.run_app(app, host=opts.host, port=opts.port, print=None)
if __name__ == "__main__":
main()

1
tests/__init__.py Normal file
View File

@ -0,0 +1 @@
# retoor <retoor@molodetz.nl>

167
tests/test_providers.py Normal file
View File

@ -0,0 +1,167 @@
# retoor <retoor@molodetz.nl>
import asyncio
import pytest
from rsearch.app import MultiSearch
@pytest.fixture
def searcher():
return MultiSearch()
@pytest.fixture
def query():
return "python programming"
@pytest.fixture
def count():
return 3
class TestProviders:
@pytest.mark.asyncio
async def test_brave_search(self, searcher, query, count):
result = await asyncio.wait_for(searcher.brave_search(query, count), timeout=20)
assert result is not None
assert result.get("success") is True
assert result.get("count", 0) > 0
assert result.get("source") == "brave"
assert len(result.get("results", [])) > 0
first = result["results"][0]
assert "title" in first
assert "url" in first
assert "description" in first
@pytest.mark.asyncio
async def test_duckduckgo_html_search(self, searcher, query, count):
result = await asyncio.wait_for(searcher.duckduckgo_html_search(query, count), timeout=20)
assert result is not None
assert result.get("success") is True
assert result.get("count", 0) > 0
assert result.get("source") == "duckduckgo_html"
assert len(result.get("results", [])) > 0
first = result["results"][0]
assert "title" in first
assert "url" in first
assert first["url"].startswith("http")
@pytest.mark.asyncio
async def test_bing_search(self, searcher, query, count):
result = await asyncio.wait_for(searcher.bing_search(query, count), timeout=20)
assert result is not None
assert result.get("success") is True
assert result.get("count", 0) > 0
assert result.get("source") == "bing"
assert len(result.get("results", [])) > 0
@pytest.mark.asyncio
async def test_mojeek_search(self, searcher, query, count):
result = await asyncio.wait_for(searcher.mojeek_search(query, count), timeout=20)
assert result is not None
assert result.get("success") is True
assert result.get("count", 0) > 0
assert result.get("source") == "mojeek"
assert len(result.get("results", [])) > 0
first = result["results"][0]
assert not first["title"].startswith("http"), "Title should not be a URL"
@pytest.mark.asyncio
async def test_duckduckgo_api_search(self, searcher, query, count):
result = await asyncio.wait_for(searcher.duckduckgo_search(query, count), timeout=20)
if result is not None:
assert result.get("source") == "duckduckgo"
if result.get("count", 0) > 0:
assert result.get("success") is True
@pytest.mark.asyncio
async def test_wikipedia_search(self, searcher, query, count):
result = await asyncio.wait_for(searcher.wikipedia_search(query, count), timeout=20)
assert result is not None
assert result.get("success") is True
assert result.get("count", 0) > 0
assert result.get("source") == "wikipedia"
assert len(result.get("results", [])) > 0
first = result["results"][0]
assert "wikipedia.org" in first["url"]
@pytest.mark.asyncio
async def test_wikidata_search(self, searcher, query, count):
result = await asyncio.wait_for(searcher.wikidata_search(query, count), timeout=20)
assert result is not None
assert result.get("success") is True
assert result.get("count", 0) > 0
assert result.get("source") == "wikidata"
assert len(result.get("results", [])) > 0
class TestSearchAggregator:
@pytest.mark.asyncio
async def test_search_returns_results(self, searcher, query, count):
result = await searcher.search(query, count)
assert result is not None
assert result.get("success") is True
assert result.get("count", 0) > 0
assert result.get("query") == query
assert "timestamp" in result
assert "results" in result
@pytest.mark.asyncio
async def test_search_empty_query(self, searcher, count):
result = await searcher.search("", count)
assert result is not None
assert result.get("success") is False
assert result.get("error") == "Empty query"
@pytest.mark.asyncio
async def test_search_count_limit(self, searcher, query):
result = await searcher.search(query, 5)
assert result is not None
if result.get("success"):
assert result.get("count", 0) <= 5
@pytest.mark.asyncio
async def test_result_format(self, searcher, query, count):
result = await searcher.search(query, count)
assert "query" in result
assert "source" in result
assert "count" in result
assert "results" in result
assert "timestamp" in result
assert "success" in result
assert "error" in result
class TestAllProviders:
@pytest.mark.asyncio
async def test_all_providers_return_valid_format(self, searcher, query, count):
providers = [
("brave", searcher.brave_search),
("duckduckgo_html", searcher.duckduckgo_html_search),
("bing", searcher.bing_search),
("mojeek", searcher.mojeek_search),
("duckduckgo", searcher.duckduckgo_search),
("wikipedia", searcher.wikipedia_search),
("wikidata", searcher.wikidata_search),
]
for name, fn in providers:
try:
result = await asyncio.wait_for(fn(query, count), timeout=20)
if result is not None:
assert "query" in result, f"{name}: missing query"
assert "source" in result, f"{name}: missing source"
assert "count" in result, f"{name}: missing count"
assert "results" in result, f"{name}: missing results"
assert "timestamp" in result, f"{name}: missing timestamp"
assert "success" in result, f"{name}: missing success"
except asyncio.TimeoutError:
pytest.skip(f"{name} timed out")
if __name__ == "__main__":
pytest.main([__file__, "-v"])