chore: update md, py, toml files

2026-01-01 23:27:55 +01:00 · 2026-01-01 23:27:55 +01:00 · ccb50fbdbb
commit ccb50fbdbb
11 changed files with 1044 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,37 @@
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 .pytest_cache/
 .coverage
 htmlcov/
 .tox/
 .nox/
 .hypothesis/
 *.log
 .env
 .venv
 venv/
 ENV/
 .idea/
 .vscode/
 *.swp
 *.swo
 *~
 .DS_Store
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,10 @@
 # Changelog
 ## Version 1.1.0 - 2026-01-01
 update md, py, toml files
 **Changes:** 10 files, 1034 lines
 **Languages:** Markdown (182 lines), Other (68 lines), Python (729 lines), TOML (50 lines), Text (5 lines)
--- a/31
+++ b/31
@ -0,0 +1,31 @@
 .PHONY: install dev run test test-unit test-integration clean build uninstall
 install:
 	pip install -r requirements.txt
 dev:
 	pip install -e .
 run:
 	python -m rsearch
 test: test-integration
 test-unit:
 	pytest tests/ -v --ignore=tests/test_providers.py
 test-integration:
 	pytest tests/test_providers.py -v
 test-quick:
 	curl -s "http://localhost:8080/health" | python -m json.tool
 	curl -s "http://localhost:8080/search?query=python&count=3" | python -m json.tool
 clean:
 	rm -rf __pycache__ *.egg-info dist build .eggs rsearch/__pycache__ tests/__pycache__ .pytest_cache
 build:
 	python -m build
 uninstall:
 	pip uninstall -y rsearch
--- a/README.md
+++ b/README.md
@ -0,0 +1,182 @@
 # rsearch
 Author: retoor <retoor@molodetz.nl>
 Multi-source search aggregator API that queries multiple search engines and returns unified results without requiring API keys.
 ## Features
 - 7 search providers with automatic fallback
 - No API keys required (HTML scraping + public APIs)
 - Async architecture for performance
 - Unified JSON response format
 - Fixed provider ordering by result quality
 - Comprehensive integration tests
 ## Search Providers
 | Provider | Type | Description |
 |----------|------|-------------|
 | Brave | Scraping | High quality web results |
 | DuckDuckGo HTML | Scraping | Reliable lightweight version |
 | Bing | Scraping | Microsoft search engine |
 | Mojeek | Scraping | Independent search index |
 | DuckDuckGo | API | Instant answers |
 | Wikipedia | API | Encyclopedia reference |
 | Wikidata | API | Structured knowledge base |
 ## Installation
 Install dependencies:
 ```bash
 pip install -r requirements.txt
 ```
 Install as package (development mode):
 ```bash
 pip install -e .
 ```
 Install with test dependencies:
 ```bash
 pip install -e ".[test]"
 ```
 Or using make:
 ```bash
 make dev
 ```
 ## Usage
 Run as module:
 ```bash
 python -m rsearch
 ```
 Or after installation:
 ```bash
 rsearch
 ```
 Or using make:
 ```bash
 make run
 ```
 ### Command Line Options
 ```
 usage: rsearch [-h] [-H HOST] [-p PORT] [-l {DEBUG,INFO,WARNING,ERROR}] [-v]
 options:
  -h, --help            show help message
  -H, --host HOST       Host to bind to (default: 0.0.0.0)
  -p, --port PORT       Port to listen on (default: 8080)
  -l, --log-level       Log level: DEBUG, INFO, WARNING, ERROR (default: INFO)
  -v, --version         show version number
 ```
 Examples:
 ```bash
 rsearch --port 9000                    # Run on port 9000
 rsearch --host 127.0.0.1 --port 3000   # Bind to localhost:3000
 rsearch --log-level DEBUG              # Enable debug logging
 ```
 ## Testing
 Run integration tests:
 ```bash
 make test
 ```
 Or directly with pytest:
 ```bash
 pytest tests/test_providers.py -v
 ```
 Quick API test (requires running server):
 ```bash
 make test-quick
 ```
 ## API Endpoints
 ### Search
 ```
 GET /search?query=<q>&count=<n>
 ```
 Parameters:
 - `query`: Search term (required)
 - `count`: Number of results (default: 10, max: 100)
 Response:
 ```json
 {
    "query": "python",
    "source": "brave",
    "count": 3,
    "results": [
        {
            "title": "Welcome to Python.org",
            "url": "https://www.python.org/",
            "description": "The official home of the Python Programming Language",
            "source": "brave",
            "extra": {}
        }
    ],
    "timestamp": "2024-01-01T12:00:00.000000Z",
    "success": true,
    "error": null
 }
 ```
 ### Health Check
 ```
 GET /health
 ```
 Response:
 ```json
 {
    "status": "ok",
    "services": ["brave", "duckduckgo_html", "bing", "mojeek", "duckduckgo", "wikipedia", "wikidata"],
    "timestamp": "2024-01-01T12:00:00.000000Z"
 }
 ```
 ## Project Structure
 ```
 rsearch/
 ├── rsearch/
 │   ├── __init__.py
 │   ├── __main__.py
 │   └── app.py
 ├── tests/
 │   ├── __init__.py
 │   └── test_providers.py
 ├── requirements.txt
 ├── pyproject.toml
 ├── Makefile
 └── README.md
 ```
 ## License
 MIT
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,50 @@
 [build-system]
 requires = ["setuptools>=61.0", "wheel"]
 build-backend = "setuptools.build_meta"
 [project]
 name = "rsearch"
 version = "1.1.0"
 description = "Multi-source search aggregator API"
 authors = [
    {name = "retoor", email = "retoor@molodetz.nl"}
 ]
 readme = "README.md"
 license = {text = "MIT"}
 requires-python = ">=3.9"
 classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: MIT License",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.9",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
 ]
 dependencies = [
    "aiohttp>=3.9.0",
    "beautifulsoup4>=4.12.0",
    "lxml>=5.0.0",
 ]
 [project.optional-dependencies]
 test = [
    "pytest>=7.0.0",
    "pytest-asyncio>=0.21.0",
 ]
 [project.scripts]
 rsearch = "rsearch:main"
 [project.urls]
 Homepage = "https://github.com/retoor/rsearch"
 [tool.setuptools.packages.find]
 where = ["."]
 include = ["rsearch*"]
 [tool.pytest.ini_options]
 asyncio_mode = "auto"
 testpaths = ["tests"]
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
 aiohttp>=3.9.0
 beautifulsoup4>=4.12.0
 lxml>=5.0.0
 pytest>=7.0.0
 pytest-asyncio>=0.21.0
--- a/rsearch/init.py
+++ b/rsearch/init.py
@ -0,0 +1,6 @@
 # retoor <retoor@molodetz.nl>
 from rsearch.app import MultiSearch, create_app, main
 __version__ = "1.0.0"
 __all__ = ["MultiSearch", "create_app", "main"]
--- a/rsearch/main.py
+++ b/rsearch/main.py
@ -0,0 +1,6 @@
 # retoor <retoor@molodetz.nl>
 from rsearch.app import main
 if __name__ == "__main__":
    main()
--- a/rsearch/app.py
+++ b/rsearch/app.py
@ -0,0 +1,549 @@
 # retoor <retoor@molodetz.nl>
 import aiohttp
 import argparse
 import asyncio
 from aiohttp import web
 from datetime import datetime
 from html import unescape
 from typing import Dict, Optional, List, Callable
 from urllib.parse import unquote, parse_qs, urlparse
 import random
 import logging
 import re
 import sys
 from bs4 import BeautifulSoup
 logger = logging.getLogger("search-api")
 class MultiSearch:
    def __init__(self):
        self.services: List[Callable[[str, int], asyncio.Future]] = [
            self.brave_search,
            self.duckduckgo_html_search,
            self.bing_search,
            self.mojeek_search,
            self.duckduckgo_search,
            self.wikipedia_search,
            self.wikidata_search,
        ]
        self.user_agents = [
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",
            "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0",
            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
        ]
    def _headers(self) -> Dict[str, str]:
        return {
            "User-Agent": random.choice(self.user_agents),
            "Accept": "application/json, text/html;q=0.9",
            "Accept-Language": "en-US,en;q=0.9",
            "Connection": "keep-alive",
        }
    async def _safe_get(
        self,
        session: aiohttp.ClientSession,
        url: str,
        params: Optional[Dict] = None,
        timeout: int = 10,
    ) -> Optional[Dict]:
        try:
            async with session.get(
                url,
                params=params,
                headers=self._headers(),
                timeout=aiohttp.ClientTimeout(total=timeout),
            ) as resp:
                if resp.status != 200:
                    return None
                try:
                    return await resp.json(content_type=None)
                except Exception:
                    text = await resp.text()
                    return {"_raw": text}
        except Exception as e:
            logger.warning(f"GET {url} failed: {e}")
            return None
    def _scrape_headers(self) -> Dict[str, str]:
        ua = random.choice(self.user_agents)
        headers = {
            "User-Agent": ua,
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.9",
            "Accept-Encoding": "gzip, deflate",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1",
            "DNT": "1",
        }
        if "Chrome" in ua and "Edg" not in ua:
            headers["Sec-Ch-Ua"] = '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"'
            headers["Sec-Ch-Ua-Mobile"] = "?0"
            headers["Sec-Ch-Ua-Platform"] = '"Linux"' if "Linux" in ua else '"Windows"'
            headers["Sec-Fetch-Dest"] = "document"
            headers["Sec-Fetch-Mode"] = "navigate"
            headers["Sec-Fetch-Site"] = "none"
            headers["Sec-Fetch-User"] = "?1"
        return headers
    async def _safe_get_html(
        self,
        session: aiohttp.ClientSession,
        url: str,
        params: Optional[Dict] = None,
        timeout: int = 15,
    ) -> Optional[str]:
        try:
            async with session.get(
                url,
                params=params,
                headers=self._scrape_headers(),
                timeout=aiohttp.ClientTimeout(total=timeout),
                allow_redirects=True,
            ) as resp:
                if resp.status != 200:
                    return None
                return await resp.text()
        except Exception as e:
            logger.warning(f"HTML GET {url} failed: {e}")
            return None
    def _clean_html(self, html: str) -> str:
        if not html:
            return ""
        text = re.sub(r'<[^>]+>', ' ', html)
        text = re.sub(r'\s+', ' ', text)
        return unescape(text).strip()
    async def duckduckgo_search(self, query: str, count: int) -> Optional[Dict]:
        url = "https://api.duckduckgo.com/"
        params = {
            "q": query,
            "format": "json",
            "no_html": "1",
            "skip_disambig": "1",
        }
        async with aiohttp.ClientSession() as session:
            data = await self._safe_get(session, url, params, timeout=10)
        if not data or not isinstance(data, dict):
            return None
        results = []
        if data.get("AbstractText"):
            results.append(
                {
                    "title": data.get("Heading", "Instant Answer"),
                    "url": data.get("AbstractURL", ""),
                    "description": data.get("AbstractText", "")[:500],
                    "source": "duckduckgo_instant",
                    "extra": {
                        "image": data.get("Image", ""),
                        "abstract_source": data.get("AbstractSource", ""),
                    },
                }
            )
        for item in data.get("Results", [])[: max(0, count - len(results))]:
            results.append(
                {
                    "title": item.get("Text", "")[:200],
                    "url": item.get("FirstURL", ""),
                    "description": item.get("Text", "")[:500],
                    "source": "duckduckgo",
                    "extra": {
                        "icon": item.get("Icon", ""),
                    },
                }
            )
        if not results:
            return None
        return self._wrap(query, "duckduckgo", results[:count])
    async def wikipedia_search(self, query: str, count: int) -> Optional[Dict]:
        url = "https://en.wikipedia.org/w/api.php"
        params = {
            "action": "query",
            "format": "json",
            "list": "search",
            "srsearch": query,
            "srlimit": min(count, 50),
        }
        async with aiohttp.ClientSession() as session:
            data = await self._safe_get(session, url, params, timeout=10)
        if not data or not isinstance(data, dict):
            return None
        results = []
        for item in data.get("query", {}).get("search", [])[:count]:
            snippet = item.get("snippet", "")
            snippet = snippet.replace("<span class='searchmatch'>", "").replace(
                "</span>", ""
            )
            snippet = unescape(snippet)
            title = item.get("title", "")
            results.append(
                {
                    "title": title,
                    "url": "https://en.wikipedia.org/wiki/" + title.replace(" ", "_"),
                    "description": snippet[:500],
                    "source": "wikipedia",
                    "extra": {
                        "pageid": item.get("pageid"),
                        "size": item.get("size"),
                        "wordcount": item.get("wordcount"),
                        "timestamp": item.get("timestamp"),
                    },
                }
            )
        if not results:
            return None
        return self._wrap(query, "wikipedia", results)
    async def wikidata_search(self, query: str, count: int) -> Optional[Dict]:
        url = "https://www.wikidata.org/w/api.php"
        params = {
            "action": "wbsearchentities",
            "search": query,
            "format": "json",
            "language": "en",
            "limit": min(count, 50),
        }
        async with aiohttp.ClientSession() as session:
            data = await self._safe_get(session, url, params, timeout=10)
        if not data or not isinstance(data, dict):
            return None
        results = []
        for item in data.get("search", [])[:count]:
            results.append(
                {
                    "title": item.get("label", ""),
                    "url": item.get("url", ""),
                    "description": (item.get("description") or "")[:500],
                    "source": "wikidata",
                    "extra": {
                        "id": item.get("id"),
                        "aliases": (item.get("aliases") or [])[:5],
                    },
                }
            )
        if not results:
            return None
        return self._wrap(query, "wikidata", results)
    async def jina_search(self, query: str, count: int) -> Optional[Dict]:
        # This may change; if JSON not available, this will just return None
        url = f"https://s.jina.ai/{query}"
        async with aiohttp.ClientSession() as session:
            try:
                async with session.get(
                    url,
                    headers={"Accept": "application/json", **self._headers()},
                    timeout=aiohttp.ClientTimeout(total=15),
                ) as resp:
                    if resp.status != 200:
                        return None
                    data = await resp.json()
            except Exception as e:
                logger.warning(f"Jina search failed: {e}")
                return None
        if not isinstance(data, dict):
            return None
        raw_list = data.get("data") or data.get("results") or []
        if not isinstance(raw_list, list):
            return None
        results = []
        for item in raw_list[:count]:
            results.append(
                {
                    "title": (item.get("title") or "")[:200],
                    "url": item.get("url", ""),
                    "description": (item.get("description") or "")[:500],
                    "source": "jina",
                    "extra": {},
                }
            )
        if not results:
            return None
        return self._wrap(query, "jina", results)
    async def brave_search(self, query: str, count: int) -> Optional[Dict]:
        url = "https://search.brave.com/search"
        params = {"q": query, "source": "web"}
        async with aiohttp.ClientSession() as session:
            html = await self._safe_get_html(session, url, params)
        if not html:
            return None
        soup = BeautifulSoup(html, "html.parser")
        results = []
        for snippet in soup.select('div[data-type="web"][data-pos]')[:count]:
            link = snippet.select_one('a[href^="http"]')
            title_div = snippet.select_one('div[class*="title"]')
            desc_div = snippet.select_one('div.generic-snippet div.content')
            if not desc_div:
                desc_div = snippet.select_one('div[class*="snippet-description"]')
            if link:
                title = ""
                if title_div:
                    title = title_div.get("title") or title_div.get_text(strip=True)
                desc = ""
                if desc_div:
                    desc = desc_div.get_text(strip=True)
                results.append({
                    "title": title[:200],
                    "url": link.get("href", ""),
                    "description": desc[:500],
                    "source": "brave",
                    "extra": {},
                })
        if not results:
            return None
        return self._wrap(query, "brave", results)
    async def duckduckgo_html_search(self, query: str, count: int) -> Optional[Dict]:
        url = "https://html.duckduckgo.com/html/"
        params = {"q": query}
        async with aiohttp.ClientSession() as session:
            html = await self._safe_get_html(session, url, params)
        if not html:
            return None
        soup = BeautifulSoup(html, "html.parser")
        results = []
        for result in soup.select('div.result.results_links')[:count * 2]:
            link = result.select_one('a.result__a')
            snippet = result.select_one('a.result__snippet')
            if not link:
                continue
            result_url = link.get("href", "")
            if "uddg=" in result_url:
                parsed = urlparse(result_url)
                qs = parse_qs(parsed.query)
                if "uddg" in qs:
                    result_url = unquote(qs["uddg"][0])
            elif result_url.startswith("//"):
                result_url = "https:" + result_url
            if result_url.startswith("/") or "duckduckgo.com" in result_url:
                continue
            results.append({
                "title": link.get_text(strip=True)[:200],
                "url": result_url,
                "description": (snippet.get_text(strip=True) if snippet else "")[:500],
                "source": "duckduckgo_html",
                "extra": {},
            })
            if len(results) >= count:
                break
        if not results:
            return None
        return self._wrap(query, "duckduckgo_html", results)
    async def bing_search(self, query: str, count: int) -> Optional[Dict]:
        url = "https://www.bing.com/search"
        params = {"q": query, "count": min(count, 50)}
        async with aiohttp.ClientSession() as session:
            html = await self._safe_get_html(session, url, params)
        if not html:
            return None
        soup = BeautifulSoup(html, "html.parser")
        results = []
        for item in soup.select('li.b_algo')[:count]:
            link = item.select_one('h2 a')
            desc = item.select_one('p')
            if link:
                results.append({
                    "title": link.get_text(strip=True)[:200],
                    "url": link.get("href", ""),
                    "description": (desc.get_text(strip=True) if desc else "")[:500],
                    "source": "bing",
                    "extra": {},
                })
        if not results:
            return None
        return self._wrap(query, "bing", results)
    async def mojeek_search(self, query: str, count: int) -> Optional[Dict]:
        url = "https://www.mojeek.com/search"
        params = {"q": query}
        async with aiohttp.ClientSession() as session:
            html = await self._safe_get_html(session, url, params)
        if not html:
            return None
        soup = BeautifulSoup(html, "html.parser")
        results = []
        for item in soup.select('ul.results-standard li')[:count]:
            link = item.select_one('a.ob')
            title_el = item.select_one('a.title')
            desc = item.select_one('p.s')
            if link:
                title = title_el.get_text(strip=True) if title_el else link.get_text(strip=True)
                results.append({
                    "title": title[:200],
                    "url": link.get("href", ""),
                    "description": (desc.get_text(strip=True) if desc else "")[:500],
                    "source": "mojeek",
                    "extra": {},
                })
        if not results:
            return None
        return self._wrap(query, "mojeek", results)
    def _wrap(self, query: str, service: str, results: List[Dict]) -> Dict:
        return {
            "query": query,
            "source": service,
            "count": len(results),
            "results": results,
            "timestamp": datetime.utcnow().isoformat() + "Z",
            "success": True,
            "error": None,
        }
    async def search(self, query: str, count: int) -> Dict:
        query = (query or "").strip()
        if not query:
            return {
                "query": "",
                "source": "none",
                "count": 0,
                "results": [],
                "timestamp": datetime.utcnow().isoformat() + "Z",
                "success": False,
                "error": "Empty query",
            }
        count = max(1, min(int(count), 100))
        services = self.services
        logger.info(f"search '{query}' count={count} services={len(services)}")
        for fn in services:
            name = fn.__name__
            try:
                result = await asyncio.wait_for(fn(query, count), timeout=20)
            except asyncio.TimeoutError:
                logger.warning(f"{name} timed out")
                continue
            except Exception as e:
                logger.warning(f"{name} failed: {e}")
                continue
            if result and result.get("success") and result.get("count", 0) > 0:
                logger.info(f"using {result.get('source')} for '{query}'")
                return result
        return {
            "query": query,
            "source": "none",
            "count": 0,
            "results": [],
            "timestamp": datetime.utcnow().isoformat() + "Z",
            "success": False,
            "error": "All services failed",
        }
 searcher = MultiSearch()
 async def handle_search(request: web.Request) -> web.Response:
    q = request.query.get("query", "")
    count_raw = request.query.get("count", "10")
    try:
        count = int(count_raw)
    except ValueError:
        count = 10
    result = await searcher.search(q, count)
    status = 200 if result.get("success") else 400
    return web.json_response(result, status=status)
 async def handle_health(request: web.Request) -> web.Response:
    return web.json_response(
        {
            "status": "ok",
            "services": [
                "brave",
                "duckduckgo_html",
                "bing",
                "mojeek",
                "duckduckgo",
                "wikipedia",
                "wikidata",
            ],
            "timestamp": datetime.utcnow().isoformat() + "Z",
        }
    )
 def create_app() -> web.Application:
    app = web.Application()
    app.router.add_get("/search", handle_search)
    app.router.add_get("/health", handle_health)
    return app
 def parse_args(args=None):
    parser = argparse.ArgumentParser(
        prog="rsearch",
        description="Multi-source search aggregator API"
    )
    parser.add_argument(
        "-H", "--host",
        default="0.0.0.0",
        help="Host to bind to (default: 0.0.0.0)"
    )
    parser.add_argument(
        "-p", "--port",
        type=int,
        default=8080,
        help="Port to listen on (default: 8080)"
    )
    parser.add_argument(
        "-l", "--log-level",
        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
        default="INFO",
        help="Log level (default: INFO)"
    )
    parser.add_argument(
        "-v", "--version",
        action="version",
        version="%(prog)s 1.0.0"
    )
    return parser.parse_args(args)
 def main(args=None):
    opts = parse_args(args)
    logging.basicConfig(
        level=getattr(logging, opts.log_level),
        format="%(asctime)s %(levelname)s %(name)s: %(message)s"
    )
    app = create_app()
    logger.info(f"Starting server on {opts.host}:{opts.port}")
    web.run_app(app, host=opts.host, port=opts.port, print=None)
 if __name__ == "__main__":
    main()
--- a/tests/init.py
+++ b/tests/init.py
@ -0,0 +1 @@
 # retoor <retoor@molodetz.nl>
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@ -0,0 +1,167 @@
 # retoor <retoor@molodetz.nl>
 import asyncio
 import pytest
 from rsearch.app import MultiSearch
@pytest.fixture
 def searcher():
    return MultiSearch()
@pytest.fixture
 def query():
    return "python programming"
@pytest.fixture
 def count():
    return 3
 class TestProviders:
    @pytest.mark.asyncio
    async def test_brave_search(self, searcher, query, count):
        result = await asyncio.wait_for(searcher.brave_search(query, count), timeout=20)
        assert result is not None
        assert result.get("success") is True
        assert result.get("count", 0) > 0
        assert result.get("source") == "brave"
        assert len(result.get("results", [])) > 0
        first = result["results"][0]
        assert "title" in first
        assert "url" in first
        assert "description" in first
    @pytest.mark.asyncio
    async def test_duckduckgo_html_search(self, searcher, query, count):
        result = await asyncio.wait_for(searcher.duckduckgo_html_search(query, count), timeout=20)
        assert result is not None
        assert result.get("success") is True
        assert result.get("count", 0) > 0
        assert result.get("source") == "duckduckgo_html"
        assert len(result.get("results", [])) > 0
        first = result["results"][0]
        assert "title" in first
        assert "url" in first
        assert first["url"].startswith("http")
    @pytest.mark.asyncio
    async def test_bing_search(self, searcher, query, count):
        result = await asyncio.wait_for(searcher.bing_search(query, count), timeout=20)
        assert result is not None
        assert result.get("success") is True
        assert result.get("count", 0) > 0
        assert result.get("source") == "bing"
        assert len(result.get("results", [])) > 0
    @pytest.mark.asyncio
    async def test_mojeek_search(self, searcher, query, count):
        result = await asyncio.wait_for(searcher.mojeek_search(query, count), timeout=20)
        assert result is not None
        assert result.get("success") is True
        assert result.get("count", 0) > 0
        assert result.get("source") == "mojeek"
        assert len(result.get("results", [])) > 0
        first = result["results"][0]
        assert not first["title"].startswith("http"), "Title should not be a URL"
    @pytest.mark.asyncio
    async def test_duckduckgo_api_search(self, searcher, query, count):
        result = await asyncio.wait_for(searcher.duckduckgo_search(query, count), timeout=20)
        if result is not None:
            assert result.get("source") == "duckduckgo"
            if result.get("count", 0) > 0:
                assert result.get("success") is True
    @pytest.mark.asyncio
    async def test_wikipedia_search(self, searcher, query, count):
        result = await asyncio.wait_for(searcher.wikipedia_search(query, count), timeout=20)
        assert result is not None
        assert result.get("success") is True
        assert result.get("count", 0) > 0
        assert result.get("source") == "wikipedia"
        assert len(result.get("results", [])) > 0
        first = result["results"][0]
        assert "wikipedia.org" in first["url"]
    @pytest.mark.asyncio
    async def test_wikidata_search(self, searcher, query, count):
        result = await asyncio.wait_for(searcher.wikidata_search(query, count), timeout=20)
        assert result is not None
        assert result.get("success") is True
        assert result.get("count", 0) > 0
        assert result.get("source") == "wikidata"
        assert len(result.get("results", [])) > 0
 class TestSearchAggregator:
    @pytest.mark.asyncio
    async def test_search_returns_results(self, searcher, query, count):
        result = await searcher.search(query, count)
        assert result is not None
        assert result.get("success") is True
        assert result.get("count", 0) > 0
        assert result.get("query") == query
        assert "timestamp" in result
        assert "results" in result
    @pytest.mark.asyncio
    async def test_search_empty_query(self, searcher, count):
        result = await searcher.search("", count)
        assert result is not None
        assert result.get("success") is False
        assert result.get("error") == "Empty query"
    @pytest.mark.asyncio
    async def test_search_count_limit(self, searcher, query):
        result = await searcher.search(query, 5)
        assert result is not None
        if result.get("success"):
            assert result.get("count", 0) <= 5
    @pytest.mark.asyncio
    async def test_result_format(self, searcher, query, count):
        result = await searcher.search(query, count)
        assert "query" in result
        assert "source" in result
        assert "count" in result
        assert "results" in result
        assert "timestamp" in result
        assert "success" in result
        assert "error" in result
 class TestAllProviders:
    @pytest.mark.asyncio
    async def test_all_providers_return_valid_format(self, searcher, query, count):
        providers = [
            ("brave", searcher.brave_search),
            ("duckduckgo_html", searcher.duckduckgo_html_search),
            ("bing", searcher.bing_search),
            ("mojeek", searcher.mojeek_search),
            ("duckduckgo", searcher.duckduckgo_search),
            ("wikipedia", searcher.wikipedia_search),
            ("wikidata", searcher.wikidata_search),
        ]
        for name, fn in providers:
            try:
                result = await asyncio.wait_for(fn(query, count), timeout=20)
                if result is not None:
                    assert "query" in result, f"{name}: missing query"
                    assert "source" in result, f"{name}: missing source"
                    assert "count" in result, f"{name}: missing count"
                    assert "results" in result, f"{name}: missing results"
                    assert "timestamp" in result, f"{name}: missing timestamp"
                    assert "success" in result, f"{name}: missing success"
            except asyncio.TimeoutError:
                pytest.skip(f"{name} timed out")
 if __name__ == "__main__":
    pytest.main([__file__, "-v"])