chore: update md, py, toml files

2026-01-01 23:27:55 +01:00 · 2026-01-01 23:27:55 +01:00 · ccb50fbdbb
commit ccb50fbdbb
11 changed files with 1044 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,37 @@
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+.nox/
+.hypothesis/
+*.log
+.env
+.venv
+venv/
+ENV/
+.idea/
+.vscode/
+*.swp
+*.swo
+*~
+.DS_Store
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,10 @@
+# Changelog
+
+
+
+## Version 1.1.0 - 2026-01-01
+
+update md, py, toml files
+
+**Changes:** 10 files, 1034 lines
+**Languages:** Markdown (182 lines), Other (68 lines), Python (729 lines), TOML (50 lines), Text (5 lines)
--- a/31
+++ b/31
@ -0,0 +1,31 @@
+.PHONY: install dev run test test-unit test-integration clean build uninstall
+
+install:
+	pip install -r requirements.txt
+
+dev:
+	pip install -e .
+
+run:
+	python -m rsearch
+
+test: test-integration
+
+test-unit:
+	pytest tests/ -v --ignore=tests/test_providers.py
+
+test-integration:
+	pytest tests/test_providers.py -v
+
+test-quick:
+	curl -s "http://localhost:8080/health" | python -m json.tool
+	curl -s "http://localhost:8080/search?query=python&count=3" | python -m json.tool
+
+clean:
+	rm -rf __pycache__ *.egg-info dist build .eggs rsearch/__pycache__ tests/__pycache__ .pytest_cache
+
+build:
+	python -m build
+
+uninstall:
+	pip uninstall -y rsearch
--- a/README.md
+++ b/README.md
@ -0,0 +1,182 @@
+# rsearch
+
+Author: retoor <retoor@molodetz.nl>
+
+Multi-source search aggregator API that queries multiple search engines and returns unified results without requiring API keys.
+
+## Features
+
+- 7 search providers with automatic fallback
+- No API keys required (HTML scraping + public APIs)
+- Async architecture for performance
+- Unified JSON response format
+- Fixed provider ordering by result quality
+- Comprehensive integration tests
+
+## Search Providers
+
+| Provider | Type | Description |
+|----------|------|-------------|
+| Brave | Scraping | High quality web results |
+| DuckDuckGo HTML | Scraping | Reliable lightweight version |
+| Bing | Scraping | Microsoft search engine |
+| Mojeek | Scraping | Independent search index |
+| DuckDuckGo | API | Instant answers |
+| Wikipedia | API | Encyclopedia reference |
+| Wikidata | API | Structured knowledge base |
+
+## Installation
+
+Install dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+Install as package (development mode):
+
+```bash
+pip install -e .
+```
+
+Install with test dependencies:
+
+```bash
+pip install -e ".[test]"
+```
+
+Or using make:
+
+```bash
+make dev
+```
+
+## Usage
+
+Run as module:
+
+```bash
+python -m rsearch
+```
+
+Or after installation:
+
+```bash
+rsearch
+```
+
+Or using make:
+
+```bash
+make run
+```
+
+### Command Line Options
+
+```
+usage: rsearch [-h] [-H HOST] [-p PORT] [-l {DEBUG,INFO,WARNING,ERROR}] [-v]
+
+options:
+  -h, --help            show help message
+  -H, --host HOST       Host to bind to (default: 0.0.0.0)
+  -p, --port PORT       Port to listen on (default: 8080)
+  -l, --log-level       Log level: DEBUG, INFO, WARNING, ERROR (default: INFO)
+  -v, --version         show version number
+```
+
+Examples:
+
+```bash
+rsearch --port 9000                    # Run on port 9000
+rsearch --host 127.0.0.1 --port 3000   # Bind to localhost:3000
+rsearch --log-level DEBUG              # Enable debug logging
+```
+
+## Testing
+
+Run integration tests:
+
+```bash
+make test
+```
+
+Or directly with pytest:
+
+```bash
+pytest tests/test_providers.py -v
+```
+
+Quick API test (requires running server):
+
+```bash
+make test-quick
+```
+
+## API Endpoints
+
+### Search
+
+```
+GET /search?query=<q>&count=<n>
+```
+
+Parameters:
+- `query`: Search term (required)
+- `count`: Number of results (default: 10, max: 100)
+
+Response:
+```json
+{
+    "query": "python",
+    "source": "brave",
+    "count": 3,
+    "results": [
+        {
+            "title": "Welcome to Python.org",
+            "url": "https://www.python.org/",
+            "description": "The official home of the Python Programming Language",
+            "source": "brave",
+            "extra": {}
+        }
+    ],
+    "timestamp": "2024-01-01T12:00:00.000000Z",
+    "success": true,
+    "error": null
+}
+```
+
+### Health Check
+
+```
+GET /health
+```
+
+Response:
+```json
+{
+    "status": "ok",
+    "services": ["brave", "duckduckgo_html", "bing", "mojeek", "duckduckgo", "wikipedia", "wikidata"],
+    "timestamp": "2024-01-01T12:00:00.000000Z"
+}
+```
+
+## Project Structure
+
+```
+rsearch/
+├── rsearch/
+│   ├── __init__.py
+│   ├── __main__.py
+│   └── app.py
+├── tests/
+│   ├── __init__.py
+│   └── test_providers.py
+├── requirements.txt
+├── pyproject.toml
+├── Makefile
+└── README.md
+```
+
+## License
+
+MIT
--- a/pyproject.toml
+++ b/pyproject.toml
@ -0,0 +1,50 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "rsearch"
+version = "1.1.0"
+description = "Multi-source search aggregator API"
+authors = [
+    {name = "retoor", email = "retoor@molodetz.nl"}
+]
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.9"
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Developers",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Internet :: WWW/HTTP :: Indexing/Search",
+]
+dependencies = [
+    "aiohttp>=3.9.0",
+    "beautifulsoup4>=4.12.0",
+    "lxml>=5.0.0",
+]
+
+[project.optional-dependencies]
+test = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+]
+
+[project.scripts]
+rsearch = "rsearch:main"
+
+[project.urls]
+Homepage = "https://github.com/retoor/rsearch"
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["rsearch*"]
+
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,5 @@
+aiohttp>=3.9.0
+beautifulsoup4>=4.12.0
+lxml>=5.0.0
+pytest>=7.0.0
+pytest-asyncio>=0.21.0
--- a/rsearch/init.py
+++ b/rsearch/init.py
@ -0,0 +1,6 @@
+# retoor <retoor@molodetz.nl>
+
+from rsearch.app import MultiSearch, create_app, main
+
+__version__ = "1.0.0"
+__all__ = ["MultiSearch", "create_app", "main"]
--- a/rsearch/main.py
+++ b/rsearch/main.py
@ -0,0 +1,6 @@
+# retoor <retoor@molodetz.nl>
+
+from rsearch.app import main
+
+if __name__ == "__main__":
+    main()
--- a/rsearch/app.py
+++ b/rsearch/app.py
@ -0,0 +1,549 @@
+# retoor <retoor@molodetz.nl>
+
+import aiohttp
+import argparse
+import asyncio
+from aiohttp import web
+from datetime import datetime
+from html import unescape
+from typing import Dict, Optional, List, Callable
+from urllib.parse import unquote, parse_qs, urlparse
+import random
+import logging
+import re
+import sys
+from bs4 import BeautifulSoup
+logger = logging.getLogger("search-api")
+
+
+class MultiSearch:
+    def __init__(self):
+        self.services: List[Callable[[str, int], asyncio.Future]] = [
+            self.brave_search,
+            self.duckduckgo_html_search,
+            self.bing_search,
+            self.mojeek_search,
+            self.duckduckgo_search,
+            self.wikipedia_search,
+            self.wikidata_search,
+        ]
+        self.user_agents = [
+            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
+            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",
+            "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0",
+            "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
+            "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0",
+        ]
+
+    def _headers(self) -> Dict[str, str]:
+        return {
+            "User-Agent": random.choice(self.user_agents),
+            "Accept": "application/json, text/html;q=0.9",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Connection": "keep-alive",
+        }
+
+    async def _safe_get(
+        self,
+        session: aiohttp.ClientSession,
+        url: str,
+        params: Optional[Dict] = None,
+        timeout: int = 10,
+    ) -> Optional[Dict]:
+        try:
+            async with session.get(
+                url,
+                params=params,
+                headers=self._headers(),
+                timeout=aiohttp.ClientTimeout(total=timeout),
+            ) as resp:
+                if resp.status != 200:
+                    return None
+                try:
+                    return await resp.json(content_type=None)
+                except Exception:
+                    text = await resp.text()
+                    return {"_raw": text}
+        except Exception as e:
+            logger.warning(f"GET {url} failed: {e}")
+            return None
+
+    def _scrape_headers(self) -> Dict[str, str]:
+        ua = random.choice(self.user_agents)
+        headers = {
+            "User-Agent": ua,
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Accept-Encoding": "gzip, deflate",
+            "Connection": "keep-alive",
+            "Upgrade-Insecure-Requests": "1",
+            "DNT": "1",
+        }
+        if "Chrome" in ua and "Edg" not in ua:
+            headers["Sec-Ch-Ua"] = '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"'
+            headers["Sec-Ch-Ua-Mobile"] = "?0"
+            headers["Sec-Ch-Ua-Platform"] = '"Linux"' if "Linux" in ua else '"Windows"'
+            headers["Sec-Fetch-Dest"] = "document"
+            headers["Sec-Fetch-Mode"] = "navigate"
+            headers["Sec-Fetch-Site"] = "none"
+            headers["Sec-Fetch-User"] = "?1"
+        return headers
+
+    async def _safe_get_html(
+        self,
+        session: aiohttp.ClientSession,
+        url: str,
+        params: Optional[Dict] = None,
+        timeout: int = 15,
+    ) -> Optional[str]:
+        try:
+            async with session.get(
+                url,
+                params=params,
+                headers=self._scrape_headers(),
+                timeout=aiohttp.ClientTimeout(total=timeout),
+                allow_redirects=True,
+            ) as resp:
+                if resp.status != 200:
+                    return None
+                return await resp.text()
+        except Exception as e:
+            logger.warning(f"HTML GET {url} failed: {e}")
+            return None
+
+    def _clean_html(self, html: str) -> str:
+        if not html:
+            return ""
+        text = re.sub(r'<[^>]+>', ' ', html)
+        text = re.sub(r'\s+', ' ', text)
+        return unescape(text).strip()
+
+    async def duckduckgo_search(self, query: str, count: int) -> Optional[Dict]:
+        url = "https://api.duckduckgo.com/"
+        params = {
+            "q": query,
+            "format": "json",
+            "no_html": "1",
+            "skip_disambig": "1",
+        }
+        async with aiohttp.ClientSession() as session:
+            data = await self._safe_get(session, url, params, timeout=10)
+        if not data or not isinstance(data, dict):
+            return None
+
+        results = []
+
+        if data.get("AbstractText"):
+            results.append(
+                {
+                    "title": data.get("Heading", "Instant Answer"),
+                    "url": data.get("AbstractURL", ""),
+                    "description": data.get("AbstractText", "")[:500],
+                    "source": "duckduckgo_instant",
+                    "extra": {
+                        "image": data.get("Image", ""),
+                        "abstract_source": data.get("AbstractSource", ""),
+                    },
+                }
+            )
+
+        for item in data.get("Results", [])[: max(0, count - len(results))]:
+            results.append(
+                {
+                    "title": item.get("Text", "")[:200],
+                    "url": item.get("FirstURL", ""),
+                    "description": item.get("Text", "")[:500],
+                    "source": "duckduckgo",
+                    "extra": {
+                        "icon": item.get("Icon", ""),
+                    },
+                }
+            )
+
+        if not results:
+            return None
+
+        return self._wrap(query, "duckduckgo", results[:count])
+
+    async def wikipedia_search(self, query: str, count: int) -> Optional[Dict]:
+        url = "https://en.wikipedia.org/w/api.php"
+        params = {
+            "action": "query",
+            "format": "json",
+            "list": "search",
+            "srsearch": query,
+            "srlimit": min(count, 50),
+        }
+        async with aiohttp.ClientSession() as session:
+            data = await self._safe_get(session, url, params, timeout=10)
+        if not data or not isinstance(data, dict):
+            return None
+
+        results = []
+        for item in data.get("query", {}).get("search", [])[:count]:
+            snippet = item.get("snippet", "")
+            snippet = snippet.replace("<span class='searchmatch'>", "").replace(
+                "</span>", ""
+            )
+            snippet = unescape(snippet)
+            title = item.get("title", "")
+            results.append(
+                {
+                    "title": title,
+                    "url": "https://en.wikipedia.org/wiki/" + title.replace(" ", "_"),
+                    "description": snippet[:500],
+                    "source": "wikipedia",
+                    "extra": {
+                        "pageid": item.get("pageid"),
+                        "size": item.get("size"),
+                        "wordcount": item.get("wordcount"),
+                        "timestamp": item.get("timestamp"),
+                    },
+                }
+            )
+
+        if not results:
+            return None
+
+        return self._wrap(query, "wikipedia", results)
+
+    async def wikidata_search(self, query: str, count: int) -> Optional[Dict]:
+        url = "https://www.wikidata.org/w/api.php"
+        params = {
+            "action": "wbsearchentities",
+            "search": query,
+            "format": "json",
+            "language": "en",
+            "limit": min(count, 50),
+        }
+        async with aiohttp.ClientSession() as session:
+            data = await self._safe_get(session, url, params, timeout=10)
+        if not data or not isinstance(data, dict):
+            return None
+
+        results = []
+        for item in data.get("search", [])[:count]:
+            results.append(
+                {
+                    "title": item.get("label", ""),
+                    "url": item.get("url", ""),
+                    "description": (item.get("description") or "")[:500],
+                    "source": "wikidata",
+                    "extra": {
+                        "id": item.get("id"),
+                        "aliases": (item.get("aliases") or [])[:5],
+                    },
+                }
+            )
+
+        if not results:
+            return None
+
+        return self._wrap(query, "wikidata", results)
+
+    async def jina_search(self, query: str, count: int) -> Optional[Dict]:
+        # This may change; if JSON not available, this will just return None
+        url = f"https://s.jina.ai/{query}"
+        async with aiohttp.ClientSession() as session:
+            try:
+                async with session.get(
+                    url,
+                    headers={"Accept": "application/json", **self._headers()},
+                    timeout=aiohttp.ClientTimeout(total=15),
+                ) as resp:
+                    if resp.status != 200:
+                        return None
+                    data = await resp.json()
+            except Exception as e:
+                logger.warning(f"Jina search failed: {e}")
+                return None
+
+        if not isinstance(data, dict):
+            return None
+
+        raw_list = data.get("data") or data.get("results") or []
+        if not isinstance(raw_list, list):
+            return None
+
+        results = []
+        for item in raw_list[:count]:
+            results.append(
+                {
+                    "title": (item.get("title") or "")[:200],
+                    "url": item.get("url", ""),
+                    "description": (item.get("description") or "")[:500],
+                    "source": "jina",
+                    "extra": {},
+                }
+            )
+
+        if not results:
+            return None
+
+        return self._wrap(query, "jina", results)
+
+    async def brave_search(self, query: str, count: int) -> Optional[Dict]:
+        url = "https://search.brave.com/search"
+        params = {"q": query, "source": "web"}
+        async with aiohttp.ClientSession() as session:
+            html = await self._safe_get_html(session, url, params)
+        if not html:
+            return None
+
+        soup = BeautifulSoup(html, "html.parser")
+        results = []
+        for snippet in soup.select('div[data-type="web"][data-pos]')[:count]:
+            link = snippet.select_one('a[href^="http"]')
+            title_div = snippet.select_one('div[class*="title"]')
+            desc_div = snippet.select_one('div.generic-snippet div.content')
+            if not desc_div:
+                desc_div = snippet.select_one('div[class*="snippet-description"]')
+            if link:
+                title = ""
+                if title_div:
+                    title = title_div.get("title") or title_div.get_text(strip=True)
+                desc = ""
+                if desc_div:
+                    desc = desc_div.get_text(strip=True)
+                results.append({
+                    "title": title[:200],
+                    "url": link.get("href", ""),
+                    "description": desc[:500],
+                    "source": "brave",
+                    "extra": {},
+                })
+        if not results:
+            return None
+        return self._wrap(query, "brave", results)
+
+    async def duckduckgo_html_search(self, query: str, count: int) -> Optional[Dict]:
+        url = "https://html.duckduckgo.com/html/"
+        params = {"q": query}
+        async with aiohttp.ClientSession() as session:
+            html = await self._safe_get_html(session, url, params)
+        if not html:
+            return None
+
+        soup = BeautifulSoup(html, "html.parser")
+        results = []
+        for result in soup.select('div.result.results_links')[:count * 2]:
+            link = result.select_one('a.result__a')
+            snippet = result.select_one('a.result__snippet')
+            if not link:
+                continue
+            result_url = link.get("href", "")
+            if "uddg=" in result_url:
+                parsed = urlparse(result_url)
+                qs = parse_qs(parsed.query)
+                if "uddg" in qs:
+                    result_url = unquote(qs["uddg"][0])
+            elif result_url.startswith("//"):
+                result_url = "https:" + result_url
+            if result_url.startswith("/") or "duckduckgo.com" in result_url:
+                continue
+            results.append({
+                "title": link.get_text(strip=True)[:200],
+                "url": result_url,
+                "description": (snippet.get_text(strip=True) if snippet else "")[:500],
+                "source": "duckduckgo_html",
+                "extra": {},
+            })
+            if len(results) >= count:
+                break
+        if not results:
+            return None
+        return self._wrap(query, "duckduckgo_html", results)
+
+    async def bing_search(self, query: str, count: int) -> Optional[Dict]:
+        url = "https://www.bing.com/search"
+        params = {"q": query, "count": min(count, 50)}
+        async with aiohttp.ClientSession() as session:
+            html = await self._safe_get_html(session, url, params)
+        if not html:
+            return None
+
+        soup = BeautifulSoup(html, "html.parser")
+        results = []
+        for item in soup.select('li.b_algo')[:count]:
+            link = item.select_one('h2 a')
+            desc = item.select_one('p')
+            if link:
+                results.append({
+                    "title": link.get_text(strip=True)[:200],
+                    "url": link.get("href", ""),
+                    "description": (desc.get_text(strip=True) if desc else "")[:500],
+                    "source": "bing",
+                    "extra": {},
+                })
+        if not results:
+            return None
+        return self._wrap(query, "bing", results)
+
+    async def mojeek_search(self, query: str, count: int) -> Optional[Dict]:
+        url = "https://www.mojeek.com/search"
+        params = {"q": query}
+        async with aiohttp.ClientSession() as session:
+            html = await self._safe_get_html(session, url, params)
+        if not html:
+            return None
+
+        soup = BeautifulSoup(html, "html.parser")
+        results = []
+        for item in soup.select('ul.results-standard li')[:count]:
+            link = item.select_one('a.ob')
+            title_el = item.select_one('a.title')
+            desc = item.select_one('p.s')
+            if link:
+                title = title_el.get_text(strip=True) if title_el else link.get_text(strip=True)
+                results.append({
+                    "title": title[:200],
+                    "url": link.get("href", ""),
+                    "description": (desc.get_text(strip=True) if desc else "")[:500],
+                    "source": "mojeek",
+                    "extra": {},
+                })
+        if not results:
+            return None
+        return self._wrap(query, "mojeek", results)
+
+    def _wrap(self, query: str, service: str, results: List[Dict]) -> Dict:
+        return {
+            "query": query,
+            "source": service,
+            "count": len(results),
+            "results": results,
+            "timestamp": datetime.utcnow().isoformat() + "Z",
+            "success": True,
+            "error": None,
+        }
+
+    async def search(self, query: str, count: int) -> Dict:
+        query = (query or "").strip()
+        if not query:
+            return {
+                "query": "",
+                "source": "none",
+                "count": 0,
+                "results": [],
+                "timestamp": datetime.utcnow().isoformat() + "Z",
+                "success": False,
+                "error": "Empty query",
+            }
+
+        count = max(1, min(int(count), 100))
+        services = self.services
+
+        logger.info(f"search '{query}' count={count} services={len(services)}")
+
+        for fn in services:
+            name = fn.__name__
+            try:
+                result = await asyncio.wait_for(fn(query, count), timeout=20)
+            except asyncio.TimeoutError:
+                logger.warning(f"{name} timed out")
+                continue
+            except Exception as e:
+                logger.warning(f"{name} failed: {e}")
+                continue
+
+            if result and result.get("success") and result.get("count", 0) > 0:
+                logger.info(f"using {result.get('source')} for '{query}'")
+                return result
+
+        return {
+            "query": query,
+            "source": "none",
+            "count": 0,
+            "results": [],
+            "timestamp": datetime.utcnow().isoformat() + "Z",
+            "success": False,
+            "error": "All services failed",
+        }
+
+
+searcher = MultiSearch()
+
+
+async def handle_search(request: web.Request) -> web.Response:
+    q = request.query.get("query", "")
+    count_raw = request.query.get("count", "10")
+    try:
+        count = int(count_raw)
+    except ValueError:
+        count = 10
+    result = await searcher.search(q, count)
+    status = 200 if result.get("success") else 400
+    return web.json_response(result, status=status)
+
+
+async def handle_health(request: web.Request) -> web.Response:
+    return web.json_response(
+        {
+            "status": "ok",
+            "services": [
+                "brave",
+                "duckduckgo_html",
+                "bing",
+                "mojeek",
+                "duckduckgo",
+                "wikipedia",
+                "wikidata",
+            ],
+            "timestamp": datetime.utcnow().isoformat() + "Z",
+        }
+    )
+
+
+def create_app() -> web.Application:
+    app = web.Application()
+    app.router.add_get("/search", handle_search)
+    app.router.add_get("/health", handle_health)
+    return app
+
+
+def parse_args(args=None):
+    parser = argparse.ArgumentParser(
+        prog="rsearch",
+        description="Multi-source search aggregator API"
+    )
+    parser.add_argument(
+        "-H", "--host",
+        default="0.0.0.0",
+        help="Host to bind to (default: 0.0.0.0)"
+    )
+    parser.add_argument(
+        "-p", "--port",
+        type=int,
+        default=8080,
+        help="Port to listen on (default: 8080)"
+    )
+    parser.add_argument(
+        "-l", "--log-level",
+        choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+        default="INFO",
+        help="Log level (default: INFO)"
+    )
+    parser.add_argument(
+        "-v", "--version",
+        action="version",
+        version="%(prog)s 1.0.0"
+    )
+    return parser.parse_args(args)
+
+
+def main(args=None):
+    opts = parse_args(args)
+    logging.basicConfig(
+        level=getattr(logging, opts.log_level),
+        format="%(asctime)s %(levelname)s %(name)s: %(message)s"
+    )
+    app = create_app()
+    logger.info(f"Starting server on {opts.host}:{opts.port}")
+    web.run_app(app, host=opts.host, port=opts.port, print=None)
+
+
+if __name__ == "__main__":
+    main()
+
--- a/tests/init.py
+++ b/tests/init.py
@ -0,0 +1 @@
+# retoor <retoor@molodetz.nl>
--- a/tests/test_providers.py
+++ b/tests/test_providers.py
@ -0,0 +1,167 @@
+# retoor <retoor@molodetz.nl>
+
+import asyncio
+import pytest
+from rsearch.app import MultiSearch
+
+
+@pytest.fixture
+def searcher():
+    return MultiSearch()
+
+
+@pytest.fixture
+def query():
+    return "python programming"
+
+
+@pytest.fixture
+def count():
+    return 3
+
+
+class TestProviders:
+
+    @pytest.mark.asyncio
+    async def test_brave_search(self, searcher, query, count):
+        result = await asyncio.wait_for(searcher.brave_search(query, count), timeout=20)
+        assert result is not None
+        assert result.get("success") is True
+        assert result.get("count", 0) > 0
+        assert result.get("source") == "brave"
+        assert len(result.get("results", [])) > 0
+        first = result["results"][0]
+        assert "title" in first
+        assert "url" in first
+        assert "description" in first
+
+    @pytest.mark.asyncio
+    async def test_duckduckgo_html_search(self, searcher, query, count):
+        result = await asyncio.wait_for(searcher.duckduckgo_html_search(query, count), timeout=20)
+        assert result is not None
+        assert result.get("success") is True
+        assert result.get("count", 0) > 0
+        assert result.get("source") == "duckduckgo_html"
+        assert len(result.get("results", [])) > 0
+        first = result["results"][0]
+        assert "title" in first
+        assert "url" in first
+        assert first["url"].startswith("http")
+
+    @pytest.mark.asyncio
+    async def test_bing_search(self, searcher, query, count):
+        result = await asyncio.wait_for(searcher.bing_search(query, count), timeout=20)
+        assert result is not None
+        assert result.get("success") is True
+        assert result.get("count", 0) > 0
+        assert result.get("source") == "bing"
+        assert len(result.get("results", [])) > 0
+
+    @pytest.mark.asyncio
+    async def test_mojeek_search(self, searcher, query, count):
+        result = await asyncio.wait_for(searcher.mojeek_search(query, count), timeout=20)
+        assert result is not None
+        assert result.get("success") is True
+        assert result.get("count", 0) > 0
+        assert result.get("source") == "mojeek"
+        assert len(result.get("results", [])) > 0
+        first = result["results"][0]
+        assert not first["title"].startswith("http"), "Title should not be a URL"
+
+    @pytest.mark.asyncio
+    async def test_duckduckgo_api_search(self, searcher, query, count):
+        result = await asyncio.wait_for(searcher.duckduckgo_search(query, count), timeout=20)
+        if result is not None:
+            assert result.get("source") == "duckduckgo"
+            if result.get("count", 0) > 0:
+                assert result.get("success") is True
+
+    @pytest.mark.asyncio
+    async def test_wikipedia_search(self, searcher, query, count):
+        result = await asyncio.wait_for(searcher.wikipedia_search(query, count), timeout=20)
+        assert result is not None
+        assert result.get("success") is True
+        assert result.get("count", 0) > 0
+        assert result.get("source") == "wikipedia"
+        assert len(result.get("results", [])) > 0
+        first = result["results"][0]
+        assert "wikipedia.org" in first["url"]
+
+    @pytest.mark.asyncio
+    async def test_wikidata_search(self, searcher, query, count):
+        result = await asyncio.wait_for(searcher.wikidata_search(query, count), timeout=20)
+        assert result is not None
+        assert result.get("success") is True
+        assert result.get("count", 0) > 0
+        assert result.get("source") == "wikidata"
+        assert len(result.get("results", [])) > 0
+
+
+class TestSearchAggregator:
+
+    @pytest.mark.asyncio
+    async def test_search_returns_results(self, searcher, query, count):
+        result = await searcher.search(query, count)
+        assert result is not None
+        assert result.get("success") is True
+        assert result.get("count", 0) > 0
+        assert result.get("query") == query
+        assert "timestamp" in result
+        assert "results" in result
+
+    @pytest.mark.asyncio
+    async def test_search_empty_query(self, searcher, count):
+        result = await searcher.search("", count)
+        assert result is not None
+        assert result.get("success") is False
+        assert result.get("error") == "Empty query"
+
+    @pytest.mark.asyncio
+    async def test_search_count_limit(self, searcher, query):
+        result = await searcher.search(query, 5)
+        assert result is not None
+        if result.get("success"):
+            assert result.get("count", 0) <= 5
+
+    @pytest.mark.asyncio
+    async def test_result_format(self, searcher, query, count):
+        result = await searcher.search(query, count)
+        assert "query" in result
+        assert "source" in result
+        assert "count" in result
+        assert "results" in result
+        assert "timestamp" in result
+        assert "success" in result
+        assert "error" in result
+
+
+class TestAllProviders:
+
+    @pytest.mark.asyncio
+    async def test_all_providers_return_valid_format(self, searcher, query, count):
+        providers = [
+            ("brave", searcher.brave_search),
+            ("duckduckgo_html", searcher.duckduckgo_html_search),
+            ("bing", searcher.bing_search),
+            ("mojeek", searcher.mojeek_search),
+            ("duckduckgo", searcher.duckduckgo_search),
+            ("wikipedia", searcher.wikipedia_search),
+            ("wikidata", searcher.wikidata_search),
+        ]
+
+        for name, fn in providers:
+            try:
+                result = await asyncio.wait_for(fn(query, count), timeout=20)
+                if result is not None:
+                    assert "query" in result, f"{name}: missing query"
+                    assert "source" in result, f"{name}: missing source"
+                    assert "count" in result, f"{name}: missing count"
+                    assert "results" in result, f"{name}: missing results"
+                    assert "timestamp" in result, f"{name}: missing timestamp"
+                    assert "success" in result, f"{name}: missing success"
+            except asyncio.TimeoutError:
+                pytest.skip(f"{name} timed out")
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])