477 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			477 lines
		
	
	
		
			16 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| 
								 | 
							
								# SPDX-License-Identifier: AGPL-3.0-or-later
							 | 
						||
| 
								 | 
							
								"""Implementations for caching favicons.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:py:obj:`FaviconCacheConfig`:
							 | 
						||
| 
								 | 
							
								  Configuration of the favicon cache
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:py:obj:`FaviconCache`:
							 | 
						||
| 
								 | 
							
								  Abstract base class for the implementation of a favicon cache.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:py:obj:`FaviconCacheSQLite`:
							 | 
						||
| 
								 | 
							
								  Favicon cache that manages the favicon BLOBs in a SQLite DB.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:py:obj:`FaviconCacheNull`:
							 | 
						||
| 
								 | 
							
								  Fallback solution if the configured cache cannot be used for system reasons.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								----
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from __future__ import annotations
							 | 
						||
| 
								 | 
							
								from typing import Literal
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import abc
							 | 
						||
| 
								 | 
							
								import dataclasses
							 | 
						||
| 
								 | 
							
								import hashlib
							 | 
						||
| 
								 | 
							
								import logging
							 | 
						||
| 
								 | 
							
								import pathlib
							 | 
						||
| 
								 | 
							
								import sqlite3
							 | 
						||
| 
								 | 
							
								import tempfile
							 | 
						||
| 
								 | 
							
								import time
							 | 
						||
| 
								 | 
							
								import typer
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from pydantic import BaseModel
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from searx import sqlitedb
							 | 
						||
| 
								 | 
							
								from searx import logger
							 | 
						||
| 
								 | 
							
								from searx.utils import humanize_bytes, humanize_number
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								CACHE: "FaviconCache"
							 | 
						||
| 
								 | 
							
								FALLBACK_ICON = b"FALLBACK_ICON"
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								logger = logger.getChild('favicons.cache')
							 | 
						||
| 
								 | 
							
								app = typer.Typer()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								@app.command()
							 | 
						||
| 
								 | 
							
								def state():
							 | 
						||
| 
								 | 
							
								    """show state of the cache"""
							 | 
						||
| 
								 | 
							
								    print(CACHE.state().report())
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								@app.command()
							 | 
						||
| 
								 | 
							
								def maintenance(force: bool = True, debug: bool = False):
							 | 
						||
| 
								 | 
							
								    """perform maintenance of the cache"""
							 | 
						||
| 
								 | 
							
								    root_log = logging.getLogger()
							 | 
						||
| 
								 | 
							
								    if debug:
							 | 
						||
| 
								 | 
							
								        root_log.setLevel(logging.DEBUG)
							 | 
						||
| 
								 | 
							
								    else:
							 | 
						||
| 
								 | 
							
								        root_log.handlers = []
							 | 
						||
| 
								 | 
							
								        handler = logging.StreamHandler()
							 | 
						||
| 
								 | 
							
								        handler.setFormatter(logging.Formatter("%(message)s"))
							 | 
						||
| 
								 | 
							
								        logger.addHandler(handler)
							 | 
						||
| 
								 | 
							
								        logger.setLevel(logging.DEBUG)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    state_t0 = CACHE.state()
							 | 
						||
| 
								 | 
							
								    CACHE.maintenance(force=force)
							 | 
						||
| 
								 | 
							
								    state_t1 = CACHE.state()
							 | 
						||
| 
								 | 
							
								    state_delta = state_t0 - state_t1
							 | 
						||
| 
								 | 
							
								    print("The cache has been reduced by:")
							 | 
						||
| 
								 | 
							
								    print(state_delta.report("\n- {descr}: {val}").lstrip("\n"))
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def init(cfg: "FaviconCacheConfig"):
							 | 
						||
| 
								 | 
							
								    """Initialization of a global ``CACHE``"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    global CACHE  # pylint: disable=global-statement
							 | 
						||
| 
								 | 
							
								    if cfg.db_type == "sqlite":
							 | 
						||
| 
								 | 
							
								        if sqlite3.sqlite_version_info <= (3, 35):
							 | 
						||
| 
								 | 
							
								            logger.critical(
							 | 
						||
| 
								 | 
							
								                "Disable favicon caching completely: SQLite library (%s) is too old! (require >= 3.35)",
							 | 
						||
| 
								 | 
							
								                sqlite3.sqlite_version,
							 | 
						||
| 
								 | 
							
								            )
							 | 
						||
| 
								 | 
							
								            CACHE = FaviconCacheNull(cfg)
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            CACHE = FaviconCacheSQLite(cfg)
							 | 
						||
| 
								 | 
							
								    elif cfg.db_type == "mem":
							 | 
						||
| 
								 | 
							
								        logger.error("Favicons are cached in memory, don't use this in production!")
							 | 
						||
| 
								 | 
							
								        CACHE = FaviconCacheMEM(cfg)
							 | 
						||
| 
								 | 
							
								    else:
							 | 
						||
| 
								 | 
							
								        raise NotImplementedError(f"favicons db_type '{cfg.db_type}' is unknown")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class FaviconCacheConfig(BaseModel):
							 | 
						||
| 
								 | 
							
								    """Configuration of the favicon cache."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    db_type: Literal["sqlite", "mem"] = "sqlite"
							 | 
						||
| 
								 | 
							
								    """Type of the database:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    ``sqlite``:
							 | 
						||
| 
								 | 
							
								      :py:obj:`.cache.FaviconCacheSQLite`
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    ``mem``:
							 | 
						||
| 
								 | 
							
								      :py:obj:`.cache.FaviconCacheMEM` (not recommended)
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    db_url: pathlib.Path = pathlib.Path(tempfile.gettempdir()) / "faviconcache.db"
							 | 
						||
| 
								 | 
							
								    """URL of the SQLite DB, the path to the database file."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    HOLD_TIME: int = 60 * 60 * 24 * 30  # 30 days
							 | 
						||
| 
								 | 
							
								    """Hold time (default in sec.), after which a BLOB is removed from the cache."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    LIMIT_TOTAL_BYTES: int = 1024 * 1024 * 50  # 50 MB
							 | 
						||
| 
								 | 
							
								    """Maximum of bytes (default) stored in the cache of all blobs.  Note: The
							 | 
						||
| 
								 | 
							
								    limit is only reached at each maintenance interval after which the oldest
							 | 
						||
| 
								 | 
							
								    BLOBs are deleted; the limit is exceeded during the maintenance period. If
							 | 
						||
| 
								 | 
							
								    the maintenance period is *too long* or maintenance is switched off
							 | 
						||
| 
								 | 
							
								    completely, the cache grows uncontrollably."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    BLOB_MAX_BYTES: int = 1024 * 20  # 20 KB
							 | 
						||
| 
								 | 
							
								    """The maximum BLOB size in bytes that a favicon may have so that it can be
							 | 
						||
| 
								 | 
							
								    saved in the cache.  If the favicon is larger, it is not saved in the cache
							 | 
						||
| 
								 | 
							
								    and must be requested by the client via the proxy."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    MAINTENANCE_PERIOD: int = 60 * 60
							 | 
						||
| 
								 | 
							
								    """Maintenance period in seconds / when :py:obj:`MAINTENANCE_MODE` is set to
							 | 
						||
| 
								 | 
							
								    ``auto``."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    MAINTENANCE_MODE: Literal["auto", "off"] = "auto"
							 | 
						||
| 
								 | 
							
								    """Type of maintenance mode
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    ``auto``:
							 | 
						||
| 
								 | 
							
								      Maintenance is carried out automatically as part of the maintenance
							 | 
						||
| 
								 | 
							
								      intervals (:py:obj:`MAINTENANCE_PERIOD`); no external process is required.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    ``off``:
							 | 
						||
| 
								 | 
							
								      Maintenance is switched off and must be carried out by an external process
							 | 
						||
| 
								 | 
							
								      if required.
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								@dataclasses.dataclass
							 | 
						||
| 
								 | 
							
								class FaviconCacheStats:
							 | 
						||
| 
								 | 
							
								    """Dataclass wich provides information on the status of the cache."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    favicons: int | None = None
							 | 
						||
| 
								 | 
							
								    bytes: int | None = None
							 | 
						||
| 
								 | 
							
								    domains: int | None = None
							 | 
						||
| 
								 | 
							
								    resolvers: int | None = None
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    field_descr = (
							 | 
						||
| 
								 | 
							
								        ("favicons", "number of favicons in cache", humanize_number),
							 | 
						||
| 
								 | 
							
								        ("bytes", "total size (approx. bytes) of cache", humanize_bytes),
							 | 
						||
| 
								 | 
							
								        ("domains", "total number of domains in cache", humanize_number),
							 | 
						||
| 
								 | 
							
								        ("resolvers", "number of resolvers", str),
							 | 
						||
| 
								 | 
							
								    )
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __sub__(self, other) -> FaviconCacheStats:
							 | 
						||
| 
								 | 
							
								        if not isinstance(other, self.__class__):
							 | 
						||
| 
								 | 
							
								            raise TypeError(f"unsupported operand type(s) for +: '{self.__class__}' and '{type(other)}'")
							 | 
						||
| 
								 | 
							
								        kwargs = {}
							 | 
						||
| 
								 | 
							
								        for field, _, _ in self.field_descr:
							 | 
						||
| 
								 | 
							
								            self_val, other_val = getattr(self, field), getattr(other, field)
							 | 
						||
| 
								 | 
							
								            if None in (self_val, other_val):
							 | 
						||
| 
								 | 
							
								                continue
							 | 
						||
| 
								 | 
							
								            if isinstance(self_val, int):
							 | 
						||
| 
								 | 
							
								                kwargs[field] = self_val - other_val
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                kwargs[field] = self_val
							 | 
						||
| 
								 | 
							
								        return self.__class__(**kwargs)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def report(self, fmt: str = "{descr}: {val}\n"):
							 | 
						||
| 
								 | 
							
								        s = []
							 | 
						||
| 
								 | 
							
								        for field, descr, cast in self.field_descr:
							 | 
						||
| 
								 | 
							
								            val = getattr(self, field)
							 | 
						||
| 
								 | 
							
								            if val is None:
							 | 
						||
| 
								 | 
							
								                val = "--"
							 | 
						||
| 
								 | 
							
								            else:
							 | 
						||
| 
								 | 
							
								                val = cast(val)
							 | 
						||
| 
								 | 
							
								            s.append(fmt.format(descr=descr, val=val))
							 | 
						||
| 
								 | 
							
								        return "".join(s)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class FaviconCache(abc.ABC):
							 | 
						||
| 
								 | 
							
								    """Abstract base class for the implementation of a favicon cache."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @abc.abstractmethod
							 | 
						||
| 
								 | 
							
								    def __init__(self, cfg: FaviconCacheConfig):
							 | 
						||
| 
								 | 
							
								        """An instance of the favicon cache is build up from the configuration."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @abc.abstractmethod
							 | 
						||
| 
								 | 
							
								    def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
							 | 
						||
| 
								 | 
							
								        """Returns ``None`` or the tuple of ``(data, mime)`` that has been
							 | 
						||
| 
								 | 
							
								        registered in the cache.  The ``None`` indicates that there was no entry
							 | 
						||
| 
								 | 
							
								        in the cache."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @abc.abstractmethod
							 | 
						||
| 
								 | 
							
								    def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
							 | 
						||
| 
								 | 
							
								        """Set data and mime-type in the cache.  If data is None, the
							 | 
						||
| 
								 | 
							
								        :py:obj:`FALLBACK_ICON` is registered. in the cache."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @abc.abstractmethod
							 | 
						||
| 
								 | 
							
								    def state(self) -> FaviconCacheStats:
							 | 
						||
| 
								 | 
							
								        """Returns a :py:obj:`FaviconCacheStats` (key/values) with information
							 | 
						||
| 
								 | 
							
								        on the state of the cache."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @abc.abstractmethod
							 | 
						||
| 
								 | 
							
								    def maintenance(self, force=False):
							 | 
						||
| 
								 | 
							
								        """Performs maintenance on the cache"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class FaviconCacheNull(FaviconCache):
							 | 
						||
| 
								 | 
							
								    """A dummy favicon cache that caches nothing / a fallback solution. The
							 | 
						||
| 
								 | 
							
								    NullCache is used when more efficient caches such as the
							 | 
						||
| 
								 | 
							
								    :py:obj:`FaviconCacheSQLite` cannot be used because, for example, the SQLite
							 | 
						||
| 
								 | 
							
								    library is only available in an old version and does not meet the
							 | 
						||
| 
								 | 
							
								    requirements."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(self, cfg: FaviconCacheConfig):
							 | 
						||
| 
								 | 
							
								        return None
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
							 | 
						||
| 
								 | 
							
								        return None
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
							 | 
						||
| 
								 | 
							
								        return False
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def state(self):
							 | 
						||
| 
								 | 
							
								        return FaviconCacheStats(favicons=0)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def maintenance(self, force=False):
							 | 
						||
| 
								 | 
							
								        pass
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class FaviconCacheSQLite(sqlitedb.SQLiteAppl, FaviconCache):
							 | 
						||
| 
								 | 
							
								    """Favicon cache that manages the favicon BLOBs in a SQLite DB.  The DB
							 | 
						||
| 
								 | 
							
								    model in the SQLite DB is implemented using the abstract class
							 | 
						||
| 
								 | 
							
								    :py:obj:`sqlitedb.SQLiteAppl`.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    The following configurations are required / supported:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    - :py:obj:`FaviconCacheConfig.db_url`
							 | 
						||
| 
								 | 
							
								    - :py:obj:`FaviconCacheConfig.HOLD_TIME`
							 | 
						||
| 
								 | 
							
								    - :py:obj:`FaviconCacheConfig.LIMIT_TOTAL_BYTES`
							 | 
						||
| 
								 | 
							
								    - :py:obj:`FaviconCacheConfig.BLOB_MAX_BYTES`
							 | 
						||
| 
								 | 
							
								    - :py:obj:`MAINTENANCE_PERIOD`
							 | 
						||
| 
								 | 
							
								    - :py:obj:`MAINTENANCE_MODE`
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    DB_SCHEMA = 1
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    DDL_BLOBS = """\
							 | 
						||
| 
								 | 
							
								CREATE TABLE IF NOT EXISTS blobs (
							 | 
						||
| 
								 | 
							
								  sha256     TEXT,
							 | 
						||
| 
								 | 
							
								  bytes_c    INTEGER,
							 | 
						||
| 
								 | 
							
								  mime       TEXT NOT NULL,
							 | 
						||
| 
								 | 
							
								  data       BLOB NOT NULL,
							 | 
						||
| 
								 | 
							
								  PRIMARY KEY (sha256))"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    """Table to store BLOB objects by their sha256 hash values."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    DDL_BLOB_MAP = """\
							 | 
						||
| 
								 | 
							
								CREATE TABLE IF NOT EXISTS blob_map (
							 | 
						||
| 
								 | 
							
								    m_time     INTEGER DEFAULT (strftime('%s', 'now')),  -- last modified (unix epoch) time in sec.
							 | 
						||
| 
								 | 
							
								    sha256     TEXT,
							 | 
						||
| 
								 | 
							
								    resolver   TEXT,
							 | 
						||
| 
								 | 
							
								    authority  TEXT,
							 | 
						||
| 
								 | 
							
								    PRIMARY KEY (resolver, authority))"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    """Table to map from (resolver, authority) to sha256 hash values."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    DDL_CREATE_TABLES = {
							 | 
						||
| 
								 | 
							
								        "blobs": DDL_BLOBS,
							 | 
						||
| 
								 | 
							
								        "blob_map": DDL_BLOB_MAP,
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    SQL_DROP_LEFTOVER_BLOBS = (
							 | 
						||
| 
								 | 
							
								        "DELETE FROM blobs WHERE sha256 IN ("
							 | 
						||
| 
								 | 
							
								        " SELECT b.sha256"
							 | 
						||
| 
								 | 
							
								        "   FROM blobs b"
							 | 
						||
| 
								 | 
							
								        "   LEFT JOIN blob_map bm"
							 | 
						||
| 
								 | 
							
								        "     ON b.sha256 = bm.sha256"
							 | 
						||
| 
								 | 
							
								        "  WHERE bm.sha256 IS NULL)"
							 | 
						||
| 
								 | 
							
								    )
							 | 
						||
| 
								 | 
							
								    """Delete blobs.sha256 (BLOBs) no longer in blob_map.sha256."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    SQL_ITER_BLOBS_SHA256_BYTES_C = (
							 | 
						||
| 
								 | 
							
								        "SELECT b.sha256, b.bytes_c FROM blobs b"
							 | 
						||
| 
								 | 
							
								        "  JOIN blob_map bm "
							 | 
						||
| 
								 | 
							
								        "    ON b.sha256 = bm.sha256"
							 | 
						||
| 
								 | 
							
								        " ORDER BY bm.m_time ASC"
							 | 
						||
| 
								 | 
							
								    )
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    SQL_INSERT_BLOBS = (
							 | 
						||
| 
								 | 
							
								        "INSERT INTO blobs (sha256, bytes_c, mime, data) VALUES (?, ?, ?, ?)"
							 | 
						||
| 
								 | 
							
								        "    ON CONFLICT (sha256) DO NOTHING"
							 | 
						||
| 
								 | 
							
								    )  # fmt: skip
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    SQL_INSERT_BLOB_MAP = (
							 | 
						||
| 
								 | 
							
								        "INSERT INTO blob_map (sha256, resolver, authority) VALUES (?, ?, ?)"
							 | 
						||
| 
								 | 
							
								        "    ON CONFLICT DO UPDATE "
							 | 
						||
| 
								 | 
							
								        "   SET sha256=excluded.sha256, m_time=strftime('%s', 'now')"
							 | 
						||
| 
								 | 
							
								    )
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(self, cfg: FaviconCacheConfig):
							 | 
						||
| 
								 | 
							
								        """An instance of the favicon cache is build up from the configuration."""  #
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if cfg.db_url == ":memory:":
							 | 
						||
| 
								 | 
							
								            logger.critical("don't use SQLite DB in :memory: in production!!")
							 | 
						||
| 
								 | 
							
								        super().__init__(cfg.db_url)
							 | 
						||
| 
								 | 
							
								        self.cfg = cfg
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __call__(self, resolver: str, authority: str) -> None | tuple[None | bytes, None | str]:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        sql = "SELECT sha256 FROM blob_map WHERE resolver = ? AND authority = ?"
							 | 
						||
| 
								 | 
							
								        res = self.DB.execute(sql, (resolver, authority)).fetchone()
							 | 
						||
| 
								 | 
							
								        if res is None:
							 | 
						||
| 
								 | 
							
								            return None
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        data, mime = (None, None)
							 | 
						||
| 
								 | 
							
								        sha256 = res[0]
							 | 
						||
| 
								 | 
							
								        if sha256 == FALLBACK_ICON:
							 | 
						||
| 
								 | 
							
								            return data, mime
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        sql = "SELECT data, mime FROM blobs WHERE sha256 = ?"
							 | 
						||
| 
								 | 
							
								        res = self.DB.execute(sql, (sha256,)).fetchone()
							 | 
						||
| 
								 | 
							
								        if res is not None:
							 | 
						||
| 
								 | 
							
								            data, mime = res
							 | 
						||
| 
								 | 
							
								        return data, mime
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if self.cfg.MAINTENANCE_MODE == "auto" and int(time.time()) > self.next_maintenance_time:
							 | 
						||
| 
								 | 
							
								            # Should automatic maintenance be moved to a new thread?
							 | 
						||
| 
								 | 
							
								            self.maintenance()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if data is not None and mime is None:
							 | 
						||
| 
								 | 
							
								            logger.error(
							 | 
						||
| 
								 | 
							
								                "favicon resolver %s tries to cache mime-type None for authority %s",
							 | 
						||
| 
								 | 
							
								                resolver,
							 | 
						||
| 
								 | 
							
								                authority,
							 | 
						||
| 
								 | 
							
								            )
							 | 
						||
| 
								 | 
							
								            return False
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        bytes_c = len(data or b"")
							 | 
						||
| 
								 | 
							
								        if bytes_c > self.cfg.BLOB_MAX_BYTES:
							 | 
						||
| 
								 | 
							
								            logger.info(
							 | 
						||
| 
								 | 
							
								                "favicon of resolver: %s / authority: %s to big to cache (bytes: %s) " % (resolver, authority, bytes_c)
							 | 
						||
| 
								 | 
							
								            )
							 | 
						||
| 
								 | 
							
								            return False
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if data is None:
							 | 
						||
| 
								 | 
							
								            sha256 = FALLBACK_ICON
							 | 
						||
| 
								 | 
							
								        else:
							 | 
						||
| 
								 | 
							
								            sha256 = hashlib.sha256(data).hexdigest()
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        with self.connect() as conn:
							 | 
						||
| 
								 | 
							
								            if sha256 != FALLBACK_ICON:
							 | 
						||
| 
								 | 
							
								                conn.execute(self.SQL_INSERT_BLOBS, (sha256, bytes_c, mime, data))
							 | 
						||
| 
								 | 
							
								            conn.execute(self.SQL_INSERT_BLOB_MAP, (sha256, resolver, authority))
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return True
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    @property
							 | 
						||
| 
								 | 
							
								    def next_maintenance_time(self) -> int:
							 | 
						||
| 
								 | 
							
								        """Returns (unix epoch) time of the next maintenance."""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        return self.cfg.MAINTENANCE_PERIOD + self.properties.m_time("LAST_MAINTENANCE")
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def maintenance(self, force=False):
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        # Prevent parallel DB maintenance cycles from other DB connections
							 | 
						||
| 
								 | 
							
								        # (e.g. in multi thread or process environments).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if not force and int(time.time()) < self.next_maintenance_time:
							 | 
						||
| 
								 | 
							
								            logger.debug("no maintenance required yet, next maintenance interval is in the future")
							 | 
						||
| 
								 | 
							
								            return
							 | 
						||
| 
								 | 
							
								        self.properties.set("LAST_MAINTENANCE", "")  # hint: this (also) sets the m_time of the property!
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        # do maintenance tasks
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        with self.connect() as conn:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            # drop items not in HOLD time
							 | 
						||
| 
								 | 
							
								            res = conn.execute(
							 | 
						||
| 
								 | 
							
								                f"DELETE FROM blob_map"
							 | 
						||
| 
								 | 
							
								                f" WHERE cast(m_time as integer) < cast(strftime('%s', 'now') as integer) - {self.cfg.HOLD_TIME}"
							 | 
						||
| 
								 | 
							
								            )
							 | 
						||
| 
								 | 
							
								            logger.debug("dropped %s obsolete blob_map items from db", res.rowcount)
							 | 
						||
| 
								 | 
							
								            res = conn.execute(self.SQL_DROP_LEFTOVER_BLOBS)
							 | 
						||
| 
								 | 
							
								            logger.debug("dropped %s obsolete BLOBS from db", res.rowcount)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								            # drop old items to be in LIMIT_TOTAL_BYTES
							 | 
						||
| 
								 | 
							
								            total_bytes = conn.execute("SELECT SUM(bytes_c) FROM blobs").fetchone()[0] or 0
							 | 
						||
| 
								 | 
							
								            if total_bytes > self.cfg.LIMIT_TOTAL_BYTES:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								                x = total_bytes - self.cfg.LIMIT_TOTAL_BYTES
							 | 
						||
| 
								 | 
							
								                c = 0
							 | 
						||
| 
								 | 
							
								                sha_list = []
							 | 
						||
| 
								 | 
							
								                for row in conn.execute(self.SQL_ITER_BLOBS_SHA256_BYTES_C):
							 | 
						||
| 
								 | 
							
								                    sha256, bytes_c = row
							 | 
						||
| 
								 | 
							
								                    sha_list.append(sha256)
							 | 
						||
| 
								 | 
							
								                    c += bytes_c
							 | 
						||
| 
								 | 
							
								                    if c > x:
							 | 
						||
| 
								 | 
							
								                        break
							 | 
						||
| 
								 | 
							
								                if sha_list:
							 | 
						||
| 
								 | 
							
								                    conn.execute("DELETE FROM blobs WHERE sha256 IN ('%s')" % "','".join(sha_list))
							 | 
						||
| 
								 | 
							
								                    conn.execute("DELETE FROM blob_map WHERE sha256 IN ('%s')" % "','".join(sha_list))
							 | 
						||
| 
								 | 
							
								                    logger.debug("dropped %s blobs with total size of %s bytes", len(sha_list), c)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def _query_val(self, sql, default=None):
							 | 
						||
| 
								 | 
							
								        val = self.DB.execute(sql).fetchone()
							 | 
						||
| 
								 | 
							
								        if val is not None:
							 | 
						||
| 
								 | 
							
								            val = val[0]
							 | 
						||
| 
								 | 
							
								        if val is None:
							 | 
						||
| 
								 | 
							
								            val = default
							 | 
						||
| 
								 | 
							
								        return val
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def state(self) -> FaviconCacheStats:
							 | 
						||
| 
								 | 
							
								        return FaviconCacheStats(
							 | 
						||
| 
								 | 
							
								            favicons=self._query_val("SELECT count(*) FROM blobs", 0),
							 | 
						||
| 
								 | 
							
								            bytes=self._query_val("SELECT SUM(bytes_c) FROM blobs", 0),
							 | 
						||
| 
								 | 
							
								            domains=self._query_val("SELECT count(*) FROM (SELECT authority FROM blob_map GROUP BY authority)", 0),
							 | 
						||
| 
								 | 
							
								            resolvers=self._query_val("SELECT count(*) FROM (SELECT resolver FROM blob_map GROUP BY resolver)", 0),
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								class FaviconCacheMEM(FaviconCache):
							 | 
						||
| 
								 | 
							
								    """Favicon cache in process' memory.  Its just a POC that stores the
							 | 
						||
| 
								 | 
							
								    favicons in the memory of the process.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    .. attention::
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								       Don't use it in production, it will blow up your memory!!
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    """
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __init__(self, cfg):
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        self.cfg = cfg
							 | 
						||
| 
								 | 
							
								        self._data = {}
							 | 
						||
| 
								 | 
							
								        self._sha_mime = {}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def __call__(self, resolver: str, authority: str) -> None | tuple[bytes | None, str | None]:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        sha, mime = self._sha_mime.get(f"{resolver}:{authority}", (None, None))
							 | 
						||
| 
								 | 
							
								        if sha is None:
							 | 
						||
| 
								 | 
							
								            return None
							 | 
						||
| 
								 | 
							
								        data = self._data.get(sha)
							 | 
						||
| 
								 | 
							
								        if data == FALLBACK_ICON:
							 | 
						||
| 
								 | 
							
								            data = None
							 | 
						||
| 
								 | 
							
								        return data, mime
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def set(self, resolver: str, authority: str, mime: str | None, data: bytes | None) -> bool:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        if data is None:
							 | 
						||
| 
								 | 
							
								            data = FALLBACK_ICON
							 | 
						||
| 
								 | 
							
								            mime = None
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        elif mime is None:
							 | 
						||
| 
								 | 
							
								            logger.error(
							 | 
						||
| 
								 | 
							
								                "favicon resolver %s tries to cache mime-type None for authority %s",
							 | 
						||
| 
								 | 
							
								                resolver,
							 | 
						||
| 
								 | 
							
								                authority,
							 | 
						||
| 
								 | 
							
								            )
							 | 
						||
| 
								 | 
							
								            return False
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								        digest = hashlib.sha256(data).hexdigest()
							 | 
						||
| 
								 | 
							
								        self._data[digest] = data
							 | 
						||
| 
								 | 
							
								        self._sha_mime[f"{resolver}:{authority}"] = (digest, mime)
							 | 
						||
| 
								 | 
							
								        return True
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def state(self):
							 | 
						||
| 
								 | 
							
								        return FaviconCacheStats(favicons=len(self._data.keys()))
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    def maintenance(self, force=False):
							 | 
						||
| 
								 | 
							
								        pass
							 |