From dba569462d0e9c4dbd77a54bb42ef5c3b1916142 Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Wed, 19 Apr 2023 17:20:03 +0200
Subject: [PATCH 01/10] [mod] limiter: reduce request rates for requests
 without a ping

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/plugins/limiter.py         | 27 +++++++++++++++++++++++++--
 searx/templates/simple/base.html |  3 +++
 searx/webapp.py                  |  8 +++++++-
 3 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/searx/plugins/limiter.py b/searx/plugins/limiter.py
index 46c82f588..c7d74248b 100644
--- a/searx/plugins/limiter.py
+++ b/searx/plugins/limiter.py
@@ -18,7 +18,7 @@ from flask import request
 
 from searx import redisdb
 from searx.plugins import logger
-from searx.redislib import incr_sliding_window
+from searx.redislib import incr_sliding_window, secret_hash
 
 name = "Request limiter"
 description = "Limit the number of request"
@@ -41,6 +41,18 @@ block_user_agent = re.compile(
     + r')'
 )
 
+PING_KEY = 'SearXNG_limiter.ping'
+TOKEN_KEY = 'SearXNG_limiter.token'
+
+
+def ping():
+    redis_client = redisdb.client()
+    user_agent = request.headers.get('User-Agent', 'unknown')
+    x_forwarded_for = request.headers.get('X-Forwarded-For', '')
+
+    ping_key = PING_KEY + user_agent + x_forwarded_for
+    redis_client.set(secret_hash(ping_key), 1, ex=600)
+
 
 def is_accepted_request() -> bool:
     # pylint: disable=too-many-return-statements
@@ -57,9 +69,20 @@ def is_accepted_request() -> bool:
 
     if request.path == '/search':
 
+        c_burst_max = 2
+        c_10min_max = 10
+
+        ping_key = PING_KEY + user_agent + x_forwarded_for
+        if redis_client.get(secret_hash(ping_key)):
+            logger.debug('got a ping')
+            c_burst_max = 15
+            c_10min_max = 150
+        else:
+            logger.debug('missing a ping')
+
         c_burst = incr_sliding_window(redis_client, 'IP limit, burst' + x_forwarded_for, 20)
         c_10min = incr_sliding_window(redis_client, 'IP limit, 10 minutes' + x_forwarded_for, 600)
-        if c_burst > 15 or c_10min > 150:
+        if c_burst > c_burst_max or c_10min > c_10min_max:
             logger.debug("BLOCK %s: to many request", x_forwarded_for)
             return False
 
diff --git a/searx/templates/simple/base.html b/searx/templates/simple/base.html
index a31ff07ee..dfe4ea265 100644
--- a/searx/templates/simple/base.html
+++ b/searx/templates/simple/base.html
@@ -17,6 +17,9 @@
   {% else %}
   <link rel="stylesheet" href="{{ url_for('static', filename='css/searxng.min.css') }}" type="text/css" media="screen" />
   {% endif %}
+  {% if get_setting('server.limiter') %}
+  <link rel="stylesheet" href="/limiter.css" type="text/css" media="screen" />
+  {% endif %}
   {% block styles %}{% endblock %}
   <!--[if gte IE 9]>-->
   <script src="{{ url_for('static', filename='js/searxng.head.min.js') }}" client_settings="{{ client_settings }}"></script>
diff --git a/searx/webapp.py b/searx/webapp.py
index 79255652f..67265e542 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -93,7 +93,7 @@ from searx.utils import (
 )
 from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
 from searx.query import RawTextQuery
-from searx.plugins import Plugin, plugins, initialize as plugin_initialize
+from searx.plugins import limiter, Plugin, plugins, initialize as plugin_initialize
 from searx.plugins.oa_doi_rewrite import get_doi_resolver
 from searx.preferences import (
     Preferences,
@@ -642,6 +642,12 @@ def health():
     return Response('OK', mimetype='text/plain')
 
 
+@app.route('/limiter.css', methods=['GET', 'POST'])
+def limiter_css():
+    limiter.ping()
+    return Response('', mimetype='text/css')
+
+
 @app.route('/search', methods=['GET', 'POST'])
 def search():
     """Search query in q and return results.

From 5226044c13817688a5ca3461743844dca4ed3d2b Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Wed, 19 Apr 2023 18:59:23 +0200
Subject: [PATCH 02/10] [mod] limiter: add random token to the limiter URL

By adding a random component in the limiter URL a bot can no longer send a ping
by request a static URL.

Related: https://github.com/searxng/searxng/pull/2357#issuecomment-1518525094
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/plugins/limiter.py         | 25 ++++++++++++++++++++++++-
 searx/templates/simple/base.html |  2 +-
 searx/webapp.py                  |  8 +++++---
 3 files changed, 30 insertions(+), 5 deletions(-)

diff --git a/searx/plugins/limiter.py b/searx/plugins/limiter.py
index c7d74248b..69bd576d4 100644
--- a/searx/plugins/limiter.py
+++ b/searx/plugins/limiter.py
@@ -14,6 +14,8 @@ Enable the plugin in ``settings.yml``:
 """
 
 import re
+import string
+import random
 from flask import request
 
 from searx import redisdb
@@ -54,6 +56,27 @@ def ping():
     redis_client.set(secret_hash(ping_key), 1, ex=600)
 
 
+def get_token():
+    redis_client = redisdb.client()
+    if not redis_client:
+        # This function is also called when limiter is inactive / no redis DB
+        # (see render function in webapp.py)
+        return '12345678'
+    token = redis_client.get(TOKEN_KEY)
+    if token:
+        token = token.decode('UTF-8')
+    else:
+        token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(8))
+        redis_client.set(TOKEN_KEY, token, ex=600)
+    return token
+
+
+def token_is_valid(token):
+    valid = token == get_token()
+    logger.debug("token is valid --> %s", valid)
+    return valid
+
+
 def is_accepted_request() -> bool:
     # pylint: disable=too-many-return-statements
     redis_client = redisdb.client()
@@ -83,7 +106,7 @@ def is_accepted_request() -> bool:
         c_burst = incr_sliding_window(redis_client, 'IP limit, burst' + x_forwarded_for, 20)
         c_10min = incr_sliding_window(redis_client, 'IP limit, 10 minutes' + x_forwarded_for, 600)
         if c_burst > c_burst_max or c_10min > c_10min_max:
-            logger.debug("BLOCK %s: to many request", x_forwarded_for)
+            logger.debug("BLOCK %s: too many request", x_forwarded_for)
             return False
 
         if len(request.headers.get('Accept-Language', '').strip()) == '':
diff --git a/searx/templates/simple/base.html b/searx/templates/simple/base.html
index dfe4ea265..9f7cdbb8e 100644
--- a/searx/templates/simple/base.html
+++ b/searx/templates/simple/base.html
@@ -18,7 +18,7 @@
   <link rel="stylesheet" href="{{ url_for('static', filename='css/searxng.min.css') }}" type="text/css" media="screen" />
   {% endif %}
   {% if get_setting('server.limiter') %}
-  <link rel="stylesheet" href="/limiter.css" type="text/css" media="screen" />
+  <link rel="stylesheet" href="{{ url_for('limiter_css', token=limiter_token) }}" type="text/css" media="screen" />
   {% endif %}
   {% block styles %}{% endblock %}
   <!--[if gte IE 9]>-->
diff --git a/searx/webapp.py b/searx/webapp.py
index 67265e542..815bfcabd 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -416,6 +416,7 @@ def render(template_name: str, **kwargs):
     kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint
     kwargs['cookies'] = request.cookies
     kwargs['errors'] = request.errors
+    kwargs['limiter_token'] = limiter.get_token()
 
     # values from the preferences
     kwargs['preferences'] = request.preferences
@@ -642,9 +643,10 @@ def health():
     return Response('OK', mimetype='text/plain')
 
 
-@app.route('/limiter.css', methods=['GET', 'POST'])
-def limiter_css():
-    limiter.ping()
+@app.route('/limiter<token>.css', methods=['GET', 'POST'])
+def limiter_css(token=None):
+    if limiter.token_is_valid(token):
+        limiter.ping()
     return Response('', mimetype='text/css')
 
 

From 1ec325adccc427fe05cf08da9a2d9d63da7365f4 Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Tue, 23 May 2023 18:16:37 +0200
Subject: [PATCH 03/10] [mod] limiter -> botdetection: modularization and
 documentation

In order to be able to meet the outstanding requirements, the implementation is
modularized and supplemented with documentation.

This patch does not contain functional change, except it fixes issue #2455

----

Aktivate limiter in the settings.yml and simulate a bot request by::

    curl -H 'Accept-Language: de-DE,en-US;q=0.7,en;q=0.3' \
         -H 'Accept: text/html'
         -H 'User-Agent: xyz' \
         -H 'Accept-Encoding: gzip' \
         'http://127.0.0.1:8888/search?q=foo'

In the LOG:

    DEBUG   searx.botdetection.link_token : missing ping for this request: .....

Since ``BURST_MAX_SUSPICIOUS = 2`` you can repeat the query above two time
before you get a "Too Many Requests" response.

Closes: https://github.com/searxng/searxng/issues/2455
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 docs/admin/engines/settings.rst            |   2 +-
 docs/src/searx.botdetection.rst            |  45 ++++++
 docs/src/searx.plugins.limiter.rst         |  13 --
 searx/botdetection/__init__.py             |  26 ++++
 searx/botdetection/http_accept.py          |  24 ++++
 searx/botdetection/http_accept_encoding.py |  26 ++++
 searx/botdetection/http_accept_language.py |  23 +++
 searx/botdetection/http_connection.py      |  23 +++
 searx/botdetection/http_user_agent.py      |  54 +++++++
 searx/botdetection/ip_limit.py             |  90 ++++++++++++
 searx/botdetection/limiter.py              |  79 +++++++++++
 searx/botdetection/link_token.py           | 126 +++++++++++++++++
 searx/plugins/limiter.py                   | 157 +++------------------
 searx/templates/simple/base.html           |   2 +-
 searx/webapp.py                            |  12 +-
 15 files changed, 541 insertions(+), 161 deletions(-)
 create mode 100644 docs/src/searx.botdetection.rst
 delete mode 100644 docs/src/searx.plugins.limiter.rst
 create mode 100644 searx/botdetection/__init__.py
 create mode 100644 searx/botdetection/http_accept.py
 create mode 100644 searx/botdetection/http_accept_encoding.py
 create mode 100644 searx/botdetection/http_accept_language.py
 create mode 100644 searx/botdetection/http_connection.py
 create mode 100644 searx/botdetection/http_user_agent.py
 create mode 100644 searx/botdetection/ip_limit.py
 create mode 100644 searx/botdetection/limiter.py
 create mode 100644 searx/botdetection/link_token.py

diff --git a/docs/admin/engines/settings.rst b/docs/admin/engines/settings.rst
index f9a1dad4f..63478f441 100644
--- a/docs/admin/engines/settings.rst
+++ b/docs/admin/engines/settings.rst
@@ -235,7 +235,7 @@ Global Settings
 
 ``limiter`` :
   Rate limit the number of request on the instance, block some bots.  The
-  :ref:`limiter plugin` requires a :ref:`settings redis` database.
+  :ref:`limiter src` requires a :ref:`settings redis` database.
 
 .. _image_proxy:
 
diff --git a/docs/src/searx.botdetection.rst b/docs/src/searx.botdetection.rst
new file mode 100644
index 000000000..85e0ce4cd
--- /dev/null
+++ b/docs/src/searx.botdetection.rst
@@ -0,0 +1,45 @@
+.. _botdetection:
+
+=============
+Bot Detection
+=============
+
+.. contents:: Contents
+   :depth: 2
+   :local:
+   :backlinks: entry
+
+.. automodule:: searx.botdetection
+  :members:
+
+.. automodule:: searx.botdetection.limiter
+  :members:
+
+
+Rate limit
+==========
+
+.. automodule:: searx.botdetection.ip_limit
+  :members:
+
+.. automodule:: searx.botdetection.link_token
+  :members:
+
+
+Probe HTTP headers
+==================
+
+.. automodule:: searx.botdetection.http_accept
+  :members:
+
+.. automodule:: searx.botdetection.http_accept_encoding
+  :members:
+
+.. automodule:: searx.botdetection.http_accept_language
+  :members:
+
+.. automodule:: searx.botdetection.http_connection
+  :members:
+
+.. automodule:: searx.botdetection.http_user_agent
+  :members:
diff --git a/docs/src/searx.plugins.limiter.rst b/docs/src/searx.plugins.limiter.rst
deleted file mode 100644
index 75d06f5c2..000000000
--- a/docs/src/searx.plugins.limiter.rst
+++ /dev/null
@@ -1,13 +0,0 @@
-.. _limiter plugin:
-
-==============
-Limiter Plugin
-==============
-
-.. sidebar:: info
-
-   The :ref:`limiter plugin` requires a :ref:`Redis <settings redis>` database.
-
-.. automodule:: searx.plugins.limiter
-  :members:
-
diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py
new file mode 100644
index 000000000..78a7d30f3
--- /dev/null
+++ b/searx/botdetection/__init__.py
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+""".. _botdetection src:
+
+Bot detection methods
+---------------------
+
+The methods implemented in this python package are use by the :ref:`limiter src`.
+
+"""
+
+import flask
+
+
+def dump_request(request: flask.Request):
+    return (
+        "%s: '%s'" % (request.headers.get('X-Forwarded-For'), request.path)
+        + " || form: %s" % request.form
+        + " || Accept: %s" % request.headers.get('Accept')
+        + " || Accept-Language: %s" % request.headers.get('Accept-Language')
+        + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
+        + " || Content-Type: %s" % request.headers.get('Content-Type')
+        + " || Content-Length: %s" % request.headers.get('Content-Length')
+        + " || Connection: %s" % request.headers.get('Connection')
+        + " || User-Agent: %s" % request.headers.get('User-Agent')
+    )
diff --git a/searx/botdetection/http_accept.py b/searx/botdetection/http_accept.py
new file mode 100644
index 000000000..1ab7cb4c1
--- /dev/null
+++ b/searx/botdetection/http_accept.py
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_accept``
+----------------------
+
+The ``http_accept`` method evaluates a request as the request of a bot if the
+Accept_ header ..
+
+- did not contain ``text/html``
+
+.. _Accept:
+   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
+
+"""
+
+from typing import Optional, Tuple
+import flask
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+    if 'text/html' not in request.accept_mimetypes:
+        return 429, "bot detected, HTTP header Accept did not contain text/html"
+    return None
diff --git a/searx/botdetection/http_accept_encoding.py b/searx/botdetection/http_accept_encoding.py
new file mode 100644
index 000000000..ae630fd68
--- /dev/null
+++ b/searx/botdetection/http_accept_encoding.py
@@ -0,0 +1,26 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_accept_encoding``
+-------------------------------
+
+The ``http_accept_encoding`` method evaluates a request as the request of a
+bot if the Accept-Encoding_ header ..
+
+- did not contain ``gzip`` AND ``deflate`` (if both values are missed)
+- did not contain ``text/html``
+
+.. _Accept-Encoding:
+   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
+
+"""
+
+from typing import Optional, Tuple
+import flask
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+    accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
+    if not ('gzip' in accept_list or 'deflate' in accept_list):
+        return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate"
+    return None
diff --git a/searx/botdetection/http_accept_language.py b/searx/botdetection/http_accept_language.py
new file mode 100644
index 000000000..06743802e
--- /dev/null
+++ b/searx/botdetection/http_accept_language.py
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_accept_language``
+-------------------------------
+
+The ``http_accept_language`` method evaluates a request as the request of a bot
+if the Accept-Language_ header is unset.
+
+.. _Accept-Language:
+   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
+
+"""
+
+
+from typing import Optional, Tuple
+import flask
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+    if request.headers.get('Accept-Language', '').strip() == '':
+        return 429, "bot detected, missing HTTP header Accept-Language"
+    return None
diff --git a/searx/botdetection/http_connection.py b/searx/botdetection/http_connection.py
new file mode 100644
index 000000000..f61f5e48c
--- /dev/null
+++ b/searx/botdetection/http_connection.py
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_connection``
+--------------------------
+
+The ``http_connection`` method evaluates a request as the request of a bot if
+the Connection_ header is set to ``close``.
+
+.. _Connection:
+   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
+
+"""
+
+
+from typing import Optional, Tuple
+import flask
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+    if request.headers.get('Connection', '').strip() == 'close':
+        return 429, "bot detected, HTTP header 'Connection=close'"
+    return None
diff --git a/searx/botdetection/http_user_agent.py b/searx/botdetection/http_user_agent.py
new file mode 100644
index 000000000..892ae0bd9
--- /dev/null
+++ b/searx/botdetection/http_user_agent.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``http_user_agent``
+--------------------------
+
+The ``http_user_agent`` method evaluates a request as the request of a bot if
+the User-Agent_ header is unset or matches the regular expression
+:py:obj:`USER_AGENT`.
+
+.. _User-Agent:
+   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
+
+"""
+
+from typing import Optional, Tuple
+import re
+import flask
+
+USER_AGENT = (
+    r'('
+    + r'unknown'
+    + r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
+    + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
+    + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
+    + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
+    + r'|ZmEu|BLEXBot|bitlybot'
+    # unmaintained Farside instances
+    + r'|'
+    + re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
+    # other bots and client to block
+    + '|.*PetalBot.*'
+    + r')'
+)
+"""Regular expression that matches to User-Agent_ from known *bots*"""
+
+_regexp = None
+
+
+def regexp_user_agent():
+    global _regexp  # pylint: disable=global-statement
+    if not _regexp:
+        _regexp = re.compile(USER_AGENT)
+    return _regexp
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+    user_agent = request.headers.get('User-Agent', 'unknown')
+    if regexp_user_agent().match(user_agent):
+        return (
+            429,
+            f"bot detected, HTTP header User-Agent: {user_agent}",
+        )
+    return None
diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
new file mode 100644
index 000000000..fce3f8b67
--- /dev/null
+++ b/searx/botdetection/ip_limit.py
@@ -0,0 +1,90 @@
+"""
+Method ``ip_limit``
+-------------------
+
+The ``ip_limit`` method counts request from an IP in *sliding windows*.  If
+there are to many requests in a sliding window, the request is evaluated as a
+bot request.  This method requires a redis DB and needs a HTTP X-Forwarded-For_
+header.  To take privacy only the hash value of an IP is stored in the redis DB
+and at least for a maximum of 10 minutes.
+
+The :py:obj:`link_token` method is used to investigate whether a request is
+*suspicious*.  If the :py:obj:`link_token` method is activated and a request is
+*suspicious* the request rates are reduced:
+
+- :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
+- :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
+
+.. _X-Forwarded-For:
+   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
+
+"""
+
+from typing import Optional, Tuple
+import flask
+
+from searx import redisdb
+from searx import logger
+from searx.redislib import incr_sliding_window
+
+from . import link_token
+
+logger = logger.getChild('botdetection.ip_limit')
+
+BURST_WINDOW = 20
+"""Time (sec) before sliding window for *burst* requests expires."""
+
+BURST_MAX = 15
+"""Maximum requests from one IP in the :py:obj:`BURST_WINDOW`"""
+
+BURST_MAX_SUSPICIOUS = 2
+"""Maximum of suspicious requests from one IP in the :py:obj:`BURST_WINDOW`"""
+
+LONG_WINDOW = 600
+"""Time (sec) before the longer sliding window expires."""
+
+LONG_MAX = 150
+"""Maximum requests from one IP in the :py:obj:`LONG_WINDOW`"""
+
+LONG_MAX_SUSPICIOUS = 10
+"""Maximum suspicious requests from one IP in the :py:obj:`LONG_WINDOW`"""
+
+API_WONDOW = 3600
+"""Time (sec) before sliding window for API requests (format != html) expires."""
+
+API_MAX = 4
+"""Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+    redis_client = redisdb.client()
+
+    x_forwarded_for = request.headers.get('X-Forwarded-For', '')
+    if not x_forwarded_for:
+        logger.error("missing HTTP header X-Forwarded-For")
+
+    if request.args.get('format', 'html') != 'html':
+        c = incr_sliding_window(redis_client, 'IP limit - API_WONDOW:' + x_forwarded_for, API_WONDOW)
+        if c > API_MAX:
+            return 429, "BLOCK %s: API limit exceeded"
+
+    suspicious = link_token.is_suspicious(request)
+
+    if suspicious:
+        c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
+        if c > BURST_MAX_SUSPICIOUS:
+            return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX_SUSPICIOUS"
+
+        c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW)
+        if c > LONG_MAX_SUSPICIOUS:
+            return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX_SUSPICIOUS"
+
+    else:
+        c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
+        if c > BURST_MAX:
+            return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX"
+
+        c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW)
+        if c > LONG_MAX:
+            return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX"
+    return None
diff --git a/searx/botdetection/limiter.py b/searx/botdetection/limiter.py
new file mode 100644
index 000000000..71044c312
--- /dev/null
+++ b/searx/botdetection/limiter.py
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+""".. _limiter src:
+
+Limiter
+=======
+
+.. sidebar:: info
+
+   The limiter requires a :ref:`Redis <settings redis>` database.
+
+Bot protection / IP rate limitation.  The intention of rate limitation is to
+limit suspicious requests from an IP.  The motivation behind this is the fact
+that SearXNG passes through requests from bots and is thus classified as a bot
+itself.  As a result, the SearXNG engine then receives a CAPTCHA or is blocked
+by the search engine (the origin) in some other way.
+
+To avoid blocking, the requests from bots to SearXNG must also be blocked, this
+is the task of the limiter.  To perform this task, the limiter uses the methods
+from the :py:obj:`searx.botdetection`.
+
+To enable the limiter activate:
+
+.. code:: yaml
+
+   server:
+     ...
+     limiter: true  # rate limit the number of request on the instance, block some bots
+
+and set the redis-url connection. Check the value, it depends on your redis DB
+(see :ref:`settings redis`), by example:
+
+.. code:: yaml
+
+   redis:
+     url: unix:///usr/local/searxng-redis/run/redis.sock?db=0
+
+"""
+
+from typing import Optional, Tuple
+import flask
+
+from searx.botdetection import (
+    http_accept,
+    http_accept_encoding,
+    http_accept_language,
+    http_connection,
+    http_user_agent,
+    ip_limit,
+)
+
+
+def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+
+    if request.path == '/healthz':
+        return None
+
+    for func in [
+        http_user_agent,
+    ]:
+        val = func.filter_request(request)
+        if val is not None:
+            return val
+
+    if request.path == '/search':
+
+        for func in [
+            http_accept,
+            http_accept_encoding,
+            http_accept_language,
+            http_connection,
+            http_user_agent,
+            ip_limit,
+        ]:
+            val = func.filter_request(request)
+            if val is not None:
+                return val
+
+    return None
diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py
new file mode 100644
index 000000000..8ef215f6c
--- /dev/null
+++ b/searx/botdetection/link_token.py
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""
+Method ``link_token``
+---------------------
+
+The ``link_token`` method evaluates a request as :py:obj:`suspicious
+<is_suspicious>` if the URL ``/client<token>.css`` is not requested by the
+client.  By adding a random component (the token) in the URL a bot can not send
+a ping by request a static URL.
+
+.. note::
+
+   This method requires a redis DB and needs a HTTP X-Forwarded-For_ header.
+
+To get in use of this method a flask URL route needs to be added:
+
+.. code:: python
+
+   @app.route('/client<token>.css', methods=['GET', 'POST'])
+   def client_token(token=None):
+       link_token.ping(request, token)
+       return Response('', mimetype='text/css')
+
+And in the HTML template from flask a stylesheet link is needed (the value of
+``link_token`` comes from :py:obj:`get_token`):
+
+.. code:: html
+
+   <link rel="stylesheet"
+         href="{{ url_for('client_token', token=link_token) }}"
+         type="text/css" />
+
+.. _X-Forwarded-For:
+   https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
+
+"""
+
+import string
+import random
+import flask
+
+from searx import logger
+from searx import redisdb
+from searx.redislib import secret_hash
+
+TOKEN_LIVE_TIME = 600
+"""Livetime (sec) of limiter's CSS token."""
+
+PING_KEY = 'SearXNG_limiter.ping'
+TOKEN_KEY = 'SearXNG_limiter.token'
+
+logger = logger.getChild('botdetection.link_token')
+
+
+def is_suspicious(request: flask.Request):
+    """Checks if there is a valid ping for this request, if not this request is
+    rated as *suspicious*"""
+    redis_client = redisdb.client()
+    if not redis_client:
+        return False
+
+    ping_key = get_ping_key(request)
+    if not redis_client.get(ping_key):
+        logger.warning(
+            "missing ping (IP: %s) / request: %s",
+            request.headers.get('X-Forwarded-For', ''),
+            ping_key,
+        )
+        return True
+
+    logger.debug("found ping for this request: %s", ping_key)
+    return False
+
+
+def ping(request: flask.Request, token: str):
+    """This function is called by a request to URL ``/client<token>.css``"""
+    redis_client = redisdb.client()
+    if not redis_client:
+        return
+    if not token_is_valid(token):
+        return
+    ping_key = get_ping_key(request)
+    logger.debug("store ping for: %s", ping_key)
+    redis_client.set(ping_key, 1, ex=TOKEN_LIVE_TIME)
+
+
+def get_ping_key(request: flask.Request):
+    """Generates a hashed key that fits (more or less) to a request.  At least
+    X-Forwarded-For_ is needed to be able to assign the request to an IP.
+
+    """
+    return secret_hash(
+        PING_KEY
+        + request.headers.get('X-Forwarded-For', '')
+        + request.headers.get('Accept-Language', '')
+        + request.headers.get('User-Agent', '')
+    )
+
+
+def token_is_valid(token) -> bool:
+    valid = token == get_token()
+    logger.debug("token is valid --> %s", valid)
+    return valid
+
+
+def get_token() -> str:
+    """Returns current token.  If there is no currently active token a new token
+    is generated randomly and stored in the redis DB.
+
+    - :py:obj:`TOKEN_LIVE_TIME`
+    - :py:obj:`TOKEN_KEY`
+
+    """
+    redis_client = redisdb.client()
+    if not redis_client:
+        # This function is also called when limiter is inactive / no redis DB
+        # (see render function in webapp.py)
+        return '12345678'
+    token = redis_client.get(TOKEN_KEY)
+    if token:
+        token = token.decode('UTF-8')
+    else:
+        token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(16))
+        redis_client.set(TOKEN_KEY, token, ex=TOKEN_LIVE_TIME)
+    return token
diff --git a/searx/plugins/limiter.py b/searx/plugins/limiter.py
index 69bd576d4..d9566b92b 100644
--- a/searx/plugins/limiter.py
+++ b/searx/plugins/limiter.py
@@ -1,165 +1,42 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # pyright: basic
-"""Some bot protection / rate limitation
+"""see :ref:`limiter src`"""
 
-To monitor rate limits and protect privacy the IP addresses are getting stored
-with a hash so the limiter plugin knows who to block.  A redis database is
-needed to store the hash values.
-
-Enable the plugin in ``settings.yml``:
-
-- ``server.limiter: true``
-- ``redis.url: ...`` check the value, see :ref:`settings redis`
-"""
-
-import re
-import string
-import random
-from flask import request
+import flask
 
 from searx import redisdb
 from searx.plugins import logger
-from searx.redislib import incr_sliding_window, secret_hash
+from searx.botdetection import limiter
+from searx.botdetection import dump_request
 
 name = "Request limiter"
 description = "Limit the number of request"
 default_on = False
 preference_section = 'service'
+
 logger = logger.getChild('limiter')
 
-block_user_agent = re.compile(
-    r'('
-    + r'unknown'
-    + r'|[Cc][Uu][Rr][Ll]|[wW]get|Scrapy|splash|JavaFX|FeedFetcher|python-requests|Go-http-client|Java|Jakarta|okhttp'
-    + r'|HttpClient|Jersey|Python|libwww-perl|Ruby|SynHttpClient|UniversalFeedParser|Googlebot|GoogleImageProxy'
-    + r'|bingbot|Baiduspider|yacybot|YandexMobileBot|YandexBot|Yahoo! Slurp|MJ12bot|AhrefsBot|archive.org_bot|msnbot'
-    + r'|MJ12bot|SeznamBot|linkdexbot|Netvibes|SMTBot|zgrab|James BOT|Sogou|Abonti|Pixray|Spinn3r|SemrushBot|Exabot'
-    + r'|ZmEu|BLEXBot|bitlybot'
-    # unmaintained Farside instances
-    + r'|'
-    + re.escape(r'Mozilla/5.0 (compatible; Farside/0.1.0; +https://farside.link)')
-    + '|.*PetalBot.*'
-    + r')'
-)
-
-PING_KEY = 'SearXNG_limiter.ping'
-TOKEN_KEY = 'SearXNG_limiter.token'
-
-
-def ping():
-    redis_client = redisdb.client()
-    user_agent = request.headers.get('User-Agent', 'unknown')
-    x_forwarded_for = request.headers.get('X-Forwarded-For', '')
-
-    ping_key = PING_KEY + user_agent + x_forwarded_for
-    redis_client.set(secret_hash(ping_key), 1, ex=600)
-
-
-def get_token():
-    redis_client = redisdb.client()
-    if not redis_client:
-        # This function is also called when limiter is inactive / no redis DB
-        # (see render function in webapp.py)
-        return '12345678'
-    token = redis_client.get(TOKEN_KEY)
-    if token:
-        token = token.decode('UTF-8')
-    else:
-        token = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(8))
-        redis_client.set(TOKEN_KEY, token, ex=600)
-    return token
-
-
-def token_is_valid(token):
-    valid = token == get_token()
-    logger.debug("token is valid --> %s", valid)
-    return valid
-
-
-def is_accepted_request() -> bool:
-    # pylint: disable=too-many-return-statements
-    redis_client = redisdb.client()
-    user_agent = request.headers.get('User-Agent', 'unknown')
-    x_forwarded_for = request.headers.get('X-Forwarded-For', '')
-
-    if request.path == '/healthz':
-        return True
-
-    if block_user_agent.match(user_agent):
-        logger.debug("BLOCK %s: %s --> detected User-Agent: %s" % (x_forwarded_for, request.path, user_agent))
-        return False
-
-    if request.path == '/search':
-
-        c_burst_max = 2
-        c_10min_max = 10
-
-        ping_key = PING_KEY + user_agent + x_forwarded_for
-        if redis_client.get(secret_hash(ping_key)):
-            logger.debug('got a ping')
-            c_burst_max = 15
-            c_10min_max = 150
-        else:
-            logger.debug('missing a ping')
-
-        c_burst = incr_sliding_window(redis_client, 'IP limit, burst' + x_forwarded_for, 20)
-        c_10min = incr_sliding_window(redis_client, 'IP limit, 10 minutes' + x_forwarded_for, 600)
-        if c_burst > c_burst_max or c_10min > c_10min_max:
-            logger.debug("BLOCK %s: too many request", x_forwarded_for)
-            return False
-
-        if len(request.headers.get('Accept-Language', '').strip()) == '':
-            logger.debug("BLOCK %s: missing Accept-Language", x_forwarded_for)
-            return False
-
-        if request.headers.get('Connection') == 'close':
-            logger.debug("BLOCK %s: got Connection=close", x_forwarded_for)
-            return False
-
-        accept_encoding_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
-        if 'gzip' not in accept_encoding_list and 'deflate' not in accept_encoding_list:
-            logger.debug("BLOCK %s: suspicious Accept-Encoding", x_forwarded_for)
-            return False
-
-        if 'text/html' not in request.accept_mimetypes:
-            logger.debug("BLOCK %s: Accept-Encoding misses text/html", x_forwarded_for)
-            return False
-
-        if request.args.get('format', 'html') != 'html':
-            c = incr_sliding_window(redis_client, 'API limit' + x_forwarded_for, 3600)
-            if c > 4:
-                logger.debug("BLOCK %s: API limit exceeded", x_forwarded_for)
-                return False
-
-    logger.debug(
-        "OK %s: '%s'" % (x_forwarded_for, request.path)
-        + " || form: %s" % request.form
-        + " || Accept: %s" % request.headers.get('Accept', '')
-        + " || Accept-Language: %s" % request.headers.get('Accept-Language', '')
-        + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding', '')
-        + " || Content-Type: %s" % request.headers.get('Content-Type', '')
-        + " || Content-Length: %s" % request.headers.get('Content-Length', '')
-        + " || Connection: %s" % request.headers.get('Connection', '')
-        + " || User-Agent: %s" % user_agent
-    )
-
-    return True
-
 
 def pre_request():
-    if not is_accepted_request():
-        return 'Too Many Requests', 429
+    """See :ref:`flask.Flask.before_request`"""
+
+    val = limiter.filter_request(flask.request)
+    if val is not None:
+        http_status, msg = val
+        client_ip = flask.request.headers.get('X-Forwarded-For', '<unknown>')
+        logger.error("BLOCK (IP %s): %s" % (client_ip, msg))
+        return 'Too Many Requests', http_status
+
+    logger.debug("OK: %s" % dump_request(flask.request))
     return None
 
 
-def init(app, settings):
+def init(app: flask.Flask, settings) -> bool:
     if not settings['server']['limiter']:
         return False
-
     if not redisdb.client():
-        logger.error("The limiter requires Redis")  # pylint: disable=undefined-variable
+        logger.error("The limiter requires Redis")
         return False
-
     app.before_request(pre_request)
     return True
diff --git a/searx/templates/simple/base.html b/searx/templates/simple/base.html
index 9f7cdbb8e..3c6ed11c7 100644
--- a/searx/templates/simple/base.html
+++ b/searx/templates/simple/base.html
@@ -18,7 +18,7 @@
   <link rel="stylesheet" href="{{ url_for('static', filename='css/searxng.min.css') }}" type="text/css" media="screen" />
   {% endif %}
   {% if get_setting('server.limiter') %}
-  <link rel="stylesheet" href="{{ url_for('limiter_css', token=limiter_token) }}" type="text/css" media="screen" />
+  <link rel="stylesheet" href="{{ url_for('client_token', token=link_token) }}" type="text/css" />
   {% endif %}
   {% block styles %}{% endblock %}
   <!--[if gte IE 9]>-->
diff --git a/searx/webapp.py b/searx/webapp.py
index 815bfcabd..d6322447a 100755
--- a/searx/webapp.py
+++ b/searx/webapp.py
@@ -93,7 +93,8 @@ from searx.utils import (
 )
 from searx.version import VERSION_STRING, GIT_URL, GIT_BRANCH
 from searx.query import RawTextQuery
-from searx.plugins import limiter, Plugin, plugins, initialize as plugin_initialize
+from searx.plugins import Plugin, plugins, initialize as plugin_initialize
+from searx.botdetection import link_token
 from searx.plugins.oa_doi_rewrite import get_doi_resolver
 from searx.preferences import (
     Preferences,
@@ -416,7 +417,7 @@ def render(template_name: str, **kwargs):
     kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint
     kwargs['cookies'] = request.cookies
     kwargs['errors'] = request.errors
-    kwargs['limiter_token'] = limiter.get_token()
+    kwargs['link_token'] = link_token.get_token()
 
     # values from the preferences
     kwargs['preferences'] = request.preferences
@@ -643,10 +644,9 @@ def health():
     return Response('OK', mimetype='text/plain')
 
 
-@app.route('/limiter<token>.css', methods=['GET', 'POST'])
-def limiter_css(token=None):
-    if limiter.token_is_valid(token):
-        limiter.ping()
+@app.route('/client<token>.css', methods=['GET', 'POST'])
+def client_token(token=None):
+    link_token.ping(request, token)
     return Response('', mimetype='text/css')
 
 

From 66fdec0eb92bf11c0bc477d6fb1df3dc783e4dcb Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Fri, 26 May 2023 17:24:43 +0200
Subject: [PATCH 04/10] [mod] limiter: add config file
 /etc/searxng/limiter.toml

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 requirements.txt                           |   1 +
 searx/botdetection/http_accept.py          |   5 +-
 searx/botdetection/http_accept_encoding.py |   5 +-
 searx/botdetection/http_accept_language.py |   6 +-
 searx/botdetection/http_connection.py      |   6 +-
 searx/botdetection/http_user_agent.py      |   6 +-
 searx/botdetection/ip_limit.py             |  11 +-
 searx/botdetection/limiter.py              |  43 ++-
 searx/botdetection/limiter.toml            |   3 +
 searx/plugins/limiter.py                   |   1 +
 searx/tools/__init__.py                    |   8 +
 searx/tools/config.py                      | 376 +++++++++++++++++++++
 12 files changed, 459 insertions(+), 12 deletions(-)
 create mode 100644 searx/botdetection/limiter.toml
 create mode 100644 searx/tools/__init__.py
 create mode 100644 searx/tools/config.py

diff --git a/requirements.txt b/requirements.txt
index 0bb3eafb0..9e3de3a46 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -16,3 +16,4 @@ redis==4.5.5
 markdown-it-py==2.2.0
 typing_extensions==4.6.2
 fasttext-predict==0.9.2.1
+pytomlpp==1.0.13
diff --git a/searx/botdetection/http_accept.py b/searx/botdetection/http_accept.py
index 1ab7cb4c1..23670a283 100644
--- a/searx/botdetection/http_accept.py
+++ b/searx/botdetection/http_accept.py
@@ -13,12 +13,15 @@ Accept_ header ..
    https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept
 
 """
+# pylint: disable=unused-argument
 
 from typing import Optional, Tuple
 import flask
 
+from searx.tools import config
 
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
     if 'text/html' not in request.accept_mimetypes:
         return 429, "bot detected, HTTP header Accept did not contain text/html"
     return None
diff --git a/searx/botdetection/http_accept_encoding.py b/searx/botdetection/http_accept_encoding.py
index ae630fd68..191249711 100644
--- a/searx/botdetection/http_accept_encoding.py
+++ b/searx/botdetection/http_accept_encoding.py
@@ -14,12 +14,15 @@ bot if the Accept-Encoding_ header ..
    https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Encoding
 
 """
+# pylint: disable=unused-argument
 
 from typing import Optional, Tuple
 import flask
 
+from searx.tools import config
 
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
     accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
     if not ('gzip' in accept_list or 'deflate' in accept_list):
         return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate"
diff --git a/searx/botdetection/http_accept_language.py b/searx/botdetection/http_accept_language.py
index 06743802e..558a216cf 100644
--- a/searx/botdetection/http_accept_language.py
+++ b/searx/botdetection/http_accept_language.py
@@ -11,13 +11,15 @@ if the Accept-Language_ header is unset.
    https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
 
 """
-
+# pylint: disable=unused-argument
 
 from typing import Optional, Tuple
 import flask
 
+from searx.tools import config
 
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
     if request.headers.get('Accept-Language', '').strip() == '':
         return 429, "bot detected, missing HTTP header Accept-Language"
     return None
diff --git a/searx/botdetection/http_connection.py b/searx/botdetection/http_connection.py
index f61f5e48c..0ef24a7b8 100644
--- a/searx/botdetection/http_connection.py
+++ b/searx/botdetection/http_connection.py
@@ -11,13 +11,15 @@ the Connection_ header is set to ``close``.
    https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Connection
 
 """
-
+# pylint: disable=unused-argument
 
 from typing import Optional, Tuple
 import flask
 
+from searx.tools import config
 
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
     if request.headers.get('Connection', '').strip() == 'close':
         return 429, "bot detected, HTTP header 'Connection=close'"
     return None
diff --git a/searx/botdetection/http_user_agent.py b/searx/botdetection/http_user_agent.py
index 892ae0bd9..3d1ec9173 100644
--- a/searx/botdetection/http_user_agent.py
+++ b/searx/botdetection/http_user_agent.py
@@ -12,11 +12,15 @@ the User-Agent_ header is unset or matches the regular expression
    https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/User-Agent
 
 """
+# pylint: disable=unused-argument
 
 from typing import Optional, Tuple
 import re
 import flask
 
+from searx.tools import config
+
+
 USER_AGENT = (
     r'('
     + r'unknown'
@@ -44,7 +48,7 @@ def regexp_user_agent():
     return _regexp
 
 
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
     user_agent = request.headers.get('User-Agent', 'unknown')
     if regexp_user_agent().match(user_agent):
         return (
diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
index fce3f8b67..2646920c2 100644
--- a/searx/botdetection/ip_limit.py
+++ b/searx/botdetection/ip_limit.py
@@ -1,4 +1,5 @@
-"""
+""".. _botdetection.ip_limit:
+
 Method ``ip_limit``
 -------------------
 
@@ -22,6 +23,8 @@ The :py:obj:`link_token` method is used to investigate whether a request is
 
 from typing import Optional, Tuple
 import flask
+from searx.tools import config
+
 
 from searx import redisdb
 from searx import logger
@@ -56,7 +59,7 @@ API_MAX = 4
 """Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
 
 
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
     redis_client = redisdb.client()
 
     x_forwarded_for = request.headers.get('X-Forwarded-For', '')
@@ -68,7 +71,9 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
         if c > API_MAX:
             return 429, "BLOCK %s: API limit exceeded"
 
-    suspicious = link_token.is_suspicious(request)
+    suspicious = False
+    if cfg['botdetection.ip_limit.link_token']:
+        suspicious = link_token.is_suspicious(request)
 
     if suspicious:
         c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
diff --git a/searx/botdetection/limiter.py b/searx/botdetection/limiter.py
index 71044c312..cc1e00b3c 100644
--- a/searx/botdetection/limiter.py
+++ b/searx/botdetection/limiter.py
@@ -38,8 +38,11 @@ and set the redis-url connection. Check the value, it depends on your redis DB
 """
 
 from typing import Optional, Tuple
+from pathlib import Path
 import flask
+import pytomlpp as toml
 
+from searx.tools import config
 from searx.botdetection import (
     http_accept,
     http_accept_encoding,
@@ -49,6 +52,42 @@ from searx.botdetection import (
     ip_limit,
 )
 
+LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
+"""Base configuration (schema) of the botdetection."""
+
+LIMITER_CFG = Path('/etc/searxng/limiter.toml')
+"""Lokal Limiter configuration."""
+
+CFG_DEPRECATED = {
+    # "dummy.old.foo": "config 'dummy.old.foo' exists only for tests.  Don't use it in your real project config."
+}
+
+CFG = config.Config({}, {})
+
+
+def init_cfg(log):
+    global CFG  # pylint: disable=global-statement
+    CFG = config.Config(cfg_schema=toml.load(LIMITER_CFG_SCHEMA), deprecated=CFG_DEPRECATED)
+
+    if not LIMITER_CFG.exists():
+        log.warning("missing config file: %s", LIMITER_CFG)
+        return
+
+    log.warning("load config file: %s", LIMITER_CFG)
+    try:
+        upd_cfg = toml.load(LIMITER_CFG)
+    except toml.DecodeError as exc:
+        msg = str(exc).replace('\t', '').replace('\n', ' ')
+        log.error("%s: %s", LIMITER_CFG, msg)
+        raise
+
+    is_valid, issue_list = CFG.validate(upd_cfg)
+    for msg in issue_list:
+        log.error(str(msg))
+    if not is_valid:
+        raise TypeError(f"schema of {LIMITER_CFG} is invalid, can't cutomize limiter configuration from!")
+    CFG.update(upd_cfg)
+
 
 def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
 
@@ -58,7 +97,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
     for func in [
         http_user_agent,
     ]:
-        val = func.filter_request(request)
+        val = func.filter_request(request, CFG)
         if val is not None:
             return val
 
@@ -72,7 +111,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
             http_user_agent,
             ip_limit,
         ]:
-            val = func.filter_request(request)
+            val = func.filter_request(request, CFG)
             if val is not None:
                 return val
 
diff --git a/searx/botdetection/limiter.toml b/searx/botdetection/limiter.toml
new file mode 100644
index 000000000..30cd1b53c
--- /dev/null
+++ b/searx/botdetection/limiter.toml
@@ -0,0 +1,3 @@
+[botdetection.ip_limit]
+
+link_token = true
\ No newline at end of file
diff --git a/searx/plugins/limiter.py b/searx/plugins/limiter.py
index d9566b92b..92b0aa2a0 100644
--- a/searx/plugins/limiter.py
+++ b/searx/plugins/limiter.py
@@ -38,5 +38,6 @@ def init(app: flask.Flask, settings) -> bool:
     if not redisdb.client():
         logger.error("The limiter requires Redis")
         return False
+    limiter.init_cfg(logger)
     app.before_request(pre_request)
     return True
diff --git a/searx/tools/__init__.py b/searx/tools/__init__.py
new file mode 100644
index 000000000..08e6d982f
--- /dev/null
+++ b/searx/tools/__init__.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+""".. _tools src:
+
+A collection of *utilities* used by SearXNG, but without SearXNG specific
+peculiarities.
+
+"""
diff --git a/searx/tools/config.py b/searx/tools/config.py
new file mode 100644
index 000000000..f998031ba
--- /dev/null
+++ b/searx/tools/config.py
@@ -0,0 +1,376 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+"""Configuration class :py:class:`Config` with deep-update, schema validation
+and deprecated names.
+
+The :py:class:`Config` class implements a configuration that is based on
+structured dictionaries.  The configuration schema is defined in a dictionary
+structure and the configuration data is given in a dictionary structure.
+"""
+from __future__ import annotations
+
+import copy
+import typing
+import logging
+import pathlib
+import pytomlpp as toml
+
+__all__ = ['Config', 'UNSET', 'SchemaIssue']
+
+log = logging.getLogger(__name__)
+
+
+class FALSE:
+    """Class of ``False`` singelton"""
+
+    # pylint: disable=multiple-statements
+    def __init__(self, msg):
+        self.msg = msg
+
+    def __bool__(self):
+        return False
+
+    def __str__(self):
+        return self.msg
+
+    __repr__ = __str__
+
+
+UNSET = FALSE('<UNSET>')
+
+
+class SchemaIssue(ValueError):
+    """Exception to store and/or raise a message from a schema issue."""
+
+    def __init__(self, level: typing.Literal['warn', 'invalid'], msg: str):
+        self.level = level
+        super().__init__(msg)
+
+    def __str__(self):
+        return f"[cfg schema {self.level}] {self.args[0]}"
+
+
+class Config:
+    """Base class used for configuration"""
+
+    UNSET = UNSET
+
+    @classmethod
+    def from_toml(cls, schema_file: pathlib.Path, cfg_file: pathlib.Path, deprecated: dict) -> Config:
+
+        # init schema
+
+        log.debug("load schema file: %s", schema_file)
+        cfg = cls(cfg_schema=toml.load(schema_file), deprecated=deprecated)
+        if not cfg_file.exists():
+            log.warning("missing config file: %s", cfg_file)
+            return cfg
+
+        # load configuration
+
+        log.debug("load config file: %s", cfg_file)
+        try:
+            upd_cfg = toml.load(cfg_file)
+        except toml.DecodeError as exc:
+            msg = str(exc).replace('\t', '').replace('\n', ' ')
+            log.error("%s: %s", cfg_file, msg)
+            raise
+
+        is_valid, issue_list = cfg.validate(upd_cfg)
+        for msg in issue_list:
+            log.error(str(msg))
+        if not is_valid:
+            raise TypeError(f"schema of {cfg_file} is invalid!")
+        cfg.update(upd_cfg)
+        return cfg
+
+    def __init__(self, cfg_schema: typing.Dict, deprecated: typing.Dict[str, str]):
+        """Construtor of class Config.
+
+        :param cfg_schema: Schema of the configuration
+        :param deprecated: dictionary that maps deprecated configuration names to a messages
+
+        These values are needed for validation, see :py:obj:`validate`.
+
+        """
+        self.cfg_schema = cfg_schema
+        self.deprecated = deprecated
+        self.cfg = copy.deepcopy(cfg_schema)
+
+    def __getitem__(self, key: str):
+        return self.get(key)
+
+    def validate(self, cfg: dict):
+        """Validation of dictionary ``cfg`` on :py:obj:`Config.SCHEMA`.
+        Validation is done by :py:obj:`validate`."""
+
+        return validate(self.cfg_schema, cfg, self.deprecated)
+
+    def update(self, upd_cfg: dict):
+        """Update this configuration by ``upd_cfg``."""
+
+        dict_deepupdate(self.cfg, upd_cfg)
+
+    def default(self, name: str):
+        """Returns default value of field ``name`` in ``self.cfg_schema``."""
+        return value(name, self.cfg_schema)
+
+    def get(self, name: str, default=UNSET, replace=True):
+        """Returns the value to which ``name`` points in the configuration.
+
+        If there is no such ``name`` in the config and the ``default`` is
+        :py:obj:`UNSET`, a :py:obj:`KeyError` is raised.
+        """
+
+        parent = self._get_parent_dict(name)
+        val = parent.get(name.split('.')[-1], UNSET)
+        if val is UNSET:
+            if default is UNSET:
+                raise KeyError(name)
+            val = default
+
+        if replace and isinstance(val, str):
+            val = val % self
+        return val
+
+    def set(self, name: str, val):
+        """Set the value to which ``name`` points in the configuration.
+
+        If there is no such ``name`` in the config, a :py:obj:`KeyError` is
+        raised.
+        """
+        parent = self._get_parent_dict(name)
+        parent[name.split('.')[-1]] = val
+
+    def _get_parent_dict(self, name):
+        parent_name = '.'.join(name.split('.')[:-1])
+        if parent_name:
+            parent = value(parent_name, self.cfg)
+        else:
+            parent = self.cfg
+        if (parent is UNSET) or (not isinstance(parent, dict)):
+            raise KeyError(parent_name)
+        return parent
+
+    def path(self, name: str, default=UNSET):
+        """Get a :py:class:`pathlib.Path` object from a config string."""
+
+        val = self.get(name, default)
+        if val is UNSET:
+            if default is UNSET:
+                raise KeyError(name)
+            return default
+        return pathlib.Path(str(val))
+
+    def pyobj(self, name, default=UNSET):
+        """Get python object refered by full qualiffied name (FQN) in the config
+        string."""
+
+        fqn = self.get(name, default)
+        if fqn is UNSET:
+            if default is UNSET:
+                raise KeyError(name)
+            return default
+        (modulename, name) = str(fqn).rsplit('.', 1)
+        m = __import__(modulename, {}, {}, [name], 0)
+        return getattr(m, name)
+
+
+# working with dictionaries
+
+
+def value(name: str, data_dict: dict):
+    """Returns the value to which ``name`` points in the ``dat_dict``.
+
+    .. code: python
+
+        >>> data_dict = {
+                "foo": {"bar": 1 },
+                "bar": {"foo": 2 },
+                "foobar": [1, 2, 3],
+            }
+        >>> value('foobar', data_dict)
+        [1, 2, 3]
+        >>> value('foo.bar', data_dict)
+        1
+        >>> value('foo.bar.xxx', data_dict)
+        <UNSET>
+
+    """
+
+    ret_val = data_dict
+    for part in name.split('.'):
+        if isinstance(ret_val, dict):
+            ret_val = ret_val.get(part, UNSET)
+        if ret_val is UNSET:
+            break
+    return ret_val
+
+
+def validate(
+    schema_dict: typing.Dict, data_dict: typing.Dict, deprecated: typing.Dict[str, str]
+) -> typing.Tuple[bool, list]:
+
+    """Deep validation of dictionary in ``data_dict`` against dictionary in
+    ``schema_dict``.  Argument deprecated is a dictionary that maps deprecated
+    configuration names to a messages::
+
+        deprecated = {
+            "foo.bar" : "config 'foo.bar' is deprecated, use 'bar.foo'",
+            "..."     : "..."
+        }
+
+    The function returns a python tuple ``(is_valid, issue_list)``:
+
+    ``is_valid``:
+      A bool value indicating ``data_dict`` is valid or not.
+
+    ``issue_list``:
+      A list of messages (:py:obj:`SchemaIssue`) from the validation::
+
+          [schema warn] data_dict: deprecated 'fontlib.foo': <DEPRECATED['foo.bar']>
+          [schema invalid] data_dict: key unknown 'fontlib.foo'
+          [schema invalid] data_dict: type mismatch 'fontlib.foo': expected ..., is ...
+
+    If ``schema_dict`` or ``data_dict`` is not a dictionary type a
+    :py:obj:`SchemaIssue` is raised.
+
+    """
+    names = []
+    is_valid = True
+    issue_list = []
+
+    if not isinstance(schema_dict, dict):
+        raise SchemaIssue('invalid', "schema_dict is not a dict type")
+    if not isinstance(data_dict, dict):
+        raise SchemaIssue('invalid', f"data_dict issue{'.'.join(names)} is not a dict type")
+
+    is_valid, issue_list = _validate(names, issue_list, schema_dict, data_dict, deprecated)
+    return is_valid, issue_list
+
+
+def _validate(
+    names: typing.List,
+    issue_list: typing.List,
+    schema_dict: typing.Dict,
+    data_dict: typing.Dict,
+    deprecated: typing.Dict[str, str],
+) -> typing.Tuple[bool, typing.List]:
+
+    is_valid = True
+
+    for key, data_value in data_dict.items():
+
+        names.append(key)
+        name = '.'.join(names)
+
+        deprecated_msg = deprecated.get(name)
+        # print("XXX %s: key %s //   data_value: %s" % (name, key, data_value))
+        if deprecated_msg:
+            issue_list.append(SchemaIssue('warn', f"data_dict '{name}': deprecated - {deprecated_msg}"))
+
+        schema_value = value(name, schema_dict)
+        # print("YYY %s: key %s // schema_value: %s" % (name, key, schema_value))
+        if schema_value is UNSET:
+            if not deprecated_msg:
+                issue_list.append(SchemaIssue('invalid', f"data_dict '{name}': key unknown in schema_dict"))
+                is_valid = False
+
+        elif type(schema_value) != type(data_value):  # pylint: disable=unidiomatic-typecheck
+            issue_list.append(
+                SchemaIssue(
+                    'invalid',
+                    (f"data_dict: type mismatch '{name}':" f" expected {type(schema_value)}, is: {type(data_value)}"),
+                )
+            )
+            is_valid = False
+
+        elif isinstance(data_value, dict):
+            _valid, _ = _validate(names, issue_list, schema_dict, data_value, deprecated)
+            is_valid = is_valid and _valid
+        names.pop()
+
+    return is_valid, issue_list
+
+
+def dict_deepupdate(base_dict: dict, upd_dict: dict, names=None):
+    """Deep-update of dictionary in ``base_dict`` by dictionary in ``upd_dict``.
+
+    For each ``upd_key`` & ``upd_val`` pair in ``upd_dict``:
+
+    0. If types of ``base_dict[upd_key]`` and ``upd_val`` do not match raise a
+       :py:obj:`TypeError`.
+
+    1. If ``base_dict[upd_key]`` is a dict: recursively deep-update it by ``upd_val``.
+
+    2. If ``base_dict[upd_key]`` not exist: set ``base_dict[upd_key]`` from a
+       (deep-) copy of ``upd_val``.
+
+    3. If ``upd_val`` is a list, extend list in ``base_dict[upd_key]`` by the
+       list in ``upd_val``.
+
+    4. If ``upd_val`` is a set, update set in ``base_dict[upd_key]`` by set in
+       ``upd_val``.
+    """
+    # pylint: disable=too-many-branches
+    if not isinstance(base_dict, dict):
+        raise TypeError("argument 'base_dict' is not a ditionary type")
+    if not isinstance(upd_dict, dict):
+        raise TypeError("argument 'upd_dict' is not a ditionary type")
+
+    if names is None:
+        names = []
+
+    for upd_key, upd_val in upd_dict.items():
+        # For each upd_key & upd_val pair in upd_dict:
+
+        if isinstance(upd_val, dict):
+
+            if upd_key in base_dict:
+                # if base_dict[upd_key] exists, recursively deep-update it
+                if not isinstance(base_dict[upd_key], dict):
+                    raise TypeError(f"type mismatch {'.'.join(names)}: is not a dict type in base_dict")
+                dict_deepupdate(
+                    base_dict[upd_key],
+                    upd_val,
+                    names
+                    + [
+                        upd_key,
+                    ],
+                )
+
+            else:
+                # if base_dict[upd_key] not exist, set base_dict[upd_key] from deepcopy of upd_val
+                base_dict[upd_key] = copy.deepcopy(upd_val)
+
+        elif isinstance(upd_val, list):
+
+            if upd_key in base_dict:
+                # if base_dict[upd_key] exists, base_dict[up_key] is extended by
+                # the list from upd_val
+                if not isinstance(base_dict[upd_key], list):
+                    raise TypeError(f"type mismatch {'.'.join(names)}: is not a list type in base_dict")
+                base_dict[upd_key].extend(upd_val)
+
+            else:
+                # if base_dict[upd_key] doesn't exists, set base_dict[key] from a deepcopy of the
+                # list in upd_val.
+                base_dict[upd_key] = copy.deepcopy(upd_val)
+
+        elif isinstance(upd_val, set):
+
+            if upd_key in base_dict:
+                # if base_dict[upd_key] exists, base_dict[up_key] is updated by the set in upd_val
+                if not isinstance(base_dict[upd_key], set):
+                    raise TypeError(f"type mismatch {'.'.join(names)}: is not a set type in base_dict")
+                base_dict[upd_key].update(upd_val.copy())
+
+            else:
+                # if base_dict[upd_key] doesn't exists, set base_dict[upd_key] from a copy of the
+                # set in upd_val
+                base_dict[upd_key] = upd_val.copy()
+
+        else:
+            # for any other type of upd_val replace or add base_dict[upd_key] by a copy
+            # of upd_val
+            base_dict[upd_key] = copy.copy(upd_val)

From 9d7456fd6c49fbd96f03f6a5dedd6ba05e924d0a Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Sat, 27 May 2023 18:58:06 +0200
Subject: [PATCH 05/10] [fix] limiter.toml: botdetection.ip_limit turn off
 link_token by default

To activate the ``link_token`` method in the ``ip_limit`` method add the
following to your ``/etc/searxng/limiter.toml``::

   [botdetection.ip_limit]
   link_token = true

Related: https://github.com/searxng/searxng/pull/2357#issuecomment-1554116941
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/botdetection/ip_limit.py  | 15 ++++++++++++---
 searx/botdetection/limiter.toml |  2 +-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
index 2646920c2..e72015190 100644
--- a/searx/botdetection/ip_limit.py
+++ b/searx/botdetection/ip_limit.py
@@ -9,9 +9,18 @@ bot request.  This method requires a redis DB and needs a HTTP X-Forwarded-For_
 header.  To take privacy only the hash value of an IP is stored in the redis DB
 and at least for a maximum of 10 minutes.
 
-The :py:obj:`link_token` method is used to investigate whether a request is
-*suspicious*.  If the :py:obj:`link_token` method is activated and a request is
-*suspicious* the request rates are reduced:
+The :py:obj:`.link_token` method can be used to investigate whether a request is
+*suspicious*.  To activate the :py:obj:`.link_token` method in the
+:py:obj:`.ip_limit` method add the following to your
+``/etc/searxng/limiter.toml``:
+
+.. code:: toml
+
+   [botdetection.ip_limit]
+   link_token = true
+
+If the :py:obj:`.link_token` method is activated and a request is *suspicious*
+the request rates are reduced:
 
 - :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
 - :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
diff --git a/searx/botdetection/limiter.toml b/searx/botdetection/limiter.toml
index 30cd1b53c..28c4e7589 100644
--- a/searx/botdetection/limiter.toml
+++ b/searx/botdetection/limiter.toml
@@ -1,3 +1,3 @@
 [botdetection.ip_limit]
 
-link_token = true
\ No newline at end of file
+link_token = false
\ No newline at end of file

From 52f1452c09ab2ec74aa5898d9ea749f33a71a814 Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Sat, 27 May 2023 21:36:34 +0200
Subject: [PATCH 06/10] [mod] limiter: ip_limt - monitore suspicious IPs

To intercept bots that get their IPs from a range of IPs, there is a
``SUSPICIOUS_IP_WINDOW``.  In this window the suspicious IPs are stored for a
longer time.  IPs stored in this sliding window have a maximum of
``SUSPICIOUS_IP_MAX`` accesses before they are blocked.  As soon as the IP makes
a request that is not suspicious, the sliding window for this IP is droped.

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/botdetection/ip_limit.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
index e72015190..9cffff7f0 100644
--- a/searx/botdetection/ip_limit.py
+++ b/searx/botdetection/ip_limit.py
@@ -25,6 +25,13 @@ the request rates are reduced:
 - :py:obj:`BURST_MAX` -> :py:obj:`BURST_MAX_SUSPICIOUS`
 - :py:obj:`LONG_MAX` -> :py:obj:`LONG_MAX_SUSPICIOUS`
 
+To intercept bots that get their IPs from a range of IPs, there is a
+:py:obj:`SUSPICIOUS_IP_WINDOW`.  In this window the suspicious IPs are stored
+for a longer time.  IPs stored in this sliding window have a maximum of
+:py:obj:`SUSPICIOUS_IP_MAX` accesses before they are blocked.  As soon as the IP
+makes a request that is not suspicious, the sliding window for this IP is
+droped.
+
 .. _X-Forwarded-For:
    https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
 
@@ -37,7 +44,7 @@ from searx.tools import config
 
 from searx import redisdb
 from searx import logger
-from searx.redislib import incr_sliding_window
+from searx.redislib import incr_sliding_window, drop_counter
 
 from . import link_token
 
@@ -67,6 +74,12 @@ API_WONDOW = 3600
 API_MAX = 4
 """Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
 
+SUSPICIOUS_IP_WINDOW = 3600 * 24
+"""Time (sec) before sliding window for one suspicious IP expires."""
+
+SUSPICIOUS_IP_MAX = 3
+"""Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
+
 
 def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
     redis_client = redisdb.client()
@@ -81,10 +94,18 @@ def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple
             return 429, "BLOCK %s: API limit exceeded"
 
     suspicious = False
+    suspicious_ip_counter = 'IP limit - SUSPICIOUS_IP_WINDOW:' + x_forwarded_for
+
     if cfg['botdetection.ip_limit.link_token']:
         suspicious = link_token.is_suspicious(request)
 
     if suspicious:
+
+        # this IP is suspicious: count requests from this IP
+        c = incr_sliding_window(redis_client, suspicious_ip_counter, SUSPICIOUS_IP_WINDOW)
+        if c > SUSPICIOUS_IP_MAX:
+            return 429, f"bot detected, too many request from {x_forwarded_for} in SUSPICIOUS_IP_WINDOW"
+
         c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
         if c > BURST_MAX_SUSPICIOUS:
             return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX_SUSPICIOUS"
@@ -94,6 +115,11 @@ def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple
             return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX_SUSPICIOUS"
 
     else:
+
+        if cfg['botdetection.ip_limit.link_token']:
+            # this IP is no longer suspicious: release ip again / delete the counter of this IP
+            drop_counter(redis_client, suspicious_ip_counter)
+
         c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
         if c > BURST_MAX:
             return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX"

From b8c7c2c9aa604fd1fb7be5559c9ad025ceb17aa4 Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Sun, 28 May 2023 18:58:31 +0200
Subject: [PATCH 07/10] [mod] botdetection - improve ip_limit and link_token
 methods

- counting requests in LONG_WINDOW and BURST_WINDOW is not needed when the
  request is validated by the link_token method [1]

- renew a ping-key on validation [2], this is needed for infinite scrolling,
  where no new token (CSS) is loaded. / this does not fix the BURST_MAX issue in
  the vanilla limiter

- normalize the counter names of the ip_limit method to 'ip_limit.*'

- just integrate the ip_limit method straight forward in the limiter plugin /
  non intermediate code --> ip_limit now returns None or a werkzeug.Response
  object that can be passed by the plugin to the flask application / non
  intermediate code that returns a tuple

[1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566113277
[2] https://github.com/searxng/searxng/pull/2357#discussion_r1208542206
[3] https://github.com/searxng/searxng/pull/2357#issuecomment-1566125979

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/botdetection/__init__.py             | 16 +---
 searx/botdetection/_helpers.py             | 93 ++++++++++++++++++++++
 searx/botdetection/http_accept.py          |  8 +-
 searx/botdetection/http_accept_encoding.py |  8 +-
 searx/botdetection/http_accept_language.py |  8 +-
 searx/botdetection/http_connection.py      |  8 +-
 searx/botdetection/http_user_agent.py      | 11 ++-
 searx/botdetection/ip_limit.py             | 61 +++++++-------
 searx/botdetection/limiter.py              | 11 ++-
 searx/botdetection/link_token.py           | 43 +++++++---
 searx/plugins/limiter.py                   | 14 +---
 11 files changed, 197 insertions(+), 84 deletions(-)
 create mode 100644 searx/botdetection/_helpers.py

diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py
index 78a7d30f3..b4de0f9c8 100644
--- a/searx/botdetection/__init__.py
+++ b/searx/botdetection/__init__.py
@@ -9,18 +9,4 @@ The methods implemented in this python package are use by the :ref:`limiter src`
 
 """
 
-import flask
-
-
-def dump_request(request: flask.Request):
-    return (
-        "%s: '%s'" % (request.headers.get('X-Forwarded-For'), request.path)
-        + " || form: %s" % request.form
-        + " || Accept: %s" % request.headers.get('Accept')
-        + " || Accept-Language: %s" % request.headers.get('Accept-Language')
-        + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
-        + " || Content-Type: %s" % request.headers.get('Content-Type')
-        + " || Content-Length: %s" % request.headers.get('Content-Length')
-        + " || Connection: %s" % request.headers.get('Connection')
-        + " || User-Agent: %s" % request.headers.get('User-Agent')
-    )
+from ._helpers import dump_request
diff --git a/searx/botdetection/_helpers.py b/searx/botdetection/_helpers.py
new file mode 100644
index 000000000..b034b980b
--- /dev/null
+++ b/searx/botdetection/_helpers.py
@@ -0,0 +1,93 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pylint: disable=missing-module-docstring, invalid-name
+
+from typing import Optional
+import flask
+import werkzeug
+
+from searx import logger
+
+logger = logger.getChild('botdetection')
+
+
+def dump_request(request: flask.Request):
+    return (
+        "%s: %s" % (get_real_ip(request), request.path)
+        + " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
+        + " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
+        + " || form: %s" % request.form
+        + " || Accept: %s" % request.headers.get('Accept')
+        + " || Accept-Language: %s" % request.headers.get('Accept-Language')
+        + " || Accept-Encoding: %s" % request.headers.get('Accept-Encoding')
+        + " || Content-Type: %s" % request.headers.get('Content-Type')
+        + " || Content-Length: %s" % request.headers.get('Content-Length')
+        + " || Connection: %s" % request.headers.get('Connection')
+        + " || User-Agent: %s" % request.headers.get('User-Agent')
+    )
+
+
+def too_many_requests(request: flask.Request, log_msg: str) -> Optional[werkzeug.Response]:
+    log_prefix = 'BLOCK %s: ' % get_real_ip(request)
+    logger.debug(log_prefix + log_msg)
+    return flask.make_response(('Too Many Requests', 429))
+
+
+def get_real_ip(request: flask.Request) -> str:
+    """Returns real IP of the request.  Since not all proxies set all the HTTP
+    headers and incoming headers can be faked it may happen that the IP cannot
+    be determined correctly.
+
+    .. sidebar:: :py:obj:`flask.Request.remote_addr`
+
+       SearXNG uses Werkzeug's ProxyFix_ (with it default ``x_for=1``).
+
+    This function tries to get the remote IP in the order listed below,
+    additional some tests are done and if inconsistencies or errors are
+    detected, they are logged.
+
+    The remote IP of the request is taken from (first match):
+
+    - X-Forwarded-For_ header
+    - `X-real-IP header <https://github.com/searxng/searxng/issues/1237#issuecomment-1147564516>`__
+    - :py:obj:`flask.Request.remote_addr`
+
+    .. _ProxyFix:
+       https://werkzeug.palletsprojects.com/middleware/proxy_fix/
+
+    .. _X-Forwarded-For:
+      https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
+
+    """
+
+    forwarded_for = request.headers.get("X-Forwarded-For")
+    real_ip = request.headers.get('X-Real-IP')
+    remote_addr = request.remote_addr
+    logger.debug("X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr)
+
+    if not forwarded_for:
+        logger.error("X-Forwarded-For header is not set!")
+    else:
+        from .limiter import get_cfg  # pylint: disable=import-outside-toplevel, cyclic-import
+
+        forwarded_for = [x.strip() for x in forwarded_for.split(',')]
+        x_for: int = get_cfg()['real_ip.x_for']
+        forwarded_for = forwarded_for[-min(len(forwarded_for), x_for)]
+
+    if not real_ip:
+        logger.error("X-Real-IP header is not set!")
+
+    if forwarded_for and real_ip and forwarded_for != real_ip:
+        logger.warning("IP from X-Real-IP (%s) is not equal to IP from X-Forwarded-For (%s)", real_ip, forwarded_for)
+
+    if forwarded_for and remote_addr and forwarded_for != remote_addr:
+        logger.warning(
+            "IP from WSGI environment (%s) is not equal to IP from X-Forwarded-For (%s)", remote_addr, forwarded_for
+        )
+
+    if real_ip and remote_addr and real_ip != remote_addr:
+        logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
+
+    request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0'
+    logger.debug("get_real_ip() -> %s", request_ip)
+    return request_ip
diff --git a/searx/botdetection/http_accept.py b/searx/botdetection/http_accept.py
index 23670a283..60e2330ae 100644
--- a/searx/botdetection/http_accept.py
+++ b/searx/botdetection/http_accept.py
@@ -15,13 +15,15 @@ Accept_ header ..
 """
 # pylint: disable=unused-argument
 
-from typing import Optional, Tuple
+from typing import Optional
 import flask
+import werkzeug
 
 from searx.tools import config
+from ._helpers import too_many_requests
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
     if 'text/html' not in request.accept_mimetypes:
-        return 429, "bot detected, HTTP header Accept did not contain text/html"
+        return too_many_requests(request, "HTTP header Accept did not contain text/html")
     return None
diff --git a/searx/botdetection/http_accept_encoding.py b/searx/botdetection/http_accept_encoding.py
index 191249711..5301c5d9d 100644
--- a/searx/botdetection/http_accept_encoding.py
+++ b/searx/botdetection/http_accept_encoding.py
@@ -16,14 +16,16 @@ bot if the Accept-Encoding_ header ..
 """
 # pylint: disable=unused-argument
 
-from typing import Optional, Tuple
+from typing import Optional
 import flask
+import werkzeug
 
 from searx.tools import config
+from ._helpers import too_many_requests
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
     accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
     if not ('gzip' in accept_list or 'deflate' in accept_list):
-        return 429, "bot detected, HTTP header Accept-Encoding did not contain gzip nor deflate"
+        return too_many_requests(request, "HTTP header Accept-Encoding did not contain gzip nor deflate")
     return None
diff --git a/searx/botdetection/http_accept_language.py b/searx/botdetection/http_accept_language.py
index 558a216cf..060f67ec0 100644
--- a/searx/botdetection/http_accept_language.py
+++ b/searx/botdetection/http_accept_language.py
@@ -13,13 +13,15 @@ if the Accept-Language_ header is unset.
 """
 # pylint: disable=unused-argument
 
-from typing import Optional, Tuple
+from typing import Optional
 import flask
+import werkzeug
 
 from searx.tools import config
+from ._helpers import too_many_requests
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
     if request.headers.get('Accept-Language', '').strip() == '':
-        return 429, "bot detected, missing HTTP header Accept-Language"
+        return too_many_requests(request, "missing HTTP header Accept-Language")
     return None
diff --git a/searx/botdetection/http_connection.py b/searx/botdetection/http_connection.py
index 0ef24a7b8..e718dfe3f 100644
--- a/searx/botdetection/http_connection.py
+++ b/searx/botdetection/http_connection.py
@@ -13,13 +13,15 @@ the Connection_ header is set to ``close``.
 """
 # pylint: disable=unused-argument
 
-from typing import Optional, Tuple
+from typing import Optional
 import flask
+import werkzeug
 
 from searx.tools import config
+from ._helpers import too_many_requests
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
     if request.headers.get('Connection', '').strip() == 'close':
-        return 429, "bot detected, HTTP header 'Connection=close'"
+        return too_many_requests(request, "HTTP header 'Connection=close")
     return None
diff --git a/searx/botdetection/http_user_agent.py b/searx/botdetection/http_user_agent.py
index 3d1ec9173..70309e975 100644
--- a/searx/botdetection/http_user_agent.py
+++ b/searx/botdetection/http_user_agent.py
@@ -14,11 +14,13 @@ the User-Agent_ header is unset or matches the regular expression
 """
 # pylint: disable=unused-argument
 
-from typing import Optional, Tuple
+from typing import Optional
 import re
 import flask
+import werkzeug
 
 from searx.tools import config
+from ._helpers import too_many_requests
 
 
 USER_AGENT = (
@@ -48,11 +50,8 @@ def regexp_user_agent():
     return _regexp
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
     user_agent = request.headers.get('User-Agent', 'unknown')
     if regexp_user_agent().match(user_agent):
-        return (
-            429,
-            f"bot detected, HTTP header User-Agent: {user_agent}",
-        )
+        return too_many_requests(request, f"bot detected, HTTP header User-Agent: {user_agent}")
     return None
diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
index 9cffff7f0..e7fa57187 100644
--- a/searx/botdetection/ip_limit.py
+++ b/searx/botdetection/ip_limit.py
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
 """.. _botdetection.ip_limit:
 
 Method ``ip_limit``
@@ -37,16 +39,18 @@ droped.
 
 """
 
-from typing import Optional, Tuple
+from typing import Optional
 import flask
+import werkzeug
 from searx.tools import config
 
-
 from searx import redisdb
 from searx import logger
 from searx.redislib import incr_sliding_window, drop_counter
 
 from . import link_token
+from ._helpers import too_many_requests
+
 
 logger = logger.getChild('botdetection.ip_limit')
 
@@ -81,50 +85,51 @@ SUSPICIOUS_IP_MAX = 3
 """Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[Tuple[int, str]]:
+def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
+    # pylint: disable=too-many-return-statements
     redis_client = redisdb.client()
 
-    x_forwarded_for = request.headers.get('X-Forwarded-For', '')
-    if not x_forwarded_for:
+    client_ip = request.headers.get('X-Forwarded-For', '')
+    if not client_ip:
         logger.error("missing HTTP header X-Forwarded-For")
 
     if request.args.get('format', 'html') != 'html':
-        c = incr_sliding_window(redis_client, 'IP limit - API_WONDOW:' + x_forwarded_for, API_WONDOW)
+        c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW)
         if c > API_MAX:
-            return 429, "BLOCK %s: API limit exceeded"
-
-    suspicious = False
-    suspicious_ip_counter = 'IP limit - SUSPICIOUS_IP_WINDOW:' + x_forwarded_for
+            return too_many_requests(request, "too many request in API_WINDOW")
 
     if cfg['botdetection.ip_limit.link_token']:
-        suspicious = link_token.is_suspicious(request)
 
-    if suspicious:
+        suspicious = link_token.is_suspicious(request, True)
+
+        if not suspicious:
+            # this IP is no longer suspicious: release ip again / delete the counter of this IP
+            drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip)
+            return None
 
         # this IP is suspicious: count requests from this IP
-        c = incr_sliding_window(redis_client, suspicious_ip_counter, SUSPICIOUS_IP_WINDOW)
+        c = incr_sliding_window(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip, SUSPICIOUS_IP_WINDOW)
         if c > SUSPICIOUS_IP_MAX:
-            return 429, f"bot detected, too many request from {x_forwarded_for} in SUSPICIOUS_IP_WINDOW"
+            logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", client_ip)
+            return flask.redirect(flask.url_for('index'), code=302)
 
-        c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
+        c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW)
         if c > BURST_MAX_SUSPICIOUS:
-            return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX_SUSPICIOUS"
+            return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
 
-        c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW)
+        c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW)
         if c > LONG_MAX_SUSPICIOUS:
-            return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX_SUSPICIOUS"
+            return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
 
-    else:
+        return None
 
-        if cfg['botdetection.ip_limit.link_token']:
-            # this IP is no longer suspicious: release ip again / delete the counter of this IP
-            drop_counter(redis_client, suspicious_ip_counter)
+    # vanilla limiter without extensions counts BURST_MAX and LONG_MAX
+    c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW)
+    if c > BURST_MAX:
+        return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX)")
 
-        c = incr_sliding_window(redis_client, 'IP limit - BURST_WINDOW:' + x_forwarded_for, BURST_WINDOW)
-        if c > BURST_MAX:
-            return 429, f"bot detected, too many request from {x_forwarded_for} in BURST_MAX"
+    c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW)
+    if c > LONG_MAX:
+        return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX)")
 
-        c = incr_sliding_window(redis_client, 'IP limit - LONG_WINDOW:' + x_forwarded_for, LONG_WINDOW)
-        if c > LONG_MAX:
-            return 429, f"bot detected, too many request from {x_forwarded_for} in LONG_MAX"
     return None
diff --git a/searx/botdetection/limiter.py b/searx/botdetection/limiter.py
index cc1e00b3c..93826684f 100644
--- a/searx/botdetection/limiter.py
+++ b/searx/botdetection/limiter.py
@@ -42,6 +42,7 @@ from pathlib import Path
 import flask
 import pytomlpp as toml
 
+from searx import logger
 from searx.tools import config
 from searx.botdetection import (
     http_accept,
@@ -62,7 +63,13 @@ CFG_DEPRECATED = {
     # "dummy.old.foo": "config 'dummy.old.foo' exists only for tests.  Don't use it in your real project config."
 }
 
-CFG = config.Config({}, {})
+CFG = None
+
+
+def get_cfg() -> config.Config:
+    if CFG is None:
+        init_cfg(logger)
+    return CFG
 
 
 def init_cfg(log):
@@ -73,7 +80,7 @@ def init_cfg(log):
         log.warning("missing config file: %s", LIMITER_CFG)
         return
 
-    log.warning("load config file: %s", LIMITER_CFG)
+    log.info("load config file: %s", LIMITER_CFG)
     try:
         upd_cfg = toml.load(LIMITER_CFG)
     except toml.DecodeError as exc:
diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py
index 8ef215f6c..376d06d61 100644
--- a/searx/botdetection/link_token.py
+++ b/searx/botdetection/link_token.py
@@ -47,15 +47,24 @@ from searx.redislib import secret_hash
 TOKEN_LIVE_TIME = 600
 """Livetime (sec) of limiter's CSS token."""
 
+PING_LIVE_TIME = 3600
+"""Livetime (sec) of the ping-key from a client (request)"""
+
 PING_KEY = 'SearXNG_limiter.ping'
+"""Prefix of all ping-keys generated by :py:obj:`get_ping_key`"""
+
 TOKEN_KEY = 'SearXNG_limiter.token'
+"""Key for which the current token is stored in the DB"""
 
 logger = logger.getChild('botdetection.link_token')
 
 
-def is_suspicious(request: flask.Request):
+def is_suspicious(request: flask.Request, renew: bool = False):
     """Checks if there is a valid ping for this request, if not this request is
-    rated as *suspicious*"""
+    rated as *suspicious*.  If a valid ping exists and argument ``renew`` is
+    ``True`` the expire time of this ping is reset to :py:obj:`PING_LIVE_TIME`.
+
+    """
     redis_client = redisdb.client()
     if not redis_client:
         return False
@@ -69,12 +78,19 @@ def is_suspicious(request: flask.Request):
         )
         return True
 
-    logger.debug("found ping for this request: %s", ping_key)
+    if renew:
+        redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
+
+    logger.debug("found ping for client request: %s", ping_key)
     return False
 
 
 def ping(request: flask.Request, token: str):
-    """This function is called by a request to URL ``/client<token>.css``"""
+    """This function is called by a request to URL ``/client<token>.css``.  If
+    ``token`` is valid a :py:obj:`PING_KEY` for the client is stored in the DB.
+    The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
+
+    """
     redis_client = redisdb.client()
     if not redis_client:
         return
@@ -82,19 +98,24 @@ def ping(request: flask.Request, token: str):
         return
     ping_key = get_ping_key(request)
     logger.debug("store ping for: %s", ping_key)
-    redis_client.set(ping_key, 1, ex=TOKEN_LIVE_TIME)
+    redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
 
 
 def get_ping_key(request: flask.Request):
-    """Generates a hashed key that fits (more or less) to a request.  At least
-    X-Forwarded-For_ is needed to be able to assign the request to an IP.
+    """Generates a hashed key that fits (more or less) to a client (request).
+    At least X-Forwarded-For_ is needed to be able to assign the request to an
+    IP.
 
     """
-    return secret_hash(
+    return (
         PING_KEY
-        + request.headers.get('X-Forwarded-For', '')
-        + request.headers.get('Accept-Language', '')
-        + request.headers.get('User-Agent', '')
+        + "["
+        + secret_hash(
+            request.headers.get('X-Forwarded-For', '')
+            + request.headers.get('Accept-Language', '')
+            + request.headers.get('User-Agent', '')
+        )
+        + "]"
     )
 
 
diff --git a/searx/plugins/limiter.py b/searx/plugins/limiter.py
index 92b0aa2a0..7edbb1ce0 100644
--- a/searx/plugins/limiter.py
+++ b/searx/plugins/limiter.py
@@ -20,16 +20,10 @@ logger = logger.getChild('limiter')
 
 def pre_request():
     """See :ref:`flask.Flask.before_request`"""
-
-    val = limiter.filter_request(flask.request)
-    if val is not None:
-        http_status, msg = val
-        client_ip = flask.request.headers.get('X-Forwarded-For', '<unknown>')
-        logger.error("BLOCK (IP %s): %s" % (client_ip, msg))
-        return 'Too Many Requests', http_status
-
-    logger.debug("OK: %s" % dump_request(flask.request))
-    return None
+    ret_val = limiter.filter_request(flask.request)
+    if ret_val is None:
+        logger.debug("OK: %s" % dump_request(flask.request))
+    return ret_val
 
 
 def init(app: flask.Flask, settings) -> bool:

From 38431d2e142b7da6a9b48aad203f02a2eff7e6fd Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Mon, 29 May 2023 19:46:37 +0200
Subject: [PATCH 08/10] [fix] correct determination of the IP for the request

For correct determination of the IP to the request the function
botdetection.get_real_ip() is implemented.  This fonction is used in the
ip_limit and link_token method of the botdetection and it is used in the
self_info plugin.

A documentation about the X-Forwarded-For header has been added.

[1] https://github.com/searxng/searxng/pull/2357#issuecomment-1566211059

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/botdetection/__init__.py   | 20 +++++++++++++++++---
 searx/botdetection/ip_limit.py   |  6 ++----
 searx/botdetection/limiter.toml  |  7 ++++++-
 searx/botdetection/link_token.py |  7 +++----
 searx/plugins/self_info.py       | 31 +++++++------------------------
 tests/unit/test_plugins.py       | 12 +++++++-----
 6 files changed, 42 insertions(+), 41 deletions(-)

diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py
index b4de0f9c8..c903b0bb4 100644
--- a/searx/botdetection/__init__.py
+++ b/searx/botdetection/__init__.py
@@ -2,11 +2,25 @@
 # lint: pylint
 """.. _botdetection src:
 
-Bot detection methods
----------------------
+X-Forwarded-For
+===============
 
-The methods implemented in this python package are use by the :ref:`limiter src`.
+.. attention::
+
+   A correct setup of the HTTP request headers ``X-Forwarded-For`` and
+   ``X-Real-IP`` is essential to be able to assign a request to an IP correctly:
+
+   - `NGINX RequestHeader`_
+   - `Apache RequestHeader`_
+
+.. _NGINX RequestHeader:
+    https://docs.searxng.org/admin/installation-nginx.html#nginx-s-searxng-site
+.. _Apache RequestHeader:
+    https://docs.searxng.org/admin/installation-apache.html#apache-s-searxng-site
+
+.. autofunction:: searx.botdetection.get_real_ip
 
 """
 
 from ._helpers import dump_request
+from ._helpers import get_real_ip
diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
index e7fa57187..268285dd9 100644
--- a/searx/botdetection/ip_limit.py
+++ b/searx/botdetection/ip_limit.py
@@ -49,7 +49,7 @@ from searx import logger
 from searx.redislib import incr_sliding_window, drop_counter
 
 from . import link_token
-from ._helpers import too_many_requests
+from ._helpers import too_many_requests, get_real_ip
 
 
 logger = logger.getChild('botdetection.ip_limit')
@@ -89,9 +89,7 @@ def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkz
     # pylint: disable=too-many-return-statements
     redis_client = redisdb.client()
 
-    client_ip = request.headers.get('X-Forwarded-For', '')
-    if not client_ip:
-        logger.error("missing HTTP header X-Forwarded-For")
+    client_ip = get_real_ip(request)
 
     if request.args.get('format', 'html') != 'html':
         c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW)
diff --git a/searx/botdetection/limiter.toml b/searx/botdetection/limiter.toml
index 28c4e7589..af797d32c 100644
--- a/searx/botdetection/limiter.toml
+++ b/searx/botdetection/limiter.toml
@@ -1,3 +1,8 @@
 [botdetection.ip_limit]
 
-link_token = false
\ No newline at end of file
+link_token = false
+
+[real_ip]
+
+# Number of values to trust for X-Forwarded-For.
+x_for = 1
diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py
index 376d06d61..a83214a33 100644
--- a/searx/botdetection/link_token.py
+++ b/searx/botdetection/link_token.py
@@ -43,6 +43,7 @@ import flask
 from searx import logger
 from searx import redisdb
 from searx.redislib import secret_hash
+from ._helpers import get_real_ip
 
 TOKEN_LIVE_TIME = 600
 """Livetime (sec) of limiter's CSS token."""
@@ -73,7 +74,7 @@ def is_suspicious(request: flask.Request, renew: bool = False):
     if not redis_client.get(ping_key):
         logger.warning(
             "missing ping (IP: %s) / request: %s",
-            request.headers.get('X-Forwarded-For', ''),
+            get_real_ip(request),
             ping_key,
         )
         return True
@@ -111,9 +112,7 @@ def get_ping_key(request: flask.Request):
         PING_KEY
         + "["
         + secret_hash(
-            request.headers.get('X-Forwarded-For', '')
-            + request.headers.get('Accept-Language', '')
-            + request.headers.get('User-Agent', '')
+            get_real_ip(request) + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
         )
         + "]"
     )
diff --git a/searx/plugins/self_info.py b/searx/plugins/self_info.py
index fbe4518b5..8079ee0d4 100644
--- a/searx/plugins/self_info.py
+++ b/searx/plugins/self_info.py
@@ -1,21 +1,11 @@
-'''
-searx is free software: you can redistribute it and/or modify
-it under the terms of the GNU Affero General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
+# SPDX-License-Identifier: AGPL-3.0-or-later
+# lint: pylint
+# pylint: disable=missing-module-docstring,invalid-name
 
-searx is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU Affero General Public License for more details.
-
-You should have received a copy of the GNU Affero General Public License
-along with searx. If not, see < http://www.gnu.org/licenses/ >.
-
-(C) 2015 by Adam Tauber, <asciimoo@gmail.com>
-'''
-from flask_babel import gettext
 import re
+from flask_babel import gettext
+
+from searx.botdetection._helpers import get_real_ip
 
 name = gettext('Self Information')
 description = gettext('Displays your IP if the query is "ip" and your user agent if the query contains "user agent".')
@@ -28,18 +18,11 @@ query_examples = ''
 p = re.compile('.*user[ -]agent.*', re.IGNORECASE)
 
 
-# attach callback to the post search hook
-#  request: flask request object
-#  ctx: the whole local context of the pre search hook
 def post_search(request, search):
     if search.search_query.pageno > 1:
         return True
     if search.search_query.query == 'ip':
-        x_forwarded_for = request.headers.getlist("X-Forwarded-For")
-        if x_forwarded_for:
-            ip = x_forwarded_for[0]
-        else:
-            ip = request.remote_addr
+        ip = get_real_ip(request)
         search.result_container.answers['ip'] = {'answer': ip}
     elif p.match(search.search_query.query):
         ua = request.user_agent
diff --git a/tests/unit/test_plugins.py b/tests/unit/test_plugins.py
index 28df835e5..0d555fdc0 100644
--- a/tests/unit/test_plugins.py
+++ b/tests/unit/test_plugins.py
@@ -50,9 +50,13 @@ class SelfIPTest(SearxTestCase):
         self.assertTrue(len(store.plugins) == 1)
 
         # IP test
-        request = Mock(remote_addr='127.0.0.1')
-        request.headers.getlist.return_value = []
-        search = get_search_mock(query='ip', pageno=1)
+        request = Mock()
+        request.remote_addr = '127.0.0.1'
+        request.headers = {'X-Forwarded-For': '1.2.3.4, 127.0.0.1', 'X-Real-IP': '127.0.0.1'}
+        search = get_search_mock(
+            query='ip',
+            pageno=1,
+        )
         store.call(store.plugins, 'post_search', request, search)
         self.assertTrue('127.0.0.1' in search.result_container.answers["ip"]["answer"])
 
@@ -62,7 +66,6 @@ class SelfIPTest(SearxTestCase):
 
         # User agent test
         request = Mock(user_agent='Mock')
-        request.headers.getlist.return_value = []
 
         search = get_search_mock(query='user-agent', pageno=1)
         store.call(store.plugins, 'post_search', request, search)
@@ -98,7 +101,6 @@ class HashPluginTest(SearxTestCase):
         self.assertTrue(len(store.plugins) == 1)
 
         request = Mock(remote_addr='127.0.0.1')
-        request.headers.getlist.return_value = []
 
         # MD5
         search = get_search_mock(query='md5 test', pageno=1)

From 281e36f4b7848374535d5e953050ae73423191ca Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Thu, 1 Jun 2023 15:41:48 +0200
Subject: [PATCH 09/10] [fix] limiter: replace real_ip by IPv4/v6 network

Closes: https://github.com/searxng/searxng/issues/2477
Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/botdetection/__init__.py             |  1 +
 searx/botdetection/_helpers.py             | 42 ++++++++++++---
 searx/botdetection/http_accept.py          | 16 ++++--
 searx/botdetection/http_accept_encoding.py | 16 ++++--
 searx/botdetection/http_accept_language.py | 14 +++--
 searx/botdetection/http_connection.py      | 16 ++++--
 searx/botdetection/http_user_agent.py      | 16 ++++--
 searx/botdetection/ip_limit.py             | 49 ++++++++++-------
 searx/botdetection/limiter.py              | 61 ++++++++++------------
 searx/botdetection/limiter.toml            | 22 ++++++--
 searx/botdetection/link_token.py           | 54 +++++++++++--------
 searx/plugins/limiter.py                   |  7 +--
 12 files changed, 208 insertions(+), 106 deletions(-)

diff --git a/searx/botdetection/__init__.py b/searx/botdetection/__init__.py
index c903b0bb4..fcd8e5630 100644
--- a/searx/botdetection/__init__.py
+++ b/searx/botdetection/__init__.py
@@ -24,3 +24,4 @@ X-Forwarded-For
 
 from ._helpers import dump_request
 from ._helpers import get_real_ip
+from ._helpers import too_many_requests
diff --git a/searx/botdetection/_helpers.py b/searx/botdetection/_helpers.py
index b034b980b..8e0156d6e 100644
--- a/searx/botdetection/_helpers.py
+++ b/searx/botdetection/_helpers.py
@@ -1,11 +1,19 @@
 # SPDX-License-Identifier: AGPL-3.0-or-later
 # lint: pylint
 # pylint: disable=missing-module-docstring, invalid-name
+from __future__ import annotations
 
-from typing import Optional
+from ipaddress import (
+    IPv4Network,
+    IPv6Network,
+    IPv6Address,
+    ip_address,
+    ip_network,
+)
 import flask
 import werkzeug
 
+from searx.tools import config
 from searx import logger
 
 logger = logger.getChild('botdetection')
@@ -13,7 +21,7 @@ logger = logger.getChild('botdetection')
 
 def dump_request(request: flask.Request):
     return (
-        "%s: %s" % (get_real_ip(request), request.path)
+        request.path
         + " || X-Forwarded-For: %s" % request.headers.get('X-Forwarded-For')
         + " || X-Real-IP: %s" % request.headers.get('X-Real-IP')
         + " || form: %s" % request.form
@@ -27,12 +35,30 @@ def dump_request(request: flask.Request):
     )
 
 
-def too_many_requests(request: flask.Request, log_msg: str) -> Optional[werkzeug.Response]:
-    log_prefix = 'BLOCK %s: ' % get_real_ip(request)
-    logger.debug(log_prefix + log_msg)
+def too_many_requests(network: IPv4Network | IPv6Network, log_msg: str) -> werkzeug.Response | None:
+    """Returns a HTTP 429 response object and writes a ERROR message to the
+    'botdetection' logger.  This function is used in part by the filter methods
+    to return the default ``Too Many Requests`` response.
+
+    """
+
+    logger.debug("BLOCK %s: %s", network.compressed, log_msg)
     return flask.make_response(('Too Many Requests', 429))
 
 
+def get_network(real_ip: str, cfg: config.Config) -> IPv4Network | IPv6Network:
+    """Returns the (client) network of whether the real_ip is part of."""
+
+    ip = ip_address(real_ip)
+    if isinstance(ip, IPv6Address):
+        prefix = cfg['real_ip.ipv6_prefix']
+    else:
+        prefix = cfg['real_ip.ipv4_prefix']
+    network = ip_network(f"{real_ip}/{prefix}", strict=False)
+    # logger.debug("get_network(): %s", network.compressed)
+    return network
+
+
 def get_real_ip(request: flask.Request) -> str:
     """Returns real IP of the request.  Since not all proxies set all the HTTP
     headers and incoming headers can be faked it may happen that the IP cannot
@@ -63,7 +89,9 @@ def get_real_ip(request: flask.Request) -> str:
     forwarded_for = request.headers.get("X-Forwarded-For")
     real_ip = request.headers.get('X-Real-IP')
     remote_addr = request.remote_addr
-    logger.debug("X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr)
+    # logger.debug(
+    #     "X-Forwarded-For: %s || X-Real-IP: %s || request.remote_addr: %s", forwarded_for, real_ip, remote_addr
+    # )
 
     if not forwarded_for:
         logger.error("X-Forwarded-For header is not set!")
@@ -89,5 +117,5 @@ def get_real_ip(request: flask.Request) -> str:
         logger.warning("IP from WSGI environment (%s) is not equal to IP from X-Real-IP (%s)", remote_addr, real_ip)
 
     request_ip = forwarded_for or real_ip or remote_addr or '0.0.0.0'
-    logger.debug("get_real_ip() -> %s", request_ip)
+    # logger.debug("get_real_ip() -> %s", request_ip)
     return request_ip
diff --git a/searx/botdetection/http_accept.py b/searx/botdetection/http_accept.py
index 60e2330ae..b78a86278 100644
--- a/searx/botdetection/http_accept.py
+++ b/searx/botdetection/http_accept.py
@@ -15,7 +15,12 @@ Accept_ header ..
 """
 # pylint: disable=unused-argument
 
-from typing import Optional
+from __future__ import annotations
+from ipaddress import (
+    IPv4Network,
+    IPv6Network,
+)
+
 import flask
 import werkzeug
 
@@ -23,7 +28,12 @@ from searx.tools import config
 from ._helpers import too_many_requests
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
+def filter_request(
+    network: IPv4Network | IPv6Network,
+    request: flask.Request,
+    cfg: config.Config,
+) -> werkzeug.Response | None:
+
     if 'text/html' not in request.accept_mimetypes:
-        return too_many_requests(request, "HTTP header Accept did not contain text/html")
+        return too_many_requests(network, "HTTP header Accept did not contain text/html")
     return None
diff --git a/searx/botdetection/http_accept_encoding.py b/searx/botdetection/http_accept_encoding.py
index 5301c5d9d..60718a4ca 100644
--- a/searx/botdetection/http_accept_encoding.py
+++ b/searx/botdetection/http_accept_encoding.py
@@ -16,7 +16,12 @@ bot if the Accept-Encoding_ header ..
 """
 # pylint: disable=unused-argument
 
-from typing import Optional
+from __future__ import annotations
+from ipaddress import (
+    IPv4Network,
+    IPv6Network,
+)
+
 import flask
 import werkzeug
 
@@ -24,8 +29,13 @@ from searx.tools import config
 from ._helpers import too_many_requests
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
+def filter_request(
+    network: IPv4Network | IPv6Network,
+    request: flask.Request,
+    cfg: config.Config,
+) -> werkzeug.Response | None:
+
     accept_list = [l.strip() for l in request.headers.get('Accept-Encoding', '').split(',')]
     if not ('gzip' in accept_list or 'deflate' in accept_list):
-        return too_many_requests(request, "HTTP header Accept-Encoding did not contain gzip nor deflate")
+        return too_many_requests(network, "HTTP header Accept-Encoding did not contain gzip nor deflate")
     return None
diff --git a/searx/botdetection/http_accept_language.py b/searx/botdetection/http_accept_language.py
index 060f67ec0..395d28bfd 100644
--- a/searx/botdetection/http_accept_language.py
+++ b/searx/botdetection/http_accept_language.py
@@ -12,8 +12,12 @@ if the Accept-Language_ header is unset.
 
 """
 # pylint: disable=unused-argument
+from __future__ import annotations
+from ipaddress import (
+    IPv4Network,
+    IPv6Network,
+)
 
-from typing import Optional
 import flask
 import werkzeug
 
@@ -21,7 +25,11 @@ from searx.tools import config
 from ._helpers import too_many_requests
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
+def filter_request(
+    network: IPv4Network | IPv6Network,
+    request: flask.Request,
+    cfg: config.Config,
+) -> werkzeug.Response | None:
     if request.headers.get('Accept-Language', '').strip() == '':
-        return too_many_requests(request, "missing HTTP header Accept-Language")
+        return too_many_requests(network, "missing HTTP header Accept-Language")
     return None
diff --git a/searx/botdetection/http_connection.py b/searx/botdetection/http_connection.py
index e718dfe3f..ee0d80a23 100644
--- a/searx/botdetection/http_connection.py
+++ b/searx/botdetection/http_connection.py
@@ -13,7 +13,12 @@ the Connection_ header is set to ``close``.
 """
 # pylint: disable=unused-argument
 
-from typing import Optional
+from __future__ import annotations
+from ipaddress import (
+    IPv4Network,
+    IPv6Network,
+)
+
 import flask
 import werkzeug
 
@@ -21,7 +26,12 @@ from searx.tools import config
 from ._helpers import too_many_requests
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
+def filter_request(
+    network: IPv4Network | IPv6Network,
+    request: flask.Request,
+    cfg: config.Config,
+) -> werkzeug.Response | None:
+
     if request.headers.get('Connection', '').strip() == 'close':
-        return too_many_requests(request, "HTTP header 'Connection=close")
+        return too_many_requests(network, "HTTP header 'Connection=close")
     return None
diff --git a/searx/botdetection/http_user_agent.py b/searx/botdetection/http_user_agent.py
index 70309e975..17025f68b 100644
--- a/searx/botdetection/http_user_agent.py
+++ b/searx/botdetection/http_user_agent.py
@@ -14,8 +14,13 @@ the User-Agent_ header is unset or matches the regular expression
 """
 # pylint: disable=unused-argument
 
-from typing import Optional
+from __future__ import annotations
 import re
+from ipaddress import (
+    IPv4Network,
+    IPv6Network,
+)
+
 import flask
 import werkzeug
 
@@ -50,8 +55,13 @@ def regexp_user_agent():
     return _regexp
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
+def filter_request(
+    network: IPv4Network | IPv6Network,
+    request: flask.Request,
+    cfg: config.Config,
+) -> werkzeug.Response | None:
+
     user_agent = request.headers.get('User-Agent', 'unknown')
     if regexp_user_agent().match(user_agent):
-        return too_many_requests(request, f"bot detected, HTTP header User-Agent: {user_agent}")
+        return too_many_requests(network, f"bot detected, HTTP header User-Agent: {user_agent}")
     return None
diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
index 268285dd9..46e026371 100644
--- a/searx/botdetection/ip_limit.py
+++ b/searx/botdetection/ip_limit.py
@@ -38,8 +38,12 @@ droped.
    https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
 
 """
+from __future__ import annotations
+from ipaddress import (
+    IPv4Network,
+    IPv6Network,
+)
 
-from typing import Optional
 import flask
 import werkzeug
 from searx.tools import config
@@ -49,7 +53,7 @@ from searx import logger
 from searx.redislib import incr_sliding_window, drop_counter
 
 from . import link_token
-from ._helpers import too_many_requests, get_real_ip
+from ._helpers import too_many_requests
 
 
 logger = logger.getChild('botdetection.ip_limit')
@@ -85,49 +89,58 @@ SUSPICIOUS_IP_MAX = 3
 """Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""
 
 
-def filter_request(request: flask.Request, cfg: config.Config) -> Optional[werkzeug.Response]:
+def filter_request(
+    network: IPv4Network | IPv6Network,
+    request: flask.Request,
+    cfg: config.Config,
+) -> werkzeug.Response | None:
+
     # pylint: disable=too-many-return-statements
     redis_client = redisdb.client()
 
-    client_ip = get_real_ip(request)
+    if network.is_link_local and not cfg['botdetection.ip_limit.filter_link_local']:
+        logger.debug("network %s is link-local -> not monitored by ip_limit method", network.compressed)
+        return None
 
     if request.args.get('format', 'html') != 'html':
-        c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + client_ip, API_WONDOW)
+        c = incr_sliding_window(redis_client, 'ip_limit.API_WONDOW:' + network.compressed, API_WONDOW)
         if c > API_MAX:
-            return too_many_requests(request, "too many request in API_WINDOW")
+            return too_many_requests(network, "too many request in API_WINDOW")
 
     if cfg['botdetection.ip_limit.link_token']:
 
-        suspicious = link_token.is_suspicious(request, True)
+        suspicious = link_token.is_suspicious(network, request, True)
 
         if not suspicious:
             # this IP is no longer suspicious: release ip again / delete the counter of this IP
-            drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip)
+            drop_counter(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed)
             return None
 
         # this IP is suspicious: count requests from this IP
-        c = incr_sliding_window(redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + client_ip, SUSPICIOUS_IP_WINDOW)
+        c = incr_sliding_window(
+            redis_client, 'ip_limit.SUSPICIOUS_IP_WINDOW' + network.compressed, SUSPICIOUS_IP_WINDOW
+        )
         if c > SUSPICIOUS_IP_MAX:
-            logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", client_ip)
+            logger.error("BLOCK: too many request from %s in SUSPICIOUS_IP_WINDOW (redirect to /)", network)
             return flask.redirect(flask.url_for('index'), code=302)
 
-        c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW)
+        c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
         if c > BURST_MAX_SUSPICIOUS:
-            return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
+            return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX_SUSPICIOUS)")
 
-        c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW)
+        c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
         if c > LONG_MAX_SUSPICIOUS:
-            return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
+            return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX_SUSPICIOUS)")
 
         return None
 
     # vanilla limiter without extensions counts BURST_MAX and LONG_MAX
-    c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + client_ip, BURST_WINDOW)
+    c = incr_sliding_window(redis_client, 'ip_limit.BURST_WINDOW' + network.compressed, BURST_WINDOW)
     if c > BURST_MAX:
-        return too_many_requests(request, "too many request in BURST_WINDOW (BURST_MAX)")
+        return too_many_requests(network, "too many request in BURST_WINDOW (BURST_MAX)")
 
-    c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + client_ip, LONG_WINDOW)
+    c = incr_sliding_window(redis_client, 'ip_limit.LONG_WINDOW' + network.compressed, LONG_WINDOW)
     if c > LONG_MAX:
-        return too_many_requests(request, "too many request in LONG_WINDOW (LONG_MAX)")
+        return too_many_requests(network, "too many request in LONG_WINDOW (LONG_MAX)")
 
     return None
diff --git a/searx/botdetection/limiter.py b/searx/botdetection/limiter.py
index 93826684f..18ffc8407 100644
--- a/searx/botdetection/limiter.py
+++ b/searx/botdetection/limiter.py
@@ -37,14 +37,16 @@ and set the redis-url connection. Check the value, it depends on your redis DB
 
 """
 
-from typing import Optional, Tuple
+from __future__ import annotations
+
 from pathlib import Path
 import flask
-import pytomlpp as toml
+import werkzeug
 
-from searx import logger
 from searx.tools import config
-from searx.botdetection import (
+from searx import logger
+
+from . import (
     http_accept,
     http_accept_encoding,
     http_accept_language,
@@ -53,6 +55,16 @@ from searx.botdetection import (
     ip_limit,
 )
 
+from ._helpers import (
+    get_network,
+    get_real_ip,
+    dump_request,
+)
+
+logger = logger.getChild('botdetection.limiter')
+
+CFG: config.Config = None  # type: ignore
+
 LIMITER_CFG_SCHEMA = Path(__file__).parent / "limiter.toml"
 """Base configuration (schema) of the botdetection."""
 
@@ -63,40 +75,21 @@ CFG_DEPRECATED = {
     # "dummy.old.foo": "config 'dummy.old.foo' exists only for tests.  Don't use it in your real project config."
 }
 
-CFG = None
-
 
 def get_cfg() -> config.Config:
+    global CFG  # pylint: disable=global-statement
     if CFG is None:
-        init_cfg(logger)
+        CFG = config.Config.from_toml(LIMITER_CFG_SCHEMA, LIMITER_CFG, CFG_DEPRECATED)
     return CFG
 
 
-def init_cfg(log):
-    global CFG  # pylint: disable=global-statement
-    CFG = config.Config(cfg_schema=toml.load(LIMITER_CFG_SCHEMA), deprecated=CFG_DEPRECATED)
+def filter_request(request: flask.Request) -> werkzeug.Response | None:
 
-    if not LIMITER_CFG.exists():
-        log.warning("missing config file: %s", LIMITER_CFG)
-        return
-
-    log.info("load config file: %s", LIMITER_CFG)
-    try:
-        upd_cfg = toml.load(LIMITER_CFG)
-    except toml.DecodeError as exc:
-        msg = str(exc).replace('\t', '').replace('\n', ' ')
-        log.error("%s: %s", LIMITER_CFG, msg)
-        raise
-
-    is_valid, issue_list = CFG.validate(upd_cfg)
-    for msg in issue_list:
-        log.error(str(msg))
-    if not is_valid:
-        raise TypeError(f"schema of {LIMITER_CFG} is invalid, can't cutomize limiter configuration from!")
-    CFG.update(upd_cfg)
-
-
-def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
+    cfg = get_cfg()
+    real_ip = get_real_ip(request)
+    network = get_network(real_ip, cfg)
+    if network.is_link_local:
+        return None
 
     if request.path == '/healthz':
         return None
@@ -104,7 +97,7 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
     for func in [
         http_user_agent,
     ]:
-        val = func.filter_request(request, CFG)
+        val = func.filter_request(network, request, cfg)
         if val is not None:
             return val
 
@@ -118,8 +111,8 @@ def filter_request(request: flask.Request) -> Optional[Tuple[int, str]]:
             http_user_agent,
             ip_limit,
         ]:
-            val = func.filter_request(request, CFG)
+            val = func.filter_request(network, request, cfg)
             if val is not None:
                 return val
-
+    logger.debug(f"OK {network}: %s", dump_request(flask.request))
     return None
diff --git a/searx/botdetection/limiter.toml b/searx/botdetection/limiter.toml
index af797d32c..71a231e8f 100644
--- a/searx/botdetection/limiter.toml
+++ b/searx/botdetection/limiter.toml
@@ -1,8 +1,22 @@
-[botdetection.ip_limit]
-
-link_token = false
-
 [real_ip]
 
 # Number of values to trust for X-Forwarded-For.
+
 x_for = 1
+
+# The prefix defines the number of leading bits in an address that are compared
+# to determine whether or not an address is part of a (client) network.
+
+ipv4_prefix = 32
+ipv6_prefix = 48
+
+[botdetection.ip_limit]
+
+# To get unlimited access in a local network, by default link-lokal addresses
+# (networks) are not monitored by the ip_limit
+filter_link_local = false
+
+# acrivate link_token method in the ip_limit method
+link_token = false
+
+
diff --git a/searx/botdetection/link_token.py b/searx/botdetection/link_token.py
index a83214a33..11a6a56b5 100644
--- a/searx/botdetection/link_token.py
+++ b/searx/botdetection/link_token.py
@@ -6,7 +6,7 @@ Method ``link_token``
 
 The ``link_token`` method evaluates a request as :py:obj:`suspicious
 <is_suspicious>` if the URL ``/client<token>.css`` is not requested by the
-client.  By adding a random component (the token) in the URL a bot can not send
+client.  By adding a random component (the token) in the URL, a bot can not send
 a ping by request a static URL.
 
 .. note::
@@ -35,6 +35,11 @@ And in the HTML template from flask a stylesheet link is needed (the value of
    https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/X-Forwarded-For
 
 """
+from __future__ import annotations
+from ipaddress import (
+    IPv4Network,
+    IPv6Network,
+)
 
 import string
 import random
@@ -43,7 +48,11 @@ import flask
 from searx import logger
 from searx import redisdb
 from searx.redislib import secret_hash
-from ._helpers import get_real_ip
+
+from ._helpers import (
+    get_network,
+    get_real_ip,
+)
 
 TOKEN_LIVE_TIME = 600
 """Livetime (sec) of limiter's CSS token."""
@@ -60,29 +69,26 @@ TOKEN_KEY = 'SearXNG_limiter.token'
 logger = logger.getChild('botdetection.link_token')
 
 
-def is_suspicious(request: flask.Request, renew: bool = False):
-    """Checks if there is a valid ping for this request, if not this request is
-    rated as *suspicious*.  If a valid ping exists and argument ``renew`` is
-    ``True`` the expire time of this ping is reset to :py:obj:`PING_LIVE_TIME`.
+def is_suspicious(network: IPv4Network | IPv6Network, request: flask.Request, renew: bool = False):
+    """Checks whether a valid ping is exists for this (client) network, if not
+    this request is rated as *suspicious*.  If a valid ping exists and argument
+    ``renew`` is ``True`` the expire time of this ping is reset to
+    :py:obj:`PING_LIVE_TIME`.
 
     """
     redis_client = redisdb.client()
     if not redis_client:
         return False
 
-    ping_key = get_ping_key(request)
+    ping_key = get_ping_key(network, request)
     if not redis_client.get(ping_key):
-        logger.warning(
-            "missing ping (IP: %s) / request: %s",
-            get_real_ip(request),
-            ping_key,
-        )
+        logger.warning("missing ping (IP: %s) / request: %s", network.compressed, ping_key)
         return True
 
     if renew:
         redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
 
-    logger.debug("found ping for client request: %s", ping_key)
+    logger.debug("found ping for (client) network %s -> %s", network.compressed, ping_key)
     return False
 
 
@@ -92,27 +98,31 @@ def ping(request: flask.Request, token: str):
     The expire time of this ping-key is :py:obj:`PING_LIVE_TIME`.
 
     """
+    from . import limiter  # pylint: disable=import-outside-toplevel, cyclic-import
+
     redis_client = redisdb.client()
     if not redis_client:
         return
     if not token_is_valid(token):
         return
-    ping_key = get_ping_key(request)
-    logger.debug("store ping for: %s", ping_key)
+
+    cfg = limiter.get_cfg()
+    real_ip = get_real_ip(request)
+    network = get_network(real_ip, cfg)
+
+    ping_key = get_ping_key(network, request)
+    logger.debug("store ping_key for (client) network %s (IP %s) -> %s", network.compressed, real_ip, ping_key)
     redis_client.set(ping_key, 1, ex=PING_LIVE_TIME)
 
 
-def get_ping_key(request: flask.Request):
-    """Generates a hashed key that fits (more or less) to a client (request).
-    At least X-Forwarded-For_ is needed to be able to assign the request to an
-    IP.
-
-    """
+def get_ping_key(network: IPv4Network | IPv6Network, request: flask.Request) -> str:
+    """Generates a hashed key that fits (more or less) to a *WEB-browser
+    session* in a network."""
     return (
         PING_KEY
         + "["
         + secret_hash(
-            get_real_ip(request) + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
+            network.compressed + request.headers.get('Accept-Language', '') + request.headers.get('User-Agent', '')
         )
         + "]"
     )
diff --git a/searx/plugins/limiter.py b/searx/plugins/limiter.py
index 7edbb1ce0..a8beb5e88 100644
--- a/searx/plugins/limiter.py
+++ b/searx/plugins/limiter.py
@@ -8,7 +8,6 @@ import flask
 from searx import redisdb
 from searx.plugins import logger
 from searx.botdetection import limiter
-from searx.botdetection import dump_request
 
 name = "Request limiter"
 description = "Limit the number of request"
@@ -20,10 +19,7 @@ logger = logger.getChild('limiter')
 
 def pre_request():
     """See :ref:`flask.Flask.before_request`"""
-    ret_val = limiter.filter_request(flask.request)
-    if ret_val is None:
-        logger.debug("OK: %s" % dump_request(flask.request))
-    return ret_val
+    return limiter.filter_request(flask.request)
 
 
 def init(app: flask.Flask, settings) -> bool:
@@ -32,6 +28,5 @@ def init(app: flask.Flask, settings) -> bool:
     if not redisdb.client():
         logger.error("The limiter requires Redis")
         return False
-    limiter.init_cfg(logger)
     app.before_request(pre_request)
     return True

From 80af38d37b21dc6e5edbf27bd22310db42a6f923 Mon Sep 17 00:00:00 2001
From: Markus Heiser <markus.heiser@darmarit.de>
Date: Thu, 1 Jun 2023 16:00:49 +0200
Subject: [PATCH 10/10] [mod] increase SUSPICIOUS_IP_WINDOW from one day to 30
 days

In my tests I see bots rotating IPs (with endless IP lists).  If such a bot has
100 IPs and has three attempts (SUSPICIOUS_IP_MAX = 3) then it can successfully
send up to 300 requests in one day while rotating the IP.  To block the bots for
a longer period of time the SUSPICIOUS_IP_WINDOW, as the time period in which an
IP is observed, must be increased.

For normal WEB-browsers this is no problem, because the SUSPICIOUS_IP_WINDOW is
deleted as soon as the CSS with the token is loaded.

SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
  Time (sec) before sliding window for one suspicious IP expires.

SUSPICIOUS_IP_MAX = 3
  Maximum requests from one suspicious IP in the :py:obj:`SUSPICIOUS_IP_WINDOW`."""

Signed-off-by: Markus Heiser <markus.heiser@darmarit.de>
---
 searx/botdetection/ip_limit.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/searx/botdetection/ip_limit.py b/searx/botdetection/ip_limit.py
index 46e026371..bb4229f0e 100644
--- a/searx/botdetection/ip_limit.py
+++ b/searx/botdetection/ip_limit.py
@@ -82,7 +82,7 @@ API_WONDOW = 3600
 API_MAX = 4
 """Maximum requests from one IP in the :py:obj:`API_WONDOW`"""
 
-SUSPICIOUS_IP_WINDOW = 3600 * 24
+SUSPICIOUS_IP_WINDOW = 3600 * 24 * 30
 """Time (sec) before sliding window for one suspicious IP expires."""
 
 SUSPICIOUS_IP_MAX = 3