Merge 023a646b0a into a1e2b25467
				
					
				
			This commit is contained in:
		
						commit
						e8c87b883a
					
				
							
								
								
									
										9
									
								
								docs/dev/plugins/rerank.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								docs/dev/plugins/rerank.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,9 @@ | |||||||
|  | .. _rerank plugin: | ||||||
|  | 
 | ||||||
|  | ================ | ||||||
|  | Rerank | ||||||
|  | ================ | ||||||
|  | 
 | ||||||
|  | .. automodule:: searx.plugins.rerank | ||||||
|  |   :members: | ||||||
|  | 
 | ||||||
| @ -68,6 +68,9 @@ class PluginInfo: | |||||||
|     keywords: list[str] = field(default_factory=list) |     keywords: list[str] = field(default_factory=list) | ||||||
|     """See :py:obj:`Plugin.keywords`""" |     """See :py:obj:`Plugin.keywords`""" | ||||||
| 
 | 
 | ||||||
|  |     is_allowed: bool = True | ||||||
|  |     """Switch to disable plugin completely, without the user preference.""" | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class Plugin(abc.ABC): | class Plugin(abc.ABC): | ||||||
|     """Abstract base class of all Plugins.""" |     """Abstract base class of all Plugins.""" | ||||||
|  | |||||||
							
								
								
									
										115
									
								
								searx/plugins/rerank.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								searx/plugins/rerank.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,115 @@ | |||||||
|  | # SPDX-License-Identifier: AGPL-3.0-or-later | ||||||
|  | # pylint: disable=missing-module-docstring, missing-class-docstring | ||||||
|  | from __future__ import annotations | ||||||
|  | import typing | ||||||
|  | 
 | ||||||
|  | from flask_babel import gettext | ||||||
|  | 
 | ||||||
|  | from searx import settings | ||||||
|  | from searx.plugins import Plugin, PluginInfo | ||||||
|  | from searx.result_types import EngineResults | ||||||
|  | 
 | ||||||
|  | if typing.TYPE_CHECKING: | ||||||
|  |     from searx.search import SearchWithPlugins | ||||||
|  |     from searx.extended_types import SXNG_Request | ||||||
|  | 
 | ||||||
|  | try: | ||||||
|  |     import bm25s | ||||||
|  | except ImportError: | ||||||
|  |     # Import error is ignored because the admin has to install bm25s manually to use the plugin | ||||||
|  |     bm25s = None | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | class SXNGPlugin(Plugin): | ||||||
|  |     """Plugin which reranks the search results using the Okapi BM25 algorithm. | ||||||
|  | 
 | ||||||
|  |     This plugin utilizes the `bm25s` library to reorder search results based on their relevance to the search query, | ||||||
|  |     potentially improving the quality of results.  Before enabling this plugin, | ||||||
|  |     ensure you have installed the ``bm25s`` pip package.  e.g. by installing it directly via pip or | ||||||
|  |     by adding it to the project's `requirements.txt` file. | ||||||
|  | 
 | ||||||
|  |     Configuration: | ||||||
|  |     -------------- | ||||||
|  |     To enable the Rerank plugin, add it to the `enabled_plugins` list in your `settings.yml` file: | ||||||
|  | 
 | ||||||
|  |     .. code:: yaml | ||||||
|  | 
 | ||||||
|  |     enabled_plugins: | ||||||
|  |         .. | ||||||
|  |         - 'Rerank plugin' | ||||||
|  | 
 | ||||||
|  |     By default, the plugin retains the information about which engines found a particular result. | ||||||
|  |     Results that appear in multiple engine results will receive a score boost. | ||||||
|  |     This approach might be relevant if you wish results found by different engines to be prioritized. | ||||||
|  |     You can modify this behaviour by configuring the ``remove_extra_engines`` setting. | ||||||
|  |     If ``remove_extra_engines`` is set to ``true``, the original engine list is reduced to only the first engine. | ||||||
|  |     This is useful when you prefer the reranking to not be affected by any potential overlap | ||||||
|  |     of results from different engines. | ||||||
|  | 
 | ||||||
|  |     .. code:: yaml | ||||||
|  | 
 | ||||||
|  |     rerank: | ||||||
|  |         remove_extra_engines: true | ||||||
|  | 
 | ||||||
|  |     """ | ||||||
|  | 
 | ||||||
|  |     id = "rerank" | ||||||
|  |     default_on = False | ||||||
|  | 
 | ||||||
|  |     def __init__(self): | ||||||
|  |         super().__init__() | ||||||
|  | 
 | ||||||
|  |         self.stopword_langs = ['en', 'de', 'nl', 'fr', 'es', 'pt', 'it', 'ru', 'sv', 'no', 'zh'] | ||||||
|  |         self.remove_extra_engines = settings.get('rerank', {}).get('remove_extra_engines') | ||||||
|  | 
 | ||||||
|  |         self.info = PluginInfo( | ||||||
|  |             id=self.id, | ||||||
|  |             name=gettext("Rerank plugin"), | ||||||
|  |             description=gettext("""Rerank search results, ignoring original engine ranking"""), | ||||||
|  |             preference_section="general", | ||||||
|  |             is_allowed=bm25s is not None, | ||||||
|  |         ) | ||||||
|  | 
 | ||||||
|  |     def post_search(self, request: "SXNG_Request", search: "SearchWithPlugins") -> EngineResults: | ||||||
|  |         results = EngineResults() | ||||||
|  | 
 | ||||||
|  |         if not bm25s: | ||||||
|  |             return results | ||||||
|  | 
 | ||||||
|  |         # pylint: disable=protected-access | ||||||
|  |         results = search.result_container._merged_results | ||||||
|  |         query = search.search_query.query | ||||||
|  |         locale = search.search_query.locale | ||||||
|  | 
 | ||||||
|  |         # Determine the stopwords based on the selected locale | ||||||
|  |         stopwords = locale.language if locale and locale.language in self.stopword_langs else 'en' | ||||||
|  | 
 | ||||||
|  |         retriever = bm25s.BM25() | ||||||
|  |         result_tokens = bm25s.tokenize( | ||||||
|  |             [ | ||||||
|  |                 f"{result.get('title', '')} | {result.get('content', '')} | {result.get('url', '')}" | ||||||
|  |                 for result in results | ||||||
|  |             ], | ||||||
|  |             stopwords=stopwords, | ||||||
|  |         ) | ||||||
|  |         retriever.index(result_tokens) | ||||||
|  | 
 | ||||||
|  |         query_tokens = bm25s.tokenize(query, stopwords=stopwords) | ||||||
|  | 
 | ||||||
|  |         # Retrieve ranked indices of results based on the query tokens | ||||||
|  |         indices = retriever.retrieve(query_tokens, k=len(results), return_as='documents', show_progress=False) | ||||||
|  | 
 | ||||||
|  |         if self.remove_extra_engines: | ||||||
|  |             # Only keep the main engine and set our ranking | ||||||
|  |             for position, index in enumerate(indices[0]): | ||||||
|  |                 if 'positions' in results[index]: | ||||||
|  |                     results[index]['positions'] = [position + 1] | ||||||
|  |                     results[index]['engines'] = set([results[index]['engine']]) | ||||||
|  |         else: | ||||||
|  |             # Overwrite all engine positions with the new ranking | ||||||
|  |             # Results returned from multiple engines will still get a score boost | ||||||
|  |             for position, index in enumerate(indices[0]): | ||||||
|  |                 if 'positions' in results[index]: | ||||||
|  |                     results[index]['positions'] = [position + 1] * len(results[index]['positions']) | ||||||
|  | 
 | ||||||
|  |         return results | ||||||
| @ -249,6 +249,7 @@ outgoing: | |||||||
| #   - 'Hostnames plugin'  # see 'hostnames' configuration below | #   - 'Hostnames plugin'  # see 'hostnames' configuration below | ||||||
| #   - 'Open Access DOI rewrite' | #   - 'Open Access DOI rewrite' | ||||||
| #   - 'Tor check plugin' | #   - 'Tor check plugin' | ||||||
|  | #   - 'Rerank plugin'  # requires the bm25s python dependency to be installed | ||||||
| 
 | 
 | ||||||
| # Configuration of the "Hostnames plugin": | # Configuration of the "Hostnames plugin": | ||||||
| # | # | ||||||
|  | |||||||
| @ -38,7 +38,7 @@ | |||||||
| 
 | 
 | ||||||
| {%- macro plugin_preferences(section) -%} | {%- macro plugin_preferences(section) -%} | ||||||
|   {%- for plugin in plugins_storage -%} |   {%- for plugin in plugins_storage -%} | ||||||
|     {%- if plugin.preference_section == section -%} |     {%- if plugin.preference_section == section and plugin.is_allowed -%} | ||||||
|       <fieldset>{{- '' -}} |       <fieldset>{{- '' -}} | ||||||
| 	<legend>{{ _(plugin.name) }}</legend>{{- '' -}} | 	<legend>{{ _(plugin.name) }}</legend>{{- '' -}} | ||||||
| 	<div class="value"> | 	<div class="value"> | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user