| 
									
										
										
										
											2021-09-10 12:43:33 +02:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2024-03-11 14:06:26 +01:00
										 |  |  | # pylint: disable=missing-module-docstring | 
					
						
							| 
									
										
										
										
											2021-09-10 12:43:33 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							| 
									
										
										
										
											2022-02-07 16:45:48 +01:00
										 |  |  | from urllib.parse import urlunparse, urlparse | 
					
						
							| 
									
										
										
										
											2024-03-11 14:06:26 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | from flask_babel import gettext | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-09-10 12:43:33 +02:00
										 |  |  | from searx import settings | 
					
						
							|  |  |  | from searx.plugins import logger | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | name = gettext('Hostname replace') | 
					
						
							|  |  |  | description = gettext('Rewrite result hostnames or remove results based on the hostname') | 
					
						
							|  |  |  | default_on = False | 
					
						
							|  |  |  | preference_section = 'general' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | plugin_id = 'hostname_replace' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | replacements = {re.compile(p): r for (p, r) in settings[plugin_id].items()} if plugin_id in settings else {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | logger = logger.getChild(plugin_id) | 
					
						
							|  |  |  | parsed = 'parsed_url' | 
					
						
							| 
									
										
										
										
											2022-02-13 16:12:46 +01:00
										 |  |  | _url_fields = ['iframe_src', 'audio_src'] | 
					
						
							| 
									
										
										
										
											2021-09-10 12:43:33 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-11 14:06:26 +01:00
										 |  |  | def on_result(_request, _search, result): | 
					
						
							| 
									
										
										
										
											2022-02-13 12:34:04 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-08 18:22:31 +01:00
										 |  |  |     for pattern, replacement in replacements.items(): | 
					
						
							| 
									
										
										
										
											2022-02-13 12:34:04 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         if parsed in result: | 
					
						
							|  |  |  |             if pattern.search(result[parsed].netloc): | 
					
						
							|  |  |  |                 # to keep or remove this result from the result list depends | 
					
						
							|  |  |  |                 # (only) on the 'parsed_url' | 
					
						
							|  |  |  |                 if not replacement: | 
					
						
							|  |  |  |                     return False | 
					
						
							|  |  |  |                 result[parsed] = result[parsed]._replace(netloc=pattern.sub(replacement, result[parsed].netloc)) | 
					
						
							|  |  |  |                 result['url'] = urlunparse(result[parsed]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for url_field in _url_fields: | 
					
						
							|  |  |  |             if result.get(url_field): | 
					
						
							|  |  |  |                 url_src = urlparse(result[url_field]) | 
					
						
							|  |  |  |                 if pattern.search(url_src.netloc): | 
					
						
							|  |  |  |                     if not replacement: | 
					
						
							|  |  |  |                         del result[url_field] | 
					
						
							|  |  |  |                     else: | 
					
						
							|  |  |  |                         url_src = url_src._replace(netloc=pattern.sub(replacement, url_src.netloc)) | 
					
						
							|  |  |  |                         result[url_field] = urlunparse(url_src) | 
					
						
							| 
									
										
										
										
											2021-09-10 12:43:33 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return True |