| 
									
										
										
										
											2022-01-30 16:05:08 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2023-09-15 09:53:03 +02:00
										 |  |  | """Processors for engine-type: ``online_url_search``
 | 
					
						
							| 
									
										
										
										
											2022-01-30 16:05:08 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | from .online import OnlineProcessor | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | re_search_urls = { | 
					
						
							|  |  |  |     'http': re.compile(r'https?:\/\/[^ ]*'), | 
					
						
							|  |  |  |     'ftp': re.compile(r'ftps?:\/\/[^ ]*'), | 
					
						
							|  |  |  |     'data:image': re.compile('data:image/[^; ]*;base64,[^ ]*'), | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class OnlineUrlSearchProcessor(OnlineProcessor): | 
					
						
							|  |  |  |     """Processor class used by ``online_url_search`` engines.""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     engine_type = 'online_url_search' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_params(self, search_query, engine_category): | 
					
						
							| 
									
										
										
										
											2022-09-29 20:54:46 +02:00
										 |  |  |         """Returns a set of :ref:`request params <engine request online>` or ``None`` if
 | 
					
						
							|  |  |  |         search query does not match to :py:obj:`re_search_urls`. | 
					
						
							| 
									
										
										
										
											2022-08-01 16:42:33 +02:00
										 |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2022-09-29 20:54:46 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-30 16:05:08 +01:00
										 |  |  |         params = super().get_params(search_query, engine_category) | 
					
						
							|  |  |  |         if params is None: | 
					
						
							|  |  |  |             return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         url_match = False | 
					
						
							|  |  |  |         search_urls = {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for k, v in re_search_urls.items(): | 
					
						
							|  |  |  |             m = v.search(search_query.query) | 
					
						
							|  |  |  |             v = None | 
					
						
							|  |  |  |             if m: | 
					
						
							|  |  |  |                 url_match = True | 
					
						
							|  |  |  |                 v = m[0] | 
					
						
							|  |  |  |             search_urls[k] = v | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not url_match: | 
					
						
							|  |  |  |             return None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         params['search_urls'] = search_urls | 
					
						
							|  |  |  |         return params |