| 
									
										
										
										
											2024-03-11 14:06:26 +01:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							|  |  |  | # pylint: disable=missing-module-docstring | 
					
						
							| 
									
										
										
										
											2015-06-09 16:16:07 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							| 
									
										
										
										
											2020-08-06 17:42:46 +02:00
										 |  |  | from urllib.parse import urlunparse, parse_qsl, urlencode | 
					
						
							| 
									
										
										
										
											2015-06-09 16:16:07 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-11 14:06:26 +01:00
										 |  |  | from flask_babel import gettext | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-12-27 09:26:22 +01:00
										 |  |  | regexes = { | 
					
						
							|  |  |  |     re.compile(r'utm_[^&]+'), | 
					
						
							|  |  |  |     re.compile(r'(wkey|wemail)[^&]*'), | 
					
						
							|  |  |  |     re.compile(r'(_hsenc|_hsmi|hsCtaTracking|__hssc|__hstc|__hsfp)[^&]*'), | 
					
						
							|  |  |  |     re.compile(r'&$'), | 
					
						
							|  |  |  | } | 
					
						
							| 
									
										
										
										
											2015-06-09 16:16:07 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | name = gettext('Tracker URL remover') | 
					
						
							|  |  |  | description = gettext('Remove trackers arguments from the returned URL') | 
					
						
							|  |  |  | default_on = True | 
					
						
							| 
									
										
										
										
											2017-02-12 15:06:01 +01:00
										 |  |  | preference_section = 'privacy' | 
					
						
							| 
									
										
										
										
											2015-06-09 16:16:07 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-11 14:06:26 +01:00
										 |  |  | def on_result(_request, _search, result): | 
					
						
							| 
									
										
										
										
											2019-09-23 17:14:32 +02:00
										 |  |  |     if 'parsed_url' not in result: | 
					
						
							|  |  |  |         return True | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-10-22 14:01:53 +02:00
										 |  |  |     query = result['parsed_url'].query | 
					
						
							| 
									
										
										
										
											2015-06-09 16:16:07 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-18 14:27:15 +02:00
										 |  |  |     if query == "": | 
					
						
							| 
									
										
										
										
											2015-06-15 20:34:02 +02:00
										 |  |  |         return True | 
					
						
							| 
									
										
										
										
											2019-10-14 14:58:20 +02:00
										 |  |  |     parsed_query = parse_qsl(query) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-10-23 08:17:00 +02:00
										 |  |  |     changes = 0 | 
					
						
							| 
									
										
										
										
											2019-10-14 15:09:39 +02:00
										 |  |  |     for i, (param_name, _) in enumerate(list(parsed_query)): | 
					
						
							| 
									
										
										
										
											2019-10-14 14:58:20 +02:00
										 |  |  |         for reg in regexes: | 
					
						
							|  |  |  |             if reg.match(param_name): | 
					
						
							| 
									
										
										
										
											2019-10-23 08:17:00 +02:00
										 |  |  |                 parsed_query.pop(i - changes) | 
					
						
							|  |  |  |                 changes += 1 | 
					
						
							|  |  |  |                 result['parsed_url'] = result['parsed_url']._replace(query=urlencode(parsed_query)) | 
					
						
							|  |  |  |                 result['url'] = urlunparse(result['parsed_url']) | 
					
						
							| 
									
										
										
										
											2019-10-14 14:58:20 +02:00
										 |  |  |                 break | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-06-09 16:16:07 +02:00
										 |  |  |     return True |