| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | #!/usr/bin/env python | 
					
						
							| 
									
										
										
										
											2021-10-03 15:12:09 +02:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2022-01-03 12:40:06 +01:00
										 |  |  | """This script saves `Ahmia's blacklist`_ for onion sites.
 | 
					
						
							| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-03 12:40:06 +01:00
										 |  |  | Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data | 
					
						
							|  |  |  | ...  <.github/workflows/data-update.yml>`). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | .. _Ahmia's blacklist: https://ahmia.fi/blacklist/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2023-02-10 13:40:12 +01:00
										 |  |  | # pylint: disable=use-dict-literal | 
					
						
							| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | import requests | 
					
						
							| 
									
										
										
										
											2024-03-10 15:33:23 +01:00
										 |  |  | from searx.data import data_dir | 
					
						
							| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-10 15:33:23 +01:00
										 |  |  | DATA_FILE = data_dir / 'ahmia_blacklist.txt' | 
					
						
							| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | URL = 'https://ahmia.fi/blacklist/banned/' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fetch_ahmia_blacklist(): | 
					
						
							|  |  |  |     resp = requests.get(URL, timeout=3.0) | 
					
						
							|  |  |  |     if resp.status_code != 200: | 
					
						
							| 
									
										
										
										
											2023-02-10 13:40:12 +01:00
										 |  |  |         # pylint: disable=broad-exception-raised | 
					
						
							| 
									
										
										
										
											2024-03-10 15:33:23 +01:00
										 |  |  |         raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code)  # type: ignore | 
					
						
							| 
									
										
										
										
											2022-01-03 12:58:48 +01:00
										 |  |  |     return resp.text.split() | 
					
						
							| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-03 12:40:06 +01:00
										 |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  |     blacklist = fetch_ahmia_blacklist() | 
					
						
							| 
									
										
										
										
											2024-03-10 15:33:23 +01:00
										 |  |  |     blacklist.sort() | 
					
						
							|  |  |  |     with DATA_FILE.open("w", encoding='utf-8') as f: | 
					
						
							| 
									
										
										
										
											2022-01-03 12:40:06 +01:00
										 |  |  |         f.write('\n'.join(blacklist)) |