| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | #!/usr/bin/env python | 
					
						
							| 
									
										
										
										
											2022-01-03 12:58:48 +01:00
										 |  |  | # lint: pylint | 
					
						
							| 
									
										
										
										
											2021-10-03 15:12:09 +02:00
										 |  |  | # SPDX-License-Identifier: AGPL-3.0-or-later | 
					
						
							| 
									
										
										
										
											2022-01-03 12:40:06 +01:00
										 |  |  | """This script saves `Ahmia's blacklist`_ for onion sites.
 | 
					
						
							| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-03 12:40:06 +01:00
										 |  |  | Output file: :origin:`searx/data/ahmia_blacklist.txt` (:origin:`CI Update data | 
					
						
							|  |  |  | ...  <.github/workflows/data-update.yml>`). | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | .. _Ahmia's blacklist: https://ahmia.fi/blacklist/ | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-02-25 17:42:52 +01:00
										 |  |  | from os.path import join | 
					
						
							| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | import requests | 
					
						
							|  |  |  | from searx import searx_dir | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | URL = 'https://ahmia.fi/blacklist/banned/' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def fetch_ahmia_blacklist(): | 
					
						
							|  |  |  |     resp = requests.get(URL, timeout=3.0) | 
					
						
							|  |  |  |     if resp.status_code != 200: | 
					
						
							|  |  |  |         raise Exception("Error fetching Ahmia blacklist, HTTP code " + resp.status_code) | 
					
						
							| 
									
										
										
										
											2022-01-03 12:58:48 +01:00
										 |  |  |     return resp.text.split() | 
					
						
							| 
									
										
										
										
											2020-10-19 08:55:57 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def get_ahmia_blacklist_filename(): | 
					
						
							|  |  |  |     return join(join(searx_dir, "data"), "ahmia_blacklist.txt") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-01-03 12:40:06 +01:00
										 |  |  | if __name__ == '__main__': | 
					
						
							|  |  |  |     blacklist = fetch_ahmia_blacklist() | 
					
						
							| 
									
										
										
										
											2022-01-03 12:58:48 +01:00
										 |  |  |     with open(get_ahmia_blacklist_filename(), "w", encoding='utf-8') as f: | 
					
						
							| 
									
										
										
										
											2022-01-03 12:40:06 +01:00
										 |  |  |         f.write('\n'.join(blacklist)) |