Merge pull request #97 from pointhi/https
Implementing https rewrite support
This commit is contained in:
		
						commit
						20400c40c3
					
				| @ -17,6 +17,7 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| 
 | ||||
| from os import environ | ||||
| from os.path import realpath, dirname, join, abspath | ||||
| from searx.https_rewrite import load_https_rules | ||||
| try: | ||||
|     from yaml import load | ||||
| except: | ||||
| @ -34,7 +35,16 @@ if 'SEARX_SETTINGS_PATH' in environ: | ||||
| else: | ||||
|     settings_path = join(searx_dir, 'settings.yml') | ||||
| 
 | ||||
| if 'SEARX_HTTPS_REWRITE_PATH' in environ: | ||||
|     https_rewrite_path = environ['SEARX_HTTPS_REWRITE_PATH'] | ||||
| else: | ||||
|     https_rewrite_path = join(searx_dir, 'https_rules') | ||||
| 
 | ||||
| # load settings | ||||
| with open(settings_path) as settings_yaml: | ||||
|     settings = load(settings_yaml) | ||||
| 
 | ||||
| # load https rules only if https rewrite is enabled | ||||
| if settings.get('server', {}).get('https_rewrite'): | ||||
|     # loade https rules | ||||
|     load_https_rules(https_rewrite_path) | ||||
|  | ||||
| @ -1,14 +1,141 @@ | ||||
| ''' | ||||
| searx is free software: you can redistribute it and/or modify | ||||
| it under the terms of the GNU Affero General Public License as published by | ||||
| the Free Software Foundation, either version 3 of the License, or | ||||
| (at your option) any later version. | ||||
| 
 | ||||
| searx is distributed in the hope that it will be useful, | ||||
| but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
| GNU Affero General Public License for more details. | ||||
| 
 | ||||
| You should have received a copy of the GNU Affero General Public License | ||||
| along with searx. If not, see < http://www.gnu.org/licenses/ >. | ||||
| 
 | ||||
| (C) 2013- by Adam Tauber, <asciimoo@gmail.com> | ||||
| ''' | ||||
| 
 | ||||
| import re | ||||
| from lxml import etree | ||||
| from os import listdir | ||||
| from os.path import isfile, join | ||||
| 
 | ||||
| 
 | ||||
| # https://gitweb.torproject.org/\ | ||||
| # pde/https-everywhere.git/tree/4.0:/src/chrome/content/rules | ||||
| 
 | ||||
| # HTTPS rewrite rules | ||||
| https_rules = ( | ||||
|     # from | ||||
|     (re.compile(r'^http://(www\.|m\.|)?xkcd\.(?:com|org)/', re.I | re.U), | ||||
|      # to | ||||
|      r'https://\1xkcd.com/'), | ||||
|     (re.compile(r'^https?://(?:ssl)?imgs\.xkcd\.com/', re.I | re.U), | ||||
|      r'https://sslimgs.xkcd.com/'), | ||||
| ) | ||||
| https_rules = [] | ||||
| 
 | ||||
| 
 | ||||
| # load single ruleset from a xml file | ||||
| def load_single_https_ruleset(filepath): | ||||
|     ruleset = () | ||||
| 
 | ||||
|     # init parser | ||||
|     parser = etree.XMLParser() | ||||
| 
 | ||||
|     # load and parse xml-file | ||||
|     try: | ||||
|         tree = etree.parse(filepath, parser) | ||||
|     except: | ||||
|         # TODO, error message | ||||
|         return () | ||||
| 
 | ||||
|     # get root node | ||||
|     root = tree.getroot() | ||||
| 
 | ||||
|     #print(etree.tostring(tree)) | ||||
| 
 | ||||
|     # check if root is a node with the name ruleset | ||||
|     # TODO improve parsing | ||||
|     if root.tag != 'ruleset':         | ||||
|         return () | ||||
| 
 | ||||
|     # check if rule is deactivated by default | ||||
|     if root.attrib.get('default_off'): | ||||
|         return () | ||||
| 
 | ||||
|     # check if rule does only work for specific platforms | ||||
|     if root.attrib.get('platform'): | ||||
|         return () | ||||
| 
 | ||||
|     hosts = [] | ||||
|     rules = [] | ||||
|     exclusions = [] | ||||
| 
 | ||||
|     # parse childs from ruleset | ||||
|     for ruleset in root: | ||||
|         # this child define a target | ||||
|         if ruleset.tag == 'target': | ||||
|             # check if required tags available  | ||||
|             if not ruleset.attrib.get('host'): | ||||
|                 continue | ||||
| 
 | ||||
|             # convert host-rule to valid regex | ||||
|             host = ruleset.attrib.get('host').replace('.', '\.').replace('*', '.*') | ||||
| 
 | ||||
|             # append to host list | ||||
|             hosts.append(host) | ||||
| 
 | ||||
|         # this child define a rule | ||||
|         elif ruleset.tag == 'rule': | ||||
|             # check if required tags available  | ||||
|             if not ruleset.attrib.get('from')\ | ||||
|                or not ruleset.attrib.get('to'): | ||||
|                 continue | ||||
| 
 | ||||
|             # TODO hack, which convert a javascript regex group into a valid python regex group | ||||
|             rule_from = ruleset.attrib.get('from').replace('$', '\\') | ||||
|             rule_to = ruleset.attrib.get('to').replace('$', '\\') | ||||
| 
 | ||||
|             # TODO, not working yet because of the hack above, currently doing that in webapp.py | ||||
|             #rule_from_rgx = re.compile(rule_from, re.I) | ||||
| 
 | ||||
|             # append rule | ||||
|             rules.append((rule_from, rule_to)) | ||||
| 
 | ||||
|         # this child define an exclusion | ||||
|         elif ruleset.tag == 'exclusion': | ||||
|             # check if required tags available  | ||||
|             if not ruleset.attrib.get('pattern'): | ||||
|                 continue | ||||
| 
 | ||||
|             exclusion_rgx = re.compile(ruleset.attrib.get('pattern')) | ||||
| 
 | ||||
|             # append exclusion | ||||
|             exclusions.append(exclusion_rgx) | ||||
| 
 | ||||
|     # convert list of possible hosts to a simple regex | ||||
|     # TODO compress regex to improve performance | ||||
|     try: | ||||
|         target_hosts = re.compile('^(' + '|'.join(hosts) + ')', re.I | re.U) | ||||
|     except: | ||||
|         return () | ||||
| 
 | ||||
|     # return ruleset | ||||
|     return (target_hosts, rules, exclusions) | ||||
| 
 | ||||
| 
 | ||||
| # load all https rewrite rules | ||||
| def load_https_rules(rules_path): | ||||
|     # add / to path if not set yet | ||||
|     if rules_path[-1:] != '/': | ||||
|         rules_path += '/' | ||||
| 
 | ||||
|     # search all xml files which are stored in the https rule directory | ||||
|     xml_files = [ join(rules_path,f) for f in listdir(rules_path) if isfile(join(rules_path,f)) and f[-4:] == '.xml' ] | ||||
| 
 | ||||
|     # load xml-files | ||||
|     for ruleset_file in xml_files: | ||||
|         # calculate rewrite-rules | ||||
|         ruleset = load_single_https_ruleset(ruleset_file) | ||||
| 
 | ||||
|         # skip if no ruleset returned | ||||
|         if not ruleset: | ||||
|             continue | ||||
| 
 | ||||
|         # append ruleset | ||||
|         https_rules.append(ruleset) | ||||
|          | ||||
|     print(' * {n} https-rules loaded'.format(n=len(https_rules))) | ||||
|  | ||||
							
								
								
									
										17
									
								
								searx/https_rules/00README
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								searx/https_rules/00README
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | ||||
| <!--  | ||||
| This directory contains web site rewriting rules for the | ||||
| HTTPS Everywhere software, available from | ||||
| https://www.eff.org/https-everywhere | ||||
| 
 | ||||
| These rules were contributed to the project by users and aim to | ||||
| enable routine secure access to as many different web sites as | ||||
| possible.  They are automatically installed together with the | ||||
| HTTPS Everywhere software.  The presence of these rules does not | ||||
| mean that an HTTPS Everywhere user accessed, or intended to | ||||
| access, any particular web site. | ||||
| 
 | ||||
| For information about how to create additional HTTPS Everywhere | ||||
| rewriting rules to add support for new sites, please see | ||||
| 
 | ||||
| https://www.eff.org/https-everywhere/rulesets | ||||
| --> | ||||
							
								
								
									
										56
									
								
								searx/https_rules/Bing.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										56
									
								
								searx/https_rules/Bing.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,56 @@ | ||||
| <!-- | ||||
| 	For other Microsoft coverage, see Microsoft.xml. | ||||
| 
 | ||||
| 
 | ||||
| 	CDN buckets: | ||||
| 
 | ||||
| 		- a134.lm.akamai.net | ||||
| 
 | ||||
| 			- akam.bing.com | ||||
| 			- *.mm.bing.net | ||||
| 
 | ||||
| 
 | ||||
| 	Nonfunctional domains: | ||||
| 
 | ||||
| 		- m2.cn.bing.com | ||||
| 		- origin.bj1.bing.com | ||||
| 		- blogs.bing.com | ||||
| 
 | ||||
| 
 | ||||
| 	Fully covered domains: | ||||
| 
 | ||||
| 		- bing.com subdomains: | ||||
| 
 | ||||
| 			- (www.) | ||||
| 			- c.bing		(tracking beacons) | ||||
| 			- cn.bing | ||||
| 			- h.bing | ||||
| 			- ssl | ||||
| 			- testfamilysafety.bing | ||||
| 			- udc.bing | ||||
| 			- (www.)bing | ||||
| 
 | ||||
| 		- *.mm.bing.net | ||||
| 		- api.bing.com | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Bing"> | ||||
| 
 | ||||
| 	<target host="bing.com" /> | ||||
| 	<target host="*.bing.com" /> | ||||
| 	<target host="*.mm.bing.net" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<securecookie host=".*\.bing\.com$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://((?:c|cn|h|ssl|testfamilysafety|udc|www)\.)?bing\.com/" | ||||
| 		to="https://$1bing.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://([^/:@]*)\.mm\.bing\.net/" | ||||
| 		to="https://$1.mm.bing.com/"/> | ||||
| 
 | ||||
| 	<rule from="^http://([^/:@]*)\.api\.bing\.net/" | ||||
| 		to="https://$1.api.bing.com/"/> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										69
									
								
								searx/https_rules/Dailymotion.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								searx/https_rules/Dailymotion.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,69 @@ | ||||
| <!-- | ||||
| 	Nonfunctional domains: | ||||
| 
 | ||||
| 		- blog.dailymotion.com | ||||
| 		- press.dailymotion.com		(shows steaw.com, CN: www.steaw.com) | ||||
| 		- proxy-46.dailymotion.com | ||||
| 		- publicite.dailymotion.com | ||||
| 		- publisher.dailymotion.com	(reset) | ||||
| 		- vid.ak.dmcdn.net		(403, Akamai) | ||||
| 		- vid2.ak.dmcdn.net		(504, akamai) | ||||
| 
 | ||||
| 
 | ||||
| 	Problematic domains: | ||||
| 
 | ||||
| 		- ak2.static.dailymotion.com	(mismatched, CN: *.dmcdn.net) | ||||
| 		- support.dmcloud.net		(mismatched, CN: *.zendesk.com) | ||||
| 
 | ||||
| 
 | ||||
| 	Partially covered domains: | ||||
| 
 | ||||
| 		- (www.)dailymotion.com | ||||
| 
 | ||||
| 			- cdn/manifest/video/\w+.mnft 403s | ||||
| 			- crossdomain.xml breaks videos | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Dailymotion (default off)" default_off="breaks some embedded videos"> | ||||
| 
 | ||||
| 	<target host="dailymotion.com" /> | ||||
| 	<!-- | ||||
| 		* for cross-domain cookie. | ||||
| 					--> | ||||
| 	<target host="*.dailymotion.com" /> | ||||
| 		<!-- | ||||
| 			https://mail1.eff.org/pipermail/https-everywhere-rules/2012-July/001241.html | ||||
| 													--> | ||||
| 		<exclusion pattern="^http://(?:www\.)?dailymotion\.com/(?:cdn/[\w-]+/video/|crossdomain\.xml$)" /> | ||||
| 	<target host="ak2.static.dailymotion.com" /> | ||||
| 	<target host="*.dmcdn.net" /> | ||||
| 	<target host="dmcloud.net" /> | ||||
| 	<target host="*.dmcloud.net" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Testing wrt embedded breakage. | ||||
| 
 | ||||
| 		securecookie host="^.*\.dailymotion\.com$" name=".+" /--> | ||||
| 	<!-- | ||||
| 		Omniture tracking cookies: | ||||
| 						--> | ||||
| 	<securecookie host="^\.dailymotion\.com$" name="^s_\w+$" /> | ||||
| 	<securecookie host="^www\.dailymotion\.com$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://(erroracct\.|www\.)?dailymotion\.com/" | ||||
| 		to="https://$1dailymotion.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(s\d|static(?:\d|s\d-ssl))\.dmcdn\.net/" | ||||
| 		to="https://$1.dmcdn.net/" /> | ||||
| 
 | ||||
| 	<rule from="^https?://ak2\.static\.dailymotion\.com/" | ||||
| 		to="https://static1-ssl.dmcdn.net/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(s\.|www\.)?dmcloud\.net/" | ||||
| 		to="https://$1dmcloud.net/" /> | ||||
| 
 | ||||
| 	<rule from="^https?://support\.dmcloud\.net/" | ||||
| 		to="https://dmcloud.zendesk.com/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										53
									
								
								searx/https_rules/Deviantart.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								searx/https_rules/Deviantart.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,53 @@ | ||||
| <!-- | ||||
| 	For problematic rules, see Deviantart-mismatches.xml. | ||||
| 
 | ||||
| 
 | ||||
| 	Other deviantArt rulesets: | ||||
| 
 | ||||
| 		- Sta.sh.xml | ||||
| 
 | ||||
| 
 | ||||
| 	ToDo: Find edgecast URL for /(fc|th)\d+. | ||||
| 
 | ||||
| 
 | ||||
| 	Mixed content: | ||||
| 
 | ||||
| 		- Images on *.....com from e.deviantart.net * | ||||
| 
 | ||||
| 	* Secured by us | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="DeviantArt (pending)" default_off="site operator says not ready yet"> | ||||
| 
 | ||||
| 	<target host="deviantart.com" /> | ||||
| 	<target host="*.deviantart.com" /> | ||||
| 	<target host="deviantart.net" /> | ||||
| 	<target host="*.deviantart.net" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Not secured by server: | ||||
| 					--> | ||||
| 	<!--securecookie host="^\.deviantart\.com$" name="^userinfo$" /--> | ||||
| 
 | ||||
| 	<securecookie host="^\.deviantart\.com$" name=".*" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Redirects from com to net, but does so successfully by itself. | ||||
| 										--> | ||||
| 	<rule from="^http://([aei]|fc\d\d|s[ht]|th\d\d)\.deviantart\.(com|net)/" | ||||
| 		to="https://$1.deviantart.$2/" /> | ||||
| 
 | ||||
| 	<!--	This handles everything that isn't in the first rule. | ||||
| 		Namely, usernames, backend, fc, th, and (www.). | ||||
| 			These domains present a cert that is only | ||||
| 		valid for .com. | ||||
| 			Note that .net isn't used on DA, but.net does | ||||
| 		redirect to .com, and we shouldn't break what would | ||||
| 		otherwise work. | ||||
| 			Mustn't rewrite from https here, as doing so | ||||
| 		would conflict with the first rule. | ||||
| 								--> | ||||
| 	<rule from="^http://([^/:@\.]+\.)?deviantart\.(?:com|net)/" | ||||
| 		to="https://$1deviantart.com/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										38
									
								
								searx/https_rules/DuckDuckGo.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								searx/https_rules/DuckDuckGo.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,38 @@ | ||||
| <!-- | ||||
| 	Problematic domains: | ||||
| 
 | ||||
| 		- www.dukgo.com		(mismatched, CN: dukgo.com) | ||||
| 
 | ||||
| 
 | ||||
| 	Fully covered domains: | ||||
| 
 | ||||
| 		- (www.)dukgo.com	(www → ^) | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="DuckDuckGo"> | ||||
|   <target host="duckduckgo.com" /> | ||||
|   <target host="*.duckduckgo.com" /> | ||||
|   <target host="ddg.gg" /> | ||||
|   <target host="duck.co" /> | ||||
|   <target host="i.duck.co" /> | ||||
| 	<target host="dukgo.com" /> | ||||
| 	<target host="www.dukgo.com" /> | ||||
| 
 | ||||
|   <exclusion pattern="^http://(help|meme)\.duckduckgo\.com/" /> | ||||
| 
 | ||||
| 	<securecookie host="^duck\.co$" name=".*"/> | ||||
| 
 | ||||
|   <rule from="^http://duckduckgo\.com/" to="https://duckduckgo.com/"/> | ||||
|   <rule from="^http://([^/:@\.]+)\.duckduckgo\.com/" to="https://$1.duckduckgo.com/"/> | ||||
| 	<!-- TODO: What does ddg.gg/foo do? Runs query foo, redirects to homepage, or error? --> | ||||
|     <rule from="^http://ddg\.gg/$" to="https://duckduckgo.com/" /> | ||||
| 	 | ||||
|     <rule from="^http://duck\.co/" to="https://duck.co/" /> | ||||
| 
 | ||||
| 	<rule from="^http://i\.duck\.co/" | ||||
| 		to="https://duckduckgo.com/"/> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?dukgo\.com/" | ||||
| 		to="https://dukgo.com/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										44
									
								
								searx/https_rules/Flickr.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								searx/https_rules/Flickr.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,44 @@ | ||||
| <!-- | ||||
| 	For other Yahoo coverage, see Yahoo.xml. | ||||
| 
 | ||||
| 
 | ||||
| 	These altnames don't exist: | ||||
| 
 | ||||
| 		- www.blog.flickr.net | ||||
| 		- www.code.flickr.net | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Flickr"> | ||||
| 
 | ||||
| 	<target host="flic.kr" /> | ||||
| 	<target host="*.flic.kr" /> | ||||
| 	<target host="flickr.com" /> | ||||
| 	<target host="*.flickr.com" /> | ||||
| 	<target host="*.flickr.net" /> | ||||
| 	<target host="*.staticflickr.com" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Not secured by server: | ||||
| 					--> | ||||
| 	<!--securecookie host="^\.flic\.kr$" name="^BX$" /--> | ||||
| 
 | ||||
| 	<securecookie host="^\.flic\.kr$" name=".+" /> | ||||
| 	<securecookie host=".*\.flickr\.com$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://flic\.kr/" | ||||
| 		to="https://flic.kr/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(api\.|www\.)?flickr\.com/" | ||||
| 		to="https://$1flickr.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://s(ecure|tatic)\.flickr\.com/" | ||||
| 		to="https://s$1.flickr.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(c2|farm\d+)\.static(\.)?flickr\.com/" | ||||
| 		to="https://$1.static$2flickr.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(blog|code)\.flickr\.net/" | ||||
| 		to="https://$1.flickr.net/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										11
									
								
								searx/https_rules/Github-Pages.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								searx/https_rules/Github-Pages.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,11 @@ | ||||
| <!-- | ||||
| 	For other GitHub coverage, see Github.xml. | ||||
| --> | ||||
| <ruleset name="GitHub Pages"> | ||||
| 
 | ||||
| 	<target host="*.github.io" /> | ||||
| 
 | ||||
| 	<rule from="^http://([^/@:\.]+)\.github\.io/" | ||||
| 		to="https://$1.github.io/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										94
									
								
								searx/https_rules/Github.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								searx/https_rules/Github.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,94 @@ | ||||
| <!-- | ||||
| 	Other GitHub rulesets: | ||||
| 
 | ||||
| 		- Github-Pages.xml | ||||
| 		- Guag.es.xml | ||||
| 		- Speaker_Deck.com.xml | ||||
| 
 | ||||
| 
 | ||||
| 	CDN buckets: | ||||
| 
 | ||||
| 		- github-images.s3.amazonaws.com | ||||
| 		- github.global.ssl.fastly.net | ||||
| 		- a248.e.akamai.net/assets.github.com/ | ||||
| 		- a248.e.akamai.net/camo.github.com/ | ||||
| 		- s3.amazonaws.com/github/ | d24z2fz21y4fag.cloudfront.net | ||||
| 		- github.myshopify.com | ||||
| 
 | ||||
| 
 | ||||
| 	Fully covered domains: | ||||
| 
 | ||||
| 		- github.com subdomains: | ||||
| 
 | ||||
| 			- (www.) | ||||
| 			- assets\d+ | ||||
| 			- assets-cdn | ||||
| 			- bounty | ||||
| 			- cloud | ||||
| 			- f.cloud | ||||
| 			- codeload | ||||
| 			- developer | ||||
| 			- eclipse | ||||
| 			- enterprise | ||||
| 			- gist | ||||
| 			- gist-assets | ||||
| 			- help | ||||
| 			- identicons | ||||
| 			- jobs | ||||
| 			- mac | ||||
| 			- mobile | ||||
| 			- nodeload | ||||
| 			- octodex | ||||
| 			- pages | ||||
| 			- raw | ||||
| 			- rg3 | ||||
| 			- shop | ||||
| 			- status | ||||
| 			- support | ||||
| 			- training | ||||
| 			- try | ||||
| 			- wiki | ||||
| 			- windows | ||||
| 
 | ||||
| 		- collector.githubapp.com | ||||
| 
 | ||||
| 		- githubusercontent.com | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="GitHub"> | ||||
| 
 | ||||
| 	<target host="github.com" /> | ||||
| 	<target host="*.github.com" /> | ||||
| 	<target host="github.io" /> | ||||
| 	<target host="*.githubusercontent.com" /> | ||||
| 	<target host="collector.githubapp.com" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Secured by server: | ||||
| 					--> | ||||
| 	<!--securecookie host="^github\.com$" name="^(_gh_sess|tz|user_session)$" /--> | ||||
| 	<!--securecookie host="^\.github\.com$" name="^(dotcom_user|logged_in)$" /--> | ||||
| 	<!--securecookie host="^enterprise\.github\.com$" name="^(_enterprise_web|request_method)$" /--> | ||||
| 	<!--securecookie host="^gist\.github\.com$" name="^_gist_session$" /--> | ||||
| 	<!--securecookie host="^help\.github\.com$" name="^_help_session$" /--> | ||||
| 	<!-- | ||||
| 		Not secured by server: | ||||
| 					--> | ||||
| 	<!--securecookie host="^status\.github\.com$" name="^rack\.session$" /--> | ||||
| 
 | ||||
| 	<securecookie host="^(?:.*\.)?github\.com$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://((?:assets\d+|assets-cdn|bounty|cloud|f\.cloud|codeload|developer|eclipse|enterprise|gist|gist-assets|help|identicons|jobs|mac|mobile|nodeload|octodex|pages|raw|rg3|shop|status|support|training|try|wiki|windows|www)\.)?github\.com/" | ||||
| 		to="https://$1github.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://collector\.githubapp\.com/" | ||||
| 		to="https://collector.githubapp.com/" /> | ||||
| 
 | ||||
| 	<rule from="^https?://github\.io/" | ||||
| 		to="https://pages.github.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://([^/@:\.]+)\.githubusercontent\.com/" | ||||
| 		to="https://$1.githubusercontent.com/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										26
									
								
								searx/https_rules/Google-mismatches.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								searx/https_rules/Google-mismatches.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,26 @@ | ||||
| <!-- | ||||
| 
 | ||||
| 	Problematic domains: | ||||
| 
 | ||||
| 		- (www.)apture.com	(works, mismatched, CN: *.google.com) | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Google (mismatches)" default_off="mismatches"> | ||||
| 
 | ||||
| 	<!--	Akamai	--> | ||||
| 	<target host="js.admeld.com"/> | ||||
| 	<target host="apture.com" /> | ||||
| 	<target host="www.apture.com" /> | ||||
| 	<target host="googleartproject.com"/> | ||||
| 	<target host="www.googleartproject.com"/> | ||||
| 
 | ||||
| 	<rule from="^http://js\.admeld\.com/" | ||||
| 		to="https://js.admeld.com/"/> | ||||
| 
 | ||||
| 	<rule from="^https?://(?:www\.)?apture\.com/" | ||||
| 		to="https://apture.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?googleartproject\.com/" | ||||
| 		to="https://www.googleartproject.com/"/> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										14
									
								
								searx/https_rules/Google.org.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								searx/https_rules/Google.org.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | ||||
| <!-- | ||||
| 	For other Google coverage, see GoogleServices.xml. | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Google.org"> | ||||
| 
 | ||||
| 	<target host="google.org" /> | ||||
| 	<target host="www.google.org" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://(www\.)?google\.org/" | ||||
| 		to="https://$1google.org/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										143
									
								
								searx/https_rules/GoogleAPIs.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										143
									
								
								searx/https_rules/GoogleAPIs.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,143 @@ | ||||
| <!-- | ||||
| 	For other Google coverage, see GoogleServices.xml. | ||||
| 
 | ||||
| 
 | ||||
| 	Nonfunctional domains: | ||||
| 
 | ||||
| 		- hosted.gmodules.com * | ||||
| 		- img0.gmodules.com * | ||||
| 		- p.gmodules.com * | ||||
| 
 | ||||
| 	* 404; mismatched, CN: *.googleusercontent.com | ||||
| 
 | ||||
| 
 | ||||
| 	Problematic domains: | ||||
| 
 | ||||
| 		- gmodules.com			(503, CN: www.google.com) | ||||
| 		- www.gmodules.com		(503, CN: *.googleusercontent.com) | ||||
| 		- gstatic.com			(404, valid cert) | ||||
| 		- api.recaptcha.net		(works; mismatched, CN: google.com) | ||||
| 
 | ||||
| 
 | ||||
| 	Partially covered domains: | ||||
| 
 | ||||
| 		- (www.)gmodules.com		(→ www.google.com) | ||||
| 		- (www.)google.com | ||||
| 		- chart.apis.google.com		(→ chart.googleapis.com) | ||||
| 
 | ||||
| 
 | ||||
| 	Fully covered domains: | ||||
| 
 | ||||
| 		- api.google.com | ||||
| 
 | ||||
| 		- *.clients.google.com: | ||||
| 
 | ||||
| 			- linkhelp | ||||
| 
 | ||||
| 		- ssl.google-analytics.com | ||||
| 		- www.google-analytics.com | ||||
| 
 | ||||
| 		- googleapis.com subdomains: | ||||
| 
 | ||||
| 			- ajax | ||||
| 			- chart | ||||
| 			- *.commondatastorage | ||||
| 			- fonts | ||||
| 			- *.storage | ||||
| 			- www | ||||
| 
 | ||||
| 		- gstatic.com subdomains: | ||||
| 
 | ||||
| 			- (www.)	(^ → www) | ||||
| 			- csi | ||||
| 			- encrypted-tbn\d | ||||
| 			- g0 | ||||
| 			- *.metric | ||||
| 			- ssl | ||||
| 			- t\d | ||||
| 
 | ||||
| 		- api.recaptcha.net	(→ www.google.com) | ||||
| 		- api-secure.recaptcha.net | ||||
| 		- gdata.youtube.com | ||||
| 
 | ||||
| 
 | ||||
| 	ssl.google-analytics.com/ga.js sets __utm\w wildcard | ||||
| 	cookies on whichever domain it is loaded from. | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Google APIs"> | ||||
| 
 | ||||
| 	<target host="gmodules.com" /> | ||||
| 	<target host="www.gmodules.com" /> | ||||
| 	<target host="google.com" /> | ||||
| 	<target host="apis.google.com" /> | ||||
| 	<target host="*.apis.google.com" /> | ||||
| 	<target host="*.clients.google.com" /> | ||||
| 	<target host="www.google.com" /> | ||||
| 	<target host="*.google-analytics.com" /> | ||||
| 	<target host="*.googleapis.com" /> | ||||
| 	<target host="gstatic.com" /> | ||||
| 	<target host="*.gstatic.com" /> | ||||
| 	<!--	Captive portal detection redirects to this URL, and many captive | ||||
| 		portals break TLS, so exempt this redirect URL. | ||||
| 		See GitHub bug #368 | ||||
| 							--> | ||||
| 		<exclusion pattern="^http://www\.gstatic\.com/generate_204" /> | ||||
| 	<target host="*.recaptcha.net" /> | ||||
| 	<target host="gdata.youtube.com" /> | ||||
| 		<exclusion pattern="^http://gdata\.youtube\.com/crossdomain\.xml" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<securecookie host="^ssl\.google-analytics\.com$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?gmodules\.com/ig/images/" | ||||
| 		to="https://www.google.com/ig/images/" /> | ||||
| 
 | ||||
| 	<!--	jsapi was causing problems on some sites that embed google maps: | ||||
| 		https://trac.torproject.org/projects/tor/ticket/2335 | ||||
| 		Apparently now fixed; thanks, Google! | ||||
| 							--> | ||||
| 	<rule from="^http://(?:www\.)?google\.com/(afsonline/|chart|jsapi|recaptcha/|uds)" | ||||
| 		to="https://www.google.com/$1" /> | ||||
| 
 | ||||
| 	<rule from="^http://(api|[\w-]+\.client)s\.google\.com/" | ||||
| 		to="https://$1s.google.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://chart\.apis\.google\.com/chart" | ||||
| 		to="https://chart.googleapis.com/chart" /> | ||||
| 
 | ||||
| 	<rule from="^http://(ssl|www)\.google-analytics\.com/" | ||||
| 		to="https://$1.google-analytics.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(ajax|chart|fonts|www)\.googleapis\.com/" | ||||
| 		to="https://$1.googleapis.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://([^@:\./]+\.)?(commondata)?storage\.googleapis\.com/" | ||||
| 		to="https://$1$2storage.googleapis.com/" /> | ||||
| 
 | ||||
| 	<!--	There is an interesting question about whether we should | ||||
| 		append &strip=1 to all cache URLs.  This causes them to load | ||||
| 		without images and styles, which is more secure but can look | ||||
| 		worse. | ||||
| 			Without &strip=1, the images and styles from the cached | ||||
| 		pages still load from the original, typically unencrypted, page. | ||||
| 			With &strip=1, the cached page will be text-only and | ||||
| 		will come exclusively from Google's HTTPS server. | ||||
| 									--> | ||||
| 	<rule from="^http://(?:www\.)?gstatic\.com/" | ||||
| 		to="https://www.gstatic.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(csi|encrypted-tbn\d|g0|[\w-]+\.metric|ssl|t\d)\.gstatic\.com/" | ||||
| 		to="https://$1.gstatic.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://api\.recaptcha\.net/" | ||||
| 		to="https://www.google.com/recaptcha/api/" /> | ||||
| 
 | ||||
| 	<rule from="^http://api-secure\.recaptcha\.net/" | ||||
| 		to="https://api-secure.recaptcha.net/" /> | ||||
| 
 | ||||
| 	<rule from="^http://gdata\.youtube\.com/" | ||||
| 		to="https://gdata.youtube.com/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										6
									
								
								searx/https_rules/GoogleCanada.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								searx/https_rules/GoogleCanada.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,6 @@ | ||||
| <ruleset name="GoogleCanada"> | ||||
| 	<target host="google.ca" /> | ||||
| 	<target host="*.google.ca" /> | ||||
| 	<rule from="^http://([^/:@\.]+)\.google\.ca/finance" to="https://$1.google.ca/finance"/> | ||||
| </ruleset> | ||||
| 
 | ||||
							
								
								
									
										65
									
								
								searx/https_rules/GoogleImages.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										65
									
								
								searx/https_rules/GoogleImages.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,65 @@ | ||||
| <!-- | ||||
| 	For other Google coverage, see GoogleServices.xml. | ||||
| 
 | ||||
| 
 | ||||
| 	Problematic domains: | ||||
| 
 | ||||
| 		- www.google.bo * | ||||
| 		- www.google.co * | ||||
| 		- www.google.ec * | ||||
| 		- www.google.in * | ||||
| 		- www.google.kr * | ||||
| 		- www.google.com.kz ** | ||||
| 		- www.google.com.lk * | ||||
| 		- www.google.mx ** | ||||
| 		- www.google.sg * | ||||
| 		- www.google.sl * | ||||
| 		- www.google.ug * | ||||
| 		- www.google.vn * | ||||
| 
 | ||||
| 	* 404; mismatched, CN: google.com | ||||
| 	** Works; mismatched, CN: google.com | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Google Images"> | ||||
| 
 | ||||
| 	<target host="google.*" /> | ||||
| 	<target host="www.google.*" /> | ||||
| 	<target host="google.co.*" /> | ||||
| 	<target host="www.google.co.*" /> | ||||
| 	<target host="google.com" /> | ||||
| 	<target host="images.google.com" /> | ||||
| 	<target host="google.com.*" /> | ||||
| 	<target host="www.google.com.*" /> | ||||
| 		<!-- | ||||
| 			Only handle image-related paths in this ruleset: | ||||
| 										--> | ||||
| 		<exclusion pattern="^http://(?:www\.)?google(?:\.com?)?\.\w{2,3}/(?!(?:advanced_image_search|imghp|.*tb(?:m=isch|s=sbi)))" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.com/" | ||||
| 		to="https://www.google.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://images\.google\.com/" | ||||
| 		to="https://images.google.com/" /> | ||||
| 
 | ||||
| 	<!--	First handle problematic domains: | ||||
| 							--> | ||||
| 	<rule from="^http://(?:www\.)?google\.co/" | ||||
| 		to="https://www.google.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.(?:co\.)?(in|kr|ug)/" | ||||
| 		to="https://www.google.co.$1/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.(?:com\.)?(kz|lk)/" | ||||
| 		to="https://www.google.$1/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.(?:com\.)?(bo|ec|mx|sg|sl|vn)/" | ||||
| 		to="https://www.google.com.$1/" /> | ||||
| 
 | ||||
| 	<!--	And then the rest: | ||||
| 					--> | ||||
| 	<rule from="^http://(?:www\.)?google\.(com?\.)?(ae|ar|at|au|bg|bh|br|ca|ch|cl|co|cr|cu|de|eg|es|fi|fr|gh|gt|hr|id|ie|il|it|jo|jp|jm|ke|kw|lb|ly|my|na|ng|nl|no|nz|om|pa|pe|pk|pl|pt|py|qa|ro|ru|rw|sa|se|sv|th|tr|uk|uy|ve|za|zw)/" | ||||
| 		to="https://www.google.$1$2/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										78
									
								
								searx/https_rules/GoogleMainSearch.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										78
									
								
								searx/https_rules/GoogleMainSearch.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,78 @@ | ||||
| <ruleset name="Search www.google.com"> | ||||
| 
 | ||||
| <!--  | ||||
| Enabling this ruleset should cause searches to go to | ||||
| https://www.google.com rather than https://encrypted.google.com.  Note that | ||||
| the filename is important; it must be before GoogleSearch.xml in a bash | ||||
| expansion of src/chrome/content/rules/*.xml in order to take precedence.  | ||||
| --> | ||||
| 
 | ||||
|   <target host="*.google.com" /> | ||||
|   <target host="google.com" /> | ||||
|   <target host="www.google.com.*" /> | ||||
|   <target host="google.com.*" /> | ||||
|   <target host="www.google.co.*" /> | ||||
|   <target host="google.co.*" /> | ||||
|   <target host="www.google.*" /> | ||||
|   <target host="google.*" /> | ||||
|   <!-- beyond clients1 these do not currently exist in the ccTLDs, | ||||
|        but just in case... --> | ||||
|   <target host="clients1.google.com.*" /> | ||||
|   <target host="clients2.google.com.*" /> | ||||
|   <target host="clients3.google.com.*" /> | ||||
|   <target host="clients4.google.com.*" /> | ||||
|   <target host="clients5.google.com.*" /> | ||||
|   <target host="clients6.google.com.*" /> | ||||
|   <target host="clients1.google.co.*" /> | ||||
|   <target host="clients2.google.co.*" /> | ||||
|   <target host="clients3.google.co.*" /> | ||||
|   <target host="clients4.google.co.*" /> | ||||
|   <target host="clients5.google.co.*" /> | ||||
|   <target host="clients6.google.co.*" /> | ||||
|   <target host="clients1.google.*" /> | ||||
|   <target host="clients2.google.*" /> | ||||
|   <target host="clients3.google.*" /> | ||||
|   <target host="clients4.google.*" /> | ||||
|   <target host="clients5.google.*" /> | ||||
|   <target host="clients6.google.*" /> | ||||
| 
 | ||||
|   <rule from="^http://www\.google\.com/$" | ||||
|           to="https://www.google.com/"/> | ||||
| 
 | ||||
|   <!-- The most basic case. --> | ||||
| 
 | ||||
|   <rule from="^http://(?:www\.)?google\.com/search" | ||||
|           to="https://www.google.com/search"/> | ||||
| 
 | ||||
|   <!-- A very annoying exception that we seem to need for the basic case --> | ||||
| 
 | ||||
|   <exclusion pattern="^http://(?:www\.)?google\.com/search.*tbs=shop" /> | ||||
|   <exclusion pattern="^http://clients[0-9]\.google\.com/.*client=products.*" /> | ||||
|   <exclusion pattern="^http://suggestqueries\.google\.com/.*client=.*" /> | ||||
| 
 | ||||
|   <!-- https://trac.torproject.org/projects/tor/ticket/9713 --> | ||||
| 
 | ||||
|   <exclusion pattern="^http://clients[0-9]\.google\.com/ocsp" /> | ||||
| 
 | ||||
|   <!-- This is necessary for image results links from web search results --> | ||||
| 
 | ||||
|   <exclusion pattern="^http://(?:www\.)?google\.com/search.*tbm=isch.*" /> | ||||
| 
 | ||||
|   <rule from="^http://(?:www\.)?google\.com/webhp" | ||||
|           to="https://www.google.com/webhp"/> | ||||
| 
 | ||||
|   <rule from="^http://(?:www\.)?google\.com/#" | ||||
|           to="https://www.google.com/#"/> | ||||
| 
 | ||||
|   <rule from="^http://(?:www\.)?google\.com/$" | ||||
|           to="https://www.google.com/"/> | ||||
| 
 | ||||
|    <!-- Completion urls look like this: | ||||
| 
 | ||||
| http://clients2.google.co.jp/complete/search?hl=ja&client=hp&expIds=17259,24660,24729,24745&q=m&cp=1 HTTP/1.1\r\n | ||||
| 
 | ||||
|    --> | ||||
|   <rule from="^http://clients[0-9]\.google\.com/complete/search" | ||||
|           to="https://clients1.google.com/complete/search"/> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										67
									
								
								searx/https_rules/GoogleMaps.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								searx/https_rules/GoogleMaps.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,67 @@ | ||||
| <!-- | ||||
| 	Problematic domains: | ||||
| 
 | ||||
| 		- khms * | ||||
| 		- khms[0-3] * | ||||
| 
 | ||||
| 	* $ 404s | ||||
| 
 | ||||
| 
 | ||||
| 	Fully covered domains: | ||||
| 
 | ||||
| 		- google.com subdomains: | ||||
| 
 | ||||
| 			- khms | ||||
| 			- khms[0-3] | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Google Maps"> | ||||
| 
 | ||||
| 	<target host="maps.google.*" /> | ||||
| 		<!-- | ||||
| 			https://trac.torproject.org/projects/tor/ticket/8627 | ||||
| 										--> | ||||
| 		<exclusion pattern="^http://maps\.google\.com/local_url" /> | ||||
| 		<exclusion pattern="^http://maps\.google\.gr/transitathens" /> | ||||
| 	<target host="maps.google.co.*" /> | ||||
| 	<target host="khms.google.com" /> | ||||
| 	<target host="khms0.google.com" /> | ||||
| 	<target host="khms1.google.com" /> | ||||
| 	<target host="khms2.google.com" /> | ||||
| 	<target host="khms3.google.com" /> | ||||
| 	<target host="maps-api-ssl.google.com" /> | ||||
| 	<target host="mw2.google.com" /> | ||||
| 	<target host="maps.google.com.*" /> | ||||
| 	<target host="maps.googleapis.com" /> | ||||
| 		<!-- | ||||
| 			https://mail1.eff.org/pipermail/https-everywhere-rules/2012-September/001317.html | ||||
| 														--> | ||||
| 		<!--exclusion pattern="^http://maps\.googleapis\.com/map(files/lib/map_1_20\.swf|sapi/publicapi\?file=flashapi)" /--> | ||||
| 		<exclusion pattern="^http://maps\.googleapis\.com/map(?:files/lib/map_\d+_\d+\.swf|sapi/publicapi\?file=flashapi)" /> | ||||
| 	<target host="maps.gstatic.com" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--securecookie host="^maps\.google\.(com?\.)?(au|ca|gh|ie|in|jm|ke|lk|my|n[agz]|pk|rw|sl|sg|ug|uk|za|zw)$" name=".+" /--> | ||||
| 	<securecookie host="^maps\.google\.[\w.]{2,6}$" name=".+" /> | ||||
| 	<securecookie host="^maps\.g(?:oogle|oogleapis|static)\.com$" name=".+" /> | ||||
| 	<securecookie host="^maps-api-ssl\.google\.com$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://maps\.google\.([^/]+)/" | ||||
| 		to="https://maps.google.$1/" /> | ||||
| 
 | ||||
| 	<!--	http://khms.../$ 404s: | ||||
| 					--> | ||||
| 	<rule from="^http://khms\d?\.google\.com/+\??$" | ||||
| 		to="https://www.google.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(khms\d?|maps-api-ssl|mw2)\.google\.com/" | ||||
| 		to="https://$1.google.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://maps\.g(oogleapis|static)\.com/" | ||||
| 		to="https://maps.g$1.com/" /> | ||||
| 
 | ||||
| 	<rule from="^https://maps\.googleapis\.com/map(?=files/lib/map_\d+_\d+\.swf|sapi/publicapi\?file=flashapi)" | ||||
| 		to="http://maps.googleapis.com/map" downgrade="1" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										6
									
								
								searx/https_rules/GoogleMelange.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								searx/https_rules/GoogleMelange.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,6 @@ | ||||
| <ruleset name="GoogleMelange"> | ||||
|   <target host="www.google-melange.com" /> | ||||
|   <target host="google-melange.com" /> | ||||
| 
 | ||||
|   <rule from="^http://(www\.)?google-melange\.com/" to="https://www.google-melange.com/" /> | ||||
| </ruleset> | ||||
							
								
								
									
										135
									
								
								searx/https_rules/GoogleSearch.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										135
									
								
								searx/https_rules/GoogleSearch.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,135 @@ | ||||
| <ruleset name="Google Search"> | ||||
| 
 | ||||
| 	<target host="google.com" /> | ||||
| 	<target host="*.google.com" /> | ||||
| 	<target host="google.com.*" /> | ||||
| 	<target host="www.google.com.*" /> | ||||
| 	<target host="google.co.*" /> | ||||
| 	<target host="www.google.co.*" /> | ||||
| 	<target host="google.*" /> | ||||
| 	<target host="www.google.*" /> | ||||
| 	<!-- | ||||
| 		Beyond clients1 these do not currently | ||||
| 		exist in the ccTLDs, but just in case... | ||||
| 							--> | ||||
| 	<target host="clients1.google.com.*" /> | ||||
| 	<target host="clients2.google.com.*" /> | ||||
| 	<target host="clients3.google.com.*" /> | ||||
| 	<target host="clients4.google.com.*" /> | ||||
| 	<target host="clients5.google.com.*" /> | ||||
| 	<target host="clients6.google.com.*" /> | ||||
| 	<target host="clients1.google.co.*" /> | ||||
| 	<target host="clients2.google.co.*" /> | ||||
| 	<target host="clients3.google.co.*" /> | ||||
| 	<target host="clients4.google.co.*" /> | ||||
| 	<target host="clients5.google.co.*" /> | ||||
| 	<target host="clients6.google.co.*" /> | ||||
| 	<target host="clients1.google.*" /> | ||||
| 	<target host="clients2.google.*" /> | ||||
| 	<target host="clients3.google.*" /> | ||||
| 	<target host="clients4.google.*" /> | ||||
| 	<target host="clients5.google.*" /> | ||||
| 	<target host="clients6.google.*" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Some Google pages can generate naive links back to the | ||||
| 		unencrypted version of encrypted.google.com, which is | ||||
| 		a 301 but theoretically vulnerable to SSL stripping. | ||||
| 									--> | ||||
| 	<rule from="^http://encrypted\.google\.com/" | ||||
| 		to="https://encrypted.google.com/" /> | ||||
| 
 | ||||
| 	<!--	The most basic case. | ||||
| 					--> | ||||
| 	<rule from="^http://(?:www\.)?google\.com/search" | ||||
| 		to="https://encrypted.google.com/search" /> | ||||
| 
 | ||||
| 	<!--	A very annoying exception that we | ||||
| 		seem to need for the basic case | ||||
| 						--> | ||||
| 	<exclusion pattern="^http://(?:www\.)?google\.com/search.*tbs=shop" /> | ||||
| 	<exclusion pattern="^http://clients\d\.google\.com/.*client=products.*" /> | ||||
| 	<exclusion pattern="^http://suggestqueries\.google\.com/.*client=.*" /> | ||||
| 
 | ||||
|   <!-- https://trac.torproject.org/projects/tor/ticket/9713  | ||||
|          --> | ||||
| 
 | ||||
|   <exclusion pattern="^http://clients[0-9]\.google\.com/ocsp" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	This is necessary for image results | ||||
| 		links from web search results | ||||
| 						--> | ||||
| 	<exclusion pattern="^http://(?:www\.)?google\.com/search.*tbm=isch.*" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.com/about" | ||||
| 		to="https://www.google.com/about" /> | ||||
| 
 | ||||
| 	<!--	There are two distinct cases for these firefox searches	--> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google(?:\.com?)?\.[a-z]{2}/firefox/?$" | ||||
| 		to="https://encrypted.google.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google(?:\.com?)?\.[a-z]{2}/firefox" | ||||
| 		to="https://encrypted.google.com/webhp" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.com/webhp" | ||||
| 		to="https://encrypted.google.com/webhp" /> | ||||
| 
 | ||||
| 	<rule from="^http://codesearch\.google\.com/" | ||||
| 		to="https://codesearch.google.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.com/codesearch" | ||||
| 		to="https://www.google.com/codesearch" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.com/#" | ||||
| 		to="https://encrypted.google.com/#" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.com/$" | ||||
| 		to="https://encrypted.google.com/" /> | ||||
| 
 | ||||
| 	<!--	Google supports IPv6 search, including | ||||
| 		HTTPS with a valid certificate!	--> | ||||
| 	<rule from="^http://ipv6\.google\.com/" | ||||
| 		to="https://ipv6.google.com/" /> | ||||
| 
 | ||||
| 	<!--	most google international sites look like | ||||
| 		"google.fr", some look like "google.co.jp", | ||||
| 		and some crazy ones like "google.com.au"	--> | ||||
| 
 | ||||
| 	<rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/(search\?|#)" | ||||
| 		to="https://$1google$2.$3/$4" /> | ||||
| 
 | ||||
| 	<!--	Language preference setting	--> | ||||
| 	<rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/setprefs" | ||||
| 	to="https://$1google$2.$3/setprefs" /> | ||||
| 
 | ||||
| 	<!--	Completion urls look like this: | ||||
| 
 | ||||
| http://clients2.google.co.jp/complete/search?hl=ja&client=hp&expIds=17259,24660,24729,24745&q=m&cp=1 HTTP/1.1\r\n | ||||
| 
 | ||||
| 		--> | ||||
| 	<rule from="^http://clients\d\.google\.com/complete/search" | ||||
| 		to="https://clients1.google.com/complete/search" /> | ||||
| 
 | ||||
| 	<rule from="^http://clients\d\.google(\.com?\.[a-z]{2})/complete/search" | ||||
| 		to="https://clients1.google.$1/complete/search" /> | ||||
| 
 | ||||
| 	<rule from="^http://clients\d\.google\.([a-z]{2})/complete/search" | ||||
| 		to="https://clients1.google.$1/complete/search" /> | ||||
| 
 | ||||
| 	<rule from="^http://suggestqueries\.google\.com/complete/search" | ||||
| 		to="https://clients1.google.com/complete/search" /> | ||||
| 
 | ||||
| 	<rule from="^http://(www\.)?google\.(com?\.)?([a-z]{2})/(?:webhp)?$" | ||||
| 		to="https://$1google.$2$3/" /> | ||||
| 
 | ||||
| 	<!--	If there are URL parameters, keep them.	--> | ||||
| 	<rule from="^http://(www\.)?google\.(com?\.)?([a-z]{2})/(?:webhp)?\?" | ||||
| 		to="https://$1google.$2$3/webhp?" /> | ||||
| 
 | ||||
| 	<!-- teapot --> | ||||
| 	<rule from="^http://(www\.)?google(\.com?)?\.([a-z]{2})/teapot" | ||||
| 		to="https://$1google$2.$3/teapot" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										345
									
								
								searx/https_rules/GoogleServices.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										345
									
								
								searx/https_rules/GoogleServices.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,345 @@ | ||||
| <!-- | ||||
| 	Other Google rulesets: | ||||
| 
 | ||||
| 		- 2mdn.net.xml | ||||
| 		- Admeld.xml | ||||
| 		- ChannelIntelligence.com.xml | ||||
| 		- Doubleclick.net.xml | ||||
| 		- FeedBurner.xml | ||||
| 		- Google.org.xml | ||||
| 		- GoogleAPIs.xml | ||||
| 		- Google_App_Engine.xml | ||||
| 		- GoogleImages.xml | ||||
| 		- GoogleShopping.xml | ||||
| 		- Ingress.xml | ||||
| 		- Meebo.xml | ||||
| 		- Orkut.xml | ||||
| 		- Postini.xml | ||||
| 		- WebM_Project.org.xml | ||||
| 
 | ||||
| 
 | ||||
| 	Nonfunctional domains: | ||||
| 
 | ||||
| 		- feedproxy.google.com			(404, valid cert) | ||||
| 		- partnerpage.google.com * | ||||
| 		- safebrowsing.clients.google.com	(404, mismatched) | ||||
| 		- (www.)googlesyndicatedsearch.com	(404; mismatched, CN: google.com) | ||||
| 		- buttons.googlesyndication.com * | ||||
| 
 | ||||
| 	* 404, valid cert | ||||
| 
 | ||||
| 
 | ||||
| 	Nonfunctional google.com paths: | ||||
| 
 | ||||
| 		- analytics	(redirects to http) | ||||
| 		- imgres | ||||
| 		- gadgets * | ||||
| 		- hangouts	(404) | ||||
| 		- u/		(404) | ||||
| 
 | ||||
| 	* Redirects to http | ||||
| 
 | ||||
| 
 | ||||
| 	Problematic domains: | ||||
| 
 | ||||
| 		- www.goo.gl		(404; mismatched, CN: *.google.com) | ||||
| 
 | ||||
| 		- google.com subdomains: | ||||
| 
 | ||||
| 			- books		(googlebooks/, images/, & intl/ 404, but works when rewritten to www) | ||||
| 			- cbks0 **** | ||||
| 			- earth * | ||||
| 			- gg		($ 404s) | ||||
| 			- knoll * | ||||
| 			- scholar ** | ||||
| 			- trends * | ||||
| 
 | ||||
| 		- news.google.cctld ** | ||||
| 		- scholar.google.cctld ** | ||||
| 		- *-opensocial.googleusercontent.com *** | ||||
| 
 | ||||
| 	**** $ 404s | ||||
| 	* 404, valid cert | ||||
| 	** Redirects to http, valid cert | ||||
| 	*** Breaks followers widget - https://trac.torproject.org/projects/tor/ticket/7294 | ||||
| 
 | ||||
| 
 | ||||
| 	Partially covered domains: | ||||
| 
 | ||||
| 		- google.cctld subdomains: | ||||
| 
 | ||||
| 			- scholar	(→ www) | ||||
| 
 | ||||
| 		- google.com subdomains: | ||||
| 
 | ||||
| 			- (www.) | ||||
| 			- cbks0		($ 404s) | ||||
| 			- gg		($ 404s) | ||||
| 			- news		(→ www) | ||||
| 			- scholar	(→ www) | ||||
| 
 | ||||
| 		- *.googleusercontent.com	(*-opensocial excluded) | ||||
| 
 | ||||
| 
 | ||||
| 	Fully covered domains: | ||||
| 
 | ||||
| 		- lh[3-6].ggpht.com | ||||
| 		- (www.)goo.gl		(www → ^) | ||||
| 
 | ||||
| 		- google.com subdomains: | ||||
| 
 | ||||
| 			- accounts | ||||
| 			- adwords | ||||
| 			- apis | ||||
| 			- appengine | ||||
| 			- books		(→ encrypted) | ||||
| 			- calendar | ||||
| 			- checkout | ||||
| 			- chrome | ||||
| 			- clients[12] | ||||
| 			- code | ||||
| 			- *.corp | ||||
| 			- developers | ||||
| 			- dl | ||||
| 			- docs | ||||
| 			- docs\d | ||||
| 			- \d.docs | ||||
| 			- drive | ||||
| 			- earth		(→ www) | ||||
| 			- encrypted | ||||
| 			- encrypted-tbn[123] | ||||
| 			- feedburner | ||||
| 			- fiber | ||||
| 			- finance | ||||
| 			- glass | ||||
| 			- groups | ||||
| 			- health | ||||
| 			- helpouts | ||||
| 			- history | ||||
| 			- hostedtalkgadget | ||||
| 			- id | ||||
| 			- investor | ||||
| 			- knol | ||||
| 			- knoll		(→ knol) | ||||
| 			- lh\d | ||||
| 			- mail | ||||
| 			- chatenabled.mail | ||||
| 			- pack | ||||
| 			- picasaweb | ||||
| 			- pki | ||||
| 			- play | ||||
| 			- plus | ||||
| 			- plusone | ||||
| 			- productforums | ||||
| 			- profiles | ||||
| 			- safebrowsing-cache | ||||
| 			- cert-test.sandbox | ||||
| 			- plus.sandbox | ||||
| 			- sb-ssl | ||||
| 			- script | ||||
| 			- security | ||||
| 			- services | ||||
| 			- servicessites | ||||
| 			- sites | ||||
| 			- spreadsheets | ||||
| 			- spreadsheets\d | ||||
| 			- support | ||||
| 			- talk | ||||
| 			- talkgadget | ||||
| 			- tbn2			(→ encrypted-tbn2) | ||||
| 			- tools | ||||
| 			- trends		(→ www) | ||||
| 
 | ||||
| 		- partner.googleadservices.com | ||||
| 		- (www.)googlecode.com | ||||
| 		- *.googlecode.com	(per-project subdomains) | ||||
| 		- googlesource.com | ||||
| 		- *.googlesource.com | ||||
| 		- pagead2.googlesyndication.com | ||||
| 		- tpc.googlesyndication.com | ||||
| 		- mail-attachment.googleusercontent.com | ||||
| 		- webcache.googleusercontent.com | ||||
| 
 | ||||
| 
 | ||||
| 	XXX: Needs more testing | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Google Services"> | ||||
| 
 | ||||
| 	<target host="*.ggpht.com" /> | ||||
| 	<target host="gmail.com" /> | ||||
| 	<target host="www.gmail.com" /> | ||||
| 	<target host="goo.gl" /> | ||||
| 	<target host="www.goo.gl" /> | ||||
| 	<target host="google.*" /> | ||||
| 	<target host="accounts.google.*" /> | ||||
| 	<target host="adwords.google.*" /> | ||||
| 	<target host="finance.google.*" /> | ||||
| 	<target host="groups.google.*" /> | ||||
| 	<target host="it.google.*" /> | ||||
| 	<target host="news.google.*" /> | ||||
| 		<exclusion pattern="^http://(?:news\.)?google\.com/(?:archivesearch|newspapers)" /> | ||||
| 	<target host="picasaweb.google.*" /> | ||||
| 	<target host="scholar.google.*" /> | ||||
| 	<target host="www.google.*" /> | ||||
| 	<target host="*.google.ca" /> | ||||
| 	<target host="google.co.*" /> | ||||
| 	<target host="accounts.google.co.*" /> | ||||
| 	<target host="adwords.google.co.*" /> | ||||
| 	<target host="finance.google.co.*" /> | ||||
| 	<target host="groups.google.co.*" /> | ||||
| 	<target host="id.google.co.*" /> | ||||
| 	<target host="news.google.co.*" /> | ||||
| 	<target host="picasaweb.google.co.*" /> | ||||
| 	<target host="scholar.google.co.*" /> | ||||
| 	<target host="www.google.co.*" /> | ||||
| 	<target host="google.com" /> | ||||
| 	<target host="*.google.com" /> | ||||
| 		<exclusion pattern="^http://(?:www\.)?google\.com/analytics/*(?:/[^/]+)?(?:\?.*)?$" /> | ||||
| 		<!--exclusion pattern="^http://books\.google\.com/(?!books/(\w+\.js|css/|javascript/)|favicon\.ico|googlebooks/|images/|intl/)" /--> | ||||
| 		<exclusion pattern="^http://cbks0\.google\.com/(?:$|\?)" /> | ||||
| 		<exclusion pattern="^http://gg\.google\.com/(?!csi(?:$|\?))" /> | ||||
| 	<target host="google.com.*" /> | ||||
| 	<target host="accounts.google.com.*" /> | ||||
| 	<target host="adwords.google.com.*" /> | ||||
| 	<target host="groups.google.com.*" /> | ||||
| 	<target host="id.google.com.*" /> | ||||
| 	<target host="news.google.com.*" /> | ||||
| 	<target host="picasaweb.google.com.*" /> | ||||
| 	<target host="scholar.google.com.*" /> | ||||
| 	<target host="www.google.com.*" /> | ||||
| 	<target host="partner.googleadservices.com" /> | ||||
| 	<target host="googlecode.com" /> | ||||
| 	<target host="*.googlecode.com" /> | ||||
| 	<target host="googlemail.com" /> | ||||
| 	<target host="www.googlemail.com" /> | ||||
| 	<target host="googlesource.com" /> | ||||
| 	<target host="*.googlesource.com" /> | ||||
| 	<target host="*.googlesyndication.com" /> | ||||
| 	<target host="www.googletagservices.com" /> | ||||
| 	<target host="googleusercontent.com" /> | ||||
| 	<target host="*.googleusercontent.com" /> | ||||
| 		<!-- | ||||
| 			Necessary for the Followers widget: | ||||
| 
 | ||||
| 				 https://trac.torproject.org/projects/tor/ticket/7294 | ||||
| 											--> | ||||
| 		<exclusion pattern="http://[^@:\./]+-opensocial\.googleusercontent\.com" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Can we secure any of these wildcard cookies safely? | ||||
| 									--> | ||||
| 	<!--securecookie host="^\.google\.com$" name="^(hl|I4SUserLocale|NID|PREF|S)$" /--> | ||||
| 	<!--securecookie host="^\.google\.[\w.]{2,6}$" name="^(hl|I4SUserLocale|NID|PREF|S|S_awfe)$" /--> | ||||
| 	<securecookie host="^(?:accounts|adwords|\.code|login\.corp|developers|docs|\d\.docs|fiber|mail|picasaweb|plus|\.?productforums|support)\.google\.[\w.]{2,6}$" name=".+" /> | ||||
| 	<securecookie host="^www\.google\.com$" name="^GoogleAccountsLocale_session$" /> | ||||
| 	<securecookie host="^mail-attachment\.googleusercontent\.com$" name=".+" /> | ||||
| 	<securecookie host="^gmail\.com$" name=".+" /> | ||||
| 	<securecookie host="^www\.gmail\.com$" name=".+" /> | ||||
| 	<securecookie host="^googlemail\.com$" name=".+" /> | ||||
| 	<securecookie host="^www\.googlemail\.com$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--    - lh 3-6 exist | ||||
| 		- All appear identical | ||||
| 		- Identical to lh\d.googleusercontent.com | ||||
| 					--> | ||||
| 	<rule from="^http://lh(\d)\.ggpht\.com/" | ||||
| 		to="https://lh$1.ggpht.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://lh(\d)\.google\.ca/" | ||||
| 		to="https://lh$1.google.ca/" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://(www\.)?g(oogle)?mail\.com/" | ||||
| 		to="https://$1g$2mail.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?goo\.gl/" | ||||
| 		to="https://goo.gl/" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Redirects to http when rewritten to www: | ||||
| 							--> | ||||
| 	<rule from="^http://books\.google\.com/" | ||||
| 		to="https://encrypted.google.com/" /> | ||||
| 
 | ||||
| 	<!--	tisp$ 404s: | ||||
| 				--> | ||||
| 	<rule from="^http://(?:www\.)?google\.((?:com?\.)?\w{2,3})/tisp(?=$|\?)" | ||||
| 		to="https://www.google.$1/tisp/" /> | ||||
| 
 | ||||
| 	<!--	Paths that work on all in google.* | ||||
| 							--> | ||||
| 	<rule from="^http://(?:www\.)?google\.((?:com?\.)?\w{2,3})/(accounts|adplanner|ads|adsense|adwords|analytics|bookmarks|chrome|contacts|coop|cse|css|culturalinstitute|doodles|earth|favicon\.ico|finance|get|goodtoknow|googleblogs|grants|green|hostednews|images|intl|js|landing|logos|mapmaker|newproducts|news|nexus|patents|policies|prdhp|profiles|products|reader|s2|settings|shopping|support|tisp|tools|transparencyreport|trends|urchin|webmasters)(?=$|[?/])" | ||||
| 		 to="https://www.google.$1/$2" /> | ||||
| 
 | ||||
| 	<!--	Paths that 404 on .ccltd, but work on .com: | ||||
| 								--> | ||||
| 	<rule from="^http://(?:www\.)?google\.(?:com?\.)?\w{2,3}/(?=calendar|dictionary|doubleclick|help|ideas|pacman|postini|powermeter|url)" | ||||
| 		 to="https://www.google.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.(?:com?\.)?\w{2,3}/custom" | ||||
| 		 to="https://www.google.com/cse" /> | ||||
| 
 | ||||
| 	<!--	Paths that only exist/work on .com | ||||
| 							--> | ||||
| 	<rule from="^http://(?:www\.)?google\.com/(\+|appsstatus|books|buzz|extern_js|glass|googlebooks|ig|insights|moderator|phone|safebrowsing|videotargetting|webfonts)(?=$|[?/])" | ||||
| 		to="https://www.google.com/$1" /> | ||||
| 
 | ||||
| 	<!--	Subdomains that work on all in google.* | ||||
| 							--> | ||||
| 	<rule from="^http://(accounts|adwords|finance|groups|id|picasaweb|)\.google\.((?:com?\.)?\w{2,3})/" | ||||
| 		to="https://$1.google.$2/" /> | ||||
| 
 | ||||
| 	<!--	Subdomains that only exist/work on .com | ||||
| 							--> | ||||
| 	<rule from="^http://(apis|appengine|books|calendar|cbks0|chat|checkout|chrome|clients[12]|code|[\w-]+\.corp|developers|dl|docs\d?|\d\.docs|drive|encrypted|encrypted-tbn[123]|feedburner|fiber|fonts|gg|glass||health|helpouts|history|(?:hosted)?talkgadget|investor|lh\d|(?:chatenabled\.)?mail|pack|pki|play|plus(?:\.sandbox)?|plusone|productforums|profiles|safebrowsing-cache|cert-test\.sandbox|sb-ssl|script|security|services|servicessites|sites|spreadsheets\d?|support|talk|tools)\.google\.com/" | ||||
| 		to="https://$1.google.com/" /> | ||||
| 
 | ||||
| 	<exclusion pattern="^http://clients[0-9]\.google\.com/ocsp"/> | ||||
| 
 | ||||
| 	<rule from="^http://earth\.google\.com/" | ||||
| 		to="https://www.google.com/earth/" /> | ||||
| 
 | ||||
| 	<rule from="^http://scholar\.google\.((?:com?\.)?\w{2,3})/intl/" | ||||
| 		to="https://www.google.$1/intl/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:encrypted-)?tbn2\.google\.com/" | ||||
| 		to="https://encrypted-tbn2.google.com/" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://knoll?\.google\.com/" | ||||
| 		to="https://knol.google.com/" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://news\.google\.(?:com?\.)?\w{2,3}/(?:$|news|newshp)" | ||||
| 		to="https://www.google.com/news" /> | ||||
| 
 | ||||
| 	<rule from="^http://trends\.google\.com/" | ||||
| 		 to="https://www.google.com/trends" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://([^/:@\.]+\.)?googlecode\.com/" | ||||
| 		 to="https://$1googlecode.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://([^\./]\.)?googlesource\.com/" | ||||
| 		to="https://$1googlesource.com/" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://partner\.googleadservices\.com/" | ||||
| 		 to="https://partner.googleadservices.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(pagead2|tpc)\.googlesyndication\.com/" | ||||
| 		 to="https://$1.googlesyndication.com/" /> | ||||
| 
 | ||||
| 	<!--	!www doesn't exist. | ||||
| 					--> | ||||
| 	<rule from="^http://www\.googletagservices\.com/tag/js/" | ||||
| 		to="https://www.googletagservices.com/tag/js/" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://([^@:\./]+)\.googleusercontent\.com/" | ||||
| 		to="https://$1.googleusercontent.com/" /> | ||||
| 	 | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										28
									
								
								searx/https_rules/GoogleShopping.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								searx/https_rules/GoogleShopping.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,28 @@ | ||||
| <!-- | ||||
| 	For other Google coverage, see GoogleServices.xml. | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Google Shopping"> | ||||
| 
 | ||||
| 	<target host="google.*" /> | ||||
| 	<target host="www.google.*" /> | ||||
| 	<target host="google.co.*" /> | ||||
| 	<target host="www.google.co.*" /> | ||||
| 	<target host="*.google.com" /> | ||||
| 	<target host="google.com.*" /> | ||||
| 	<target host="www.google.com.*" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://encrypted\.google\.com/(prdhp|shopping)"  | ||||
| 		to="https://www.google.com/$1" /> | ||||
| 
 | ||||
| 	<rule from="^http://shopping\.google\.com/" | ||||
| 		to="https://shopping.google.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:encrypted|www)\.google\.com/(.*tbm=shop)" | ||||
| 		to="https://www.google.com/$1" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?google\.((?:com?\.)?(?:ae|ar|at|au|bg|bh|bo|br|ca|ch|cl|cr|co|cu|de|ec|eg|es|fi|fr|gh|gt|hr|id|ie|il|in|it|jm|jo|jp|ke|kr|kw|kz|lb|lk|ly|mx|my|na|ng|nl|no|nz|om|pa|pe|pk|pl|pt|py|qa|ro|ru|rw|sa|sg|sl|se|sv|th|tr|ug|uk|uy|ve|vn|za|zw))/(?=prdhp|shopping)" | ||||
| 		to="https://www.google.com/$1" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										7
									
								
								searx/https_rules/GoogleSorry.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								searx/https_rules/GoogleSorry.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,7 @@ | ||||
| <ruleset name="GoogleSorry"> | ||||
|   <target host="sorry.google.com" /> | ||||
|   <target host="www.google.com" /> | ||||
|   <target host="google.com" /> | ||||
| 
 | ||||
|   <rule from="^http://((sorry|www)\.)?google\.com/sorry/" to="https://sorry.google.com/sorry/" /> | ||||
| </ruleset> | ||||
							
								
								
									
										8
									
								
								searx/https_rules/GoogleTranslate.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								searx/https_rules/GoogleTranslate.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,8 @@ | ||||
| <ruleset name="Google Translate (broken)" default_off="redirect loops"> | ||||
|   <target host="translate.googleapis.com" /> | ||||
|   <target host="translate.google.com" /> | ||||
| 
 | ||||
|   <rule from="^http://translate\.googleapis\.com/" to="https://translate.googleapis.com/"/> | ||||
|   <rule from="^http://translate\.google\.com/" | ||||
|       to="https://translate.google.com/" /> | ||||
| </ruleset> | ||||
							
								
								
									
										83
									
								
								searx/https_rules/GoogleVideos.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								searx/https_rules/GoogleVideos.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,83 @@ | ||||
| <ruleset name="Google Videos"> | ||||
|   <target host="*.google.com" /> | ||||
|   <target host="google.com" /> | ||||
|   <target host="www.google.com.*" /> | ||||
|   <target host="google.com.*" /> | ||||
|   <target host="www.google.co.*" /> | ||||
|   <target host="google.co.*" /> | ||||
|   <target host="www.google.*" /> | ||||
|   <target host="google.*" /> | ||||
| 
 | ||||
|   <rule from="^http://encrypted\.google\.com/videohp"  | ||||
|           to="https://encrypted.google.com/videohp" /> | ||||
| 
 | ||||
|   <!-- https://videos.google.com is currently broken; work around that... --> | ||||
|   <rule from="^https?://videos?\.google\.com/$" | ||||
|           to="https://encrypted.google.com/videohp" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com/videohp" | ||||
| 	  to="https://encrypted.google.com/videohp" /> | ||||
|   <rule from="^http://(?:images|www|encrypted)\.google\.com/(.*tbm=isch)" | ||||
|           to="https://encrypted.google.com/$1" /> | ||||
| 
 | ||||
|   <rule | ||||
|    from="^http://(?:www\.)?google\.(?:com?\.)?(?:au|ca|gh|ie|in|jm|ke|lk|my|na|ng|nz|pk|rw|sl|sg|ug|uk|za|zw)/videohp" | ||||
|      to="https://encrypted.google.com/videohp" /> | ||||
|   <rule | ||||
|    from="^http://(?:www\.)?google\.(?:com?\.)?(?:ar|bo|cl|co|cu|cr|ec|es|gt|mx|pa|pe|py|sv|uy|ve)/videohp$" | ||||
|     to="https://encrypted.google.com/videohp?hl=es" /> | ||||
|   <rule | ||||
|    from="^http://(?:www\.)?google\.(?:com\.)?(?:ae|bh|eg|jo|kw|lb|ly|om|qa|sa)/videohp$" | ||||
|      to="https://encrypted.google.com/videohp?hl=ar" /> | ||||
|   <rule from="^http://(?:www\.)?google\.(?:at|ch|de)/videohp$" | ||||
|           to="https://encrypted.google.com/videohp?hl=de" /> | ||||
|   <rule from="^http://(?:www\.)?google\.(fr|nl|it|pl|ru|bg|pt|ro|hr|fi|no)/videohp$" | ||||
|           to="https://encrypted.google.com/videohp?hl=$1" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com?\.(id|th|tr)/videohp$" | ||||
|           to="https://encrypted.google.com/videohp?hl=$1" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.il/videohp$" | ||||
|           to="https://encrypted.google.com/videohp?hl=he" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.kr/videohp$" | ||||
|           to="https://encrypted.google.com/videohp?hl=ko" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.kz/videohp$" | ||||
|           to="https://encrypted.google.com/videohp?hl=kk" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.jp/videohp$" | ||||
|           to="https://encrypted.google.com/videohp?hl=ja" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.vn/videohp$" | ||||
|           to="https://encrypted.google.com/videohp?hl=vi" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.br/videohp$" | ||||
|           to="https://encrypted.google.com/videohp?hl=pt-BR" /> | ||||
|   <rule from="^http://(?:www\.)?google\.se/videohp$" | ||||
|           to="https://encrypted.google.com/videohp?hl=sv" /> | ||||
| 
 | ||||
| <!-- If there are URL parameters, keep them. --> | ||||
|   <rule | ||||
|    from="^http://(?:www\.)?google\.(?:com?\.)?(?:ar|bo|cl|co|cu|cr|ec|es|gt|mx|pa|pe|py|sv|uy|ve)/videohp\?" | ||||
|     to="https://encrypted.google.com/videohp?hl=es&" /> | ||||
|   <rule | ||||
|    from="^http://(?:www\.)?google\.(?:com\.)?(?:ae|bh|eg|jo|kw|lb|ly|om|qa|sa)/videohp\?" | ||||
|      to="https://encrypted.google.com/videohp?hl=ar&" /> | ||||
|   <rule from="^http://(?:www\.)?google\.(?:at|ch|de)/videohp\?" | ||||
|           to="https://encrypted.google.com/videohp?hl=de&" /> | ||||
|   <rule from="^http://(?:www\.)?google\.(fr|nl|it|pl|ru|bg|pt|ro|hr|fi|no)/videohp\?" | ||||
|           to="https://encrypted.google.com/videohp?hl=$1&" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com?\.(id|th|tr)/videohp\?" | ||||
|           to="https://encrypted.google.com/videohp?hl=$1&" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.il/videohp\?" | ||||
|           to="https://encrypted.google.com/videohp?hl=he&" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.kr/videohp\?" | ||||
|           to="https://encrypted.google.com/videohp?hl=ko&" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.kz/videohp\?" | ||||
|           to="https://encrypted.google.com/videohp?hl=kk&" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.jp/videohp\?" | ||||
|           to="https://encrypted.google.com/videohp?hl=ja&" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.vn/videohp\?" | ||||
|           to="https://encrypted.google.com/videohp?hl=vi&" /> | ||||
|   <rule from="^http://(?:www\.)?google\.com\.br/videohp\?" | ||||
|           to="https://encrypted.google.com/videohp?hl=pt-BR&" /> | ||||
|   <rule from="^http://(?:www\.)?google\.se/videohp\?" | ||||
|           to="https://encrypted.google.com/videohp?hl=sv&" /> | ||||
| 
 | ||||
| 	<rule from="^http://video\.google\.com/ThumbnailServer2" | ||||
| 		to="https://video.google.com/ThumbnailServer2" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										17
									
								
								searx/https_rules/GoogleWatchBlog.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								searx/https_rules/GoogleWatchBlog.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,17 @@ | ||||
| <!-- | ||||
| 	gwbhrd.appspot.com | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="GoogleWatchBlog"> | ||||
| 
 | ||||
| 	<target host="googlewatchblog.de" /> | ||||
| 	<target host="*.googlewatchblog.de" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<securecookie host="^(?:www)?\.googlewatchblog\.de$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://(static\.|www\.)?googlewatchblog\.de/" | ||||
| 		to="https://$1googlewatchblog.de/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										21
									
								
								searx/https_rules/Google_App_Engine.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										21
									
								
								searx/https_rules/Google_App_Engine.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,21 @@ | ||||
| <!-- | ||||
| 	For other Google coverage, see GoogleServices.xml. | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Google App Engine"> | ||||
| 
 | ||||
| 	<target host="appspot.com" /> | ||||
| 	<target host="*.appspot.com" /> | ||||
| 		<!-- | ||||
| 			Redirects to http for some reason. | ||||
| 								--> | ||||
| 		<exclusion pattern="^http://photomunchers\.appspot\.com/" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<securecookie host="^.+\.appspot\.com$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://([^@:\./]+\.)?appspot\.com/" | ||||
| 		 to="https://$1appspot.com/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										16
									
								
								searx/https_rules/Googleplex.com.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										16
									
								
								searx/https_rules/Googleplex.com.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,16 @@ | ||||
| <!-- This rule was automatically generated based on an HSTS | ||||
|      preload rule in the Chromium browser.  See  | ||||
|      https://src.chromium.org/viewvc/chrome/trunk/src/net/base/transport_security_state.cc | ||||
|      for the list of preloads.  Sites are added to the Chromium HSTS | ||||
|      preload list on request from their administrators, so HTTPS should | ||||
|      work properly everywhere on this site. | ||||
|   | ||||
|      Because Chromium and derived browsers automatically force HTTPS for | ||||
|      every access to this site, this rule applies only to Firefox. --> | ||||
| <ruleset name="Googleplex.com (default off)" platform="firefox" default_off="Certificate error"> | ||||
|   <target host="googleplex.com" /> | ||||
| 
 | ||||
|   <securecookie host="^googleplex\.com$" name=".+" /> | ||||
| 
 | ||||
|   <rule from="^http://googleplex\.com/" to="https://googleplex.com/" /> | ||||
| </ruleset> | ||||
							
								
								
									
										15
									
								
								searx/https_rules/OpenStreetMap.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								searx/https_rules/OpenStreetMap.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,15 @@ | ||||
| <ruleset name="OpenStreetMap"> | ||||
| 
 | ||||
| 	<target host="openstreetmap.org"/> | ||||
| 	<target host="*.openstreetmap.org"/> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?openstreetmap\.org/" | ||||
| 		to="https://www.openstreetmap.org/"/> | ||||
| 
 | ||||
| 	<rule from="^http://tile\.openstreetmap\.org/" | ||||
| 		to="https://a.tile.openstreetmap.org/"/> | ||||
| 
 | ||||
| 	<rule from="^http://(blog|help|lists|nominatim|piwik|taginfo|[abc]\.tile|trac|wiki)\.openstreetmap\.org/" | ||||
| 		to="https://$1.openstreetmap.org/"/> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										14
									
								
								searx/https_rules/Rawgithub.com.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								searx/https_rules/Rawgithub.com.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | ||||
| <!-- | ||||
| 	www: cert only matches ^rawgithub.com | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="rawgithub.com"> | ||||
| 
 | ||||
| 	<target host="rawgithub.com" /> | ||||
| 	<target host="www.rawgithub.com" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?rawgithub\.com/" | ||||
| 		to="https://rawgithub.com/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										101
									
								
								searx/https_rules/Soundcloud.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								searx/https_rules/Soundcloud.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,101 @@ | ||||
| <!-- | ||||
| 
 | ||||
| 	CDN buckets: | ||||
| 
 | ||||
| 		- akmedia-a.akamaihd.net | ||||
| 
 | ||||
| 		- soundcloud.assistly.com | ||||
| 
 | ||||
| 			- help.soundcloud.com | ||||
| 
 | ||||
| 		- cs70.wac.edgecastcdn.net | ||||
| 
 | ||||
| 			- a1.sndcdn.com | ||||
| 			- i1.sndcdn.com | ||||
| 			- w1.sndcdn.com | ||||
| 
 | ||||
| 		- wpc.658D.edgecastcdn.net | ||||
| 		- m-a.sndcdn.com.edgesuite.net  | ||||
| 		- soundcloud.gettyimages.com | ||||
| 
 | ||||
| 		- scbackstage.wpengine.netdna-cdn.com | ||||
| 
 | ||||
| 			- ssl doesn't exist | ||||
| 			- backstage.soundcloud.com | ||||
| 
 | ||||
| 		- soundcloud.wpengine.netdna-cdn.com | ||||
| 
 | ||||
| 			- -ssl doesn't exist | ||||
| 			- blog.soundcloud.com | ||||
| 
 | ||||
| 		- gs1.wpc.v2cdn.netcdn.net | ||||
| 		- gs1.wpc.v2cdn.net | ||||
| 
 | ||||
| 			- ec-media.soundcloud.com | ||||
| 
 | ||||
| 	Nonfunctional soundcloud.com subdomains: | ||||
| 
 | ||||
| 		- help		(redirects to http, mismatched, CN: *.assistly.com) | ||||
| 		- m		(redirects to http) | ||||
| 		- media | ||||
| 		- status	(times out) | ||||
| 
 | ||||
| 
 | ||||
| 	Problematic domains: | ||||
| 
 | ||||
| 		- m-a.sndcdn.com	(works, akamai) | ||||
| 
 | ||||
| 
 | ||||
| 	Partially covered domains: | ||||
| 
 | ||||
| 		- backstage.soundcloud.com | ||||
| 
 | ||||
| 
 | ||||
| 	Fully covered domains: | ||||
| 
 | ||||
| 		- sndcdn.com subdomains: | ||||
| 
 | ||||
| 			- a[12] | ||||
| 			- api | ||||
| 			- i[1-4] | ||||
| 			- w[12] | ||||
| 			- wis | ||||
| 
 | ||||
| 		- soundcloud.com subdomains: | ||||
| 
 | ||||
| 			- (www.) | ||||
| 			- api | ||||
| 			- blog | ||||
| 			- connect | ||||
| 			- developers | ||||
| 			- ec-media | ||||
| 			- eventlogger | ||||
| 			- help-assets | ||||
| 			- media | ||||
| 			- visuals | ||||
| 			- w | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Soundcloud (partial)"> | ||||
| 
 | ||||
| 	<target host="scbackstage.wpengine.netdna-cdn.com" /> | ||||
| 	<target host="soundcloud.wpengine.netdna-cdn.com" /> | ||||
| 	<target host="*.sndcdn.com" /> | ||||
| 	<target host="soundcloud.com" /> | ||||
| 	<target host="*.soundcloud.com" /> | ||||
| 		<exclusion pattern="^https?://(?:scbackstage\.wpengine\.netdna-cdn|backstage\.soundcloud)\.com/(?!wp-content/)" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://([aiw]\d|api|wis)\.sndcdn\.com/" | ||||
| 		to="https://$1.sndcdn.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://((?:api|backstage|blog|connect|developers|ec-media|eventlogger|help-assets|media|visuals|w|www)\.)?soundcloud\.com/" | ||||
| 		to="https://$1soundcloud.com/" /> | ||||
| 
 | ||||
| 	<rule from="^https?://scbackstage\.wpengine\.netdna-cdn\.com/" | ||||
| 		to="https://backstage.soundcloud.com/" /> | ||||
| 
 | ||||
| 	<rule from="^https?://soundcloud\.wpengine\.netdna-cdn\.com/" | ||||
| 		to="https://blog.soundcloud.com/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										36
									
								
								searx/https_rules/ThePirateBay.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										36
									
								
								searx/https_rules/ThePirateBay.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,36 @@ | ||||
| <!-- | ||||
|   Nonfunctional: | ||||
| 
 | ||||
|     - image.bayimg.com | ||||
|     - (www.)thepiratebay.sx		(http reply) | ||||
| 
 | ||||
| 
 | ||||
|   For problematic rules, see ThePirateBay-mismatches.xml. | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="The Pirate Bay (partial)"> | ||||
| 
 | ||||
|   <target host="suprbay.org" /> | ||||
|   <target host="*.suprbay.org" /> | ||||
|   <!--	* for cross-domain cookie	--> | ||||
|   <target host="*.forum.suprbay.org" /> | ||||
|   <target host="thepiratebay.org"/> | ||||
|   <target host="*.thepiratebay.org"/> | ||||
|   <target host="thepiratebay.se"/> | ||||
|   <target host="*.thepiratebay.se"/> | ||||
| 
 | ||||
|   <securecookie host="^.*\.suprbay\.org$" name=".*" /> | ||||
|   <securecookie host="^(.*\.)?thepiratebay\.se$" name=".*"/> | ||||
| 
 | ||||
| 
 | ||||
|   <!--	Cert doesn't match (www.), redirects like so.	--> | ||||
|   <rule from="^https?://(?:forum\.|www\.)?suprbay\.org/" | ||||
|     to="https://forum.suprbay.org/" /> | ||||
| 
 | ||||
|   <rule from="^http://(?:www\.)?thepiratebay\.(?:org|se)/" | ||||
|     to="https://thepiratebay.se/"/> | ||||
| 
 | ||||
|   <rule from="^http://(rss|static|torrents)\.thepiratebay\.(?:org|se)/" | ||||
|     to="https://$1.thepiratebay.se/"/> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										18
									
								
								searx/https_rules/Torproject.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								searx/https_rules/Torproject.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | ||||
| <ruleset name="Tor Project"> | ||||
| 
 | ||||
| 	<target host="torproject.org" /> | ||||
| 	<target host="*.torproject.org" /> | ||||
| 		<exclusion pattern="^http://torperf\.torproject\.org/" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Not secured by server: | ||||
| 					--> | ||||
| 	<!--securecookie host="^\.blog\.torproject\.org$" name="^SESS[0-9a-f]{32}$" /--> | ||||
| 
 | ||||
| 	<securecookie host="^(?:.*\.)?torproject\.org$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://([^/:@\.]+\.)?torproject\.org/" | ||||
| 		 to="https://$1torproject.org/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										169
									
								
								searx/https_rules/Twitter.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										169
									
								
								searx/https_rules/Twitter.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,169 @@ | ||||
| <!-- | ||||
| 	Other Twitter rulesets: | ||||
| 
 | ||||
| 		- Twitter_Community.com.xml | ||||
| 
 | ||||
| 
 | ||||
| 	Nonfunctional domains: | ||||
| 
 | ||||
| 		- status.twitter.com * | ||||
| 		- status.twitter.jp * | ||||
| 
 | ||||
| 	* Tumblr | ||||
| 
 | ||||
| 
 | ||||
| 	CDN buckets: | ||||
| 
 | ||||
| 		- a1095.g.akamai.net/=/1095/134446/1d/platform.twitter.com/ | platform2.twitter.com.edgesuite.net | ||||
| 
 | ||||
| 			- platform2.twitter.com | ||||
| 
 | ||||
| 		- twitter-any.s3.amazonaws.com | ||||
| 		- twitter-blog.s3.amazonaws.com | ||||
| 
 | ||||
| 		- d2rdfnizen5apl.cloudfront.net | ||||
| 
 | ||||
| 			- s.twimg.com | ||||
| 
 | ||||
| 		- ssl2.twitter.com.edgekey.net | ||||
| 		- twitter.github.com | ||||
| 
 | ||||
| 
 | ||||
| 	Problematic domains: | ||||
| 
 | ||||
| 		- twimg.com subdomains: | ||||
| 
 | ||||
| 			- a5 * | ||||
| 			- s		(cloudfront) | ||||
| 
 | ||||
| 		- twitter.com subdomains: | ||||
| 
 | ||||
| 			- platform[0-3]		(403, akamai) | ||||
| 
 | ||||
| 	* akamai | ||||
| 
 | ||||
| 
 | ||||
| 	Fully covered domains: | ||||
| 
 | ||||
| 		- (www.)t.co		(www → ^) | ||||
| 
 | ||||
| 		- twimg.com subdomains: | ||||
| 
 | ||||
| 			- a[5-9]	(→ si0) | ||||
| 			- a\d | ||||
| 			- abs | ||||
| 			- dnt | ||||
| 			- ea | ||||
| 			- g | ||||
| 			- g2 | ||||
| 			- gu | ||||
| 			- hca | ||||
| 			- jp | ||||
| 			- ma | ||||
| 			- ma[0123] | ||||
| 			- o | ||||
| 			- p | ||||
| 			- pbs | ||||
| 			- r | ||||
| 			- s		(→ d2rdfnizen5apl.cloudfront.net) | ||||
| 			- si[0-5] | ||||
| 			- syndication | ||||
| 			- cdn.syndication | ||||
| 			- tailfeather | ||||
| 			- ton | ||||
| 			- v | ||||
| 			- widgets | ||||
| 
 | ||||
| 		- twitter.com subdomains: | ||||
| 
 | ||||
| 			- (www.) | ||||
| 			- 201[012] | ||||
| 			- about | ||||
| 			- ads | ||||
| 			- analytics | ||||
| 			- api | ||||
| 			- cdn.api | ||||
| 			- urls.api | ||||
| 			- blog | ||||
| 			- business | ||||
| 			- preview.cdn | ||||
| 			- preview-dev.cdn | ||||
| 			- preview-stage.cdn | ||||
| 			- de | ||||
| 			- dev | ||||
| 			- en | ||||
| 			- engineering | ||||
| 			- es | ||||
| 			- firefox | ||||
| 			- fr | ||||
| 			- it | ||||
| 			- ja | ||||
| 			- jp | ||||
| 			- m | ||||
| 			- media | ||||
| 			- mobile | ||||
| 			- music | ||||
| 			- oauth | ||||
| 			- p | ||||
| 			- pic | ||||
| 			- platform | ||||
| 			- platform[0-3]		(→ platform) | ||||
| 			- widgets.platform | ||||
| 			- search | ||||
| 			- static | ||||
| 			- support | ||||
| 			- transparency | ||||
| 			- upload | ||||
| 
 | ||||
| 
 | ||||
| 	These altnames don't exist: | ||||
| 
 | ||||
| 		- i3.twimg.com | ||||
| 		- p-dev.twimg.com | ||||
| 		- vmtc.twimg.com | ||||
| 
 | ||||
| 		- cdn-dev.api.twitter.com | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Twitter"> | ||||
| 
 | ||||
| 	<target host="t.co" /> | ||||
| 	<target host="*.t.co" /> | ||||
| 	<target host="*.twimg.com" /> | ||||
| 	<target host="twitter.com" /> | ||||
| 	<target host="*.twitter.com" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Secured by server: | ||||
| 					--> | ||||
| 	<!--securecookie host="^\.twitter\.com$" name="^_twitter_sess$" /--> | ||||
| 	<!--securecookie host="^support\.twitter\.com$" name="^_help_center_session$" /--> | ||||
| 	<!-- | ||||
| 		Not secured by server: | ||||
| 					--> | ||||
| 	<!--securecookie host="^\.t\.co$" name="^muc$" /--> | ||||
| 	<!--securecookie host="^\.twitter\.com$" name="^guest_id$" /--> | ||||
| 
 | ||||
| 	<securecookie host="^\.t\.co$" name=".+" /> | ||||
| 	<securecookie host="^(?:.*\.)?twitter\.com$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?t\.co/" | ||||
| 		to="https://t.co/" /> | ||||
| 
 | ||||
| 	<rule from="^http://a[5-9]\.twimg\.com/" | ||||
| 		to="https://si0.twimg.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(abs|a\d|dnt|ea|g[2u]?|hca|jp|ma\d?|o|p|pbs|r|si\d|(?:cdn\.)?syndication|tailfeather|ton|v|widgets)\.twimg\.com/" | ||||
| 		to="https://$1.twimg.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://s\.twimg\.com/" | ||||
| 		to="https://d2rdfnizen5apl.cloudfront.net/" /> | ||||
| 
 | ||||
| 	<rule from="^http://((?:201\d|about|ads|analytics|blog|(?:cdn\.|urls\.)?api|business|preview(?:-dev|-stage)?\.cdn|de|dev|engineering|en|es|firefox|fr|it|ja|jp|m|media|mobile|music|oauth|p|pic|platform|widgets\.platform|search|static|support|transparency|upload|www)\.)?twitter\.com/" | ||||
| 		to="https://$1twitter.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://platform\d\.twitter\.com/" | ||||
| 		to="https://platform.twitter.com/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										75
									
								
								searx/https_rules/Vimeo.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										75
									
								
								searx/https_rules/Vimeo.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,75 @@ | ||||
| <!-- | ||||
| 	CDN buckets: | ||||
| 
 | ||||
| 		- av.vimeo.com.edgesuite.net | ||||
| 
 | ||||
| 			- a808.g.akamai.net | ||||
| 
 | ||||
| 		- pdl.vimeocdn.com.edgesuite.net | ||||
| 
 | ||||
| 			- a1189.g.akamai.net | ||||
| 
 | ||||
| 
 | ||||
| 	Problematic subdomains: | ||||
| 
 | ||||
| 		- av	(pdl.../crossdomain.xml restricts to port 80) | ||||
| 		- pdl	(works, akamai) | ||||
| 
 | ||||
| 
 | ||||
| 	Partially covered subdomains: | ||||
| 
 | ||||
| 		- developer	(some pages redirect to http) | ||||
| 		- pdl		(→ akamai) | ||||
| 
 | ||||
| 
 | ||||
| 	Fully covered subdomains: | ||||
| 
 | ||||
| 		- (www.) | ||||
| 		- secure | ||||
| 
 | ||||
| 
 | ||||
| Default off per https://trac.torproject.org/projects/tor/ticket/7569 --> | ||||
| <ruleset name="Vimeo (default off)" default_off="breaks some video embedding"> | ||||
| 
 | ||||
| 	<target host="vimeo.com" /> | ||||
| 	<target host="*.vimeo.com" /> | ||||
| 		<exclusion pattern="^http://av\.vimeo\.com/crossdomain\.xml" /> | ||||
| 		<!--exclusion pattern="^http://developer\.vimeo\.com/($|\?|(apps|guidelines|help|player)($|[?/]))" /--> | ||||
| 		<exclusion pattern="^http://developer\.vimeo\.com/(?!apis(?:$|[?/])|favicon\.ico)" /> | ||||
| 	<target host="*.vimeocdn.com" /> | ||||
| 		<!-- | ||||
| 			Uses crossdomain.xml from s3.amazonaws.com, which sets secure="false" | ||||
| 
 | ||||
| 				https://mail1.eff.org/pipermail/https-everywhere/2012-October/001583.html | ||||
| 			--> | ||||
| 		<exclusion pattern="^http://a\.vimeocdn\.com/p/flash/moogaloop/" /> | ||||
| 
 | ||||
| 		<!--	We cannot secure streams because crossdomain.xml | ||||
| 			restricts to port 80 :( | ||||
| 						--> | ||||
| 		<exclusion pattern="^http://pdl\.vimeocdn\.com/(?!crossdomain\.xml)" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Tracking cookies: | ||||
| 					--> | ||||
| 	<securecookie host="^\.(?:player\.)?vimeo\.com$" name="^__utm\w$" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://((?:developer|player|secure|www)\.)?vimeo\.com/" | ||||
| 		to="https://$1vimeo.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://av\.vimeo\.com/" | ||||
| 		to="https://a248.e.akamai.net/f/808/9207/8m/av.vimeo.com/" /> | ||||
| 
 | ||||
| 	<!--	a & b: Akamai	--> | ||||
| 	<rule from="^http://(?:secure-)?([ab])\.vimeocdn\.com/" | ||||
| 		to="https://secure-$1.vimeocdn.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://i\.vimeocdn\.com/" | ||||
| 		to="https://i.vimeocdn.com/" /> | ||||
| 
 | ||||
| 	<rule from="^http://pdl\.vimeocdn\.com/" | ||||
| 		to="https://a248.e.akamai.net/f/1189/4415/8d/pdl.vimeocdn.com/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
| 
 | ||||
							
								
								
									
										13
									
								
								searx/https_rules/WikiLeaks.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								searx/https_rules/WikiLeaks.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,13 @@ | ||||
| <ruleset name="WikiLeaks"> | ||||
| 
 | ||||
| 	<target host="wikileaks.org" /> | ||||
| 	<target host="*.wikileaks.org" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<securecookie host="^(?:w*\.)?wikileaks\.org$" name=".+" /> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://((?:chat|search|shop|www)\.)?wikileaks\.org/" | ||||
| 		to="https://$1wikileaks.org/" /> | ||||
| 
 | ||||
| </ruleset> | ||||
							
								
								
									
										107
									
								
								searx/https_rules/Wikimedia.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								searx/https_rules/Wikimedia.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,107 @@ | ||||
| <!-- | ||||
| 	Wikipedia and other Wikimedia Foundation wikis previously had no real HTTPS support, and | ||||
| 	URLs had to be rewritten to https://secure.wikimedia.org/$wikitype/$language/ . This is no | ||||
| 	longer the case, see https://blog.wikimedia.org/2011/10/03/native-https-support-enabled-for-all-wikimedia-foundation-wikis/ , | ||||
| 	so this file is a lot simpler these days. | ||||
| 
 | ||||
| 
 | ||||
| 	Mixed content: | ||||
| 
 | ||||
| 		- Images, on: | ||||
| 
 | ||||
| 			- stats.wikimedia.org from upload.wikimedia.org * | ||||
| 			- stats.wikimedia.org from wikimediafoundation.org * | ||||
| 
 | ||||
| 	* Secured by us | ||||
| 
 | ||||
| --> | ||||
| <ruleset name="Wikimedia"> | ||||
| 
 | ||||
| 	<target host="enwp.org" /> | ||||
| 	<target host="frwp.org" /> | ||||
| 
 | ||||
| 	<target host="mediawiki.org" /> | ||||
| 	<target host="www.mediawiki.org" /> | ||||
| 	<target host="wikimedia.org" /> | ||||
| 	<target host="*.wikimedia.org" /> | ||||
| 		<exclusion pattern="^http://(?:apt|cs|cz|parsoid-lb\.eqiad|status|torrus|ubuntu)\.wikimedia\.org" /> | ||||
| 		<!-- https://mail1.eff.org/pipermail/https-everywhere-rules/2012-June/001189.html --> | ||||
| 		<exclusion pattern="^http://lists\.wikimedia\.org/pipermail(?:$|/)" /> | ||||
| 	<target host="wikimediafoundation.org" /> | ||||
| 	<target host="www.wikimediafoundation.org" /> | ||||
| 
 | ||||
| 	<!-- Wikimedia projects (also some wikimedia.org subdomains) --> | ||||
| 	<target host="wikibooks.org" /> | ||||
| 	<target host="*.wikibooks.org" /> | ||||
| 	<target host="wikidata.org" /> | ||||
| 	<target host="*.wikidata.org" /> | ||||
| 	<target host="wikinews.org" /> | ||||
| 	<target host="*.wikinews.org" /> | ||||
| 	<target host="wikipedia.org" /> | ||||
| 	<target host="*.wikipedia.org" /> | ||||
| 	<target host="wikiquote.org" /> | ||||
| 	<target host="*.wikiquote.org" /> | ||||
| 	<target host="wikisource.org" /> | ||||
| 	<target host="*.wikisource.org" /> | ||||
| 	<target host="wikiversity.org" /> | ||||
| 	<target host="*.wikiversity.org" /> | ||||
| 	<target host="wikivoyage.org" /> | ||||
| 	<target host="*.wikivoyage.org" /> | ||||
| 	<target host="wiktionary.org" /> | ||||
| 	<target host="*.wiktionary.org" /> | ||||
| 
 | ||||
| 	<!-- Wikimedia chapters --> | ||||
| 	<target host="wikimedia.ca" /> | ||||
| 	<target host="www.wikimedia.ca" /> | ||||
| 
 | ||||
| 	<!-- Wikimedia Tool Labs --> | ||||
| 	<target host="tools.wmflabs.org" /> | ||||
| 	<target host="icinga.wmflabs.org" /> | ||||
| 	<target host="ganglia.wmflabs.org" /> | ||||
| 
 | ||||
| 	<!--	Not secured by server: | ||||
| 					--> | ||||
| 	<!--securecookie host="^\.wiki(books|ipedia)\.org$" name="^GeoIP$" /--> | ||||
| 
 | ||||
| 	<securecookie host="^^\.wik(?:ibooks|idata|imedia|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org$" name="^GeoIP$" /> | ||||
| 	<securecookie host="^([^@:/]+\.)?wik(ibooks|idata|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org$" name=".*" /> | ||||
| 	<securecookie host="^(species|commons|meta|incubator|wikitech).wikimedia.org$" name=".*" /> | ||||
| 	<securecookie host="^(?:www\.)?mediawiki\.org$" name=".*" /> | ||||
| 	<securecookie host="^wikimediafoundation.org$" name=".*" /> | ||||
| 
 | ||||
| 	<rule from="^http://(en|fr)wp\.org/" | ||||
| 		to="https://$1.wikipedia.org/wiki/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?mediawiki\.org/" | ||||
| 		to="https://www.mediawiki.org/" /> | ||||
| 
 | ||||
| 	<rule from="^https?://download\.wikipedia\.org/" | ||||
| 		to="https://dumps.wikimedia.org/" /> | ||||
| 
 | ||||
| 	<rule from="^https?://(download|dataset2|sitemap)\.wikimedia\.org/" | ||||
| 		to="https://dumps.wikimedia.org/" /> | ||||
| 
 | ||||
| 	<rule from="^https?://(labs-ns[01]|virt0)\.wikimedia\.org/" | ||||
| 		to="https://wikitech.wikimedia.org/" />	 | ||||
| 
 | ||||
| 	<rule from="^https?://noboard\.chapters\.wikimedia\.org/" | ||||
| 		to="https://noboard-chapters.wikimedia.org/" /> | ||||
| 
 | ||||
| 	<rule from="^https?://wg\.en\.wikipedia\.org/" | ||||
| 		to="https://wg-en.wikipedia.org/" /> | ||||
| 
 | ||||
| 	<rule from="^https?://arbcom\.(de|en|fi|nl)\.wikipedia\.org/" | ||||
| 		to="https://arbcom-$1.wikipedia.org/" /> | ||||
| 
 | ||||
| 	<rule from="^http://([^@:/]+\.)?wik(ibooks|idata|imedia|inews|ipedia|iquote|isource|iversity|ivoyage|tionary)\.org/" | ||||
| 		to="https://$1wik$2.org/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(www\.)?wikimediafoundation\.org/" | ||||
| 		to="https://$1wikimediafoundation.org/" /> | ||||
| 
 | ||||
| 	<rule from="^http://(www\.)?wikimedia\.ca/" | ||||
| 		to="https://wikimedia.ca/" /> | ||||
| 
 | ||||
| 	<rule from="^http://([^@:/]+)\.wmflabs\.org/" | ||||
| 		to="https://$1.wmflabs.org/" /> | ||||
| </ruleset> | ||||
							
								
								
									
										2450
									
								
								searx/https_rules/Yahoo.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2450
									
								
								searx/https_rules/Yahoo.xml
									
									
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										46
									
								
								searx/https_rules/YouTube.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								searx/https_rules/YouTube.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,46 @@ | ||||
| <ruleset name="YouTube (partial)"> | ||||
| 
 | ||||
| 	<target host="youtube.com" /> | ||||
| 	<target host="*.youtube.com" /> | ||||
| 		<exclusion pattern="^http://(?:www\.)?youtube\.com/crossdomain\.xml"/> | ||||
| 		<exclusion pattern="^http://(?:www\.)?youtube\.com/(?:apiplayer|api_video_info)"/> | ||||
|         <exclusion pattern="^http://(?:[^/@:\.]+\.)?ytimg\.com/.*apiplayer[0-9]*\.swf"/> | ||||
| 	<target host="*.ytimg.com" /> | ||||
| 	<target host="youtu.be" /> | ||||
| 	<target host="youtube-nocookie.com"/> | ||||
| 	<target host="www.youtube-nocookie.com"/> | ||||
| 	<target host="*.googlevideo.com"/> | ||||
|                 <exclusion pattern="^http://([^/@:\.]+)\.googlevideo\.com/crossdomain\.xml"/> | ||||
| 
 | ||||
| 
 | ||||
| 	<!--	Not secured by server: | ||||
| 					--> | ||||
| 	<!--securecookie host="^\.youtube\.com$" name="^(GEUP|PREF|VISITOR_INFO1_LIVE|YSC)$" /--> | ||||
| 
 | ||||
| 	<!--	observed ^. cookies: | ||||
| 			- use_hitbox | ||||
| 			- VISITOR_INFO1_LIVE | ||||
| 			- recently_watched_video_id_list | ||||
| 			- .youtube.com		--> | ||||
| 	<securecookie host="^\.youtube\.com" name=".*"/> | ||||
| 
 | ||||
| 
 | ||||
| 	<rule from="^http://(www\.)?youtube\.com/" | ||||
| 		to="https://$1youtube.com/"/> | ||||
| 
 | ||||
| 	<rule from="^http://(br|de|es|fr|il|img|insight|jp|m|nl|uk)\.youtube\.com/" | ||||
| 		to="https://$1.youtube.com/"/> | ||||
| 
 | ||||
| 	<rule from="^http://([^/@:\.]+)\.ytimg\.com/" | ||||
| 		to="https://$1.ytimg.com/"/> | ||||
| 
 | ||||
| 	<rule from="^http://youtu\.be/" | ||||
| 		to="https://youtu.be/"/> | ||||
| 
 | ||||
| 	<rule from="^http://(?:www\.)?youtube-nocookie\.com/" | ||||
| 		to="https://www.youtube-nocookie.com/"/> | ||||
| 
 | ||||
| 	<rule from="^http://([^/@:\.]+)\.googlevideo\.com/" | ||||
| 	        to="https://$1.googlevideo.com/"/> | ||||
| 
 | ||||
| </ruleset> | ||||
| @ -4,6 +4,9 @@ server: | ||||
|     debug : False | ||||
|     request_timeout : 3.0 # seconds | ||||
|     base_url: False | ||||
|     themes_path : "" | ||||
|     default_theme : default | ||||
|     https_rewrite : True | ||||
| 
 | ||||
| engines: | ||||
|   - name : general_dummy | ||||
|  | ||||
| @ -50,6 +50,9 @@ from searx.search import Search | ||||
| from searx.query import Query | ||||
| from searx.autocomplete import backends as autocomplete_backends | ||||
| 
 | ||||
| from urlparse import urlparse | ||||
| import re | ||||
| 
 | ||||
| 
 | ||||
| static_path, templates_path, themes =\ | ||||
|     get_themes(settings['themes_path'] | ||||
| @ -206,16 +209,60 @@ def index(): | ||||
|         if not search.paging and engines[result['engine']].paging: | ||||
|             search.paging = True | ||||
| 
 | ||||
|         # check if HTTPS rewrite is required  | ||||
|         if settings['server']['https_rewrite']\ | ||||
|            and result['parsed_url'].scheme == 'http': | ||||
| 
 | ||||
|             for http_regex, https_url in https_rules: | ||||
|                 if http_regex.match(result['url']): | ||||
|                     result['url'] = http_regex.sub(https_url, result['url']) | ||||
|                     # TODO result['parsed_url'].scheme | ||||
|             skip_https_rewrite = False | ||||
| 
 | ||||
|             # check if HTTPS rewrite is possible | ||||
|             for target, rules, exclusions in https_rules: | ||||
| 
 | ||||
|                 # check if target regex match with url | ||||
|                 if target.match(result['url']): | ||||
|                     # process exclusions | ||||
|                     for exclusion in exclusions: | ||||
|                         # check if exclusion match with url | ||||
|                         if exclusion.match(result['url']): | ||||
|                             skip_https_rewrite = True | ||||
|                             break | ||||
| 
 | ||||
|                     # skip https rewrite if required | ||||
|                     if skip_https_rewrite: | ||||
|                         break | ||||
| 
 | ||||
|                     # process rules | ||||
|                     for rule in rules: | ||||
|                         try: | ||||
|                             # TODO, precompile rule | ||||
|                             p = re.compile(rule[0]) | ||||
|                              | ||||
|                             # rewrite url if possible | ||||
|                             new_result_url = p.sub(rule[1], result['url']) | ||||
|                         except: | ||||
|                             break | ||||
| 
 | ||||
|                         # parse new url | ||||
|                         new_parsed_url = urlparse(new_result_url) | ||||
| 
 | ||||
|                         # continiue if nothing was rewritten | ||||
|                         if result['url'] == new_result_url: | ||||
|                             continue | ||||
| 
 | ||||
|                         # get domainname from result | ||||
|                         # TODO, does only work correct with TLD's like asdf.com, not for asdf.com.de | ||||
|                         # TODO, using publicsuffix instead of this rewrite rule | ||||
|                         old_result_domainname = '.'.join(result['parsed_url'].hostname.split('.')[-2:]) | ||||
|                         new_result_domainname = '.'.join(new_parsed_url.hostname.split('.')[-2:]) | ||||
| 
 | ||||
|                         # check if rewritten hostname is the same, to protect against wrong or malicious rewrite rules | ||||
|                         if old_result_domainname == new_result_domainname: | ||||
|                             # set new url | ||||
|                             result['url'] = new_result_url | ||||
| 
 | ||||
|                     # target has matched, do not search over the other rules  | ||||
|                     break | ||||
| 
 | ||||
|         # HTTPS rewrite | ||||
|         if search.request_data.get('format', 'html') == 'html': | ||||
|             if 'content' in result: | ||||
|                 result['content'] = highlight_content(result['content'], | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user