| 
									
										
										
										
											2015-05-02 15:45:17 +02:00
										 |  |  | """
 | 
					
						
							|  |  |  |  Gigablast (Web) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-22 20:25:57 +01:00
										 |  |  |  @website     https://gigablast.com | 
					
						
							|  |  |  |  @provide-api yes (https://gigablast.com/api.html) | 
					
						
							| 
									
										
										
										
											2015-05-02 15:45:17 +02:00
										 |  |  | 
 | 
					
						
							|  |  |  |  @using-api   yes | 
					
						
							|  |  |  |  @results     XML | 
					
						
							|  |  |  |  @stable      yes | 
					
						
							|  |  |  |  @parse       url, title, content | 
					
						
							|  |  |  | """
 | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-31 16:11:04 +02:00
										 |  |  | import random | 
					
						
							| 
									
										
										
										
											2016-01-31 13:24:09 +01:00
										 |  |  | from json import loads | 
					
						
							| 
									
										
										
										
											2015-10-16 12:05:50 +02:00
										 |  |  | from time import time | 
					
						
							| 
									
										
										
										
											2016-11-06 03:51:38 +01:00
										 |  |  | from lxml.html import fromstring | 
					
						
							| 
									
										
										
										
											2019-12-21 20:51:30 +01:00
										 |  |  | from searx.poolrequests import get | 
					
						
							| 
									
										
										
										
											2016-11-30 18:43:03 +01:00
										 |  |  | from searx.url_utils import urlencode | 
					
						
							| 
									
										
										
										
											2019-11-15 09:31:37 +01:00
										 |  |  | from searx.utils import eval_xpath | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # engine dependent config | 
					
						
							|  |  |  | categories = ['general'] | 
					
						
							|  |  |  | paging = True | 
					
						
							| 
									
										
										
										
											2015-12-23 18:43:35 +01:00
										 |  |  | number_of_results = 10 | 
					
						
							|  |  |  | language_support = True | 
					
						
							|  |  |  | safesearch = True | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-12-23 18:43:35 +01:00
										 |  |  | # search-url | 
					
						
							| 
									
										
										
										
											2015-12-22 20:25:57 +01:00
										 |  |  | base_url = 'https://gigablast.com/' | 
					
						
							| 
									
										
										
										
											2015-12-23 18:43:35 +01:00
										 |  |  | search_string = 'search?{query}'\ | 
					
						
							|  |  |  |     '&n={number_of_results}'\ | 
					
						
							| 
									
										
										
										
											2016-01-31 13:24:09 +01:00
										 |  |  |     '&c=main'\ | 
					
						
							| 
									
										
										
										
											2015-12-23 18:43:35 +01:00
										 |  |  |     '&s={offset}'\ | 
					
						
							| 
									
										
										
										
											2016-01-31 13:24:09 +01:00
										 |  |  |     '&format=json'\ | 
					
						
							| 
									
										
										
										
											2019-12-21 20:51:30 +01:00
										 |  |  |     '&langcountry={lang}'\ | 
					
						
							| 
									
										
										
										
											2016-06-12 00:06:31 +02:00
										 |  |  |     '&ff={safesearch}'\ | 
					
						
							| 
									
										
										
										
											2019-12-21 20:51:30 +01:00
										 |  |  |     '&rand={rxikd}' | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | # specific xpath variables | 
					
						
							|  |  |  | results_xpath = '//response//result' | 
					
						
							|  |  |  | url_xpath = './/url' | 
					
						
							|  |  |  | title_xpath = './/title' | 
					
						
							|  |  |  | content_xpath = './/sum' | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-11-06 03:51:38 +01:00
										 |  |  | supported_languages_url = 'https://gigablast.com/search?&rxikd=1' | 
					
						
							| 
									
										
										
										
											2016-08-07 05:19:21 +02:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-21 21:13:43 +01:00
										 |  |  | extra_param = ''  # gigablast requires a random extra parameter | 
					
						
							| 
									
										
										
										
											2019-12-21 20:51:30 +01:00
										 |  |  | # which can be extracted from the source code of the search page | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def parse_extra_param(text): | 
					
						
							|  |  |  |     global extra_param | 
					
						
							|  |  |  |     param_lines = [x for x in text.splitlines() if x.startswith('var url=') or x.startswith('url=url+')] | 
					
						
							|  |  |  |     extra_param = '' | 
					
						
							|  |  |  |     for l in param_lines: | 
					
						
							|  |  |  |         extra_param += l.split("'")[1] | 
					
						
							|  |  |  |     extra_param = extra_param.split('&')[-1] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def init(engine_settings=None): | 
					
						
							|  |  |  |     parse_extra_param(get('http://gigablast.com/search?c=main&qlangcountry=en-us&q=south&s=10').text) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | # do search-request | 
					
						
							|  |  |  | def request(query, params): | 
					
						
							| 
									
										
										
										
											2019-12-21 20:51:30 +01:00
										 |  |  |     print("EXTRAPARAM:", extra_param) | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  |     offset = (params['pageno'] - 1) * number_of_results | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-01-06 15:27:46 +01:00
										 |  |  |     if params['language'] == 'all': | 
					
						
							|  |  |  |         language = 'xx' | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         language = params['language'].replace('-', '_').lower() | 
					
						
							|  |  |  |         if language.split('-')[0] != 'zh': | 
					
						
							|  |  |  |             language = language.split('-')[0] | 
					
						
							| 
									
										
										
										
											2015-12-23 18:43:35 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     if params['safesearch'] >= 1: | 
					
						
							|  |  |  |         safesearch = 1 | 
					
						
							|  |  |  |     else: | 
					
						
							|  |  |  |         safesearch = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2017-08-31 16:11:04 +02:00
										 |  |  |     # rxieu is some kind of hash from the search query, but accepts random atm | 
					
						
							| 
									
										
										
										
											2015-12-23 18:43:35 +01:00
										 |  |  |     search_path = search_string.format(query=urlencode({'q': query}), | 
					
						
							|  |  |  |                                        offset=offset, | 
					
						
							|  |  |  |                                        number_of_results=number_of_results, | 
					
						
							|  |  |  |                                        lang=language, | 
					
						
							| 
									
										
										
										
											2019-12-21 20:51:30 +01:00
										 |  |  |                                        rxikd=int(time() * 1000), | 
					
						
							|  |  |  |                                        safesearch=safesearch) | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2019-12-21 20:51:30 +01:00
										 |  |  |     params['url'] = base_url + search_path + '&' + extra_param | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return params | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get response from search-request | 
					
						
							|  |  |  | def response(resp): | 
					
						
							|  |  |  |     results = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # parse results | 
					
						
							| 
									
										
										
										
											2019-12-21 20:51:30 +01:00
										 |  |  |     try: | 
					
						
							|  |  |  |         response_json = loads(resp.text) | 
					
						
							|  |  |  |     except: | 
					
						
							|  |  |  |         parse_extra_param(resp.text) | 
					
						
							| 
									
										
										
										
											2019-12-21 21:25:50 +01:00
										 |  |  |         raise Exception('extra param expired, please reload') | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2016-01-31 13:24:09 +01:00
										 |  |  |     for result in response_json['results']: | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  |         # append result | 
					
						
							| 
									
										
										
										
											2016-01-31 13:24:09 +01:00
										 |  |  |         results.append({'url': result['url'], | 
					
						
							| 
									
										
										
										
											2016-12-09 11:44:24 +01:00
										 |  |  |                         'title': result['title'], | 
					
						
							|  |  |  |                         'content': result['sum']}) | 
					
						
							| 
									
										
										
										
											2015-02-08 14:12:14 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # return results | 
					
						
							|  |  |  |     return results | 
					
						
							| 
									
										
										
										
											2016-11-06 03:51:38 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # get supported languages from their site | 
					
						
							| 
									
										
										
										
											2016-12-15 07:34:43 +01:00
										 |  |  | def _fetch_supported_languages(resp): | 
					
						
							| 
									
										
										
										
											2016-11-06 03:51:38 +01:00
										 |  |  |     supported_languages = [] | 
					
						
							| 
									
										
										
										
											2016-12-15 07:34:43 +01:00
										 |  |  |     dom = fromstring(resp.text) | 
					
						
							| 
									
										
										
										
											2019-11-15 09:31:37 +01:00
										 |  |  |     links = eval_xpath(dom, '//span[@id="menu2"]/a') | 
					
						
							| 
									
										
										
										
											2016-11-06 03:51:38 +01:00
										 |  |  |     for link in links: | 
					
						
							| 
									
										
										
										
											2019-11-15 09:31:37 +01:00
										 |  |  |         href = eval_xpath(link, './@href')[0].split('lang%3A') | 
					
						
							| 
									
										
										
										
											2016-12-29 06:24:56 +01:00
										 |  |  |         if len(href) == 2: | 
					
						
							|  |  |  |             code = href[1].split('_') | 
					
						
							|  |  |  |             if len(code) == 2: | 
					
						
							|  |  |  |                 code = code[0] + '-' + code[1].upper() | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 code = code[0] | 
					
						
							| 
									
										
										
										
											2016-11-06 03:51:38 +01:00
										 |  |  |             supported_languages.append(code) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return supported_languages |