commit
						469e08881e
					
				| @ -81,7 +81,7 @@ def load_engine(engine_data): | ||||
|         if engine_attr.startswith('_'): | ||||
|             continue | ||||
|         if getattr(engine, engine_attr) is None: | ||||
|             print('[E] Engine config error: Missing attribute "{0}.{1}"'\ | ||||
|             print('[E] Engine config error: Missing attribute "{0}.{1}"' | ||||
|                   .format(engine.name, engine_attr)) | ||||
|             sys.exit(1) | ||||
| 
 | ||||
| @ -102,7 +102,7 @@ def load_engine(engine_data): | ||||
|     if engine.shortcut: | ||||
|         # TODO check duplications | ||||
|         if engine.shortcut in engine_shortcuts: | ||||
|             print('[E] Engine config error: ambigious shortcut: {0}'\ | ||||
|             print('[E] Engine config error: ambigious shortcut: {0}' | ||||
|                   .format(engine.shortcut)) | ||||
|             sys.exit(1) | ||||
|         engine_shortcuts[engine.shortcut] = engine.name | ||||
|  | ||||
							
								
								
									
										67
									
								
								searx/engines/digg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								searx/engines/digg.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,67 @@ | ||||
| ## Digg (News, Social media) | ||||
| # | ||||
| # @website     https://digg.com/ | ||||
| # @provide-api no | ||||
| # | ||||
| # @using-api   no | ||||
| # @results     HTML (using search portal) | ||||
| # @stable      no (HTML can change) | ||||
| # @parse       url, title, content, publishedDate, thumbnail | ||||
| 
 | ||||
| from urllib import quote_plus | ||||
| from json import loads | ||||
| from lxml import html | ||||
| from cgi import escape | ||||
| from dateutil import parser | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['news', 'social media'] | ||||
| paging = True | ||||
| 
 | ||||
| # search-url | ||||
| base_url = 'https://digg.com/' | ||||
| search_url = base_url+'api/search/{query}.json?position={position}&format=html' | ||||
| 
 | ||||
| # specific xpath variables | ||||
| results_xpath = '//article' | ||||
| link_xpath = './/small[@class="time"]//a' | ||||
| title_xpath = './/h2//a//text()' | ||||
| content_xpath = './/p//text()' | ||||
| pubdate_xpath = './/time' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     offset = (params['pageno'] - 1) * 10 | ||||
|     params['url'] = search_url.format(position=offset, | ||||
|                                       query=quote_plus(query)) | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     search_result = loads(resp.text) | ||||
| 
 | ||||
|     dom = html.fromstring(search_result['html']) | ||||
| 
 | ||||
|     # parse results | ||||
|     for result in dom.xpath(results_xpath): | ||||
|         url = result.attrib.get('data-contenturl') | ||||
|         thumbnail = result.xpath('.//img')[0].attrib.get('src') | ||||
|         title = ''.join(result.xpath(title_xpath)) | ||||
|         content = escape(''.join(result.xpath(content_xpath))) | ||||
|         pubdate = result.xpath(pubdate_xpath)[0].attrib.get('datetime') | ||||
|         publishedDate = parser.parse(pubdate) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': url, | ||||
|                         'title': title, | ||||
|                         'content': content, | ||||
|                         'template': 'videos.html', | ||||
|                         'publishedDate': publishedDate, | ||||
|                         'thumbnail': thumbnail}) | ||||
| 
 | ||||
|     # return results | ||||
|     return results | ||||
| @ -53,7 +53,8 @@ def response(resp): | ||||
| 
 | ||||
|     for photo in photos: | ||||
| 
 | ||||
|         # In paged configuration, the first pages' photos are represented by a None object | ||||
|         # In paged configuration, the first pages' photos | ||||
|         # are represented by a None object | ||||
|         if photo is None: | ||||
|             continue | ||||
| 
 | ||||
| @ -74,10 +75,15 @@ def response(resp): | ||||
| 
 | ||||
|         title = photo['title'] | ||||
| 
 | ||||
|         content = '<span class="photo-author">' + photo['owner']['username'] + '</span><br />' | ||||
|         content = '<span class="photo-author">' +\ | ||||
|                   photo['owner']['username'] +\ | ||||
|                   '</span><br />' | ||||
| 
 | ||||
|         if 'description' in photo: | ||||
|             content = content + '<span class="description">' + photo['description'] + '</span>' | ||||
|             content = content +\ | ||||
|                       '<span class="description">' +\ | ||||
|                       photo['description'] +\ | ||||
|                       '</span>' | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': url, | ||||
|  | ||||
| @ -21,11 +21,15 @@ paging = True | ||||
| api_key = None | ||||
| 
 | ||||
| 
 | ||||
| url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={api_key}&{text}&sort=relevance&extras=description%2C+owner_name%2C+url_o%2C+url_z&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' | ||||
| url = 'https://api.flickr.com/services/rest/?method=flickr.photos.search' +\ | ||||
|       '&api_key={api_key}&{text}&sort=relevance' +\ | ||||
|       '&extras=description%2C+owner_name%2C+url_o%2C+url_z' +\ | ||||
|       '&per_page={nb_per_page}&format=json&nojsoncallback=1&page={page}' | ||||
| photo_url = 'https://www.flickr.com/photos/{userid}/{photoid}' | ||||
| 
 | ||||
| paging = True | ||||
| 
 | ||||
| 
 | ||||
| def build_flickr_url(user_id, photo_id): | ||||
|     return photo_url.format(userid=user_id, photoid=photo_id) | ||||
| 
 | ||||
| @ -65,9 +69,12 @@ def response(resp): | ||||
| 
 | ||||
|         title = photo['title'] | ||||
| 
 | ||||
|         content = '<span class="photo-author">'+ photo['ownername'] +'</span><br />' | ||||
|          | ||||
|         content = content + '<span class="description">' + photo['description']['_content'] + '</span>' | ||||
|         content = '<span class="photo-author">' +\ | ||||
|                   photo['ownername'] +\ | ||||
|                   '</span><br />' +\ | ||||
|                   '<span class="description">' +\ | ||||
|                   photo['description']['_content'] +\ | ||||
|                   '</span>' | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': url, | ||||
|  | ||||
| @ -24,7 +24,7 @@ search_url = url + 'search/{search_term}/{pageno}/' | ||||
| 
 | ||||
| # specific xpath variables | ||||
| magnet_xpath = './/a[@title="Torrent magnet link"]' | ||||
| #content_xpath = './/font[@class="detDesc"]//text()' | ||||
| content_xpath = './/span[@class="font11px lightgrey block"]' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| @ -56,7 +56,8 @@ def response(resp): | ||||
|         link = result.xpath('.//a[@class="cellMainLink"]')[0] | ||||
|         href = urljoin(url, link.attrib['href']) | ||||
|         title = ' '.join(link.xpath('.//text()')) | ||||
|         content = escape(html.tostring(result.xpath('.//span[@class="font11px lightgrey block"]')[0], method="text")) | ||||
|         content = escape(html.tostring(result.xpath(content_xpath)[0], | ||||
|                                        method="text")) | ||||
|         seed = result.xpath('.//td[contains(@class, "green")]/text()')[0] | ||||
|         leech = result.xpath('.//td[contains(@class, "red")]/text()')[0] | ||||
| 
 | ||||
|  | ||||
| @ -11,7 +11,6 @@ | ||||
| from urllib import urlencode | ||||
| from json import loads | ||||
| import cgi | ||||
| import re | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
| @ -50,7 +49,8 @@ def response(resp): | ||||
|         for line, code in sorted(lines.items()): | ||||
|             content = content + '<tr><td class="line-number" style="padding-right:5px;">' | ||||
|             content = content + str(line) + '</td><td class="code-snippet">' | ||||
|             # Replace every two spaces with ' &nbps;' to keep formatting while allowing the browser to break the line if necessary | ||||
|             # Replace every two spaces with ' &nbps;' to keep formatting | ||||
|             # while allowing the browser to break the line if necessary | ||||
|             content = content + cgi.escape(code).replace('\t', '    ').replace('  ', '  ').replace('  ', '  ') | ||||
|             content = content + "</td></tr>" | ||||
| 
 | ||||
|  | ||||
| @ -37,8 +37,15 @@ def response(resp): | ||||
|     # parse results | ||||
|     for result in search_results['results']: | ||||
|         href = result['url'] | ||||
|         title = "[" + result['type'] + "] " + result['namespace'] + " " + result['name'] | ||||
|         content = '<span class="highlight">[' + result['type'] + "] " + result['name'] + " " + result['synopsis'] + "</span><br />" + result['description'] | ||||
|         title = "[" + result['type'] + "] " +\ | ||||
|                 result['namespace'] +\ | ||||
|                 " " + result['name'] | ||||
|         content = '<span class="highlight">[' +\ | ||||
|                   result['type'] + "] " +\ | ||||
|                   result['name'] + " " +\ | ||||
|                   result['synopsis'] +\ | ||||
|                   "</span><br />" +\ | ||||
|                   result['description'] | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': href, | ||||
|  | ||||
| @ -60,10 +60,14 @@ def response(resp): | ||||
| 
 | ||||
|         content = result.xpath('.//div[contains(@class,"red")]//text()')[0] | ||||
|         content = content + " - " | ||||
|         content = content + html.tostring(result.xpath('.//div[contains(@class,"grey-web")]')[0], method='text') | ||||
|         text = result.xpath('.//div[contains(@class,"grey-web")]')[0] | ||||
|         content = content + html.tostring(text, method='text') | ||||
| 
 | ||||
|         if result.xpath(".//span") != []: | ||||
|             content = content + " - (" + result.xpath(".//span//text()")[0].strip() + ")" | ||||
|             content = content +\ | ||||
|                       " - (" +\ | ||||
|                       result.xpath(".//span//text()")[0].strip() +\ | ||||
|                       ")" | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': href, | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| ## Twitter (Social media) | ||||
| # | ||||
| # @website     https://www.bing.com/news | ||||
| # @website     https://twitter.com/ | ||||
| # @provide-api yes (https://dev.twitter.com/docs/using-search) | ||||
| # | ||||
| # @using-api   no | ||||
| @ -14,6 +14,7 @@ from urlparse import urljoin | ||||
| from urllib import urlencode | ||||
| from lxml import html | ||||
| from cgi import escape | ||||
| from datetime import datetime | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['social media'] | ||||
| @ -27,7 +28,8 @@ search_url = base_url+'search?' | ||||
| results_xpath = '//li[@data-item-type="tweet"]' | ||||
| link_xpath = './/small[@class="time"]//a' | ||||
| title_xpath = './/span[@class="username js-action-profile-name"]//text()' | ||||
| content_xpath = './/p[@class="js-tweet-text tweet-text"]//text()' | ||||
| content_xpath = './/p[@class="js-tweet-text tweet-text"]' | ||||
| timestamp_xpath = './/span[contains(@class,"_timestamp")]' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| @ -52,8 +54,17 @@ def response(resp): | ||||
|         link = tweet.xpath(link_xpath)[0] | ||||
|         url = urljoin(base_url, link.attrib.get('href')) | ||||
|         title = ''.join(tweet.xpath(title_xpath)) | ||||
|         content = escape(''.join(tweet.xpath(content_xpath))) | ||||
| 
 | ||||
|         content = escape(html.tostring(tweet.xpath(content_xpath)[0], method='text', encoding='UTF-8').decode("utf-8")) | ||||
|         pubdate = tweet.xpath(timestamp_xpath) | ||||
|         if len(pubdate) > 0: | ||||
|             timestamp = float(pubdate[0].attrib.get('data-time')) | ||||
|             publishedDate = datetime.fromtimestamp(timestamp, None) | ||||
|             # append result | ||||
|             results.append({'url': url, | ||||
|                             'title': title, | ||||
|                             'content': content, | ||||
|                             'publishedDate': publishedDate}) | ||||
|         else: | ||||
|             # append result | ||||
|             results.append({'url': url, | ||||
|                             'title': title, | ||||
|  | ||||
| @ -154,7 +154,6 @@ def load_https_rules(rules_path): | ||||
|     print(' * {n} https-rules loaded'.format(n=len(https_rules))) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def https_url_rewrite(result): | ||||
|     skip_https_rewrite = False | ||||
|     # check if HTTPS rewrite is possible | ||||
|  | ||||
| @ -69,11 +69,16 @@ def threaded_requests(requests): | ||||
|                 print('engine timeout: {0}'.format(th._engine_name)) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| # get default reqest parameter | ||||
| def default_request_params(): | ||||
|     return { | ||||
|         'method': 'GET', 'headers': {}, 'data': {}, 'url': '', 'cookies': {}, 'verify': True} | ||||
|         'method': 'GET', | ||||
|         'headers': {}, | ||||
|         'data': {}, | ||||
|         'url': '', | ||||
|         'cookies': {}, | ||||
|         'verify': True | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
| # create a callback wrapper for the search engine results | ||||
| @ -487,14 +492,15 @@ class Search(object): | ||||
|                 continue | ||||
| 
 | ||||
|             # append request to list | ||||
|             requests.append((req, request_params['url'], request_args, selected_engine['name'])) | ||||
|             requests.append((req, request_params['url'], | ||||
|                              request_args, | ||||
|                              selected_engine['name'])) | ||||
| 
 | ||||
|         if not requests: | ||||
|             return results, suggestions, answers, infoboxes | ||||
|         # send all search-request | ||||
|         threaded_requests(requests) | ||||
| 
 | ||||
| 
 | ||||
|         while not results_queue.empty(): | ||||
|             engine_name, engine_results = results_queue.get_nowait() | ||||
| 
 | ||||
|  | ||||
| @ -45,6 +45,10 @@ engines: | ||||
|     engine : duckduckgo_definitions | ||||
|     shortcut : ddd | ||||
|      | ||||
|   - name : digg | ||||
|     engine : digg | ||||
|     shortcut : dg | ||||
| 
 | ||||
|   - name : wikidata | ||||
|     engine : wikidata | ||||
|     shortcut : wd | ||||
| @ -99,6 +103,33 @@ engines: | ||||
|     engine : google_news | ||||
|     shortcut : gon | ||||
| 
 | ||||
|   - name : google play apps | ||||
|     engine        : xpath | ||||
|     search_url    : https://play.google.com/store/search?q={query}&c=apps | ||||
|     url_xpath     : //a[@class="title"]/@href | ||||
|     title_xpath   : //a[@class="title"] | ||||
|     content_xpath : //a[@class="subtitle"] | ||||
|     categories : files | ||||
|     shortcut : gpa | ||||
|      | ||||
|   - name : google play movies | ||||
|     engine        : xpath | ||||
|     search_url    : https://play.google.com/store/search?q={query}&c=movies | ||||
|     url_xpath     : //a[@class="title"]/@href | ||||
|     title_xpath   : //a[@class="title"] | ||||
|     content_xpath : //a[@class="subtitle"] | ||||
|     categories : videos | ||||
|     shortcut : gpm | ||||
|      | ||||
|   - name : google play music | ||||
|     engine        : xpath | ||||
|     search_url    : https://play.google.com/store/search?q={query}&c=music | ||||
|     url_xpath     : //a[@class="title"]/@href | ||||
|     title_xpath   : //a[@class="title"] | ||||
|     content_xpath : //a[@class="subtitle"] | ||||
|     categories : music | ||||
|     shortcut : gps | ||||
|      | ||||
|   - name : openstreetmap | ||||
|     engine : openstreetmap | ||||
|     shortcut : osm | ||||
|  | ||||
| @ -30,7 +30,8 @@ def gen_useragent(): | ||||
| 
 | ||||
| 
 | ||||
| def searx_useragent(): | ||||
|     return 'searx/{searx_version} {suffix}'.format(searx_version=VERSION_STRING, | ||||
|     return 'searx/{searx_version} {suffix}'.format( | ||||
|            searx_version=VERSION_STRING, | ||||
|            suffix=settings['server'].get('useragent_suffix', '')) | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user