Merge pull request #781 from return42/fix-google
[fix] google engine: remove adds and fix mobile_ui selector
This commit is contained in:
		
						commit
						db6f617c0f
					
				| @ -112,7 +112,8 @@ filter_mapping = {0: 'off', 1: 'medium', 2: 'high'} | |||||||
| # ------------------------ | # ------------------------ | ||||||
| 
 | 
 | ||||||
| # google results are grouped into <div class="g ..." ../> | # google results are grouped into <div class="g ..." ../> | ||||||
| results_xpath = '//div[contains(@class, "g")]' | results_xpath = '//div[@id="search"]//div[contains(@class, "g ")]' | ||||||
|  | results_xpath_mobile_ui = '//div[contains(@class, "g ")]' | ||||||
| 
 | 
 | ||||||
| # google *sections* are no usual *results*, we ignore them | # google *sections* are no usual *results*, we ignore them | ||||||
| g_section_with_header = './g-section-with-header' | g_section_with_header = './g-section-with-header' | ||||||
| @ -336,7 +337,12 @@ def response(resp): | |||||||
|                 logger.error(e, exc_info=True) |                 logger.error(e, exc_info=True) | ||||||
| 
 | 
 | ||||||
|     # parse results |     # parse results | ||||||
|     for result in eval_xpath_list(dom, results_xpath): | 
 | ||||||
|  |     _results_xpath = results_xpath | ||||||
|  |     if use_mobile_ui: | ||||||
|  |         _results_xpath = results_xpath_mobile_ui | ||||||
|  | 
 | ||||||
|  |     for result in eval_xpath_list(dom, _results_xpath): | ||||||
| 
 | 
 | ||||||
|         # google *sections* |         # google *sections* | ||||||
|         if extract_text(eval_xpath(result, g_section_with_header)): |         if extract_text(eval_xpath(result, g_section_with_header)): | ||||||
| @ -347,20 +353,22 @@ def response(resp): | |||||||
|             title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None) |             title_tag = eval_xpath_getindex(result, title_xpath, 0, default=None) | ||||||
|             if title_tag is None: |             if title_tag is None: | ||||||
|                 # this not one of the common google results *section* |                 # this not one of the common google results *section* | ||||||
|                 logger.debug('ingoring <div class="g" ../> section: missing title') |                 logger.debug('ingoring item from the result_xpath list: missing title') | ||||||
|                 continue |                 continue | ||||||
|             title = extract_text(title_tag) |             title = extract_text(title_tag) | ||||||
|             url = eval_xpath_getindex(result, href_xpath, 0, None) |             url = eval_xpath_getindex(result, href_xpath, 0, None) | ||||||
|             if url is None: |             if url is None: | ||||||
|                 continue |                 continue | ||||||
|             content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True) |             content = extract_text(eval_xpath_getindex(result, content_xpath, 0, default=None), allow_none=True) | ||||||
|  |             if content is None: | ||||||
|  |                 logger.debug('ingoring item from the result_xpath list: missing content of title "%s"', title) | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             logger.debug('add link to results: %s', title) | ||||||
|             results.append({'url': url, 'title': title, 'content': content}) |             results.append({'url': url, 'title': title, 'content': content}) | ||||||
|  | 
 | ||||||
|         except Exception as e:  # pylint: disable=broad-except |         except Exception as e:  # pylint: disable=broad-except | ||||||
|             logger.error(e, exc_info=True) |             logger.error(e, exc_info=True) | ||||||
|             # from lxml import etree |  | ||||||
|             # logger.debug(etree.tostring(result, pretty_print=True)) |  | ||||||
|             # import pdb |  | ||||||
|             # pdb.set_trace() |  | ||||||
|             continue |             continue | ||||||
| 
 | 
 | ||||||
|     # parse suggestion |     # parse suggestion | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user