fix stackoverflow and add comments
This commit is contained in:
		
							parent
							
								
									80f98d6041
								
							
						
					
					
						commit
						a46bbb4042
					
				| @ -1,30 +1,58 @@ | ||||
| ## Stackoverflow (It) | ||||
| #  | ||||
| # @website     https://stackoverflow.com/ | ||||
| # @provide-api not clear (https://api.stackexchange.com/docs/advanced-search) | ||||
| #  | ||||
| # @using-api   no | ||||
| # @results     HTML | ||||
| # @stable      no (HTML can change) | ||||
| # @parse       url, title, content | ||||
| 
 | ||||
| from urlparse import urljoin | ||||
| from cgi import escape | ||||
| from urllib import urlencode | ||||
| from lxml import html | ||||
| 
 | ||||
| # engine dependent config | ||||
| categories = ['it'] | ||||
| 
 | ||||
| url = 'http://stackoverflow.com/' | ||||
| search_url = url+'search?{query}&page={pageno}' | ||||
| result_xpath = './/div[@class="excerpt"]//text()' | ||||
| 
 | ||||
| paging = True | ||||
| 
 | ||||
| # search-url | ||||
| url = 'http://stackoverflow.com/' | ||||
| search_url = url+'search?{query}&page={pageno}' | ||||
| 
 | ||||
| # specific xpath variables | ||||
| results_xpath = '//div[contains(@class,"question-summary")]' | ||||
| link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a' | ||||
| title_xpath = './/text()' | ||||
| content_xpath = './/div[@class="excerpt"]//text()' | ||||
| 
 | ||||
| 
 | ||||
| # do search-request | ||||
| def request(query, params): | ||||
|     params['url'] = search_url.format(query=urlencode({'q': query}), | ||||
|                                       pageno=params['pageno']) | ||||
| 
 | ||||
|     return params | ||||
| 
 | ||||
| 
 | ||||
| # get response from search-request | ||||
| def response(resp): | ||||
|     results = [] | ||||
| 
 | ||||
|     dom = html.fromstring(resp.text) | ||||
|     for result in dom.xpath('//div[@class="question-summary search-result"]'): | ||||
|         link = result.xpath('.//div[@class="result-link"]//a')[0] | ||||
| 
 | ||||
|     # parse results | ||||
|     for result in dom.xpath(results_xpath): | ||||
|         link = result.xpath(link_xpath)[0] | ||||
|         href = urljoin(url, link.attrib.get('href')) | ||||
|         title = escape(' '.join(link.xpath('.//text()'))) | ||||
|         content = escape(' '.join(result.xpath(result_xpath))) | ||||
|         results.append({'url': href, 'title': title, 'content': content}) | ||||
|         title = escape(' '.join(link.xpath(title_xpath))) | ||||
|         content = escape(' '.join(result.xpath(content_xpath))) | ||||
| 
 | ||||
|         # append result | ||||
|         results.append({'url': href,  | ||||
|                         'title': title,  | ||||
|                         'content': content}) | ||||
| 
 | ||||
|     # return results | ||||
|     return results | ||||
|  | ||||
| @ -90,7 +90,6 @@ engines: | ||||
| 
 | ||||
|   - name : stackoverflow | ||||
|     engine : stackoverflow | ||||
|     categories : it | ||||
|     shortcut : st | ||||
| 
 | ||||
|   - name : startpage | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user