[fix] indexing ++ url extraction
This commit is contained in:
		
							parent
							
								
									3854703d95
								
							
						
					
					
						commit
						01c2eeb8ff
					
				| @ -47,8 +47,11 @@ def response(resp): | |||||||
|     dom = html.fromstring(resp.text) |     dom = html.fromstring(resp.text) | ||||||
| 
 | 
 | ||||||
|     for result in dom.xpath(results_xpath): |     for result in dom.xpath(results_xpath): | ||||||
|         url = parse_url(extract_url(result.xpath(url_xpath), search_url)) |         try: | ||||||
|         title = extract_text(result.xpath(title_xpath)[0]) |             url = parse_url(extract_url(result.xpath(url_xpath), search_url)) | ||||||
|  |             title = extract_text(result.xpath(title_xpath)[0]) | ||||||
|  |         except: | ||||||
|  |             continue | ||||||
|         content = extract_text(result.xpath(content_xpath)[0]) |         content = extract_text(result.xpath(content_xpath)[0]) | ||||||
|         results.append({'url': url, 'title': title, 'content': content}) |         results.append({'url': url, 'title': title, 'content': content}) | ||||||
| 
 | 
 | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user