68 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			68 lines
		
	
	
		
			1.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| 
								 | 
							
								# SPDX-License-Identifier: AGPL-3.0-or-later
							 | 
						||
| 
								 | 
							
								"""pypi.org
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								"""
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from urllib.parse import urlencode
							 | 
						||
| 
								 | 
							
								from dateutil import parser
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								from lxml import html
							 | 
						||
| 
								 | 
							
								from searx.utils import (
							 | 
						||
| 
								 | 
							
								    eval_xpath_getindex,
							 | 
						||
| 
								 | 
							
								    eval_xpath_list,
							 | 
						||
| 
								 | 
							
								    extract_text,
							 | 
						||
| 
								 | 
							
								)
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# about
							 | 
						||
| 
								 | 
							
								about = {
							 | 
						||
| 
								 | 
							
								    "website": "https://pypi.org",
							 | 
						||
| 
								 | 
							
								    "wikidata_id": "Q2984686",
							 | 
						||
| 
								 | 
							
								    "official_api_documentation": "https://warehouse.readthedocs.io/api-reference/index.html",
							 | 
						||
| 
								 | 
							
								    "use_official_api": False,
							 | 
						||
| 
								 | 
							
								    "require_api_key": False,
							 | 
						||
| 
								 | 
							
								    "results": "HTML",
							 | 
						||
| 
								 | 
							
								}
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								categories = ['it', 'packages']
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# engine dependent config
							 | 
						||
| 
								 | 
							
								first_page_num = 1
							 | 
						||
| 
								 | 
							
								base_url = "https://pypi.org"
							 | 
						||
| 
								 | 
							
								search_url = base_url + '/search/?{query}'
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def request(query, params):
							 | 
						||
| 
								 | 
							
								    args = {
							 | 
						||
| 
								 | 
							
								        "q": query,
							 | 
						||
| 
								 | 
							
								        "page": params['pageno'],
							 | 
						||
| 
								 | 
							
								    }
							 | 
						||
| 
								 | 
							
								    params['url'] = search_url.format(query=urlencode(args))
							 | 
						||
| 
								 | 
							
								    return params
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								def response(resp):
							 | 
						||
| 
								 | 
							
								    results = []
							 | 
						||
| 
								 | 
							
								    dom = html.fromstring(resp.text)
							 | 
						||
| 
								 | 
							
								    for entry in eval_xpath_list(dom, '/html/body/main/div/div/div/form/div/ul/li/a[@class="package-snippet"]'):
							 | 
						||
| 
								 | 
							
								        url = base_url + extract_text(eval_xpath_getindex(entry, './@href', 0))  # type: ignore
							 | 
						||
| 
								 | 
							
								        title = extract_text(eval_xpath_getindex(entry, './h3/span[@class="package-snippet__name"]', 0))
							 | 
						||
| 
								 | 
							
								        version = extract_text(eval_xpath_getindex(entry, './h3/span[@class="package-snippet__version"]', 0))
							 | 
						||
| 
								 | 
							
								        created_at = extract_text(
							 | 
						||
| 
								 | 
							
								            eval_xpath_getindex(entry, './h3/span[@class="package-snippet__created"]/time/@datetime', 0)
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								        content = extract_text(eval_xpath_getindex(entry, './p', 0))
							 | 
						||
| 
								 | 
							
								        results.append(
							 | 
						||
| 
								 | 
							
								            {
							 | 
						||
| 
								 | 
							
								                "template": "packages.html",
							 | 
						||
| 
								 | 
							
								                "url": url,
							 | 
						||
| 
								 | 
							
								                "title": title,
							 | 
						||
| 
								 | 
							
								                'package_name': title,
							 | 
						||
| 
								 | 
							
								                "content": content,
							 | 
						||
| 
								 | 
							
								                "version": version,
							 | 
						||
| 
								 | 
							
								                'publishedDate': parser.parse(created_at),  # type: ignore
							 | 
						||
| 
								 | 
							
								            }
							 | 
						||
| 
								 | 
							
								        )
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								    return results
							 |