|
import json
|
|
import logging
|
|
import random
|
|
from typing import Dict, Any, Optional
|
|
|
|
import requests
|
|
|
|
logger = logging.getLogger("rp")
|
|
|
|
# Realistic User-Agents and headers
|
|
USER_AGENTS = [
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
|
|
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59",
|
|
"Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1",
|
|
"Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1",
|
|
"Mozilla/5.0 (Android 11; Mobile; rv:68.0) Gecko/68.0 Firefox/88.0",
|
|
]
|
|
|
|
|
|
def get_realistic_headers(additional_headers=None):
|
|
"""Generate realistic HTTP headers with random User-Agent and variations."""
|
|
accept_languages = [
|
|
"en-US,en;q=0.5",
|
|
"en-US,en;q=0.9",
|
|
"en-GB,en;q=0.5",
|
|
"en-US,en;q=0.5;fr;q=0.3",
|
|
]
|
|
headers = {
|
|
"User-Agent": random.choice(USER_AGENTS),
|
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
|
|
"Accept-Language": random.choice(accept_languages),
|
|
"Accept-Encoding": "gzip, deflate, br",
|
|
"DNT": "1",
|
|
"Connection": "keep-alive",
|
|
"Upgrade-Insecure-Requests": "1",
|
|
}
|
|
# Sometimes add Cache-Control
|
|
if random.random() < 0.3:
|
|
headers["Cache-Control"] = "no-cache"
|
|
# Sometimes add Referer
|
|
if random.random() < 0.2:
|
|
headers["Referer"] = "https://www.google.com/"
|
|
if additional_headers:
|
|
headers.update(additional_headers)
|
|
return headers
|
|
|
|
|
|
class SyncHTTPClient:
|
|
|
|
def __init__(self):
|
|
self.default_headers = {}
|
|
|
|
def request(
|
|
self,
|
|
method: str,
|
|
url: str,
|
|
headers: Optional[Dict[str, str]] = None,
|
|
data: Optional[bytes] = None,
|
|
json_data: Optional[Dict[str, Any]] = None,
|
|
timeout: float = 30.0,
|
|
db_conn=None,
|
|
) -> Dict[str, Any]:
|
|
if headers is None:
|
|
headers = get_realistic_headers()
|
|
else:
|
|
headers = get_realistic_headers(headers)
|
|
|
|
request_body_for_log = ""
|
|
if json_data is not None:
|
|
request_body_for_log = json.dumps(json_data)
|
|
elif data is not None:
|
|
request_body_for_log = data.decode("utf-8") if isinstance(data, bytes) else str(data)
|
|
|
|
try:
|
|
response = requests.request(
|
|
method,
|
|
url,
|
|
headers=headers,
|
|
data=data,
|
|
json=json_data,
|
|
timeout=timeout,
|
|
allow_redirects=True,
|
|
)
|
|
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
|
|
|
response_data = response.text
|
|
response_headers = dict(response.headers)
|
|
|
|
if db_conn:
|
|
from rp.tools.database import log_http_request
|
|
|
|
log_result = log_http_request(
|
|
method,
|
|
url,
|
|
request_body_for_log,
|
|
response_data,
|
|
response.status_code,
|
|
db_conn,
|
|
)
|
|
if log_result.get("status") != "success":
|
|
logger.warning(f"Failed to log HTTP request: {log_result.get('error')}")
|
|
|
|
return {
|
|
"status": response.status_code,
|
|
"headers": response_headers,
|
|
"text": response_data,
|
|
"json": response.json,
|
|
}
|
|
except requests.exceptions.HTTPError as e:
|
|
response_data = e.response.text if e.response else ""
|
|
response_headers = dict(e.response.headers) if e.response else {}
|
|
status_code = e.response.status_code if e.response else 0
|
|
|
|
if db_conn:
|
|
from rp.tools.database import log_http_request
|
|
|
|
log_result = log_http_request(
|
|
method,
|
|
url,
|
|
request_body_for_log,
|
|
response_data,
|
|
status_code,
|
|
db_conn,
|
|
)
|
|
if log_result.get("status") != "success":
|
|
logger.warning(f"Failed to log HTTP request: {log_result.get('error')}")
|
|
|
|
return {
|
|
"status": status_code,
|
|
"headers": response_headers,
|
|
"text": response_data,
|
|
"json": lambda: e.response.json() if e.response and response_data else None,
|
|
}
|
|
except requests.exceptions.RequestException as e:
|
|
logger.error(f"Request failed: {e}")
|
|
return {"error": True, "exception": str(e), "status": 0, "text": ""}
|
|
|
|
def get(
|
|
self,
|
|
url: str,
|
|
headers: Optional[Dict[str, str]] = None,
|
|
timeout: float = 30.0,
|
|
db_conn=None,
|
|
) -> Dict[str, Any]:
|
|
return self.request("GET", url, headers=headers, timeout=timeout, db_conn=db_conn)
|
|
|
|
def post(
|
|
self,
|
|
url: str,
|
|
headers: Optional[Dict[str, str]] = None,
|
|
data: Optional[bytes] = None,
|
|
json_data: Optional[Dict[str, Any]] = None,
|
|
timeout: float = 30.0,
|
|
db_conn=None,
|
|
) -> Dict[str, Any]:
|
|
return self.request(
|
|
"POST",
|
|
url,
|
|
headers=headers,
|
|
data=data,
|
|
json_data=json_data,
|
|
timeout=timeout,
|
|
db_conn=db_conn,
|
|
)
|
|
|
|
def set_default_headers(self, headers: Dict[str, str]):
|
|
self.default_headers.update(headers)
|
|
|
|
|
|
http_client = SyncHTTPClient()
|