import json
import logging
import random
from typing import Dict, Any, Optional
import requests
logger = logging.getLogger("rp")
# Realistic User-Agents and headers
USER_AGENTS = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59",
"Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (Android 11; Mobile; rv:68.0) Gecko/68.0 Firefox/88.0",
]
def get_realistic_headers(additional_headers=None):
"""Generate realistic HTTP headers with random User-Agent and variations."""
accept_languages = [
"en-US,en;q=0.5",
"en-US,en;q=0.9",
"en-GB,en;q=0.5",
"en-US,en;q=0.5;fr;q=0.3",
]
headers = {
"User-Agent": random.choice(USER_AGENTS),
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": random.choice(accept_languages),
"Accept-Encoding": "gzip, deflate, br",
"DNT": "1",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
}
# Sometimes add Cache-Control
if random.random() < 0.3:
headers["Cache-Control"] = "no-cache"
# Sometimes add Referer
if random.random() < 0.2:
headers["Referer"] = "https://www.google.com/"
if additional_headers:
headers.update(additional_headers)
return headers
class SyncHTTPClient:
def __init__(self):
self.default_headers = {}
def request(
self,
method: str,
url: str,
headers: Optional[Dict[str, str]] = None,
data: Optional[bytes] = None,
json_data: Optional[Dict[str, Any]] = None,
timeout: float = 30.0,
db_conn=None,
) -> Dict[str, Any]:
if headers is None:
headers = get_realistic_headers()
else:
headers = get_realistic_headers(headers)
request_body_for_log = ""
if json_data is not None:
request_body_for_log = json.dumps(json_data)
elif data is not None:
request_body_for_log = data.decode("utf-8") if isinstance(data, bytes) else str(data)
try:
response = requests.request(
method,
url,
headers=headers,
data=data,
json=json_data,
timeout=timeout,
allow_redirects=True,
)
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
response_data = response.text
response_headers = dict(response.headers)
if db_conn:
from rp.tools.database import log_http_request
log_result = log_http_request(
method,
url,
request_body_for_log,
response_data,
response.status_code,
db_conn,
)
if log_result.get("status") != "success":
logger.warning(f"Failed to log HTTP request: {log_result.get('error')}")
return {
"status": response.status_code,
"headers": response_headers,
"text": response_data,
"json": response.json,
}
except requests.exceptions.HTTPError as e:
response_data = e.response.text if e.response else ""
response_headers = dict(e.response.headers) if e.response else {}
status_code = e.response.status_code if e.response else 0
if db_conn:
from rp.tools.database import log_http_request
log_result = log_http_request(
method,
url,
request_body_for_log,
response_data,
status_code,
db_conn,
)
if log_result.get("status") != "success":
logger.warning(f"Failed to log HTTP request: {log_result.get('error')}")
return {
"status": status_code,
"headers": response_headers,
"text": response_data,
"json": lambda: e.response.json() if e.response and response_data else None,
}
except requests.exceptions.RequestException as e:
logger.error(f"Request failed: {e}")
return {"error": True, "exception": str(e), "status": 0, "text": ""}
def get(
self,
url: str,
headers: Optional[Dict[str, str]] = None,
timeout: float = 30.0,
db_conn=None,
) -> Dict[str, Any]:
return self.request("GET", url, headers=headers, timeout=timeout, db_conn=db_conn)
def post(
self,
url: str,
headers: Optional[Dict[str, str]] = None,
data: Optional[bytes] = None,
json_data: Optional[Dict[str, Any]] = None,
timeout: float = 30.0,
db_conn=None,
) -> Dict[str, Any]:
return self.request(
"POST",
url,
headers=headers,
data=data,
json_data=json_data,
timeout=timeout,
db_conn=db_conn,
)
def set_default_headers(self, headers: Dict[str, str]):
self.default_headers.update(headers)
http_client = SyncHTTPClient()