diff --git a/CHANGELOG.md b/CHANGELOG.md index b7ff1f5..cb435d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,14 @@ + + +## Version 1.27.0 - 2025-11-08 + +The project has been renamed to "Reetor's Guide to Modern Python" and now includes a comprehensive tutorial. The README has been significantly updated with installation instructions, a quick start guide, and information on modern Python features and aiohttp. + +**Changes:** 3 files, 2728 lines +**Languages:** Markdown (2726 lines), TOML (2 lines) ## Version 1.26.0 - 2025-11-08 diff --git a/pyproject.toml b/pyproject.toml index 60657aa..027e27b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "rp" -version = "1.26.0" +version = "1.27.0" description = "R python edition. The ultimate autonomous AI CLI." readme = "README.md" requires-python = ">=3.10" diff --git a/requirements.txt b/requirements.txt index 6c9c33b..5c12dde 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,5 +6,4 @@ gitpython==3.1.43 websockets==13.0.1 pytest==8.3.2 bcrypt==4.1.3 -python-slugify==8.0.4 -requests>=2.31.0 \ No newline at end of file +python-slugify==8.0.4 \ No newline at end of file diff --git a/rp/autonomous/mode.py b/rp/autonomous/mode.py index 24e1c8d..080c1c8 100644 --- a/rp/autonomous/mode.py +++ b/rp/autonomous/mode.py @@ -102,7 +102,9 @@ def process_response_autonomous(assistant, response): input_tokens = usage.get("prompt_tokens", 0) output_tokens = usage.get("completion_tokens", 0) assistant.usage_tracker.track_request(assistant.model, input_tokens, output_tokens) - cost = assistant.usage_tracker._calculate_cost(assistant.model, input_tokens, output_tokens) + cost = assistant.usage_tracker._calculate_cost( + assistant.model, input_tokens, output_tokens + ) total_cost = assistant.usage_tracker.session_usage["estimated_cost"] print(f"{Colors.YELLOW}💰 Cost: ${cost:.4f} | Total: ${total_cost:.4f}{Colors.RESET}") return process_response_autonomous(assistant, follow_up) diff --git a/rp/core/api.py b/rp/core/api.py index 9367fb5..d36cc3f 100644 --- a/rp/core/api.py +++ b/rp/core/api.py @@ -7,7 +7,9 @@ from rp.core.http_client import http_client logger = logging.getLogger("rp") -def call_api(messages, model, api_url, api_key, use_tools, tools_definition, verbose=False, db_conn=None): +def call_api( + messages, model, api_url, api_key, use_tools, tools_definition, verbose=False, db_conn=None +): try: messages = auto_slim_messages(messages, verbose=verbose) logger.debug(f"=== API CALL START ===") @@ -38,11 +40,14 @@ def call_api(messages, model, api_url, api_key, use_tools, tools_definition, ver if db_conn: from rp.tools.database import log_api_request + log_result = log_api_request(model, api_url, request_json, db_conn) if log_result.get("status") != "success": logger.warning(f"Failed to log API request: {log_result.get('error')}") logger.debug("Sending HTTP request...") - response = http_client.post(api_url, headers=headers, json_data=request_json, db_conn=db_conn) + response = http_client.post( + api_url, headers=headers, json_data=request_json, db_conn=db_conn + ) if response.get("error"): if "status" in response: logger.error(f"API HTTP Error: {response['status']} - {response.get('text', '')}") diff --git a/rp/core/assistant.py b/rp/core/assistant.py index 142b85d..508e93e 100644 --- a/rp/core/assistant.py +++ b/rp/core/assistant.py @@ -32,7 +32,7 @@ from rp.tools.agents import ( remove_agent, ) from rp.tools.command import kill_process, run_command, tail_process -from rp.tools.database import db_get, db_query, db_set, log_api_request +from rp.tools.database import db_get, db_query, db_set from rp.tools.filesystem import ( chdir, clear_edit_tracker, @@ -431,7 +431,7 @@ class Assistant: process_message(self, message) def run_autonomous(self): - + if self.args.message: task = self.args.message else: @@ -441,6 +441,7 @@ class Assistant: print("No task provided. Exiting.") return from rp.autonomous import run_autonomous_mode + run_autonomous_mode(self, task) def cleanup(self): @@ -467,7 +468,7 @@ class Assistant: def run(self): try: if self.args.autonomous: - self.run_autonomous() + self.run_autonomous() elif self.args.interactive or (not self.args.message and sys.stdin.isatty()): self.run_repl() else: diff --git a/rp/core/http_client.py b/rp/core/http_client.py index 435563a..c34425e 100644 --- a/rp/core/http_client.py +++ b/rp/core/http_client.py @@ -1,16 +1,60 @@ import json import logging +import random import time -import requests +import urllib.error +import urllib.parse +import urllib.request from typing import Dict, Any, Optional logger = logging.getLogger("rp") +# Realistic User-Agents and headers +USER_AGENTS = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15", + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59", + "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (Android 11; Mobile; rv:68.0) Gecko/68.0 Firefox/88.0", +] + +def get_realistic_headers(additional_headers=None): + """Generate realistic HTTP headers with random User-Agent and variations.""" + accept_languages = [ + "en-US,en;q=0.5", + "en-US,en;q=0.9", + "en-GB,en;q=0.5", + "en-US,en;q=0.5;fr;q=0.3", + ] + headers = { + "User-Agent": random.choice(USER_AGENTS), + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Language": random.choice(accept_languages), + "Accept-Encoding": "gzip, deflate, br", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + } + # Sometimes add Cache-Control + if random.random() < 0.3: + headers["Cache-Control"] = "no-cache" + # Sometimes add Referer + if random.random() < 0.2: + headers["Referer"] = "https://www.google.com/" + if additional_headers: + headers.update(additional_headers) + return headers + class SyncHTTPClient: def __init__(self): - self.session = requests.Session() + self.default_headers = {} def request( self, @@ -22,58 +66,101 @@ class SyncHTTPClient: timeout: float = 30.0, db_conn=None, ) -> Dict[str, Any]: - """Make a sync HTTP request using requests with retry logic.""" + if headers is None: + headers = get_realistic_headers() + else: + headers = get_realistic_headers(headers) + + # Handle JSON data + if json_data is not None: + data = json.dumps(json_data).encode('utf-8') + headers["Content-Type"] = "application/json" + + # Prepare request body for logging + if json_data is not None: + request_body = json.dumps(json_data) + elif data is not None: + request_body = data.decode("utf-8") if isinstance(data, bytes) else str(data) + else: + request_body = "" + + """Make a sync HTTP request using urllib with retry logic.""" attempt = 0 start_time = time.time() while True: attempt += 1 try: - response = self.session.request( - method, - url, - headers=headers, - data=data, - json=json_data, - timeout=timeout, - ) - response.raise_for_status() # Raise an exception for bad status codes - # Prepare request body for logging - if json_data is not None: - request_body = json.dumps(json_data) - elif data is not None: - request_body = data.decode('utf-8') if isinstance(data, bytes) else str(data) - else: - request_body = "" - # Log the request + req = urllib.request.Request(url, data=data, headers=headers, method=method) + with urllib.request.urlopen(req, timeout=timeout) as response: + response_data = response.read().decode('utf-8') + response_headers = dict(response.headers) + + # Create json method + def json_method(): + return json.loads(response_data) + + # Log the request + if db_conn: + from rp.tools.database import log_http_request + + log_result = log_http_request( + method, url, request_body, response_data, response.status, db_conn + ) + if log_result.get("status") != "success": + logger.warning(f"Failed to log HTTP request: {log_result.get('error')}") + + return { + "status": response.status, + "headers": response_headers, + "text": response_data, + "json": json_method, + } + except urllib.error.HTTPError as e: + # For HTTP errors, still try to read the response + try: + response_data = e.read().decode('utf-8') + except: + response_data = "" + + # Log the request even on error if db_conn: from rp.tools.database import log_http_request - log_result = log_http_request(method, url, request_body, response.text, response.status_code, db_conn) + + log_result = log_http_request( + method, url, request_body, response_data, e.code, db_conn + ) if log_result.get("status") != "success": logger.warning(f"Failed to log HTTP request: {log_result.get('error')}") + return { - "status": response.status_code, - "headers": dict(response.headers), - "text": response.text, - "json": response.json, + "status": e.code, + "headers": dict(e.headers) if e.headers else {}, + "text": response_data, + "json": lambda: json.loads(response_data) if response_data else None, } - except requests.exceptions.Timeout: - elapsed = time.time() - start_time - elapsed_minutes = int(elapsed // 60) - elapsed_seconds = elapsed % 60 - duration_str = ( - f"{elapsed_minutes}m {elapsed_seconds:.1f}s" - if elapsed_minutes > 0 - else f"{elapsed_seconds:.1f}s" - ) - logger.warning( - f"Request timed out (attempt {attempt}, duration: {duration_str}). Retrying in {attempt} second(s)..." - ) - time.sleep(attempt) - except requests.exceptions.RequestException as e: - return {"error": True, "exception": str(e)} + except urllib.error.URLError as e: + if isinstance(e.reason, TimeoutError) or "timeout" in str(e.reason).lower(): + elapsed = time.time() - start_time + elapsed_minutes = int(elapsed // 60) + elapsed_seconds = elapsed % 60 + duration_str = ( + f"{elapsed_minutes}m {elapsed_seconds:.1f}s" + if elapsed_minutes > 0 + else f"{elapsed_seconds:.1f}s" + ) + logger.warning( + f"Request timed out (attempt {attempt}, duration: {duration_str}). Retrying in {attempt} second(s)..." + ) + time.sleep(attempt) + else: + return {"error": True, "exception": str(e)} def get( - self, url: str, headers: Optional[Dict[str, str]] = None, timeout: float = 30.0, db_conn=None + self, + url: str, + headers: Optional[Dict[str, str]] = None, + timeout: float = 30.0, + db_conn=None, ) -> Dict[str, Any]: return self.request("GET", url, headers=headers, timeout=timeout, db_conn=db_conn) @@ -87,11 +174,17 @@ class SyncHTTPClient: db_conn=None, ) -> Dict[str, Any]: return self.request( - "POST", url, headers=headers, data=data, json_data=json_data, timeout=timeout, db_conn=db_conn + "POST", + url, + headers=headers, + data=data, + json_data=json_data, + timeout=timeout, + db_conn=db_conn, ) def set_default_headers(self, headers: Dict[str, str]): - self.session.headers.update(headers) + self.default_headers.update(headers) http_client = SyncHTTPClient() diff --git a/rp/editor.py b/rp/editor.py index 888309e..13769e2 100644 --- a/rp/editor.py +++ b/rp/editor.py @@ -350,7 +350,9 @@ class RPEditor: height, _ = self.stdscr.getmaxyx() page_size = height - 2 self.cursor_y = min(len(self.lines) - 1, self.cursor_y + page_size) - self.scroll_y = min(max(0, len(self.lines) - height + 1), self.scroll_y + page_size) + self.scroll_y = min( + max(0, len(self.lines) - height + 1), self.scroll_y + page_size + ) self.prev_key = key except Exception: pass diff --git a/rp/tools/__init__.py b/rp/tools/__init__.py index f6c8534..c0c3925 100644 --- a/rp/tools/__init__.py +++ b/rp/tools/__init__.py @@ -17,7 +17,27 @@ from rp.tools.editor import ( open_editor, ) from rp.tools.filesystem import ( - get_uid, read_specific_lines, replace_specific_line, insert_line_at_position, delete_specific_line, read_file, write_file, list_directory, mkdir, chdir, getpwd, index_source_directory, search_replace, get_editor, close_editor, open_editor, editor_insert_text, editor_replace_text, display_edit_summary, display_edit_timeline, clear_edit_tracker + get_uid, + read_specific_lines, + replace_specific_line, + insert_line_at_position, + delete_specific_line, + read_file, + write_file, + list_directory, + mkdir, + chdir, + getpwd, + index_source_directory, + search_replace, + get_editor, + close_editor, + open_editor, + editor_insert_text, + editor_replace_text, + display_edit_summary, + display_edit_timeline, + clear_edit_tracker, ) from rp.tools.lsp import get_diagnostics from rp.tools.memory import ( @@ -32,7 +52,7 @@ from rp.tools.memory import ( from rp.tools.patch import apply_patch, create_diff from rp.tools.python_exec import python_exec from rp.tools.search import glob_files, grep -from rp.tools.web import http_fetch, web_search, web_search_news +from rp.tools.web import http_fetch, web_search, web_search_news, download_to_file # Aliases for user-requested tool names view = read_file @@ -61,6 +81,7 @@ __all__ = [ "db_set", "delete_knowledge_entry", "delete_specific_line", + "download_to_file", "diagnostics", "display_edit_summary", "display_edit_timeline", @@ -107,4 +128,3 @@ __all__ = [ "write", "write_file", ] - diff --git a/rp/tools/database.py b/rp/tools/database.py index c8ab43a..cdfe78a 100644 --- a/rp/tools/database.py +++ b/rp/tools/database.py @@ -76,6 +76,7 @@ def db_query(query, db_conn): except Exception as e: return {"status": "error", "error": str(e)} + def log_api_request(model, api_url, request_payload, db_conn): """Log an API request to the database. @@ -101,6 +102,7 @@ def log_api_request(model, api_url, request_payload, db_conn): except Exception as e: return {"status": "error", "error": str(e)} + def log_http_request(method, url, request_body, response_body, status_code, db_conn): """Log an HTTP request to the database. diff --git a/rp/tools/filesystem.py b/rp/tools/filesystem.py index d26832c..d66afe1 100644 --- a/rp/tools/filesystem.py +++ b/rp/tools/filesystem.py @@ -16,7 +16,9 @@ def get_uid(): return _id -def read_specific_lines(filepath: str, start_line: int, end_line: Optional[int] = None, db_conn: Optional[Any] = None) -> dict: +def read_specific_lines( + filepath: str, start_line: int, end_line: Optional[int] = None, db_conn: Optional[Any] = None +) -> dict: """ Read specific lines or a range of lines from a file. @@ -40,32 +42,45 @@ def read_specific_lines(filepath: str, start_line: int, end_line: Optional[int] Examples: # Read line 5 only result = read_specific_lines("example.txt", 5) - + # Read lines 10 to 20 result = read_specific_lines("example.txt", 10, 20) """ try: path = os.path.expanduser(filepath) - with open(path, 'r') as file: + with open(path, "r") as file: lines = file.readlines() total_lines = len(lines) if start_line < 1 or start_line > total_lines: - return {"status": "error", "error": f"Start line {start_line} is out of range. File has {total_lines} lines."} + return { + "status": "error", + "error": f"Start line {start_line} is out of range. File has {total_lines} lines.", + } if end_line is None: end_line = start_line if end_line < start_line or end_line > total_lines: - return {"status": "error", "error": f"End line {end_line} is out of range. File has {total_lines} lines."} - selected_lines = lines[start_line - 1:end_line] - content = ''.join(selected_lines) + return { + "status": "error", + "error": f"End line {end_line} is out of range. File has {total_lines} lines.", + } + selected_lines = lines[start_line - 1 : end_line] + content = "".join(selected_lines) if db_conn: from rp.tools.database import db_set + db_set("read:" + path, "true", db_conn) return {"status": "success", "content": content} except Exception as e: return {"status": "error", "error": str(e)} -def replace_specific_line(filepath: str, line_number: int, new_content: str, db_conn: Optional[Any] = None, show_diff: bool = True) -> dict: +def replace_specific_line( + filepath: str, + line_number: int, + new_content: str, + db_conn: Optional[Any] = None, + show_diff: bool = True, +) -> dict: """ Replace the content of a specific line in a file. @@ -97,18 +112,27 @@ def replace_specific_line(filepath: str, line_number: int, new_content: str, db_ return {"status": "error", "error": "File does not exist"} if db_conn: from rp.tools.database import db_get + read_status = db_get("read:" + path, db_conn) if read_status.get("status") != "success" or read_status.get("value") != "true": - return {"status": "error", "error": "File must be read before writing. Please read the file first."} - with open(path, 'r') as file: + return { + "status": "error", + "error": "File must be read before writing. Please read the file first.", + } + with open(path, "r") as file: lines = file.readlines() total_lines = len(lines) if line_number < 1 or line_number > total_lines: - return {"status": "error", "error": f"Line number {line_number} is out of range. File has {total_lines} lines."} - old_content = ''.join(lines) - lines[line_number - 1] = new_content + '\n' if not new_content.endswith('\n') else new_content - new_full_content = ''.join(lines) - with open(path, 'w') as file: + return { + "status": "error", + "error": f"Line number {line_number} is out of range. File has {total_lines} lines.", + } + old_content = "".join(lines) + lines[line_number - 1] = ( + new_content + "\n" if not new_content.endswith("\n") else new_content + ) + new_full_content = "".join(lines) + with open(path, "w") as file: file.writelines(lines) if show_diff: diff_result = display_content_diff(old_content, new_full_content, filepath) @@ -119,7 +143,13 @@ def replace_specific_line(filepath: str, line_number: int, new_content: str, db_ return {"status": "error", "error": str(e)} -def insert_line_at_position(filepath: str, line_number: int, new_content: str, db_conn: Optional[Any] = None, show_diff: bool = True) -> dict: +def insert_line_at_position( + filepath: str, + line_number: int, + new_content: str, + db_conn: Optional[Any] = None, + show_diff: bool = True, +) -> dict: """ Insert a new line at a specific position in a file. @@ -148,27 +178,38 @@ def insert_line_at_position(filepath: str, line_number: int, new_content: str, d return {"status": "error", "error": "File does not exist"} if db_conn: from rp.tools.database import db_get + read_status = db_get("read:" + path, db_conn) if read_status.get("status") != "success" or read_status.get("value") != "true": - return {"status": "error", "error": "File must be read before writing. Please read the file first."} - with open(path, 'r') as file: + return { + "status": "error", + "error": "File must be read before writing. Please read the file first.", + } + with open(path, "r") as file: lines = file.readlines() - old_content = ''.join(lines) + old_content = "".join(lines) insert_index = min(line_number - 1, len(lines)) - lines.insert(insert_index, new_content + '\n' if not new_content.endswith('\n') else new_content) - new_full_content = ''.join(lines) - with open(path, 'w') as file: + lines.insert( + insert_index, new_content + "\n" if not new_content.endswith("\n") else new_content + ) + new_full_content = "".join(lines) + with open(path, "w") as file: file.writelines(lines) if show_diff: diff_result = display_content_diff(old_content, new_full_content, filepath) if diff_result["status"] == "success": print(diff_result["visual_diff"]) - return {"status": "success", "message": f"Inserted line at position {line_number} in {path}"} + return { + "status": "success", + "message": f"Inserted line at position {line_number} in {path}", + } except Exception as e: return {"status": "error", "error": str(e)} -def delete_specific_line(filepath: str, line_number: int, db_conn: Optional[Any] = None, show_diff: bool = True) -> dict: +def delete_specific_line( + filepath: str, line_number: int, db_conn: Optional[Any] = None, show_diff: bool = True +) -> dict: """ Delete a specific line from a file. @@ -195,18 +236,25 @@ def delete_specific_line(filepath: str, line_number: int, db_conn: Optional[Any] return {"status": "error", "error": "File does not exist"} if db_conn: from rp.tools.database import db_get + read_status = db_get("read:" + path, db_conn) if read_status.get("status") != "success" or read_status.get("value") != "true": - return {"status": "error", "error": "File must be read before writing. Please read the file first."} - with open(path, 'r') as file: + return { + "status": "error", + "error": "File must be read before writing. Please read the file first.", + } + with open(path, "r") as file: lines = file.readlines() total_lines = len(lines) if line_number < 1 or line_number > total_lines: - return {"status": "error", "error": f"Line number {line_number} is out of range. File has {total_lines} lines."} - old_content = ''.join(lines) + return { + "status": "error", + "error": f"Line number {line_number} is out of range. File has {total_lines} lines.", + } + old_content = "".join(lines) del lines[line_number - 1] - new_full_content = ''.join(lines) - with open(path, 'w') as file: + new_full_content = "".join(lines) + with open(path, "w") as file: file.writelines(lines) if show_diff: diff_result = display_content_diff(old_content, new_full_content, filepath) @@ -582,4 +630,3 @@ def clear_edit_tracker(): clear_tracker() return {"status": "success", "message": "Edit tracker cleared"} - diff --git a/rp/tools/web.py b/rp/tools/web.py index fdb0620..eb14cee 100644 --- a/rp/tools/web.py +++ b/rp/tools/web.py @@ -1,4 +1,6 @@ +import imghdr import json +import random import urllib.error import urllib.parse import urllib.request @@ -8,6 +10,45 @@ import json import urllib.parse import urllib.request +# Realistic User-Agents +USER_AGENTS = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15", + "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/91.0.864.59", + "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (Android 11; Mobile; rv:68.0) Gecko/68.0 Firefox/88.0", +] + +def get_default_headers(): + """Get default realistic headers with variations.""" + accept_languages = [ + "en-US,en;q=0.5", + "en-US,en;q=0.9", + "en-GB,en;q=0.5", + "en-US,en;q=0.5;fr;q=0.3", + ] + headers = { + "User-Agent": random.choice(USER_AGENTS), + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", + "Accept-Language": random.choice(accept_languages), + "Accept-Encoding": "gzip, deflate, br", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + } + # Sometimes add Cache-Control + if random.random() < 0.3: + headers["Cache-Control"] = "no-cache" + # Sometimes add Referer + if random.random() < 0.2: + headers["Referer"] = "https://www.google.com/" + return headers + def http_fetch(url, headers=None): """Fetch content from an HTTP URL. @@ -21,21 +62,63 @@ def http_fetch(url, headers=None): """ try: request = urllib.request.Request(url) + default_headers = get_default_headers() if headers: - for header_key, header_value in headers.items(): - request.add_header(header_key, header_value) + default_headers.update(headers) + for header_key, header_value in default_headers.items(): + request.add_header(header_key, header_value) with urllib.request.urlopen(request) as response: content = response.read().decode("utf-8") return {"status": "success", "content": content[:10000]} except Exception as exception: return {"status": "error", "error": str(exception)} +def download_to_file(source_url, destination_path, headers=None): + """Download content from an HTTP URL to a file. + + Args: + source_url: The URL to download from. + destination_path: The path to save the downloaded content. + headers: Optional HTTP headers. + + Returns: + Dict with status, downloaded_from, and downloaded_to on success, or status and error on failure. + + This function can be used for binary files like images as well. + """ + try: + request = urllib.request.Request(source_url) + default_headers = get_default_headers() + if headers: + default_headers.update(headers) + for header_key, header_value in default_headers.items(): + request.add_header(header_key, header_value) + with urllib.request.urlopen(request) as response: + content = response.read() + with open(destination_path, 'wb') as file: + file.write(content) + content_type = response.headers.get('Content-Type', '').lower() + if content_type.startswith('image/'): + img_type = imghdr.what(destination_path) + if img_type is None: + return {"status": "success", "downloaded_from": source_url, "downloaded_to": destination_path, "is_valid_image": False, "warning": "Downloaded content is not a valid image, consider finding a different source."} + else: + return {"status": "success", "downloaded_from": source_url, "downloaded_to": destination_path, "is_valid_image": True} + else: + return {"status": "success", "downloaded_from": source_url, "downloaded_to": destination_path} + except Exception as exception: + return {"status": "error", "error": str(exception)} + def _perform_search(base_url, query, params=None): try: encoded_query = urllib.parse.quote(query) full_url = f"{base_url}?query={encoded_query}" - with urllib.request.urlopen(full_url) as response: + request = urllib.request.Request(full_url) + default_headers = get_default_headers() + for header_key, header_value in default_headers.items(): + request.add_header(header_key, header_value) + with urllib.request.urlopen(request) as response: content = response.read().decode("utf-8") return {"status": "success", "content": json.loads(content)} except Exception as exception: @@ -66,4 +149,3 @@ def web_search_news(query): """ base_url = "https://search.molodetz.nl/search" return _perform_search(base_url, query) -