244 lines
8.2 KiB
Python
244 lines
8.2 KiB
Python
|
import asyncio
|
||
|
import websockets
|
||
|
import json
|
||
|
import base64
|
||
|
from typing import Optional, Dict, Any
|
||
|
|
||
|
class BrowserClient:
|
||
|
"""Client for controlling remote browser instances via WebSocket."""
|
||
|
|
||
|
def __init__(self, uri: str = "ws://localhost:8765"):
|
||
|
self.uri = uri
|
||
|
self.websocket = None
|
||
|
self.connection_id = None
|
||
|
self.request_counter = 0
|
||
|
self.pending_responses = {}
|
||
|
|
||
|
async def connect(self):
|
||
|
"""Connect to the browser server."""
|
||
|
self.websocket = await websockets.connect(self.uri)
|
||
|
|
||
|
# Get connection confirmation
|
||
|
response = await self.websocket.recv()
|
||
|
data = json.loads(response)
|
||
|
self.connection_id = data.get("connection_id")
|
||
|
print(f"Connected to browser: {self.connection_id}")
|
||
|
|
||
|
# Start response handler
|
||
|
asyncio.create_task(self._response_handler())
|
||
|
|
||
|
async def _response_handler(self):
|
||
|
"""Handle responses and events from the server."""
|
||
|
try:
|
||
|
async for message in self.websocket:
|
||
|
data = json.loads(message)
|
||
|
|
||
|
if data["type"] == "response":
|
||
|
request_id = data.get("request_id")
|
||
|
if request_id in self.pending_responses:
|
||
|
self.pending_responses[request_id].set_result(data)
|
||
|
|
||
|
elif data["type"] == "event":
|
||
|
print(f"Event: {data['event']} - {data['data']}")
|
||
|
|
||
|
except websockets.exceptions.ConnectionClosed:
|
||
|
print("Connection closed")
|
||
|
|
||
|
async def _send_command(self, command: str, **kwargs) -> Dict[str, Any]:
|
||
|
"""Send a command and wait for response."""
|
||
|
self.request_counter += 1
|
||
|
request_id = f"req_{self.request_counter}"
|
||
|
|
||
|
# Create future for response
|
||
|
future = asyncio.Future()
|
||
|
self.pending_responses[request_id] = future
|
||
|
|
||
|
# Send command
|
||
|
await self.websocket.send(json.dumps({
|
||
|
"command": command,
|
||
|
"request_id": request_id,
|
||
|
**kwargs
|
||
|
}))
|
||
|
|
||
|
# Wait for response
|
||
|
try:
|
||
|
response = await asyncio.wait_for(future, timeout=10.0)
|
||
|
del self.pending_responses[request_id]
|
||
|
|
||
|
if response.get("error"):
|
||
|
raise Exception(response["error"])
|
||
|
|
||
|
return response.get("result")
|
||
|
except asyncio.TimeoutError:
|
||
|
del self.pending_responses[request_id]
|
||
|
raise Exception("Command timeout")
|
||
|
|
||
|
async def navigate(self, url: str) -> Dict[str, Any]:
|
||
|
"""Navigate to a URL."""
|
||
|
return await self._send_command("navigate", url=url)
|
||
|
|
||
|
async def execute_js(self, script: str) -> Any:
|
||
|
"""Execute JavaScript and return result."""
|
||
|
return await self._send_command("execute_js", script=script)
|
||
|
|
||
|
async def go_back(self):
|
||
|
"""Go back in history."""
|
||
|
return await self._send_command("go_back")
|
||
|
|
||
|
async def go_forward(self):
|
||
|
"""Go forward in history."""
|
||
|
return await self._send_command("go_forward")
|
||
|
|
||
|
async def reload(self):
|
||
|
"""Reload the current page."""
|
||
|
return await self._send_command("reload")
|
||
|
|
||
|
async def stop(self):
|
||
|
"""Stop loading."""
|
||
|
return await self._send_command("stop")
|
||
|
|
||
|
async def get_info(self) -> Dict[str, Any]:
|
||
|
"""Get browser information."""
|
||
|
return await self._send_command("get_info")
|
||
|
|
||
|
async def screenshot(self, save_path: Optional[str] = None) -> str:
|
||
|
"""Take a screenshot. Returns base64 data or saves to file."""
|
||
|
result = await self._send_command("screenshot")
|
||
|
screenshot_b64 = result.get("screenshot")
|
||
|
|
||
|
if save_path and screenshot_b64:
|
||
|
with open(save_path, "wb") as f:
|
||
|
f.write(base64.b64decode(screenshot_b64))
|
||
|
return save_path
|
||
|
|
||
|
return screenshot_b64
|
||
|
|
||
|
async def set_html(self, html: str, base_uri: Optional[str] = None):
|
||
|
"""Load custom HTML content."""
|
||
|
return await self._send_command("set_html", html=html, base_uri=base_uri)
|
||
|
|
||
|
async def close(self):
|
||
|
"""Close the connection."""
|
||
|
if self.websocket:
|
||
|
await self.websocket.close()
|
||
|
|
||
|
|
||
|
# Example automation functions
|
||
|
|
||
|
async def scrape_page_title(client: BrowserClient, url: str) -> str:
|
||
|
"""Example: Scrape page title."""
|
||
|
await client.navigate(url)
|
||
|
await asyncio.sleep(2) # Wait for page load
|
||
|
title = await client.execute_js("document.title")
|
||
|
return title
|
||
|
|
||
|
async def fill_and_submit_form(client: BrowserClient, url: str):
|
||
|
"""Example: Fill and submit a form."""
|
||
|
await client.navigate(url)
|
||
|
await asyncio.sleep(2)
|
||
|
|
||
|
# Fill form fields
|
||
|
await client.execute_js("""
|
||
|
document.querySelector('#username').value = 'testuser';
|
||
|
document.querySelector('#email').value = 'test@example.com';
|
||
|
""")
|
||
|
|
||
|
# Submit form
|
||
|
await client.execute_js("document.querySelector('#submit-button').click()")
|
||
|
|
||
|
async def extract_all_links(client: BrowserClient, url: str) -> list:
|
||
|
"""Example: Extract all links from a page."""
|
||
|
await client.navigate(url)
|
||
|
await asyncio.sleep(2)
|
||
|
|
||
|
links = await client.execute_js("""
|
||
|
Array.from(document.querySelectorAll('a[href]')).map(a => ({
|
||
|
text: a.textContent.trim(),
|
||
|
href: a.href
|
||
|
}))
|
||
|
""")
|
||
|
return links
|
||
|
|
||
|
async def monitor_page_changes(client: BrowserClient, url: str, selector: str, interval: int = 5):
|
||
|
"""Example: Monitor a page element for changes."""
|
||
|
await client.navigate(url)
|
||
|
await asyncio.sleep(2)
|
||
|
|
||
|
last_value = None
|
||
|
while True:
|
||
|
try:
|
||
|
current_value = await client.execute_js(f"document.querySelector('{selector}')?.textContent")
|
||
|
|
||
|
if current_value != last_value:
|
||
|
print(f"Change detected: {last_value} -> {current_value}")
|
||
|
last_value = current_value
|
||
|
|
||
|
await asyncio.sleep(interval)
|
||
|
except Exception as e:
|
||
|
print(f"Monitoring error: {e}")
|
||
|
break
|
||
|
|
||
|
|
||
|
# Main example
|
||
|
async def main():
|
||
|
"""Example usage of the browser client."""
|
||
|
client = BrowserClient()
|
||
|
|
||
|
try:
|
||
|
# Connect to server
|
||
|
await client.connect()
|
||
|
|
||
|
# Example 1: Basic navigation and JS execution
|
||
|
print("\n1. Basic navigation:")
|
||
|
await client.navigate("https://www.example.com")
|
||
|
await asyncio.sleep(2)
|
||
|
|
||
|
title = await client.execute_js("document.title")
|
||
|
print(f"Page title: {title}")
|
||
|
|
||
|
# Example 2: Get page info
|
||
|
print("\n2. Browser info:")
|
||
|
info = await client.get_info()
|
||
|
print(f"Current URL: {info['url']}")
|
||
|
print(f"Can go back: {info['can_go_back']}")
|
||
|
|
||
|
# Example 3: Custom HTML
|
||
|
print("\n3. Loading custom HTML:")
|
||
|
await client.set_html("""
|
||
|
<html>
|
||
|
<head><title>Test Page</title></head>
|
||
|
<body>
|
||
|
<h1>WebSocket Browser Control</h1>
|
||
|
<p id="content">This page was loaded via WebSocket!</p>
|
||
|
<button onclick="alert('Clicked!')">Click Me</button>
|
||
|
</body>
|
||
|
</html>
|
||
|
""")
|
||
|
|
||
|
await asyncio.sleep(1)
|
||
|
content = await client.execute_js("document.getElementById('content').textContent")
|
||
|
print(f"Content: {content}")
|
||
|
|
||
|
# Example 4: Screenshot
|
||
|
print("\n4. Taking screenshot:")
|
||
|
await client.screenshot("screenshot.png")
|
||
|
print("Screenshot saved to screenshot.png")
|
||
|
|
||
|
# Example 5: Extract links from a real page
|
||
|
print("\n5. Extracting links:")
|
||
|
links = await extract_all_links(client, "https://www.python.org")
|
||
|
print(f"Found {len(links)} links")
|
||
|
for link in links[:5]: # Show first 5
|
||
|
print(f" - {link['text']}: {link['href']}")
|
||
|
|
||
|
# Keep connection open for a bit to see any events
|
||
|
print("\nWaiting for events...")
|
||
|
await asyncio.sleep(5)
|
||
|
|
||
|
finally:
|
||
|
await client.close()
|
||
|
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
asyncio.run(main())
|