This commit is contained in:
retoor 2025-08-11 13:28:18 +02:00
parent 563728ce4e
commit 6789a4d467
4 changed files with 3059 additions and 718 deletions

View File

@ -1,2 +1,6 @@
install:
sudo apt install python3-gi python3-gi-cairo gir1.2-gtk-3.0 gir1.2-webkit2-4.0
sudo apt install python3-gi python3-gi-cairo gir1.2-gtk-3.0
sudo apt install libgirepository1.0-dev
sudo apt install libgirepository1.0-dev gir1.2-gtk-3.0 libcairo2-dev pkg-config python3-dev

836
client.py
View File

@ -1,243 +1,701 @@
#!/usr/bin/env python3
"""
Playwright-compatible WebSocket Browser Client
Provides a Playwright-like API for controlling remote browsers via WebSocket.
"""
import asyncio
import websockets
import json
import uuid
import time
import base64
from typing import Optional, Dict, Any
import logging
from typing import Optional, Dict, Any, List, Callable, Union
from dataclasses import dataclass
from contextlib import asynccontextmanager
import weakref
class BrowserClient:
"""Client for controlling remote browser instances via WebSocket."""
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
def __init__(self, uri: str = "ws://localhost:8765"):
self.uri = uri
self.websocket = None
self.connection_id = None
self.request_counter = 0
self.pending_responses = {}
class TimeoutError(Exception):
"""Raised when an operation times out"""
pass
async def connect(self):
"""Connect to the browser server."""
self.websocket = await websockets.connect(self.uri)
class BrowserError(Exception):
"""Raised when a browser operation fails"""
pass
# Get connection confirmation
response = await self.websocket.recv()
data = json.loads(response)
self.connection_id = data.get("connection_id")
print(f"Connected to browser: {self.connection_id}")
@dataclass
class ElementHandle:
"""Represents a handle to a DOM element"""
page: 'Page'
selector: str
# Start response handler
asyncio.create_task(self._response_handler())
async def click(self, **options):
"""Click the element"""
return await self.page._playwright_action("click", {"selector": self.selector, "options": options})
async def _response_handler(self):
"""Handle responses and events from the server."""
async def fill(self, value: str):
"""Fill the element with text"""
return await self.page._playwright_action("fill", {"selector": self.selector, "value": value})
async def type(self, text: str, delay: int = 0):
"""Type text into the element"""
return await self.page.type(self.selector, text, delay=delay)
async def press(self, key: str):
"""Press a key"""
return await self.page._playwright_action("press", {"selector": self.selector, "key": key})
async def hover(self):
"""Hover over the element"""
return await self.page._playwright_action("hover", {"selector": self.selector})
async def focus(self):
"""Focus the element"""
return await self.page._playwright_action("focus", {"selector": self.selector})
async def get_attribute(self, name: str) -> Optional[str]:
"""Get element attribute"""
result = await self.page._playwright_action("get_attribute", {"selector": self.selector, "name": name})
return result.get("value") if result.get("success") else None
async def inner_text(self) -> str:
"""Get inner text"""
result = await self.page._playwright_action("inner_text", {"selector": self.selector})
return result.get("text", "") if result.get("success") else ""
async def inner_html(self) -> str:
"""Get inner HTML"""
result = await self.page._playwright_action("inner_html", {"selector": self.selector})
return result.get("html", "") if result.get("success") else ""
async def is_visible(self) -> bool:
"""Check if element is visible"""
result = await self.page._playwright_action("is_visible", {"selector": self.selector})
return result.get("visible", False) if result.get("success") else False
async def is_enabled(self) -> bool:
"""Check if element is enabled"""
result = await self.page._playwright_action("is_enabled", {"selector": self.selector})
return result.get("enabled", False) if result.get("success") else False
async def is_checked(self) -> bool:
"""Check if element is checked"""
result = await self.page._playwright_action("is_checked", {"selector": self.selector})
return result.get("checked", False) if result.get("success") else False
async def check(self):
"""Check a checkbox or radio button"""
return await self.page._playwright_action("check", {"selector": self.selector})
async def uncheck(self):
"""Uncheck a checkbox"""
return await self.page._playwright_action("uncheck", {"selector": self.selector})
async def select_option(self, values: Union[str, List[str]]):
"""Select option(s) in a select element"""
return await self.page._playwright_action("select_option", {"selector": self.selector, "values": values})
class Locator:
"""Playwright-style locator for finding elements"""
def __init__(self, page: 'Page', selector: str):
self.page = page
self.selector = selector
async def click(self, **options):
"""Click the first matching element"""
return await self.page.click(self.selector, **options)
async def fill(self, value: str):
"""Fill the first matching element"""
return await self.page.fill(self.selector, value)
async def type(self, text: str, delay: int = 0):
"""Type into the first matching element"""
return await self.page.type(self.selector, text, delay=delay)
async def press(self, key: str):
"""Press a key on the first matching element"""
return await self.page.press(self.selector, key)
async def hover(self):
"""Hover over the first matching element"""
return await self.page.hover(self.selector)
async def focus(self):
"""Focus the first matching element"""
return await self.page.focus(self.selector)
async def get_attribute(self, name: str) -> Optional[str]:
"""Get attribute of the first matching element"""
element = ElementHandle(self.page, self.selector)
return await element.get_attribute(name)
async def inner_text(self) -> str:
"""Get inner text of the first matching element"""
element = ElementHandle(self.page, self.selector)
return await element.inner_text()
async def inner_html(self) -> str:
"""Get inner HTML of the first matching element"""
element = ElementHandle(self.page, self.selector)
return await element.inner_html()
async def is_visible(self) -> bool:
"""Check if the first matching element is visible"""
element = ElementHandle(self.page, self.selector)
return await element.is_visible()
async def is_enabled(self) -> bool:
"""Check if the first matching element is enabled"""
element = ElementHandle(self.page, self.selector)
return await element.is_enabled()
async def is_checked(self) -> bool:
"""Check if the first matching element is checked"""
element = ElementHandle(self.page, self.selector)
return await element.is_checked()
async def check(self):
"""Check the first matching checkbox or radio"""
return await self.page.check(self.selector)
async def uncheck(self):
"""Uncheck the first matching checkbox"""
return await self.page.uncheck(self.selector)
async def select_option(self, values: Union[str, List[str]]):
"""Select option(s) in the first matching select"""
return await self.page.select_option(self.selector, values)
async def count(self) -> int:
"""Count matching elements"""
result = await self.page._playwright_action("query_selector_all", {"selector": self.selector})
return result.get("count", 0) if result.get("success") else 0
async def first(self) -> ElementHandle:
"""Get the first matching element"""
return ElementHandle(self.page, self.selector)
async def wait_for(self, **options):
"""Wait for the element to appear"""
return await self.page.wait_for_selector(self.selector, **options)
class Page:
"""Represents a browser page with Playwright-compatible API"""
def __init__(self, browser: 'Browser', connection_id: str):
self.browser = browser
self.connection_id = connection_id
self._closed = False
self._event_listeners: Dict[str, List[Callable]] = {}
async def goto(self, url: str, **options) -> Dict[str, Any]:
"""Navigate to a URL"""
return await self.browser._send_command({
"command": "goto",
"url": url,
"options": options
})
async def go_back(self, **options) -> Dict[str, Any]:
"""Navigate back"""
return await self.browser._send_command({"command": "go_back"})
async def go_forward(self, **options) -> Dict[str, Any]:
"""Navigate forward"""
return await self.browser._send_command({"command": "go_forward"})
async def reload(self, **options) -> Dict[str, Any]:
"""Reload the page"""
return await self.browser._send_command({"command": "reload"})
async def set_content(self, html: str, **options) -> Dict[str, Any]:
"""Set page content"""
return await self.browser._send_command({
"command": "set_content",
"html": html,
"options": options
})
async def content(self) -> str:
"""Get page content"""
result = await self.browser._send_command({"command": "content"})
return result if isinstance(result, str) else ""
async def title(self) -> str:
"""Get page title"""
result = await self.browser._send_command({"command": "title"})
return result if isinstance(result, str) else ""
async def url(self) -> str:
"""Get page URL"""
result = await self.browser._send_command({"command": "url"})
return result if isinstance(result, str) else ""
async def evaluate(self, expression: str, arg=None) -> Any:
"""Evaluate JavaScript in the page"""
if arg is not None:
# Simple argument serialization
expression = f"({expression})({json.dumps(arg)})"
return await self.browser._send_command({
"command": "evaluate",
"expression": expression
})
async def evaluate_handle(self, expression: str, arg=None) -> Any:
"""Evaluate JavaScript and return a handle (simplified)"""
return await self.evaluate(expression, arg)
async def screenshot(self, **options) -> bytes:
"""Take a screenshot"""
result = await self.browser._send_command({
"command": "screenshot",
"options": options
})
if isinstance(result, dict) and "screenshot" in result:
return base64.b64decode(result["screenshot"])
raise BrowserError("Failed to capture screenshot")
async def click(self, selector: str, **options) -> Dict[str, Any]:
"""Click an element"""
return await self._playwright_action("click", {"selector": selector, "options": options})
async def dblclick(self, selector: str, **options) -> Dict[str, Any]:
"""Double-click an element"""
# Simulate double click with two clicks
await self.click(selector, **options)
return await self.click(selector, **options)
async def fill(self, selector: str, value: str, **options) -> Dict[str, Any]:
"""Fill an input element"""
return await self._playwright_action("fill", {"selector": selector, "value": value})
async def type(self, selector: str, text: str, delay: int = 0) -> Dict[str, Any]:
"""Type text into an element"""
return await self.browser._send_command({
"command": "type",
"selector": selector,
"text": text,
"options": {"delay": delay}
})
async def press(self, selector: str, key: str, **options) -> Dict[str, Any]:
"""Press a key"""
return await self._playwright_action("press", {"selector": selector, "key": key})
async def check(self, selector: str, **options) -> Dict[str, Any]:
"""Check a checkbox or radio button"""
return await self._playwright_action("check", {"selector": selector})
async def uncheck(self, selector: str, **options) -> Dict[str, Any]:
"""Uncheck a checkbox"""
return await self._playwright_action("uncheck", {"selector": selector})
async def select_option(self, selector: str, values: Union[str, List[str]], **options) -> Dict[str, Any]:
"""Select option(s) in a select element"""
return await self._playwright_action("select_option", {"selector": selector, "values": values})
async def hover(self, selector: str, **options) -> Dict[str, Any]:
"""Hover over an element"""
return await self._playwright_action("hover", {"selector": selector})
async def focus(self, selector: str, **options) -> Dict[str, Any]:
"""Focus an element"""
return await self._playwright_action("focus", {"selector": selector})
async def wait_for_selector(self, selector: str, **options) -> Optional[ElementHandle]:
"""Wait for a selector to appear"""
result = await self._playwright_action("wait_for_selector", {
"selector": selector,
"options": options
})
if result.get("success"):
return ElementHandle(self, selector)
return None
async def wait_for_load_state(self, state: str = "load", **options) -> None:
"""Wait for a load state"""
await self._playwright_action("wait_for_load_state", {
"state": state,
"timeout": options.get("timeout", 30000)
})
async def wait_for_timeout(self, timeout: int) -> None:
"""Wait for a timeout"""
await self._playwright_action("wait_for_timeout", {"timeout": timeout})
async def wait_for_function(self, expression: str, **options) -> Any:
"""Wait for a function to return true"""
# Simplified implementation using polling
timeout = options.get("timeout", 30000)
polling = options.get("polling", 100)
start_time = time.time()
while (time.time() - start_time) * 1000 < timeout:
result = await self.evaluate(expression)
if result:
return result
await asyncio.sleep(polling / 1000)
raise TimeoutError(f"Timeout waiting for function: {expression}")
def locator(self, selector: str) -> Locator:
"""Create a locator for the given selector"""
return Locator(self, selector)
async def query_selector(self, selector: str) -> Optional[ElementHandle]:
"""Query for a single element"""
result = await self._playwright_action("query_selector", {"selector": selector})
if result.get("success") and result.get("found"):
return ElementHandle(self, selector)
return None
async def query_selector_all(self, selector: str) -> List[ElementHandle]:
"""Query for all matching elements"""
result = await self._playwright_action("query_selector_all", {"selector": selector})
if result.get("success"):
count = result.get("count", 0)
# For simplicity, we return handles with indexed selectors
return [ElementHandle(self, f"{selector}:nth-of-type({i+1})") for i in range(count)]
return []
async def _playwright_action(self, action: str, params: Dict[str, Any]) -> Dict[str, Any]:
"""Execute a Playwright-style action"""
result = await self.browser._send_command({
"command": "playwright",
"action": action,
"params": params
})
# Handle string results from JavaScript execution
if isinstance(result, str):
try:
return json.loads(result)
except json.JSONDecodeError:
return {"success": False, "error": f"Invalid response: {result}"}
return result or {}
def on(self, event: str, callback: Callable) -> None:
"""Register an event listener"""
if event not in self._event_listeners:
self._event_listeners[event] = []
self._event_listeners[event].append(callback)
def once(self, event: str, callback: Callable) -> None:
"""Register a one-time event listener"""
def wrapper(*args, **kwargs):
self.remove_listener(event, wrapper)
return callback(*args, **kwargs)
self.on(event, wrapper)
def remove_listener(self, event: str, callback: Callable) -> None:
"""Remove an event listener"""
if event in self._event_listeners:
self._event_listeners[event] = [
cb for cb in self._event_listeners[event] if cb != callback
]
def emit(self, event: str, data: Any) -> None:
"""Emit an event to all listeners"""
if event in self._event_listeners:
for callback in self._event_listeners[event][:]:
try:
callback(data)
except Exception as e:
logger.error(f"Error in event listener for {event}: {e}")
async def close(self) -> None:
"""Close the page"""
if not self._closed:
self._closed = True
await self.browser._send_command({"command": "close"})
class BrowserContext:
"""Browser context (simplified for single-page support)"""
def __init__(self, browser: 'Browser'):
self.browser = browser
self.pages: List[Page] = []
async def new_page(self) -> Page:
"""Create a new page (currently limited to one per context)"""
if self.pages:
raise BrowserError("Multiple pages not supported in this implementation")
page = Page(self.browser, self.browser.connection_id)
self.pages.append(page)
return page
async def close(self) -> None:
"""Close the context and all its pages"""
for page in self.pages[:]:
await page.close()
self.pages.clear()
class Browser:
"""Represents a browser instance with Playwright-compatible API"""
def __init__(self, ws_url: str = "ws://localhost:8765"):
self.ws_url = ws_url
self.websocket: Optional[websockets.WebSocketClientProtocol] = None
self.connection_id: Optional[str] = None
self._response_futures: Dict[str, asyncio.Future] = {}
self._event_listeners: Dict[str, List[Callable]] = {}
self._receive_task: Optional[asyncio.Task] = None
self._closed = False
self.contexts: List[BrowserContext] = []
async def connect(self) -> None:
"""Connect to the browser server"""
self.websocket = await websockets.connect(self.ws_url)
self._receive_task = asyncio.create_task(self._receive_messages())
# Wait for connection confirmation
timeout = 15
start_time = time.time()
while not self.connection_id and time.time() - start_time < timeout:
await asyncio.sleep(0.1)
if not self.connection_id:
raise TimeoutError("Failed to establish browser connection")
async def _receive_messages(self) -> None:
"""Receive messages from the WebSocket"""
try:
async for message in self.websocket:
try:
data = json.loads(message)
msg_type = data.get("type")
if data["type"] == "response":
if msg_type == "connected":
self.connection_id = data.get("connection_id")
logger.info(f"Connected with ID: {self.connection_id}")
elif msg_type == "response":
request_id = data.get("request_id")
if request_id in self.pending_responses:
self.pending_responses[request_id].set_result(data)
if request_id in self._response_futures:
future = self._response_futures.pop(request_id)
if data.get("error"):
future.set_exception(BrowserError(data["error"]))
else:
future.set_result(data.get("result"))
elif data["type"] == "event":
print(f"Event: {data['event']} - {data['data']}")
elif msg_type == "event":
event_type = data.get("event")
event_data = data.get("data", {})
# Emit to browser-level listeners
self._emit_event(event_type, event_data)
# Emit to page-level listeners if applicable
for context in self.contexts:
for page in context.pages:
page.emit(event_type, event_data)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse message: {e}")
except Exception as e:
logger.error(f"Error handling message: {e}")
except websockets.exceptions.ConnectionClosed:
print("Connection closed")
logger.info("WebSocket connection closed")
except Exception as e:
logger.error(f"Error in receive loop: {e}")
finally:
# Clean up pending futures
for future in self._response_futures.values():
if not future.done():
future.set_exception(BrowserError("Connection closed"))
self._response_futures.clear()
async def _send_command(self, command: str, **kwargs) -> Dict[str, Any]:
"""Send a command and wait for response."""
self.request_counter += 1
request_id = f"req_{self.request_counter}"
async def _send_command(self, command: Dict[str, Any]) -> Any:
"""Send a command and wait for response"""
if not self.websocket or self._closed:
raise BrowserError("WebSocket connection is closed")
request_id = str(uuid.uuid4())
command["request_id"] = request_id
# Create future for response
future = asyncio.Future()
self.pending_responses[request_id] = future
self._response_futures[request_id] = future
# Send command
await self.websocket.send(json.dumps({
"command": command,
"request_id": request_id,
**kwargs
}))
# Wait for response
try:
response = await asyncio.wait_for(future, timeout=10.0)
del self.pending_responses[request_id]
await self.websocket.send(json.dumps(command))
if response.get("error"):
raise Exception(response["error"])
# Wait for response with timeout
timeout = command.get("timeout", 30)
return await asyncio.wait_for(future, timeout=timeout)
return response.get("result")
except asyncio.TimeoutError:
del self.pending_responses[request_id]
raise Exception("Command timeout")
self._response_futures.pop(request_id, None)
raise TimeoutError(f"Command timeout: {command.get('command')}")
except Exception as e:
self._response_futures.pop(request_id, None)
raise BrowserError(f"Command failed: {e}")
async def navigate(self, url: str) -> Dict[str, Any]:
"""Navigate to a URL."""
return await self._send_command("navigate", url=url)
def _emit_event(self, event: str, data: Any) -> None:
"""Emit an event to all browser-level listeners"""
if event in self._event_listeners:
for callback in self._event_listeners[event][:]:
try:
callback(data)
except Exception as e:
logger.error(f"Error in browser event listener for {event}: {e}")
async def execute_js(self, script: str) -> Any:
"""Execute JavaScript and return result."""
return await self._send_command("execute_js", script=script)
def on(self, event: str, callback: Callable) -> None:
"""Register a browser-level event listener"""
if event not in self._event_listeners:
self._event_listeners[event] = []
self._event_listeners[event].append(callback)
async def go_back(self):
"""Go back in history."""
return await self._send_command("go_back")
def once(self, event: str, callback: Callable) -> None:
"""Register a one-time browser-level event listener"""
def wrapper(*args, **kwargs):
self.remove_listener(event, wrapper)
return callback(*args, **kwargs)
self.on(event, wrapper)
async def go_forward(self):
"""Go forward in history."""
return await self._send_command("go_forward")
def remove_listener(self, event: str, callback: Callable) -> None:
"""Remove a browser-level event listener"""
if event in self._event_listeners:
self._event_listeners[event] = [
cb for cb in self._event_listeners[event] if cb != callback
]
async def reload(self):
"""Reload the current page."""
return await self._send_command("reload")
async def new_context(self, **options) -> BrowserContext:
"""Create a new browser context"""
context = BrowserContext(self)
self.contexts.append(context)
return context
async def stop(self):
"""Stop loading."""
return await self._send_command("stop")
async def new_page(self) -> Page:
"""Create a new page in the default context"""
if not self.contexts:
context = await self.new_context()
else:
context = self.contexts[0]
return await context.new_page()
async def get_info(self) -> Dict[str, Any]:
"""Get browser information."""
return await self._send_command("get_info")
async def close(self) -> None:
"""Close the browser"""
if self._closed:
return
async def screenshot(self, save_path: Optional[str] = None) -> str:
"""Take a screenshot. Returns base64 data or saves to file."""
result = await self._send_command("screenshot")
screenshot_b64 = result.get("screenshot")
self._closed = True
if save_path and screenshot_b64:
with open(save_path, "wb") as f:
f.write(base64.b64decode(screenshot_b64))
return save_path
# Close all contexts
for context in self.contexts[:]:
await context.close()
return screenshot_b64
# Cancel receive task
if self._receive_task:
self._receive_task.cancel()
try:
await self._receive_task
except asyncio.CancelledError:
pass
async def set_html(self, html: str, base_uri: Optional[str] = None):
"""Load custom HTML content."""
return await self._send_command("set_html", html=html, base_uri=base_uri)
async def close(self):
"""Close the connection."""
# Close WebSocket
if self.websocket:
await self.websocket.close()
logger.info("Browser closed")
# Example automation functions
@property
def is_connected(self) -> bool:
"""Check if browser is connected"""
return bool(self.websocket and not self.websocket.closed and self.connection_id)
async def scrape_page_title(client: BrowserClient, url: str) -> str:
"""Example: Scrape page title."""
await client.navigate(url)
await asyncio.sleep(2) # Wait for page load
title = await client.execute_js("document.title")
return title
class Playwright:
"""Main Playwright-compatible API entry point"""
async def fill_and_submit_form(client: BrowserClient, url: str):
"""Example: Fill and submit a form."""
await client.navigate(url)
await asyncio.sleep(2)
def __init__(self):
self.browsers: List[Browser] = []
# Fill form fields
await client.execute_js("""
document.querySelector('#username').value = 'testuser';
document.querySelector('#email').value = 'test@example.com';
""")
class chromium:
"""Chromium browser launcher (uses WebKit in this implementation)"""
# Submit form
await client.execute_js("document.querySelector('#submit-button').click()")
@staticmethod
async def launch(**options) -> Browser:
"""Launch a browser instance"""
ws_url = options.get("ws_endpoint", "ws://localhost:8765")
browser = Browser(ws_url)
await browser.connect()
return browser
async def extract_all_links(client: BrowserClient, url: str) -> list:
"""Example: Extract all links from a page."""
await client.navigate(url)
await asyncio.sleep(2)
class firefox:
"""Firefox browser launcher (uses WebKit in this implementation)"""
links = await client.execute_js("""
Array.from(document.querySelectorAll('a[href]')).map(a => ({
text: a.textContent.trim(),
href: a.href
}))
""")
return links
@staticmethod
async def launch(**options) -> Browser:
"""Launch a browser instance"""
return await Playwright.chromium.launch(**options)
async def monitor_page_changes(client: BrowserClient, url: str, selector: str, interval: int = 5):
"""Example: Monitor a page element for changes."""
await client.navigate(url)
await asyncio.sleep(2)
class webkit:
"""WebKit browser launcher"""
last_value = None
while True:
@staticmethod
async def launch(**options) -> Browser:
"""Launch a browser instance"""
return await Playwright.chromium.launch(**options)
# Convenience function for async context manager
@asynccontextmanager
async def browser(**options):
"""Async context manager for browser"""
playwright = Playwright()
browser_instance = await playwright.chromium.launch(**options)
try:
current_value = await client.execute_js(f"document.querySelector('{selector}')?.textContent")
if current_value != last_value:
print(f"Change detected: {last_value} -> {current_value}")
last_value = current_value
await asyncio.sleep(interval)
except Exception as e:
print(f"Monitoring error: {e}")
break
# Main example
async def main():
"""Example usage of the browser client."""
client = BrowserClient()
try:
# Connect to server
await client.connect()
# Example 1: Basic navigation and JS execution
print("\n1. Basic navigation:")
await client.navigate("https://www.example.com")
await asyncio.sleep(2)
title = await client.execute_js("document.title")
print(f"Page title: {title}")
# Example 2: Get page info
print("\n2. Browser info:")
info = await client.get_info()
print(f"Current URL: {info['url']}")
print(f"Can go back: {info['can_go_back']}")
# Example 3: Custom HTML
print("\n3. Loading custom HTML:")
await client.set_html("""
<html>
<head><title>Test Page</title></head>
<body>
<h1>WebSocket Browser Control</h1>
<p id="content">This page was loaded via WebSocket!</p>
<button onclick="alert('Clicked!')">Click Me</button>
</body>
</html>
""")
await asyncio.sleep(1)
content = await client.execute_js("document.getElementById('content').textContent")
print(f"Content: {content}")
# Example 4: Screenshot
print("\n4. Taking screenshot:")
await client.screenshot("screenshot.png")
print("Screenshot saved to screenshot.png")
# Example 5: Extract links from a real page
print("\n5. Extracting links:")
links = await extract_all_links(client, "https://www.python.org")
print(f"Found {len(links)} links")
for link in links[:5]: # Show first 5
print(f" - {link['text']}: {link['href']}")
# Keep connection open for a bit to see any events
print("\nWaiting for events...")
await asyncio.sleep(5)
yield browser_instance
finally:
await client.close()
await browser_instance.close()
# Convenience function for creating a browser and page
async def launch(**options) -> Browser:
"""Launch a browser instance"""
playwright = Playwright()
return await playwright.chromium.launch(**options)
if __name__ == "__main__":
asyncio.run(main())
# Example usage functions for compatibility
async def goto(page: Page, url: str, **options):
"""Navigate to URL (convenience function)"""
return await page.goto(url, **options)
async def screenshot(page: Page, path: str = None, **options):
"""Take screenshot (convenience function)"""
data = await page.screenshot(**options)
if path:
with open(path, 'wb') as f:
f.write(data)
return data
async def evaluate(page: Page, expression: str, arg=None):
"""Evaluate JavaScript (convenience function)"""
return await page.evaluate(expression, arg)
# Selector engine helpers
def css(selector: str) -> str:
"""CSS selector helper"""
return selector
def xpath(selector: str) -> str:
"""XPath selector helper (converted to CSS where possible)"""
# Simple XPath to CSS conversion for common cases
if selector.startswith("//"):
return selector # Return as-is, will need special handling
return selector
def text(text_content: str) -> str:
"""Text selector helper"""
return f"//*[contains(text(), '{text_content}')]"

801
demo.py
View File

@ -1,232 +1,679 @@
#!/usr/bin/env python3
"""
Comprehensive Demo of Playwright-style WebSocket Browser Control
Shows both backward-compatible and new Playwright-style APIs.
"""
import asyncio
import websockets
import json
import logging
import sys
import os
from datetime import datetime
from client import Browser, Playwright, browser
async def create_browser_window(window_id: int, url: str):
"""Create and control a single browser window."""
uri = "ws://localhost:8765"
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
async with websockets.connect(uri) as websocket:
async def demo_basic_navigation():
"""Demo basic navigation features"""
print("\n=== Basic Navigation Demo ===")
response = await websocket.recv()
conn_data = json.loads(response)
print(f"Window {window_id} connected: {conn_data['connection_id'][:8]}")
# Create browser instance
browser_instance = Browser("ws://localhost:8765")
await browser_instance.connect()
try:
# Create a page
page = await browser_instance.new_page()
await websocket.send(json.dumps({
"command": "navigate",
"url": url,
"request_id": f"nav_{window_id}"
}))
await websocket.recv()
# Navigate to a website
print("Navigating to example.com...")
await page.goto("https://example.com")
await asyncio.sleep(2)
# Get page info
title = await page.title()
url = await page.url()
print(f"Title: {title}")
print(f"URL: {url}")
await websocket.send(json.dumps({
"command": "execute_js",
"script": f"document.body.style.backgroundColor = '#{window_id:02x}0000'; document.title",
"request_id": f"js_{window_id}"
}))
# Navigate to another page
print("\nNavigating to Python.org...")
await page.goto("https://www.python.org")
await asyncio.sleep(2)
response = await websocket.recv()
data = json.loads(response)
print(f"Window {window_id} - Title: {data.get('result')}")
# Go back
print("Going back...")
await page.go_back()
await asyncio.sleep(2)
# Go forward
print("Going forward...")
await page.go_forward()
await asyncio.sleep(2)
await asyncio.sleep(10)
# Reload
print("Reloading page...")
await page.reload()
await asyncio.sleep(2)
print(f"Window {window_id} closing...")
finally:
await browser_instance.close()
async def demo_playwright_style():
"""Demo Playwright-style API usage"""
print("\n=== Playwright-Style API Demo ===")
async def parallel_browser_demo():
"""Demo: Open multiple browser windows in parallel."""
urls = [
"https://www.python.org",
"https://www.github.com",
"https://www.example.com",
"https://www.wikipedia.org"
]
# Using Playwright launcher
playwright = Playwright()
browser_instance = await playwright.chromium.launch(ws_endpoint="ws://localhost:8765")
try:
# Create context and page
context = await browser_instance.new_context()
page = await context.new_page()
tasks = []
for i, url in enumerate(urls):
task = asyncio.create_task(create_browser_window(i + 1, url))
tasks.append(task)
await asyncio.sleep(0.5)
# Navigate and wait for load
print("Navigating with Playwright-style API...")
await page.goto("https://www.wikipedia.org")
await page.wait_for_load_state("load")
# Use locators
print("Using locators to find elements...")
search_box = page.locator('input[name="search"]')
await asyncio.gather(*tasks)
# Type into search box
print("Typing 'Python programming' into search...")
await search_box.type("Python programming", delay=50)
await asyncio.sleep(1)
print("All browser windows closed.")
# Press Enter
print("Pressing Enter...")
await search_box.press("Enter")
await page.wait_for_load_state("load")
await asyncio.sleep(2)
# Take screenshot
print("Taking screenshot...")
screenshot_data = await page.screenshot()
with open("wikipedia_search.png", "wb") as f:
f.write(screenshot_data)
print("Screenshot saved as wikipedia_search.png")
async def automated_testing_demo():
"""Demo: Automated testing across multiple sites."""
test_sites = [
{"url": "https://www.example.com", "selector": "h1"},
{"url": "https://www.python.org", "selector": ".introduction h1"},
{"url": "https://httpbin.org/html", "selector": "h1"},
]
finally:
await browser_instance.close()
async def test_site(site_info):
uri = "ws://localhost:8765"
async def demo_form_interaction():
"""Demo form interaction capabilities"""
print("\n=== Form Interaction Demo ===")
async with websockets.connect(uri) as websocket:
async with browser(ws_endpoint="ws://localhost:8765") as browser_instance:
page = await browser_instance.new_page()
await websocket.recv()
await websocket.send(json.dumps({
"command": "navigate",
"url": site_info["url"]
}))
await websocket.recv()
await asyncio.sleep(3)
await websocket.send(json.dumps({
"command": "execute_js",
"script": f"document.querySelector('{site_info['selector']}')?.textContent || 'Not found'"
}))
response = await websocket.recv()
data = json.loads(response)
heading = data.get("result", "Error")
await websocket.send(json.dumps({
"command": "screenshot"
}))
screenshot_response = await websocket.recv()
screenshot_data = json.loads(screenshot_response)
print(f"Site: {site_info['url']}")
print(f" Heading: {heading}")
print(f" Screenshot: {'' if screenshot_data.get('result') else ''}")
print()
tasks = [test_site(site) for site in test_sites]
await asyncio.gather(*tasks)
async def form_automation_demo():
"""Demo: Fill forms in multiple windows."""
uri = "ws://localhost:8765"
async with websockets.connect(uri) as websocket:
await websocket.recv()
html = """
# Create a test form
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>Form Automation Demo</title>
<title>Test Form</title>
<style>
body { font-family: Arial; padding: 20px; }
input, select { margin: 5px; padding: 5px; }
#result { margin-top: 20px; color: green; }
body { font-family: Arial, sans-serif; margin: 40px; }
.form-group { margin-bottom: 15px; }
label { display: block; margin-bottom: 5px; }
input, select, textarea { padding: 8px; width: 300px; }
button { padding: 10px 20px; background: #007bff; color: white; border: none; cursor: pointer; }
button:hover { background: #0056b3; }
</style>
</head>
<body>
<h1>Automated Form Demo</h1>
<form id="demo-form">
<input type="text" id="name" placeholder="Name"><br>
<input type="email" id="email" placeholder="Email"><br>
<select id="country">
<option value="">Select Country</option>
<option value="US">United States</option>
<option value="UK">United Kingdom</option>
<option value="CA">Canada</option>
</select><br>
<button type="button" onclick="submitForm()">Submit</button>
<h1>Test Form</h1>
<form id="testForm">
<div class="form-group">
<label for="name">Name:</label>
<input type="text" id="name" name="name" required>
</div>
<div class="form-group">
<label for="email">Email:</label>
<input type="email" id="email" name="email" required>
</div>
<div class="form-group">
<label for="country">Country:</label>
<select id="country" name="country">
<option value="">Select a country</option>
<option value="us">United States</option>
<option value="uk">United Kingdom</option>
<option value="ca">Canada</option>
<option value="au">Australia</option>
</select>
</div>
<div class="form-group">
<label>
<input type="checkbox" id="newsletter" name="newsletter">
Subscribe to newsletter
</label>
</div>
<div class="form-group">
<label>Gender:</label>
<label><input type="radio" name="gender" value="male"> Male</label>
<label><input type="radio" name="gender" value="female"> Female</label>
<label><input type="radio" name="gender" value="other"> Other</label>
</div>
<div class="form-group">
<label for="comments">Comments:</label>
<textarea id="comments" name="comments" rows="4"></textarea>
</div>
<button type="submit">Submit</button>
</form>
<div id="result"></div>
<div id="result" style="margin-top: 20px; padding: 20px; background: #f0f0f0; display: none;">
<h2>Form Submitted!</h2>
<div id="resultContent"></div>
</div>
<script>
function submitForm() {
const name = document.getElementById('name').value;
const email = document.getElementById('email').value;
const country = document.getElementById('country').value;
document.getElementById('result').innerHTML =
`Submitted: ${name} (${email}) from ${country}`;
}
document.getElementById('testForm').addEventListener('submit', function(e) {
e.preventDefault();
const formData = new FormData(e.target);
const data = Object.fromEntries(formData.entries());
document.getElementById('resultContent').innerHTML =
'<pre>' + JSON.stringify(data, null, 2) + '</pre>';
document.getElementById('result').style.display = 'block';
});
</script>
</body>
</html>
"""
await websocket.send(json.dumps({
"command": "set_html",
"html": html
}))
await websocket.recv()
# Set the content
print("Loading test form...")
await page.set_content(html_content)
print("Form loaded. Automating form filling...")
# Fill the form using different methods
print("\nFilling form fields...")
# Method 1: Using fill()
await page.fill("#name", "John Doe")
# Method 2: Using type() for more realistic typing
await page.type("#email", "john.doe@example.com", delay=50)
# Method 3: Using locator
country_select = page.locator("#country")
await country_select.select_option("us")
# Check checkbox
print("Checking newsletter checkbox...")
await page.check("#newsletter")
# Select radio button
print("Selecting gender radio button...")
await page.click('input[name="gender"][value="male"]')
# Fill textarea
print("Adding comments...")
comments_field = page.locator("#comments")
await comments_field.fill("This is a test comment\nWith multiple lines\nUsing Playwright-style API")
# Submit form
print("Submitting form...")
await page.click('button[type="submit"]')
await asyncio.sleep(1)
# Check if result is visible
result_div = page.locator("#result")
is_visible = await result_div.is_visible()
print(f"Result visible: {is_visible}")
fields = [
("document.getElementById('name').value = 'John Doe'", "Filled name"),
("document.getElementById('email').value = 'john@example.com'", "Filled email"),
("document.getElementById('country').value = 'US'", "Selected country"),
("submitForm()", "Submitted form")
# Get the result content
result_text = await page.inner_text("#resultContent")
print("Form submission result:")
print(result_text)
# Take a screenshot of the filled form
await page.screenshot()
print("Screenshot taken of submitted form")
async def demo_element_queries():
"""Demo element querying and manipulation"""
print("\n=== Element Query Demo ===")
browser_instance = Browser("ws://localhost:8765")
await browser_instance.connect()
try:
page = await browser_instance.new_page()
# Create a test page with multiple elements
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>Element Query Test</title>
<style>
.item { padding: 10px; margin: 5px; background: #f0f0f0; }
.hidden { display: none; }
.highlight { background: yellow; }
</style>
</head>
<body>
<h1>Element Query Test</h1>
<div class="container">
<div class="item" data-id="1">Item 1</div>
<div class="item" data-id="2">Item 2</div>
<div class="item hidden" data-id="3">Item 3 (Hidden)</div>
<div class="item" data-id="4">Item 4</div>
<button id="toggleBtn">Toggle Item 3</button>
<button id="highlightBtn">Highlight All</button>
</div>
<script>
document.getElementById('toggleBtn').addEventListener('click', () => {
const item3 = document.querySelector('[data-id="3"]');
item3.classList.toggle('hidden');
});
document.getElementById('highlightBtn').addEventListener('click', () => {
document.querySelectorAll('.item').forEach(item => {
item.classList.add('highlight');
});
});
</script>
</body>
</html>
"""
await page.set_content(html_content)
print("Test page loaded")
# Query single element
print("\nQuerying single element...")
first_item = await page.query_selector(".item")
if first_item:
text = await first_item.inner_text()
print(f"First item text: {text}")
# Query all elements
print("\nQuerying all items...")
all_items = await page.query_selector_all(".item")
print(f"Found {len(all_items)} items")
# Check visibility
print("\nChecking visibility of items...")
for i, item in enumerate(all_items):
is_visible = await item.is_visible()
print(f"Item {i+1} visible: {is_visible}")
# Get attributes
print("\nGetting data attributes...")
for item in all_items:
data_id = await item.get_attribute("data-id")
print(f"Item has data-id: {data_id}")
# Click toggle button to show hidden item
print("\nClicking toggle button...")
await page.click("#toggleBtn")
await asyncio.sleep(0.5)
# Check visibility again
hidden_item = page.locator('[data-id="3"]')
is_visible_after = await hidden_item.is_visible()
print(f"Hidden item visible after toggle: {is_visible_after}")
# Click highlight button
print("\nClicking highlight button...")
await page.click("#highlightBtn")
await asyncio.sleep(0.5)
# Use evaluate to check if highlighting worked
has_highlight = await page.evaluate("""
() => {
const items = document.querySelectorAll('.item.highlight');
return items.length;
}
""")
print(f"Number of highlighted items: {has_highlight}")
finally:
await browser_instance.close()
async def demo_wait_conditions():
"""Demo various wait conditions"""
print("\n=== Wait Conditions Demo ===")
async with browser(ws_endpoint="ws://localhost:8765") as browser_instance:
page = await browser_instance.new_page()
# Create a dynamic page
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>Dynamic Content Test</title>
<style>
.content { padding: 20px; margin: 20px; background: #f0f0f0; }
.loading { color: #999; }
.loaded { color: #333; font-weight: bold; }
</style>
</head>
<body>
<h1>Dynamic Content Test</h1>
<button id="loadBtn">Load Content</button>
<div id="content" class="content loading">Content will appear here...</div>
<script>
document.getElementById('loadBtn').addEventListener('click', () => {
const content = document.getElementById('content');
content.textContent = 'Loading...';
content.className = 'content loading';
// Simulate async loading
setTimeout(() => {
content.textContent = 'Content loaded successfully!';
content.className = 'content loaded';
// Add more content after another delay
setTimeout(() => {
const extra = document.createElement('div');
extra.id = 'extraContent';
extra.textContent = 'Extra content added!';
content.appendChild(extra);
}, 1000);
}, 2000);
});
</script>
</body>
</html>
"""
await page.set_content(html_content)
print("Dynamic page loaded")
# Click load button
print("\nClicking load button...")
await page.click("#loadBtn")
# Wait for content to change
print("Waiting for content to load...")
await page.wait_for_function("""
() => {
const content = document.getElementById('content');
return content && content.classList.contains('loaded');
}
""", timeout=5000)
print("Content loaded!")
# Wait for extra content
print("Waiting for extra content...")
extra_content = await page.wait_for_selector("#extraContent", timeout=5000)
if extra_content:
extra_text = await extra_content.inner_text()
print(f"Extra content appeared: {extra_text}")
# Wait with timeout
print("\nWaiting for 2 seconds...")
await page.wait_for_timeout(2000)
print("Wait completed")
async def demo_javascript_execution():
"""Demo JavaScript execution capabilities"""
print("\n=== JavaScript Execution Demo ===")
browser_instance = Browser("ws://localhost:8765")
await browser_instance.connect()
try:
page = await browser_instance.new_page()
# Navigate to a simple page
await page.goto("https://example.com")
await asyncio.sleep(2) # Wait for page to load
# Execute simple JavaScript
print("\nExecuting simple JavaScript...")
try:
result = await page.evaluate("1 + 2")
print(f"1 + 2 = {result}")
except Exception as e:
print(f"Error executing simple math: {e}")
# Get page dimensions
try:
dimensions = await page.evaluate("""
() => {
return {
width: window.innerWidth,
height: window.innerHeight,
devicePixelRatio: window.devicePixelRatio || 1
}
}
""")
print(f"Page dimensions: {dimensions}")
except Exception as e:
print(f"Error getting dimensions: {e}")
# Modify page content
print("\nModifying page content...")
try:
await page.evaluate("""
() => {
const h1 = document.querySelector('h1');
if (h1) {
h1.style.color = 'red';
h1.textContent = 'Modified by Playwright-style API!';
}
}
""")
print("Page modified successfully")
except Exception as e:
print(f"Error modifying page: {e}")
# Create new elements
try:
await page.evaluate("""
() => {
const div = document.createElement('div');
div.id = 'custom-div';
div.style.cssText = 'position: fixed; top: 10px; right: 10px; ' +
'background: yellow; padding: 20px; ' +
'border: 2px solid black; z-index: 9999;';
div.textContent = 'Created via JavaScript!';
document.body.appendChild(div);
}
""")
print("New element created")
except Exception as e:
print(f"Error creating element: {e}")
await asyncio.sleep(2)
# Pass arguments to JavaScript
print("\nPassing arguments to JavaScript...")
try:
greeting = await page.evaluate(
"(name) => `Hello, ${name}!`",
"Playwright User"
)
print(f"Greeting: {greeting}")
except Exception as e:
print(f"Error with argument passing: {e}")
except Exception as e:
print(f"Error in demo: {e}")
finally:
await browser_instance.close()
async def demo_event_handling():
"""Demo event handling capabilities"""
print("\n=== Event Handling Demo ===")
browser_instance = Browser("ws://localhost:8765")
await browser_instance.connect()
# Set up event listeners
events_received = []
def on_browser_ready(data):
events_received.append(("browser_ready", data))
print(f"Event: Browser ready - {data}")
def on_load_started(data):
events_received.append(("load_started", data))
print(f"Event: Load started - {data.get('url', 'unknown')}")
def on_load_finished(data):
events_received.append(("load_finished", data))
print(f"Event: Load finished - {data.get('url', 'unknown')}")
def on_title_changed(data):
events_received.append(("title_changed", data))
print(f"Event: Title changed - {data.get('title', 'unknown')}")
# Register event listeners
browser_instance.on("browser_ready", on_browser_ready)
browser_instance.on("load_started", on_load_started)
browser_instance.on("load_finished", on_load_finished)
browser_instance.on("title_changed", on_title_changed)
try:
page = await browser_instance.new_page()
print("\nNavigating to trigger events...")
await page.goto("https://www.python.org")
await asyncio.sleep(2)
print(f"\nTotal events received: {len(events_received)}")
# Navigate to another page
print("\nNavigating to another page...")
await page.goto("https://example.com")
await asyncio.sleep(2)
print(f"Total events received: {len(events_received)}")
finally:
await browser_instance.close()
async def demo_backward_compatibility():
"""Demo backward compatible API usage"""
print("\n=== Backward Compatibility Demo ===")
browser_instance = Browser("ws://localhost:8765")
await browser_instance.connect()
try:
page = await browser_instance.new_page()
# Old-style commands still work
print("Using backward-compatible commands...")
# navigate command
result = await browser_instance._send_command({
"command": "navigate",
"url": "https://example.com"
})
print(f"Navigate result: {result}")
await asyncio.sleep(2)
# execute_js command
result = await browser_instance._send_command({
"command": "execute_js",
"script": "document.title"
})
print(f"Page title via execute_js: {result}")
# simulate_typing command
result = await browser_instance._send_command({
"command": "simulate_typing",
"selector": "h1",
"text": "Hello",
"delay": 0.1
})
print(f"Simulate typing result: {result}")
# get_info command
info = await browser_instance._send_command({
"command": "get_info"
})
print(f"Browser info: {info}")
finally:
await browser_instance.close()
async def run_all_demos():
"""Run all demo functions"""
demos = [
demo_basic_navigation,
demo_playwright_style,
demo_form_interaction,
demo_element_queries,
demo_wait_conditions,
demo_javascript_execution,
demo_event_handling,
demo_backward_compatibility
]
for script, message in fields:
await websocket.send(json.dumps({
"command": "execute_js",
"script": script
}))
await websocket.recv()
print(f"{message}")
await asyncio.sleep(1)
print("=" * 60)
print("Playwright-style WebSocket Browser Control Demo")
print("=" * 60)
print("\nMake sure the browser server is running on ws://localhost:8765")
print("Start it with: python browser_server_playwright.py")
input("\nPress Enter to start demos...")
await websocket.send(json.dumps({
"command": "execute_js",
"script": "document.getElementById('result').textContent"
}))
response = await websocket.recv()
data = json.loads(response)
print(f"\nForm result: {data.get('result')}")
await asyncio.sleep(5)
for demo in demos:
try:
await demo()
await asyncio.sleep(2) # Pause between demos
except Exception as e:
logger.error(f"Error in {demo.__name__}: {e}")
import traceback
traceback.print_exc()
print("\n" + "=" * 60)
print("All demos completed!")
print("=" * 60)
# Individual demo runners for testing
async def main():
print("WebSocket Browser Control Demos")
print("=" * 40)
print("1. Parallel Browsers - Open 4 sites simultaneously")
print("2. Automated Testing - Test multiple sites")
print("3. Form Automation - Fill and submit forms")
print("4. Run All Demos")
"""Main entry point"""
if len(sys.argv) > 1:
demo_name = sys.argv[1]
demos = {
"navigation": demo_basic_navigation,
"playwright": demo_playwright_style,
"form": demo_form_interaction,
"elements": demo_element_queries,
"wait": demo_wait_conditions,
"javascript": demo_javascript_execution,
"events": demo_event_handling,
"compatibility": demo_backward_compatibility,
"all": run_all_demos
}
choice = input("\nSelect demo (1-4): ")
if choice == "1":
await parallel_browser_demo()
elif choice == "2":
await automated_testing_demo()
elif choice == "3":
await form_automation_demo()
elif choice == "4":
print("\n--- Running Parallel Browsers Demo ---")
await parallel_browser_demo()
print("\n--- Running Automated Testing Demo ---")
await automated_testing_demo()
print("\n--- Running Form Automation Demo ---")
await form_automation_demo()
if demo_name in demos:
await demos[demo_name]()
else:
print("Invalid choice")
print(f"Unknown demo: {demo_name}")
print(f"Available demos: {', '.join(demos.keys())}")
else:
await run_all_demos()
if __name__ == "__main__":
try:
asyncio.run(main())
except KeyboardInterrupt:
print("\nDemo interrupted by user")
except Exception as e:
logger.error(f"Fatal error: {e}")
import traceback
traceback.print_exc()

1882
server.py

File diff suppressed because it is too large Load Diff