|
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>API Examples - DWN Documentation</title>
|
|
<link rel="stylesheet" href="css/style.css">
|
|
</head>
|
|
<body>
|
|
<button class="mobile-menu-btn">Menu</button>
|
|
|
|
<div class="layout">
|
|
<aside class="sidebar">
|
|
<div class="sidebar-header">
|
|
<h1>DWN</h1>
|
|
<span class="version">v1.0.0</span>
|
|
</div>
|
|
|
|
<div class="search-box">
|
|
<input type="text" class="search-input" placeholder="Search docs...">
|
|
</div>
|
|
|
|
<nav class="sidebar-nav">
|
|
<div class="nav-section">
|
|
<div class="nav-section-title">Getting Started</div>
|
|
<a href="index.html" class="nav-link">Introduction</a>
|
|
<a href="installation.html" class="nav-link">Installation</a>
|
|
<a href="quickstart.html" class="nav-link">Quick Start</a>
|
|
</div>
|
|
|
|
<div class="nav-section">
|
|
<div class="nav-section-title">User Guide</div>
|
|
<a href="features.html" class="nav-link">Features</a>
|
|
<a href="shortcuts.html" class="nav-link">Keyboard Shortcuts</a>
|
|
<a href="configuration.html" class="nav-link">Configuration</a>
|
|
<a href="layouts.html" class="nav-link">Layouts</a>
|
|
<a href="ai-features.html" class="nav-link">AI Integration</a>
|
|
</div>
|
|
|
|
<div class="nav-section">
|
|
<div class="nav-section-title">API Reference</div>
|
|
<a href="api-overview.html" class="nav-link">API Overview</a>
|
|
<a href="api-reference.html" class="nav-link">API Reference</a>
|
|
<a href="api-examples.html" class="nav-link active">API Examples</a>
|
|
</div>
|
|
|
|
<div class="nav-section">
|
|
<div class="nav-section-title">Advanced</div>
|
|
<a href="architecture.html" class="nav-link">Architecture</a>
|
|
<a href="building.html" class="nav-link">Building from Source</a>
|
|
</div>
|
|
</nav>
|
|
</aside>
|
|
|
|
<main class="main-content">
|
|
<div class="content">
|
|
<div class="page-header">
|
|
<h1>API Examples</h1>
|
|
<p class="lead">Code examples for common automation tasks</p>
|
|
</div>
|
|
|
|
<div class="toc">
|
|
<div class="toc-title">On this page</div>
|
|
<ul class="toc-list">
|
|
<li><a href="#python">Python Examples</a></li>
|
|
<li><a href="#javascript">JavaScript Examples</a></li>
|
|
<li><a href="#bash">Bash Examples</a></li>
|
|
<li><a href="#events">Event Subscription</a></li>
|
|
<li><a href="#automation">Automation Recipes</a></li>
|
|
<li><a href="#browser-ocr">Browser Automation with OCR</a></li>
|
|
</ul>
|
|
</div>
|
|
|
|
<h2 id="python">Python Examples</h2>
|
|
|
|
<h3>Basic Connection</h3>
|
|
<div class="code-block">
|
|
<pre><code>import json
|
|
import websocket
|
|
|
|
ws = websocket.create_connection("ws://localhost:8777")
|
|
|
|
def send_command(command, **params):
|
|
request = {"command": command, **params}
|
|
ws.send(json.dumps(request))
|
|
return json.loads(ws.recv())
|
|
|
|
# List all windows
|
|
result = send_command("list_windows")
|
|
for window in result["windows"]:
|
|
print(f"{window['title']} ({window['class']})")
|
|
|
|
ws.close()</code></pre>
|
|
</div>
|
|
|
|
<h3>Using the Client Library</h3>
|
|
<div class="code-block">
|
|
<pre><code>from dwn_api_client import DWNClient
|
|
|
|
client = DWNClient()
|
|
client.connect()
|
|
|
|
# Get all windows
|
|
windows = client.list_windows()
|
|
print(f"Found {len(windows)} windows")
|
|
|
|
# Focus window by title
|
|
for w in windows:
|
|
if "Firefox" in w["title"]:
|
|
client.focus_window(w["id"])
|
|
break
|
|
|
|
# Switch to workspace 3
|
|
client.switch_workspace(2)
|
|
|
|
# Type some text
|
|
client.type_text("Hello from Python!")
|
|
|
|
client.disconnect()</code></pre>
|
|
</div>
|
|
|
|
<h3>Screenshot and OCR</h3>
|
|
<div class="code-block">
|
|
<pre><code>import base64
|
|
from dwn_api_client import DWNClient
|
|
|
|
client = DWNClient()
|
|
client.connect()
|
|
|
|
# Take fullscreen screenshot
|
|
result = client.screenshot("fullscreen")
|
|
|
|
# Save to file
|
|
png_data = base64.b64decode(result["data"])
|
|
with open("screenshot.png", "wb") as f:
|
|
f.write(png_data)
|
|
|
|
print(f"Saved {result['width']}x{result['height']} screenshot")
|
|
|
|
# Extract text with OCR
|
|
ocr_result = client.ocr(result["data"])
|
|
print(f"Extracted text (confidence: {ocr_result['confidence']:.0%}):")
|
|
print(ocr_result["text"])
|
|
|
|
client.disconnect()</code></pre>
|
|
</div>
|
|
|
|
<h3>Window Arrangement Script</h3>
|
|
<div class="code-block">
|
|
<pre><code>from dwn_api_client import DWNClient
|
|
|
|
def arrange_coding_setup(client):
|
|
"""Arrange windows for coding: editor left, terminal right"""
|
|
windows = client.list_windows()
|
|
|
|
# Find VS Code and terminal
|
|
vscode = None
|
|
terminal = None
|
|
for w in windows:
|
|
if "code" in w["class"].lower():
|
|
vscode = w
|
|
elif "terminal" in w["class"].lower():
|
|
terminal = w
|
|
|
|
if vscode:
|
|
client.move_window(vscode["id"], 0, 32)
|
|
client.resize_window(vscode["id"], 960, 1048)
|
|
|
|
if terminal:
|
|
client.move_window(terminal["id"], 960, 32)
|
|
client.resize_window(terminal["id"], 960, 1048)
|
|
|
|
client = DWNClient()
|
|
client.connect()
|
|
arrange_coding_setup(client)
|
|
client.disconnect()</code></pre>
|
|
</div>
|
|
|
|
<h3>Async Client</h3>
|
|
<div class="code-block">
|
|
<pre><code>import asyncio
|
|
import json
|
|
import websockets
|
|
|
|
async def main():
|
|
async with websockets.connect("ws://localhost:8777") as ws:
|
|
# Send command
|
|
await ws.send(json.dumps({"command": "list_windows"}))
|
|
|
|
# Receive response
|
|
response = json.loads(await ws.recv())
|
|
|
|
for window in response["windows"]:
|
|
print(f"Window: {window['title']}")
|
|
|
|
asyncio.run(main())</code></pre>
|
|
</div>
|
|
|
|
<h2 id="javascript">JavaScript Examples</h2>
|
|
|
|
<h3>Browser WebSocket</h3>
|
|
<div class="code-block">
|
|
<pre><code>class DWNClient {
|
|
constructor(url = 'ws://localhost:8777') {
|
|
this.url = url;
|
|
this.ws = null;
|
|
this.pending = new Map();
|
|
this.requestId = 0;
|
|
}
|
|
|
|
connect() {
|
|
return new Promise((resolve, reject) => {
|
|
this.ws = new WebSocket(this.url);
|
|
this.ws.onopen = () => resolve();
|
|
this.ws.onerror = (e) => reject(e);
|
|
this.ws.onmessage = (e) => this.handleMessage(e);
|
|
});
|
|
}
|
|
|
|
handleMessage(event) {
|
|
const response = JSON.parse(event.data);
|
|
// Handle response
|
|
console.log('Received:', response);
|
|
}
|
|
|
|
send(command, params = {}) {
|
|
const request = { command, ...params };
|
|
this.ws.send(JSON.stringify(request));
|
|
}
|
|
|
|
async listWindows() {
|
|
this.send('list_windows');
|
|
}
|
|
|
|
async focusWindow(windowId) {
|
|
this.send('focus_window', { window: windowId });
|
|
}
|
|
|
|
async typeText(text) {
|
|
this.send('type_text', { text });
|
|
}
|
|
|
|
async screenshot(mode = 'fullscreen') {
|
|
this.send('screenshot', { mode });
|
|
}
|
|
}
|
|
|
|
// Usage
|
|
const client = new DWNClient();
|
|
await client.connect();
|
|
await client.listWindows();</code></pre>
|
|
</div>
|
|
|
|
<h3>Node.js Client</h3>
|
|
<div class="code-block">
|
|
<pre><code>const WebSocket = require('ws');
|
|
|
|
const ws = new WebSocket('ws://localhost:8777');
|
|
|
|
ws.on('open', () => {
|
|
console.log('Connected to DWN');
|
|
|
|
// List windows
|
|
ws.send(JSON.stringify({ command: 'list_windows' }));
|
|
});
|
|
|
|
ws.on('message', (data) => {
|
|
const response = JSON.parse(data);
|
|
|
|
if (response.windows) {
|
|
response.windows.forEach(w => {
|
|
console.log(`${w.title} - ${w.class}`);
|
|
});
|
|
}
|
|
|
|
ws.close();
|
|
});
|
|
|
|
ws.on('error', (err) => {
|
|
console.error('Error:', err.message);
|
|
});</code></pre>
|
|
</div>
|
|
|
|
<h3>Screenshot to Canvas</h3>
|
|
<div class="code-block">
|
|
<pre><code>async function captureToCanvas(client, canvasId) {
|
|
return new Promise((resolve) => {
|
|
client.ws.onmessage = (event) => {
|
|
const response = JSON.parse(event.data);
|
|
if (response.data) {
|
|
const img = new Image();
|
|
img.onload = () => {
|
|
const canvas = document.getElementById(canvasId);
|
|
const ctx = canvas.getContext('2d');
|
|
canvas.width = response.width;
|
|
canvas.height = response.height;
|
|
ctx.drawImage(img, 0, 0);
|
|
resolve();
|
|
};
|
|
img.src = 'data:image/png;base64,' + response.data;
|
|
}
|
|
};
|
|
client.send('screenshot', { mode: 'fullscreen' });
|
|
});
|
|
}</code></pre>
|
|
</div>
|
|
|
|
<h2 id="bash">Bash Examples</h2>
|
|
|
|
<h3>Using websocat</h3>
|
|
<div class="code-block">
|
|
<pre><code>#!/bin/bash
|
|
|
|
# List windows
|
|
echo '{"command": "list_windows"}' | websocat ws://localhost:8777
|
|
|
|
# Focus window by ID
|
|
echo '{"command": "focus_window", "window": 12345678}' | websocat ws://localhost:8777
|
|
|
|
# Switch workspace
|
|
echo '{"command": "switch_workspace", "workspace": 2}' | websocat ws://localhost:8777
|
|
|
|
# Take screenshot and save
|
|
echo '{"command": "screenshot", "mode": "fullscreen"}' | \
|
|
websocat ws://localhost:8777 | \
|
|
jq -r '.data' | \
|
|
base64 -d > screenshot.png</code></pre>
|
|
</div>
|
|
|
|
<h3>Using wscat</h3>
|
|
<div class="code-block">
|
|
<pre><code>#!/bin/bash
|
|
|
|
# One-liner command
|
|
echo '{"command": "list_windows"}' | wscat -c ws://localhost:8777 -w 1
|
|
|
|
# Interactive session
|
|
wscat -c ws://localhost:8777
|
|
# Then type commands manually</code></pre>
|
|
</div>
|
|
|
|
<h3>Using curl with websocat</h3>
|
|
<div class="code-block">
|
|
<pre><code>#!/bin/bash
|
|
|
|
dwn_command() {
|
|
echo "$1" | websocat -n1 ws://localhost:8777
|
|
}
|
|
|
|
# Get focused window
|
|
dwn_command '{"command": "get_focused"}' | jq '.window.title'
|
|
|
|
# Type text
|
|
dwn_command '{"command": "type_text", "text": "Hello!"}'
|
|
|
|
# Launch application
|
|
dwn_command '{"command": "spawn", "program": "firefox"}'</code></pre>
|
|
</div>
|
|
|
|
<h2 id="events">Event Subscription</h2>
|
|
|
|
<h3>Using dwn_automation_demo.py</h3>
|
|
<p>The included automation demo script provides ready-to-use event monitoring:</p>
|
|
<div class="code-block">
|
|
<pre><code># List available events
|
|
python3 examples/dwn_automation_demo.py events --list
|
|
|
|
# Monitor all events (Ctrl+C to stop)
|
|
python3 examples/dwn_automation_demo.py events --monitor
|
|
|
|
# Monitor specific events
|
|
python3 examples/dwn_automation_demo.py events -e window_focused -e workspace_switched
|
|
|
|
# Run event demo (10 seconds)
|
|
python3 examples/dwn_automation_demo.py demo events
|
|
|
|
# Full event stream demo with summary
|
|
python3 examples/dwn_automation_demo.py demo events-all</code></pre>
|
|
</div>
|
|
|
|
<h3>Python Event Listener</h3>
|
|
<div class="code-block">
|
|
<pre><code>from dwn_api_client import DWNClient
|
|
|
|
def on_event(event):
|
|
event_name = event.get("event", "unknown")
|
|
data = event.get("data", {})
|
|
|
|
if event_name == "window_focused":
|
|
print(f"Focus: {data.get('title')}")
|
|
elif event_name == "workspace_switched":
|
|
print(f"Workspace: {data.get('new_workspace') + 1}")
|
|
elif event_name == "shortcut_triggered":
|
|
print(f"Shortcut: {data.get('description')}")
|
|
|
|
return True # Continue listening
|
|
|
|
client = DWNClient()
|
|
client.connect()
|
|
|
|
# Subscribe and listen
|
|
client.listen_events(on_event, events=[
|
|
"window_focused",
|
|
"workspace_switched",
|
|
"shortcut_triggered"
|
|
])</code></pre>
|
|
</div>
|
|
|
|
<h3>Window Activity Logger</h3>
|
|
<div class="code-block">
|
|
<pre><code>import json
|
|
from datetime import datetime
|
|
from dwn_api_client import DWNClient
|
|
|
|
def activity_logger():
|
|
client = DWNClient()
|
|
client.connect()
|
|
|
|
client.subscribe(events=[
|
|
"window_focused",
|
|
"window_created",
|
|
"window_destroyed"
|
|
])
|
|
|
|
print("Logging window activity (Ctrl+C to stop)...")
|
|
|
|
try:
|
|
with open("activity.log", "a") as f:
|
|
while True:
|
|
event = client.receive_event(timeout=1.0)
|
|
if event and event.get("type") == "event":
|
|
timestamp = datetime.now().isoformat()
|
|
ev_name = event.get("event")
|
|
data = event.get("data", {})
|
|
|
|
log_entry = {
|
|
"time": timestamp,
|
|
"event": ev_name,
|
|
"data": data
|
|
}
|
|
f.write(json.dumps(log_entry) + "\\n")
|
|
f.flush()
|
|
|
|
print(f"[{timestamp}] {ev_name}")
|
|
except KeyboardInterrupt:
|
|
print("\\nStopped logging")
|
|
finally:
|
|
client.disconnect()
|
|
|
|
activity_logger()</code></pre>
|
|
</div>
|
|
|
|
<h3>JavaScript Event Listener</h3>
|
|
<div class="code-block">
|
|
<pre><code>const ws = new WebSocket('ws://localhost:8777');
|
|
|
|
ws.onopen = () => {
|
|
console.log('Connected to DWN');
|
|
|
|
// Subscribe to events
|
|
ws.send(JSON.stringify({
|
|
command: 'subscribe',
|
|
events: ['window_focused', 'workspace_switched']
|
|
}));
|
|
};
|
|
|
|
ws.onmessage = (event) => {
|
|
const msg = JSON.parse(event.data);
|
|
|
|
if (msg.type === 'event') {
|
|
console.log(`Event: ${msg.event}`, msg.data);
|
|
|
|
// Handle specific events
|
|
switch (msg.event) {
|
|
case 'window_focused':
|
|
document.title = msg.data.title || 'DWN';
|
|
break;
|
|
case 'workspace_switched':
|
|
updateWorkspaceIndicator(msg.data.new_workspace);
|
|
break;
|
|
}
|
|
}
|
|
};
|
|
|
|
ws.onerror = (err) => {
|
|
console.error('WebSocket error:', err);
|
|
};
|
|
|
|
ws.onclose = () => {
|
|
console.log('Disconnected from DWN');
|
|
};</code></pre>
|
|
</div>
|
|
|
|
<h3>Reactive Window Arrangement</h3>
|
|
<div class="code-block">
|
|
<pre><code>from dwn_api_client import DWNClient
|
|
|
|
RULES = {
|
|
"code": {"floating": False, "workspace": 0},
|
|
"firefox": {"floating": False, "workspace": 1},
|
|
"slack": {"floating": True, "workspace": 2},
|
|
"telegram": {"floating": True, "workspace": 2},
|
|
}
|
|
|
|
def auto_arrange():
|
|
client = DWNClient()
|
|
client.connect()
|
|
|
|
client.subscribe(events=["window_created"])
|
|
|
|
print("Auto-arranging windows...")
|
|
|
|
try:
|
|
while True:
|
|
event = client.receive_event(timeout=1.0)
|
|
if event and event.get("event") == "window_created":
|
|
data = event.get("data", {})
|
|
window_id = data.get("window")
|
|
wm_class = data.get("class", "").lower()
|
|
|
|
for pattern, rules in RULES.items():
|
|
if pattern in wm_class:
|
|
print(f"Applying rules to {wm_class}")
|
|
|
|
if "workspace" in rules:
|
|
client.move_client_to_workspace(
|
|
window_id, rules["workspace"]
|
|
)
|
|
if "floating" in rules:
|
|
client.float_client(
|
|
window_id, rules["floating"]
|
|
)
|
|
break
|
|
except KeyboardInterrupt:
|
|
pass
|
|
finally:
|
|
client.disconnect()
|
|
|
|
auto_arrange()</code></pre>
|
|
</div>
|
|
|
|
<h2 id="automation">Automation Recipes</h2>
|
|
|
|
<h3>Auto-Arrange Windows by Class</h3>
|
|
<div class="code-block">
|
|
<pre><code>from dwn_api_client import DWNClient
|
|
|
|
LAYOUT_RULES = {
|
|
"code": {"workspace": 0, "floating": False},
|
|
"firefox": {"workspace": 1, "floating": False},
|
|
"telegram": {"workspace": 2, "floating": True},
|
|
"slack": {"workspace": 2, "floating": True},
|
|
}
|
|
|
|
def auto_arrange():
|
|
client = DWNClient()
|
|
client.connect()
|
|
|
|
windows = client.list_windows()
|
|
for w in windows:
|
|
win_class = w["class"].lower()
|
|
for pattern, rules in LAYOUT_RULES.items():
|
|
if pattern in win_class:
|
|
if w["workspace"] != rules["workspace"]:
|
|
client.move_window_to_workspace(
|
|
w["id"], rules["workspace"]
|
|
)
|
|
if w["floating"] != rules["floating"]:
|
|
client.set_floating(w["id"], rules["floating"])
|
|
break
|
|
|
|
client.disconnect()
|
|
|
|
auto_arrange()</code></pre>
|
|
</div>
|
|
|
|
<h3>Screenshot Monitor</h3>
|
|
<div class="code-block">
|
|
<pre><code>import time
|
|
import base64
|
|
from datetime import datetime
|
|
from dwn_api_client import DWNClient
|
|
|
|
def screenshot_monitor(interval=60, output_dir="screenshots"):
|
|
"""Take periodic screenshots"""
|
|
import os
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
client = DWNClient()
|
|
client.connect()
|
|
|
|
try:
|
|
while True:
|
|
result = client.screenshot("fullscreen")
|
|
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
filename = f"{output_dir}/screen_{timestamp}.png"
|
|
|
|
with open(filename, "wb") as f:
|
|
f.write(base64.b64decode(result["data"]))
|
|
|
|
print(f"Saved: {filename}")
|
|
time.sleep(interval)
|
|
except KeyboardInterrupt:
|
|
print("Stopped")
|
|
finally:
|
|
client.disconnect()
|
|
|
|
screenshot_monitor(interval=300) # Every 5 minutes</code></pre>
|
|
</div>
|
|
|
|
<h3>Window Focus Logger</h3>
|
|
<div class="code-block">
|
|
<pre><code>import time
|
|
from datetime import datetime
|
|
from dwn_api_client import DWNClient
|
|
|
|
def focus_logger(log_file="focus_log.txt"):
|
|
"""Log window focus changes"""
|
|
client = DWNClient()
|
|
client.connect()
|
|
|
|
last_focused = None
|
|
|
|
try:
|
|
with open(log_file, "a") as f:
|
|
while True:
|
|
windows = client.list_windows()
|
|
focused = next((w for w in windows if w["focused"]), None)
|
|
|
|
if focused and focused["id"] != last_focused:
|
|
timestamp = datetime.now().isoformat()
|
|
entry = f"{timestamp} | {focused['title']} ({focused['class']})\n"
|
|
f.write(entry)
|
|
f.flush()
|
|
print(entry.strip())
|
|
last_focused = focused["id"]
|
|
|
|
time.sleep(1)
|
|
except KeyboardInterrupt:
|
|
print("Stopped")
|
|
finally:
|
|
client.disconnect()
|
|
|
|
focus_logger()</code></pre>
|
|
</div>
|
|
|
|
<h3>Keyboard Macro</h3>
|
|
<div class="code-block">
|
|
<pre><code>from dwn_api_client import DWNClient
|
|
import time
|
|
|
|
def run_macro(client, actions, delay=0.1):
|
|
"""Execute a sequence of actions"""
|
|
for action in actions:
|
|
if action["type"] == "key":
|
|
client.key_press(action["key"], action.get("modifiers", []))
|
|
elif action["type"] == "type":
|
|
client.type_text(action["text"])
|
|
elif action["type"] == "click":
|
|
client.mouse_click(action.get("button", 1),
|
|
action.get("x"), action.get("y"))
|
|
elif action["type"] == "wait":
|
|
time.sleep(action["seconds"])
|
|
time.sleep(delay)
|
|
|
|
# Example: Open terminal and run command
|
|
macro = [
|
|
{"type": "key", "key": "t", "modifiers": ["ctrl", "alt"]},
|
|
{"type": "wait", "seconds": 1},
|
|
{"type": "type", "text": "ls -la"},
|
|
{"type": "key", "key": "Return"},
|
|
]
|
|
|
|
client = DWNClient()
|
|
client.connect()
|
|
run_macro(client, macro)
|
|
client.disconnect()</code></pre>
|
|
</div>
|
|
|
|
<h3>OCR Screen Reader</h3>
|
|
<div class="code-block">
|
|
<pre><code>from dwn_api_client import DWNClient
|
|
|
|
def read_active_window():
|
|
"""Extract and print text from active window"""
|
|
client = DWNClient()
|
|
client.connect()
|
|
|
|
# Capture active window
|
|
screenshot = client.screenshot("active")
|
|
|
|
# Extract text
|
|
ocr_result = client.ocr(screenshot["data"])
|
|
|
|
print(f"Text from active window (confidence: {ocr_result['confidence']:.0%}):")
|
|
print("-" * 40)
|
|
print(ocr_result["text"])
|
|
|
|
client.disconnect()
|
|
|
|
read_active_window()</code></pre>
|
|
</div>
|
|
|
|
<h3 id="browser-ocr">Browser Automation with OCR</h3>
|
|
<p>Complete example that opens a browser, performs a Google search, scrolls through results, and extracts text using OCR. See <code>examples/browser_ocr_demo.py</code> for the full script.</p>
|
|
<div class="code-block">
|
|
<pre><code>#!/usr/bin/env python3
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
import base64
|
|
from typing import List
|
|
|
|
class DWNAutomation:
|
|
def __init__(self, host: str = "localhost", port: int = 8777):
|
|
self.uri = f"ws://{host}:{port}/ws"
|
|
self.ws = None
|
|
|
|
async def connect(self):
|
|
import websockets
|
|
self.ws = await websockets.connect(self.uri)
|
|
|
|
async def disconnect(self):
|
|
if self.ws:
|
|
await self.ws.close()
|
|
|
|
async def send_command(self, command: dict) -> dict:
|
|
await self.ws.send(json.dumps(command))
|
|
return json.loads(await self.ws.recv())
|
|
|
|
async def run_command(self, exec_cmd: str) -> dict:
|
|
return await self.send_command({"command": "run_command", "exec": exec_cmd})
|
|
|
|
async def get_focused_client(self) -> dict:
|
|
return await self.send_command({"command": "get_focused_client"})
|
|
|
|
async def focus_client(self, window_id: int) -> dict:
|
|
return await self.send_command({"command": "focus_client", "window": window_id})
|
|
|
|
async def key_tap(self, keysym: str, modifiers: List[str] = None) -> dict:
|
|
cmd = {"command": "key_tap", "keysym": keysym}
|
|
if modifiers:
|
|
cmd["modifiers"] = modifiers
|
|
return await self.send_command(cmd)
|
|
|
|
async def key_type(self, text: str) -> dict:
|
|
return await self.send_command({"command": "key_type", "text": text})
|
|
|
|
async def mouse_scroll(self, direction: str, amount: int = 1) -> dict:
|
|
return await self.send_command({
|
|
"command": "mouse_scroll", "direction": direction, "amount": amount
|
|
})
|
|
|
|
async def screenshot(self, mode: str = "active") -> dict:
|
|
return await self.send_command({"command": "screenshot", "mode": mode})
|
|
|
|
async def ocr(self, image_base64: str) -> dict:
|
|
return await self.send_command({"command": "ocr", "image": image_base64})
|
|
|
|
|
|
async def main():
|
|
automation = DWNAutomation()
|
|
await automation.connect()
|
|
|
|
# Open default browser with Google
|
|
await automation.run_command("xdg-open https://www.google.nl")
|
|
await asyncio.sleep(5.0)
|
|
|
|
# Get focused browser window
|
|
result = await automation.get_focused_client()
|
|
window_id = int(result.get("client", {}).get("window", 0))
|
|
|
|
# Search for something
|
|
await automation.key_type("ponies")
|
|
await automation.key_tap("Return")
|
|
await asyncio.sleep(4.0)
|
|
|
|
# Scroll and collect OCR text from multiple pages
|
|
all_text = []
|
|
for i in range(4):
|
|
# Take screenshot and run OCR
|
|
screenshot = await automation.screenshot("active")
|
|
image_data = screenshot.get("data", "")
|
|
|
|
# Save screenshot
|
|
with open(f"screenshot_{i+1}.png", "wb") as f:
|
|
f.write(base64.b64decode(image_data))
|
|
|
|
# Extract text
|
|
ocr_result = await automation.ocr(image_data)
|
|
text = ocr_result.get("text", "").strip()
|
|
if text:
|
|
all_text.append(f"--- Page {i+1} ---\n{text}")
|
|
|
|
# Scroll down for next page
|
|
if i < 3:
|
|
await automation.mouse_scroll("down", 5)
|
|
await asyncio.sleep(1.5)
|
|
|
|
# Print combined results
|
|
print("EXTRACTED TEXT:")
|
|
print("\n\n".join(all_text))
|
|
|
|
await automation.disconnect()
|
|
|
|
asyncio.run(main())</code></pre>
|
|
</div>
|
|
|
|
<p>Run the included demo script:</p>
|
|
<div class="code-block">
|
|
<pre><code># Install dependency
|
|
pip install websockets
|
|
|
|
# Run the demo
|
|
python3 examples/browser_ocr_demo.py</code></pre>
|
|
</div>
|
|
|
|
<footer>
|
|
<p>DWN Window Manager - retoor <retoor@molodetz.nl></p>
|
|
</footer>
|
|
</div>
|
|
</main>
|
|
</div>
|
|
|
|
<script src="js/main.js"></script>
|
|
</body>
|
|
</html>
|