2026-01-24 16:17:47 +01:00
<!DOCTYPE html>
< html lang = "en" >
< head >
< meta charset = "UTF-8" >
< meta name = "viewport" content = "width=device-width, initial-scale=1.0" >
< title > API Examples - DWN Documentation< / title >
< link rel = "stylesheet" href = "css/style.css" >
< / head >
< body >
< button class = "mobile-menu-btn" > Menu< / button >
< div class = "layout" >
< aside class = "sidebar" >
< div class = "sidebar-header" >
< h1 > DWN< / h1 >
2026-02-07 13:04:52 +01:00
< span class = "version" > v2.0.0< / span >
2026-01-24 16:17:47 +01:00
< / div >
< div class = "search-box" >
< input type = "text" class = "search-input" placeholder = "Search docs..." >
< / div >
< nav class = "sidebar-nav" >
< div class = "nav-section" >
< div class = "nav-section-title" > Getting Started< / div >
< a href = "index.html" class = "nav-link" > Introduction< / a >
< a href = "installation.html" class = "nav-link" > Installation< / a >
< a href = "quickstart.html" class = "nav-link" > Quick Start< / a >
< / div >
< div class = "nav-section" >
< div class = "nav-section-title" > User Guide< / div >
< a href = "features.html" class = "nav-link" > Features< / a >
< a href = "shortcuts.html" class = "nav-link" > Keyboard Shortcuts< / a >
< a href = "configuration.html" class = "nav-link" > Configuration< / a >
< a href = "layouts.html" class = "nav-link" > Layouts< / a >
< a href = "ai-features.html" class = "nav-link" > AI Integration< / a >
< / div >
< div class = "nav-section" >
< div class = "nav-section-title" > API Reference< / div >
< a href = "api-overview.html" class = "nav-link" > API Overview< / a >
< a href = "api-reference.html" class = "nav-link" > API Reference< / a >
< a href = "api-examples.html" class = "nav-link active" > API Examples< / a >
< / div >
< div class = "nav-section" >
< div class = "nav-section-title" > Advanced< / div >
< a href = "architecture.html" class = "nav-link" > Architecture< / a >
2026-02-07 13:04:52 +01:00
< a href = "abstraction-layer.html" class = "nav-link" > Abstraction Layer< / a >
< a href = "plugin-development.html" class = "nav-link" > Plugin Development< / a >
2026-01-24 16:17:47 +01:00
< a href = "building.html" class = "nav-link" > Building from Source< / a >
< / div >
< / nav >
< / aside >
< main class = "main-content" >
< div class = "content" >
< div class = "page-header" >
< h1 > API Examples< / h1 >
< p class = "lead" > Code examples for common automation tasks< / p >
< / div >
< div class = "toc" >
< div class = "toc-title" > On this page< / div >
< ul class = "toc-list" >
< li > < a href = "#python" > Python Examples< / a > < / li >
< li > < a href = "#javascript" > JavaScript Examples< / a > < / li >
< li > < a href = "#bash" > Bash Examples< / a > < / li >
2026-02-07 13:04:52 +01:00
< li > < a href = "#fade-control" > Fade Effect Control< / a > < / li >
2026-01-24 16:17:47 +01:00
< li > < a href = "#events" > Event Subscription< / a > < / li >
< li > < a href = "#automation" > Automation Recipes< / a > < / li >
< li > < a href = "#browser-ocr" > Browser Automation with OCR< / a > < / li >
< / ul >
< / div >
< h2 id = "python" > Python Examples< / h2 >
< h3 > Basic Connection< / h3 >
< div class = "code-block" >
< pre > < code > import json
import websocket
2026-02-07 13:04:52 +01:00
ws = websocket.create_connection("ws://localhost:8777/ws")
2026-01-24 16:17:47 +01:00
def send_command(command, **params):
request = {"command": command, **params}
ws.send(json.dumps(request))
return json.loads(ws.recv())
2026-02-07 13:04:52 +01:00
# List all clients
result = send_command("get_clients")
for client in result["clients"]:
print(f"{client['title']} ({client['class']})")
2026-01-24 16:17:47 +01:00
ws.close()< / code > < / pre >
< / div >
< h3 > Using the Client Library< / h3 >
< div class = "code-block" >
< pre > < code > from dwn_api_client import DWNClient
client = DWNClient()
client.connect()
2026-02-07 13:04:52 +01:00
# Get all clients
clients = client.get_clients()
print(f"Found {len(clients)} clients")
2026-01-24 16:17:47 +01:00
2026-02-07 13:04:52 +01:00
# Focus client by title
for c in clients:
if "Firefox" in c["title"]:
client.focus_client(c["window"])
2026-01-24 16:17:47 +01:00
break
# Switch to workspace 3
client.switch_workspace(2)
# Type some text
2026-02-07 13:04:52 +01:00
client.key_type("Hello from Python!")
2026-01-24 16:17:47 +01:00
client.disconnect()< / code > < / pre >
< / div >
< h3 > Screenshot and OCR< / h3 >
< div class = "code-block" >
< pre > < code > import base64
from dwn_api_client import DWNClient
client = DWNClient()
client.connect()
# Take fullscreen screenshot
result = client.screenshot("fullscreen")
# Save to file
png_data = base64.b64decode(result["data"])
with open("screenshot.png", "wb") as f:
f.write(png_data)
print(f"Saved {result['width']}x{result['height']} screenshot")
# Extract text with OCR
ocr_result = client.ocr(result["data"])
print(f"Extracted text (confidence: {ocr_result['confidence']:.0%}):")
print(ocr_result["text"])
client.disconnect()< / code > < / pre >
< / div >
2026-02-07 13:04:52 +01:00
< h3 > Client Arrangement Script< / h3 >
2026-01-24 16:17:47 +01:00
< div class = "code-block" >
< pre > < code > from dwn_api_client import DWNClient
def arrange_coding_setup(client):
2026-02-07 13:04:52 +01:00
"""Arrange clients for coding: editor left, terminal right"""
clients = client.get_clients()
2026-01-24 16:17:47 +01:00
# Find VS Code and terminal
vscode = None
terminal = None
2026-02-07 13:04:52 +01:00
for c in clients:
if "code" in c["class"].lower():
vscode = c
elif "terminal" in c["class"].lower():
terminal = c
2026-01-24 16:17:47 +01:00
if vscode:
2026-02-07 13:04:52 +01:00
client.move_client(vscode["window"], 0, 32)
client.resize_client(vscode["window"], 960, 1048)
2026-01-24 16:17:47 +01:00
if terminal:
2026-02-07 13:04:52 +01:00
client.move_client(terminal["window"], 960, 32)
client.resize_client(terminal["window"], 960, 1048)
2026-01-24 16:17:47 +01:00
client = DWNClient()
client.connect()
arrange_coding_setup(client)
client.disconnect()< / code > < / pre >
< / div >
< h3 > Async Client< / h3 >
< div class = "code-block" >
< pre > < code > import asyncio
import json
import websockets
async def main():
2026-02-07 13:04:52 +01:00
async with websockets.connect("ws://localhost:8777/ws") as ws:
2026-01-24 16:17:47 +01:00
# Send command
2026-02-07 13:04:52 +01:00
await ws.send(json.dumps({"command": "get_clients"}))
2026-01-24 16:17:47 +01:00
# Receive response
response = json.loads(await ws.recv())
2026-02-07 13:04:52 +01:00
for client in response["clients"]:
print(f"Client: {client['title']}")
2026-01-24 16:17:47 +01:00
asyncio.run(main())< / code > < / pre >
< / div >
< h2 id = "javascript" > JavaScript Examples< / h2 >
< h3 > Browser WebSocket< / h3 >
< div class = "code-block" >
< pre > < code > class DWNClient {
2026-02-07 13:04:52 +01:00
constructor(url = 'ws://localhost:8777/ws') {
2026-01-24 16:17:47 +01:00
this.url = url;
this.ws = null;
this.pending = new Map();
this.requestId = 0;
}
connect() {
return new Promise((resolve, reject) => {
this.ws = new WebSocket(this.url);
this.ws.onopen = () => resolve();
this.ws.onerror = (e) => reject(e);
this.ws.onmessage = (e) => this.handleMessage(e);
});
}
handleMessage(event) {
const response = JSON.parse(event.data);
// Handle response
console.log('Received:', response);
}
send(command, params = {}) {
const request = { command, ...params };
this.ws.send(JSON.stringify(request));
}
2026-02-07 13:04:52 +01:00
async getClients() {
this.send('get_clients');
2026-01-24 16:17:47 +01:00
}
2026-02-07 13:04:52 +01:00
async focusClient(windowId) {
this.send('focus_client', { window: windowId });
2026-01-24 16:17:47 +01:00
}
2026-02-07 13:04:52 +01:00
async keyType(text) {
this.send('key_type', { text });
2026-01-24 16:17:47 +01:00
}
async screenshot(mode = 'fullscreen') {
this.send('screenshot', { mode });
}
}
// Usage
const client = new DWNClient();
await client.connect();
2026-02-07 13:04:52 +01:00
await client.getClients();< / code > < / pre >
2026-01-24 16:17:47 +01:00
< / div >
< h3 > Node.js Client< / h3 >
< div class = "code-block" >
< pre > < code > const WebSocket = require('ws');
2026-02-07 13:04:52 +01:00
const ws = new WebSocket('ws://localhost:8777/ws');
2026-01-24 16:17:47 +01:00
ws.on('open', () => {
console.log('Connected to DWN');
2026-02-07 13:04:52 +01:00
// List clients
ws.send(JSON.stringify({ command: 'get_clients' }));
2026-01-24 16:17:47 +01:00
});
ws.on('message', (data) => {
const response = JSON.parse(data);
2026-02-07 13:04:52 +01:00
if (response.clients) {
response.clients.forEach(c => {
console.log(`${c.title} - ${c.class}`);
2026-01-24 16:17:47 +01:00
});
}
ws.close();
});
ws.on('error', (err) => {
console.error('Error:', err.message);
});< / code > < / pre >
< / div >
< h3 > Screenshot to Canvas< / h3 >
< div class = "code-block" >
< pre > < code > async function captureToCanvas(client, canvasId) {
return new Promise((resolve) => {
client.ws.onmessage = (event) => {
const response = JSON.parse(event.data);
if (response.data) {
const img = new Image();
img.onload = () => {
const canvas = document.getElementById(canvasId);
const ctx = canvas.getContext('2d');
canvas.width = response.width;
canvas.height = response.height;
ctx.drawImage(img, 0, 0);
resolve();
};
img.src = 'data:image/png;base64,' + response.data;
}
};
client.send('screenshot', { mode: 'fullscreen' });
});
}< / code > < / pre >
< / div >
< h2 id = "bash" > Bash Examples< / h2 >
< h3 > Using websocat< / h3 >
< div class = "code-block" >
< pre > < code > #!/bin/bash
2026-02-07 13:04:52 +01:00
# List clients
echo '{"command": "get_clients"}' | websocat ws://localhost:8777/ws
2026-01-24 16:17:47 +01:00
2026-02-07 13:04:52 +01:00
# Focus client by ID
echo '{"command": "focus_client", "window": 12345678}' | websocat ws://localhost:8777/ws
2026-01-24 16:17:47 +01:00
# Switch workspace
2026-02-07 13:04:52 +01:00
echo '{"command": "switch_workspace", "workspace": 2}' | websocat ws://localhost:8777/ws
2026-01-24 16:17:47 +01:00
# Take screenshot and save
echo '{"command": "screenshot", "mode": "fullscreen"}' | \
2026-02-07 13:04:52 +01:00
websocat ws://localhost:8777/ws | \
2026-01-24 16:17:47 +01:00
jq -r '.data' | \
base64 -d > screenshot.png< / code > < / pre >
< / div >
< h3 > Using wscat< / h3 >
< div class = "code-block" >
< pre > < code > #!/bin/bash
# One-liner command
2026-02-07 13:04:52 +01:00
echo '{"command": "get_clients"}' | wscat -c ws://localhost:8777/ws -w 1
2026-01-24 16:17:47 +01:00
# Interactive session
2026-02-07 13:04:52 +01:00
wscat -c ws://localhost:8777/ws
2026-01-24 16:17:47 +01:00
# Then type commands manually< / code > < / pre >
< / div >
< h3 > Using curl with websocat< / h3 >
< div class = "code-block" >
< pre > < code > #!/bin/bash
dwn_command() {
2026-02-07 13:04:52 +01:00
echo "$1" | websocat -n1 ws://localhost:8777/ws
2026-01-24 16:17:47 +01:00
}
2026-02-07 13:04:52 +01:00
# Get focused client
dwn_command '{"command": "get_focused_client"}' | jq '.client.title'
2026-01-24 16:17:47 +01:00
# Type text
2026-02-07 13:04:52 +01:00
dwn_command '{"command": "key_type", "text": "Hello!"}'
2026-01-24 16:17:47 +01:00
# Launch application
2026-02-07 13:04:52 +01:00
dwn_command '{"command": "run_command", "exec": "firefox"}'< / code > < / pre >
< / div >
< h3 > Fade Effect Control< / h3 >
< p > Control DWN's fade animation effects via API:< / p >
< div class = "code-block" >
< pre > < code > import json
import websocket
ws = websocket.create_connection("ws://localhost:8777/ws")
# Get current fade settings
ws.send(json.dumps({"command": "get_fade_settings"}))
response = json.loads(ws.recv())
print(f"Speed: {response['fade_speed']}, Intensity: {response['fade_intensity']}")
# Set fade animation speed (0.1 - 3.0)
ws.send(json.dumps({"command": "set_fade_speed", "speed": 1.5}))
# Set fade glow intensity (0.0 - 1.0)
ws.send(json.dumps({"command": "set_fade_intensity", "intensity": 0.8}))
# Subscribe to fade change events
ws.send(json.dumps({
"command": "subscribe",
"events": ["fade_speed_changed", "fade_intensity_changed"]
}))
# Listen for events
while True:
event = json.loads(ws.recv())
if event.get("type") == "event":
print(f"Event: {event['event']}")
print(f"Data: {event['data']}")< / code > < / pre >
< / div >
< p > Using the provided fade control demo script:< / p >
< div class = "code-block" >
< pre > < code > # Get current fade settings
python3 examples/fade_control_demo.py
# Set fade speed
python3 examples/fade_control_demo.py --speed 1.5
# Set fade intensity
python3 examples/fade_control_demo.py --intensity 0.8
# Listen for fade events
python3 examples/fade_control_demo.py --listen --duration 30< / code > < / pre >
2026-01-24 16:17:47 +01:00
< / div >
< h2 id = "events" > Event Subscription< / h2 >
< h3 > Using dwn_automation_demo.py< / h3 >
< p > The included automation demo script provides ready-to-use event monitoring:< / p >
< div class = "code-block" >
< pre > < code > # List available events
python3 examples/dwn_automation_demo.py events --list
# Monitor all events (Ctrl+C to stop)
python3 examples/dwn_automation_demo.py events --monitor
# Monitor specific events
python3 examples/dwn_automation_demo.py events -e window_focused -e workspace_switched
# Run event demo (10 seconds)
python3 examples/dwn_automation_demo.py demo events
# Full event stream demo with summary
python3 examples/dwn_automation_demo.py demo events-all< / code > < / pre >
< / div >
< h3 > Python Event Listener< / h3 >
< div class = "code-block" >
< pre > < code > from dwn_api_client import DWNClient
def on_event(event):
event_name = event.get("event", "unknown")
data = event.get("data", {})
if event_name == "window_focused":
print(f"Focus: {data.get('title')}")
elif event_name == "workspace_switched":
print(f"Workspace: {data.get('new_workspace') + 1}")
elif event_name == "shortcut_triggered":
print(f"Shortcut: {data.get('description')}")
return True # Continue listening
client = DWNClient()
client.connect()
# Subscribe and listen
client.listen_events(on_event, events=[
"window_focused",
"workspace_switched",
"shortcut_triggered"
])< / code > < / pre >
< / div >
< h3 > Window Activity Logger< / h3 >
< div class = "code-block" >
< pre > < code > import json
from datetime import datetime
from dwn_api_client import DWNClient
def activity_logger():
client = DWNClient()
client.connect()
client.subscribe(events=[
"window_focused",
"window_created",
"window_destroyed"
])
print("Logging window activity (Ctrl+C to stop)...")
try:
with open("activity.log", "a") as f:
while True:
event = client.receive_event(timeout=1.0)
if event and event.get("type") == "event":
timestamp = datetime.now().isoformat()
ev_name = event.get("event")
data = event.get("data", {})
log_entry = {
"time": timestamp,
"event": ev_name,
"data": data
}
f.write(json.dumps(log_entry) + "\\n")
f.flush()
print(f"[{timestamp}] {ev_name}")
except KeyboardInterrupt:
print("\\nStopped logging")
finally:
client.disconnect()
activity_logger()< / code > < / pre >
< / div >
< h3 > JavaScript Event Listener< / h3 >
< div class = "code-block" >
2026-02-07 13:04:52 +01:00
< pre > < code > const ws = new WebSocket('ws://localhost:8777/ws');
2026-01-24 16:17:47 +01:00
ws.onopen = () => {
console.log('Connected to DWN');
// Subscribe to events
ws.send(JSON.stringify({
command: 'subscribe',
events: ['window_focused', 'workspace_switched']
}));
};
ws.onmessage = (event) => {
const msg = JSON.parse(event.data);
if (msg.type === 'event') {
console.log(`Event: ${msg.event}`, msg.data);
// Handle specific events
switch (msg.event) {
case 'window_focused':
document.title = msg.data.title || 'DWN';
break;
case 'workspace_switched':
updateWorkspaceIndicator(msg.data.new_workspace);
break;
}
}
};
ws.onerror = (err) => {
console.error('WebSocket error:', err);
};
ws.onclose = () => {
console.log('Disconnected from DWN');
};< / code > < / pre >
< / div >
2026-02-07 13:04:52 +01:00
< h3 > Reactive Client Arrangement< / h3 >
2026-01-24 16:17:47 +01:00
< div class = "code-block" >
< pre > < code > from dwn_api_client import DWNClient
RULES = {
"code": {"floating": False, "workspace": 0},
"firefox": {"floating": False, "workspace": 1},
"slack": {"floating": True, "workspace": 2},
"telegram": {"floating": True, "workspace": 2},
}
def auto_arrange():
client = DWNClient()
client.connect()
client.subscribe(events=["window_created"])
2026-02-07 13:04:52 +01:00
print("Auto-arranging clients...")
2026-01-24 16:17:47 +01:00
try:
while True:
event = client.receive_event(timeout=1.0)
if event and event.get("event") == "window_created":
data = event.get("data", {})
window_id = data.get("window")
wm_class = data.get("class", "").lower()
for pattern, rules in RULES.items():
if pattern in wm_class:
print(f"Applying rules to {wm_class}")
if "workspace" in rules:
client.move_client_to_workspace(
window_id, rules["workspace"]
)
if "floating" in rules:
client.float_client(
window_id, rules["floating"]
)
break
except KeyboardInterrupt:
pass
finally:
client.disconnect()
auto_arrange()< / code > < / pre >
< / div >
< h2 id = "automation" > Automation Recipes< / h2 >
2026-02-07 13:04:52 +01:00
< h3 > Auto-Arrange Clients by Class< / h3 >
2026-01-24 16:17:47 +01:00
< div class = "code-block" >
< pre > < code > from dwn_api_client import DWNClient
LAYOUT_RULES = {
"code": {"workspace": 0, "floating": False},
"firefox": {"workspace": 1, "floating": False},
"telegram": {"workspace": 2, "floating": True},
"slack": {"workspace": 2, "floating": True},
}
def auto_arrange():
client = DWNClient()
client.connect()
2026-02-07 13:04:52 +01:00
clients = client.get_clients()
for c in clients:
wm_class = c["class"].lower()
2026-01-24 16:17:47 +01:00
for pattern, rules in LAYOUT_RULES.items():
2026-02-07 13:04:52 +01:00
if pattern in wm_class:
if c["workspace"] != rules["workspace"]:
client.move_client_to_workspace(
c["window"], rules["workspace"]
2026-01-24 16:17:47 +01:00
)
2026-02-07 13:04:52 +01:00
if c["floating"] != rules["floating"]:
client.float_client(c["window"], rules["floating"])
2026-01-24 16:17:47 +01:00
break
client.disconnect()
auto_arrange()< / code > < / pre >
< / div >
< h3 > Screenshot Monitor< / h3 >
< div class = "code-block" >
< pre > < code > import time
import base64
from datetime import datetime
from dwn_api_client import DWNClient
def screenshot_monitor(interval=60, output_dir="screenshots"):
"""Take periodic screenshots"""
import os
os.makedirs(output_dir, exist_ok=True)
client = DWNClient()
client.connect()
try:
while True:
result = client.screenshot("fullscreen")
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{output_dir}/screen_{timestamp}.png"
with open(filename, "wb") as f:
f.write(base64.b64decode(result["data"]))
print(f"Saved: {filename}")
time.sleep(interval)
except KeyboardInterrupt:
print("Stopped")
finally:
client.disconnect()
screenshot_monitor(interval=300) # Every 5 minutes< / code > < / pre >
< / div >
2026-02-07 13:04:52 +01:00
< h3 > Client Focus Logger< / h3 >
2026-01-24 16:17:47 +01:00
< div class = "code-block" >
< pre > < code > import time
from datetime import datetime
from dwn_api_client import DWNClient
def focus_logger(log_file="focus_log.txt"):
2026-02-07 13:04:52 +01:00
"""Log client focus changes"""
2026-01-24 16:17:47 +01:00
client = DWNClient()
client.connect()
last_focused = None
try:
with open(log_file, "a") as f:
while True:
2026-02-07 13:04:52 +01:00
clients = client.get_clients()
focused = next((c for c in clients if c["focused"]), None)
2026-01-24 16:17:47 +01:00
2026-02-07 13:04:52 +01:00
if focused and focused["window"] != last_focused:
2026-01-24 16:17:47 +01:00
timestamp = datetime.now().isoformat()
entry = f"{timestamp} | {focused['title']} ({focused['class']})\n"
f.write(entry)
f.flush()
print(entry.strip())
2026-02-07 13:04:52 +01:00
last_focused = focused["window"]
2026-01-24 16:17:47 +01:00
time.sleep(1)
except KeyboardInterrupt:
print("Stopped")
finally:
client.disconnect()
focus_logger()< / code > < / pre >
< / div >
< h3 > Keyboard Macro< / h3 >
< div class = "code-block" >
< pre > < code > from dwn_api_client import DWNClient
import time
def run_macro(client, actions, delay=0.1):
"""Execute a sequence of actions"""
for action in actions:
if action["type"] == "key":
client.key_press(action["key"], action.get("modifiers", []))
elif action["type"] == "type":
client.type_text(action["text"])
elif action["type"] == "click":
client.mouse_click(action.get("button", 1),
action.get("x"), action.get("y"))
elif action["type"] == "wait":
time.sleep(action["seconds"])
time.sleep(delay)
# Example: Open terminal and run command
macro = [
{"type": "key", "key": "t", "modifiers": ["ctrl", "alt"]},
{"type": "wait", "seconds": 1},
{"type": "type", "text": "ls -la"},
{"type": "key", "key": "Return"},
]
client = DWNClient()
client.connect()
run_macro(client, macro)
client.disconnect()< / code > < / pre >
< / div >
< h3 > OCR Screen Reader< / h3 >
< div class = "code-block" >
< pre > < code > from dwn_api_client import DWNClient
2026-02-07 13:04:52 +01:00
def read_active_client():
"""Extract and print text from active client"""
2026-01-24 16:17:47 +01:00
client = DWNClient()
client.connect()
2026-02-07 13:04:52 +01:00
# Capture active client
2026-01-24 16:17:47 +01:00
screenshot = client.screenshot("active")
# Extract text
ocr_result = client.ocr(screenshot["data"])
2026-02-07 13:04:52 +01:00
print(f"Text from active client (confidence: {ocr_result['confidence']:.0%}):")
2026-01-24 16:17:47 +01:00
print("-" * 40)
print(ocr_result["text"])
client.disconnect()
2026-02-07 13:04:52 +01:00
read_active_client()< / code > < / pre >
2026-01-24 16:17:47 +01:00
< / div >
< h3 id = "browser-ocr" > Browser Automation with OCR< / h3 >
< p > Complete example that opens a browser, performs a Google search, scrolls through results, and extracts text using OCR. See < code > examples/browser_ocr_demo.py< / code > for the full script.< / p >
< div class = "code-block" >
< pre > < code > #!/usr/bin/env python3
import asyncio
import json
import sys
import base64
from typing import List
class DWNAutomation:
def __init__(self, host: str = "localhost", port: int = 8777):
self.uri = f"ws://{host}:{port}/ws"
self.ws = None
async def connect(self):
import websockets
self.ws = await websockets.connect(self.uri)
async def disconnect(self):
if self.ws:
await self.ws.close()
async def send_command(self, command: dict) -> dict:
await self.ws.send(json.dumps(command))
return json.loads(await self.ws.recv())
async def run_command(self, exec_cmd: str) -> dict:
return await self.send_command({"command": "run_command", "exec": exec_cmd})
async def get_focused_client(self) -> dict:
return await self.send_command({"command": "get_focused_client"})
async def focus_client(self, window_id: int) -> dict:
return await self.send_command({"command": "focus_client", "window": window_id})
async def key_tap(self, keysym: str, modifiers: List[str] = None) -> dict:
cmd = {"command": "key_tap", "keysym": keysym}
if modifiers:
cmd["modifiers"] = modifiers
return await self.send_command(cmd)
async def key_type(self, text: str) -> dict:
return await self.send_command({"command": "key_type", "text": text})
async def mouse_scroll(self, direction: str, amount: int = 1) -> dict:
return await self.send_command({
"command": "mouse_scroll", "direction": direction, "amount": amount
})
async def screenshot(self, mode: str = "active") -> dict:
return await self.send_command({"command": "screenshot", "mode": mode})
async def ocr(self, image_base64: str) -> dict:
return await self.send_command({"command": "ocr", "image": image_base64})
async def main():
automation = DWNAutomation()
await automation.connect()
# Open default browser with Google
await automation.run_command("xdg-open https://www.google.nl")
await asyncio.sleep(5.0)
# Get focused browser window
result = await automation.get_focused_client()
window_id = int(result.get("client", {}).get("window", 0))
# Search for something
await automation.key_type("ponies")
await automation.key_tap("Return")
await asyncio.sleep(4.0)
# Scroll and collect OCR text from multiple pages
all_text = []
for i in range(4):
# Take screenshot and run OCR
screenshot = await automation.screenshot("active")
image_data = screenshot.get("data", "")
# Save screenshot
with open(f"screenshot_{i+1}.png", "wb") as f:
f.write(base64.b64decode(image_data))
# Extract text
ocr_result = await automation.ocr(image_data)
text = ocr_result.get("text", "").strip()
if text:
all_text.append(f"--- Page {i+1} ---\n{text}")
# Scroll down for next page
if i < 3:
await automation.mouse_scroll("down", 5)
await asyncio.sleep(1.5)
# Print combined results
print("EXTRACTED TEXT:")
print("\n\n".join(all_text))
await automation.disconnect()
asyncio.run(main())< / code > < / pre >
< / div >
< p > Run the included demo script:< / p >
< div class = "code-block" >
< pre > < code > # Install dependency
pip install websockets
# Run the demo
python3 examples/browser_ocr_demo.py< / code > < / pre >
< / div >
< footer >
< p > DWN Window Manager - retoor < retoor@molodetz.nl> < / p >
< / footer >
< / div >
< / main >
< / div >
< script src = "js/main.js" > < / script >
< / body >
< / html >