// retoor <retoor@molodetz.nl>
import "websocket" for WebSocket
import "json" for Json
import "timer" for Timer
System.print("Browser Automation OCR Demo")
System.print("=" * 50)
var host = "ws://127.0.0.1:8777/ws"
System.print("")
System.print("Connecting to DWN API at %(host)...")
var ws = WebSocket.connect(host)
System.print("Connected")
var sendCommand = Fn.new { |command|
ws.send(Json.stringify(command))
var msg = ws.receive()
if (msg == null || !msg.isText) return null
return Json.parse(msg.text)
}
System.print("")
System.print("1. Opening default browser with google.nl...")
sendCommand.call({"command": "run_command", "exec": "xdg-open https://www.google.nl"})
System.print("2. Waiting for browser to open...")
Timer.sleep(5000)
var result = sendCommand.call({"command": "get_focused_client"})
var client = result["client"]
if (client == null) {
System.print(" Error: No focused window found")
ws.close()
Fiber.abort("No focused window")
}
var windowId = client["window"]
var toHex = Fn.new { |num|
var digits = "0123456789abcdef"
var result = ""
var n = num
while (n > 0) {
result = digits[n % 16] + result
n = (n / 16).floor
}
return result.count == 0 ? "0" : result
}
System.print(" Browser window found: 0x%(toHex.call(windowId))")
System.print("3. Waiting for page to load...")
Timer.sleep(2000)
System.print("4. Searching for 'ponies'...")
sendCommand.call({"command": "key_type", "text": "ponies"})
Timer.sleep(300)
sendCommand.call({"command": "key_tap", "keysym": "Return"})
System.print("5. Waiting for search results...")
Timer.sleep(4000)
var scrollCount = 4
var allText = []
for (i in 0...scrollCount) {
System.print("")
System.print("6.%(i + 1). Taking screenshot (page %(i + 1)/%(scrollCount))...")
var screenshotResult = sendCommand.call({"command": "screenshot", "mode": "active"})
if (screenshotResult["status"] != "ok") {
System.print(" Screenshot error: %(screenshotResult["message"])")
continue
}
var width = screenshotResult["width"]
var height = screenshotResult["height"]
System.print(" Screenshot captured: %(width)x%(height)")
var imageData = screenshotResult["data"]
System.print(" Running OCR...")
var ocrResult = sendCommand.call({"command": "ocr", "image": imageData})
if (ocrResult["status"] != "ok") {
System.print(" OCR error: %(ocrResult["message"])")
continue
}
var confidence = (ocrResult["confidence"] * 100).floor
var text = ocrResult["text"]
System.print(" OCR Confidence: %(confidence) percent")
if (text != null && text.count > 0) {
allText.add("--- Page %(i + 1) ---\n%(text)")
}
if (i < scrollCount - 1) {
System.print(" Scrolling down...")
sendCommand.call({"command": "mouse_scroll", "direction": "down", "amount": 5})
Timer.sleep(1500)
}
}
System.print("")
System.print("=" * 50)
System.print("COMBINED EXTRACTED TEXT:")
System.print("=" * 50)
if (allText.count > 0) {
for (text in allText) {
System.print("")
System.print(text)
}
} else {
System.print("(No text detected)")
}
System.print("")
System.print("=" * 50)
System.print("Demo complete - extracted text from %(allText.count) pages")
ws.close()