From 854045ceded6124aae73410301f09990376a2108 Mon Sep 17 00:00:00 2001 From: retoor Date: Tue, 3 Mar 2026 20:17:52 +0100 Subject: [PATCH] Initial. --- Makefile | 25 ++ README.md | 78 ++++ apidocs.md | 844 ++++++++++++++++++++++++++++++++++++++++++++ config.json.example | 5 + sneknim.nimble | 11 + src/nim.cfg | 1 + src/sneknim.nim | 171 +++++++++ 7 files changed, 1135 insertions(+) create mode 100644 Makefile create mode 100644 README.md create mode 100644 apidocs.md create mode 100644 config.json.example create mode 100644 sneknim.nimble create mode 100644 src/nim.cfg create mode 100644 src/sneknim.nim diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b081d37 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +# retoor + +NIMBLE_BIN := $(HOME)/.nimble/bin +NIM := $(NIMBLE_BIN)/nim +NIMBLE := $(NIMBLE_BIN)/nimble +BIN := sneknim + +export PATH := $(NIMBLE_BIN):$(PATH) + +.PHONY: all build run clean deps + +all: build + +deps: + $(NIMBLE) install -d -y + +build: deps + $(NIMBLE) build + +run: build + ./$(BIN) nim.json + +clean: + rm -f $(BIN) + rm -rf src/nimcache diff --git a/README.md b/README.md new file mode 100644 index 0000000..2bc92e2 --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ +# Sneknim + +retoor + +Snek chat bot written in Nim. Connects to the Snek platform via WebSocket, uses DeepSeek for LLM responses, supports tool calling with web search and deep research capabilities. + +## Requirements + +- Nim >= 2.0.0 +- `DEEPSEEK_API_KEY` environment variable + +## Build + +```sh +nimble install +nimble build +``` + +## Configuration + +Copy `config.json.example` to `config.json` and fill in your Snek credentials: + +```json +{ + "username": "botname", + "password": "botpassword", + "system_message": "You are a helpful assistant." +} +``` + +## Usage + +```sh +export DEEPSEEK_API_KEY=your_key_here +./sneknim config.json +``` + +Or with the `--config` flag: + +```sh +./sneknim --config config.json +``` + +## Features + +- WebSocket RPC connection with automatic reconnection and exponential backoff +- DeepSeek LLM integration with tool calling support +- Per-channel conversation context with token budgeting +- Web search via rsearch API +- Deep research with iterative search, synthesis, and validation +- Live progress updates via streaming messages +- Typing indicators with random bright colors +- Bot name sanitization to prevent cross-triggering +- Ping/pong auto-response +- Channel join/leave commands via mentions + +## Architecture + +Single-threaded async via `std/asyncdispatch`. Each incoming message is handled concurrently across channels. Per-channel conversation contexts are stored in a table and trimmed to fit within the 120K token budget. + +## Project Structure + +``` +src/ +├── sneknim.nim Entry point, message handling loop +└── sneknim/ + ├── constants.nim Protocol and timing constants + ├── types.nim Shared type definitions + ├── config.nim JSON config loading + ├── color.nim HSL bright color generation + ├── rpc.nim Snek WebSocket RPC client + ├── router.nim Message classification and dispatch + ├── deepseek.nim DeepSeek API client + ├── context.nim Conversation context management + ├── tools.nim Tool registry and execution + ├── research.nim Deep research state machine + └── search.nim rsearch API client +``` diff --git a/apidocs.md b/apidocs.md new file mode 100644 index 0000000..a998327 --- /dev/null +++ b/apidocs.md @@ -0,0 +1,844 @@ +# Snek RPC Protocol API Specification + +retoor + +Version 1.0 — February 2026 + +--- + +## Table of Contents + +1. [Protocol Overview](#1-protocol-overview) +2. [Connection](#2-connection) +3. [Request/Response Format](#3-requestresponse-format) +4. [Authentication](#4-authentication) +5. [Channel Operations](#5-channel-operations) +6. [Messaging](#6-messaging) +7. [Events](#7-events) +8. [Visual Feedback](#8-visual-feedback) +9. [Message Routing](#9-message-routing) +10. [Streaming](#10-streaming) +11. [Image Handling](#11-image-handling) +12. [Tool System](#12-tool-system) +13. [Error Handling](#13-error-handling) +14. [Constants Reference](#14-constants-reference) + +--- + +## 1. Protocol Overview + +The Snek RPC protocol is a JSON-based remote procedure call protocol transported over WebSocket. Clients send method invocations as JSON objects and receive corresponding responses matched by a unique call identifier. The server also pushes unsolicited events (messages, typing indicators, joins/leaves) over the same connection. + +Key characteristics: + +- **Transport**: WebSocket (RFC 6455) +- **Encoding**: JSON over WebSocket text frames +- **Concurrency model**: Multiplexed — multiple outstanding calls share one connection, matched by `callId` +- **Direction**: Bidirectional — client sends requests, server sends responses and pushes events + +--- + +## 2. Connection + +### Endpoint + +``` +wss://snek.molodetz.nl/rpc.ws +``` + +Plaintext variant (development only): + +``` +ws://snek.molodetz.nl/rpc.ws +``` + +### WebSocket Parameters + +| Parameter | Value | Description | +|-----------|-------|-------------| +| Heartbeat interval | 30 seconds | WebSocket ping/pong keepalive (`WS_HEARTBEAT`) | +| Request timeout | 190 seconds | Maximum wait for an RPC response (`DEFAULT_REQUEST_TIMEOUT`) | +| Receive retry delay | 1.0 seconds | Delay before retrying after a receive error (`WS_RECEIVE_RETRY_DELAY`) | + +### Connection Lifecycle + +``` +1. Open WebSocket to wss://snek.molodetz.nl/rpc.ws +2. Call login(username, password) +3. Call get_user(null) to retrieve authenticated user info +4. Call get_channels() to enumerate available channels +5. Enter receive loop — process events and messages +6. On disconnect — reconnect with exponential backoff +``` + +### Reconnection Strategy + +| Parameter | Value | +|-----------|-------| +| Max retries | 3 (`RECONNECT_MAX_RETRIES`) | +| Initial delay | 1.0 seconds (`RECONNECT_INITIAL_DELAY`) | +| Backoff factor | 2.0x (`RECONNECT_BACKOFF_FACTOR`) | + +Delays follow: 1s → 2s → 4s. After exhausting retries the client should re-establish the full connection from step 1. + +--- + +## 3. Request/Response Format + +### Client Request + +Every RPC call from client to server uses the following JSON structure: + +```json +{ + "method": "", + "args": [, , ...], + "kwargs": {}, + "callId": "" +} +``` + +| Field | Type | Required | Description | +|-------|------|----------|-------------| +| `method` | string | yes | RPC method name | +| `args` | array | yes | Positional arguments (may be empty `[]`) | +| `kwargs` | object | yes | Keyword arguments (may be empty `{}`) | +| `callId` | string | yes | Client-generated unique ID to correlate response. Recommended: 16-character hex string (e.g., `os.urandom(8).hex()`) or UUID v4. | + +### Server Response + +The server responds with a JSON object containing the same `callId`: + +```json +{ + "callId": "", + "data": +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `callId` | string | Matches the request `callId` | +| `data` | any | Return value — object, array, string, or null depending on the method | + +Additional top-level fields may be present depending on context (e.g., `event`, `message`, `username`). + +### Fire-and-Forget Requests + +Some calls do not require a response. The client sends the request normally but does not wait for a matching `callId` response. This is a client-side optimization — the server may still send a response. + +--- + +## 4. Authentication + +### login + +Authenticate with the Snek platform. + +**Request:** + +```json +{ + "method": "login", + "args": ["", ""], + "kwargs": {}, + "callId": "" +} +``` + +**Response:** + +```json +{ + "callId": "", + "data": { ... } +} +``` + +Must be the first call after establishing the WebSocket connection. + +### get_user + +Retrieve user information. + +**Request:** + +```json +{ + "method": "get_user", + "args": [null], + "kwargs": {}, + "callId": "" +} +``` + +Pass `null` as the sole argument to retrieve the authenticated user. + +**Response:** + +```json +{ + "callId": "", + "data": { + "username": "botname", + "nick": "BotNick" + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `username` | string | Login username (unique identifier) | +| `nick` | string | Display name | + +--- + +## 5. Channel Operations + +### get_channels + +Retrieve all channels visible to the authenticated user. + +**Request:** + +```json +{ + "method": "get_channels", + "args": [], + "kwargs": {}, + "callId": "" +} +``` + +**Response:** + +```json +{ + "callId": "", + "data": [ + { + "uid": "channel_unique_id", + "name": "Channel Name", + "tag": "dm" + }, + ... + ] +} +``` + +### Channel Object + +| Field | Type | Description | +|-------|------|-------------| +| `uid` | string | Unique channel identifier. Matches pattern `^[a-zA-Z0-9_-]+$` | +| `name` | string | Human-readable channel name | +| `tag` | string | Channel type. `"dm"` indicates a direct message channel | + +### Channel Types + +| Tag | Behavior | +|-----|----------| +| `"dm"` | Direct message — bot always responds to messages | +| other / none | Public channel — bot responds only if explicitly joined or triggered | + +--- + +## 6. Messaging + +### send_message + +Send a message to a channel. + +**Request:** + +```json +{ + "method": "send_message", + "args": ["", "", ], + "kwargs": {}, + "callId": "" +} +``` + +| Argument | Type | Required | Description | +|----------|------|----------|-------------| +| `channel_uid` | string | yes | Target channel UID | +| `message_text` | string | yes | Message content (Markdown supported) | +| `is_final` | boolean | yes | `true` for complete messages, `false` for streaming partial updates | + +**Response:** + +```json +{ + "callId": "", + "data": { ... } +} +``` + +### Drive URL Rewriting + +Relative drive paths are rewritten before processing: + +``` +(/drive.bin → (https://snek.molodetz.nl/drive.bin +``` + +--- + +## 7. Events + +The server pushes unsolicited events over the WebSocket connection. Events do not have a `callId` matching any pending request. + +### Event Structure + +```json +{ + "event": "", + "data": { ... } +} +``` + +### Message Event + +Messages appear as a special case — they carry top-level fields instead of being nested under `data`: + +```json +{ + "event": "message", + "message": "Hello world", + "username": "sender_username", + "user_nick": "SenderNick", + "channel_uid": "target_channel_uid", + "is_final": true +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `event` | string | `"message"` | +| `message` | string | Message content | +| `username` | string | Sender's username | +| `user_nick` | string | Sender's display name | +| `channel_uid` | string | Channel where the message was sent | +| `is_final` | boolean | `true` for complete messages, `false` for streaming updates | + +Clients should ignore messages where `is_final` is `false` unless implementing a streaming display. + +### set_typing Event + +Server-pushed event indicating a user is typing or providing visual feedback. + +```json +{ + "event": "set_typing", + "data": { ... } +} +``` + +### join / leave Events + +```json +{ + "event": "join", + "data": { + "channel_uid": "" + } +} +``` + +```json +{ + "event": "leave", + "data": { + "channel_uid": "" + } +} +``` + +### Event Handling Pattern + +Events are dispatched to handler methods named `on_`. The `data` object is unpacked as keyword arguments: + +``` +Event: {"event": "join", "data": {"channel_uid": "abc123"}} +Handler: on_join(channel_uid="abc123") +``` + +Unknown events are silently ignored. + +--- + +## 8. Visual Feedback + +### set_typing + +Set a colored typing indicator for the bot in a channel. Used for visual "thinking" feedback. + +**Request:** + +```json +{ + "method": "set_typing", + "args": ["", ""], + "kwargs": {}, + "callId": "" +} +``` + +| Argument | Type | Description | +|----------|------|-------------| +| `channel_uid` | string | Target channel | +| `html_color_code` | string | HTML hex color code (e.g., `"#FF0000"`) | + +### Color Generation + +Random bright colors are generated using HSL with these ranges: + +| Component | Min | Max | +|-----------|-----|-----| +| Hue | 0.0 | 1.0 | +| Saturation | 0.7 | 1.0 | +| Lightness | 0.5 | 0.7 | + +The color must match the pattern `^#[0-9A-Fa-f]{6}$`. + +--- + +## 9. Message Routing + +Incoming messages are classified and routed in the following priority order: + +| Priority | Condition | Handler | +|----------|-----------|---------| +| 1 | `username == self.username` | `on_own_message(channel_uid, message)` | +| 2 | Message starts with `"ping"` | `on_ping(username, user_nick, channel_uid, message_after_ping)` | +| 3 | Message contains `@nick join` or `@username join` | `on_join(channel_uid)` | +| 4 | Message contains `@nick leave` or `@username leave` | `on_leave(channel_uid)` | +| 5 | Message contains `@nick` or `@username` | `on_mention(username, user_nick, channel_uid, message)` | +| 6 | Default | `on_message(username, user_nick, channel_uid, message)` | + +### Channel Permission Rules + +For `on_message` processing, the bot applies these rules: + +1. **DM channels** (`tag == "dm"`): Always respond. +2. **Public channels**: Respond only if: + - The bot has been explicitly joined to the channel, OR + - The message matches a configured trigger pattern, OR + - The bot's username appears in the message text. + +### Ping/Pong + +Messages starting with `"ping"` trigger an automatic `"pong"` response: + +``` +Incoming: "ping hello" +Response: "pong hello" +``` + +--- + +## 10. Streaming + +The protocol supports incremental message delivery using the `is_final` field on `send_message`. + +### Sending Streaming Messages + +``` +1. send_message(channel_uid, "Partial cont...", false) ← partial update +2. send_message(channel_uid, "Partial content h...", false) ← partial update +3. send_message(channel_uid, "Partial content here.", true) ← final message +``` + +Each partial message replaces the previous one in the UI. The final message (`is_final=true`) marks the message as complete. + +### Receiving Streaming Messages + +When receiving events, messages with `is_final=false` represent in-progress content from another user or bot. Standard bot implementations skip non-final messages and only process the final version: + +``` +if not data.is_final: + continue +``` + +### Streaming Update Interval + +The minimum interval between streaming updates is `0.0` seconds (`STREAMING_UPDATE_INTERVAL`), meaning updates are sent as fast as they are generated. + +--- + +## 11. Image Handling + +### URL Extraction + +Image URLs are extracted from message text using the pattern: + +``` +https?://\S+\.(?:png|jpg|jpeg|gif|bmp|webp|svg)(?:\?\S*)? +``` + +Matched URLs are stripped of trailing `.`, `'`, and `"` characters. + +### Image Formats + +Two encoding formats are supported: + +#### OpenAI Format (default) + +Images are sent as separate content blocks in the OpenAI multi-modal message format: + +```json +{ + "role": "user", + "content": [ + { + "type": "text", + "text": "message text with URL removed" + }, + { + "type": "image_url", + "image_url": { + "url": "data:image/png;base64," + } + } + ] +} +``` + +#### DeepSeek Format + +Images are inlined into the text content as tagged references: + +```json +{ + "role": "user", + "content": [ + { + "type": "text", + "text": "message with [image: data:image/png;base64,] inline" + } + ] +} +``` + +### MIME Type Detection + +MIME types are resolved in order: + +1. File extension via `mimetypes.guess_type()` +2. Magic bytes detection: + +| Bytes | MIME Type | +|-------|-----------| +| `\x89PNG\r\n\x1a\n` | `image/png` | +| `\xff\xd8` | `image/jpeg` | +| `GIF87a` or `GIF89a` | `image/gif` | +| `RIFF....WEBP` | `image/webp` | + +3. Fallback: `image/png` + +--- + +## 12. Tool System + +The tool system follows the OpenAI function calling specification. Tools are serialized as function definitions, sent alongside the chat completion request, and executed when the LLM returns `tool_calls`. + +### Tool Definition Schema + +Each tool is serialized as: + +```json +{ + "type": "function", + "function": { + "name": "", + "description": "", + "parameters": { + "type": "object", + "properties": { + "": { + "type": "", + "default": "" + } + }, + "required": [""] + } + } +} +``` + +### Type Mapping + +| Python Type | JSON Schema Type | +|-------------|-----------------| +| `str` | `"string"` | +| `int` | `"integer"` | +| `bool` | `"boolean"` | +| `list` | `"array"` | +| `dict` | `"object"` | +| `None` | `"null"` | +| (default) | `"string"` | + +Parameters with default values are optional; parameters without defaults are listed in `required`. + +### Tool Call Flow + +``` +1. Client sends chat completion request with tools array +2. LLM returns response with tool_calls array +3. Client executes each tool call +4. Client sends tool results back as role:"tool" messages +5. Client sends another chat completion request with updated context +6. Repeat until LLM returns a text response without tool_calls +``` + +### Tool Call Response (from LLM) + +```json +{ + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_abc123", + "type": "function", + "function": { + "name": "database_find", + "arguments": "{\"table\": \"users\", \"query\": {\"active\": true}}" + } + } + ] +} +``` + +### Tool Result Message + +```json +{ + "role": "tool", + "tool_call_id": "call_abc123", + "content": "" +} +``` + +### Tool Call Limits + +| Parameter | Value | Description | +|-----------|-------|-------------| +| Max depth | 25 | Maximum consecutive tool call rounds (`MAX_TOOL_CALL_DEPTH`) | +| Max repeated calls | 5 | Maximum identical tool invocations (`MAX_REPEATED_TOOL_CALLS`) | +| Max consecutive errors | 3 | Abort tool loop after 3 consecutive errors | + +### Method Exclusions + +Methods are excluded from tool serialization if: + +- The name starts with `_` +- The name is `serialize` or `handle` +- The attribute is not callable + +--- + +## 13. Error Handling + +### Exception Hierarchy + +``` +BotError +├── ConnectionError Connection/WebSocket failures +├── AuthenticationError Login failures +├── RPCError RPC call failures, timeouts +├── ToolError Tool execution failures +├── DatabaseError Database operation failures +├── ValidationError Input validation failures +└── CircuitBreakerOpenError Circuit breaker tripped +``` + +### RPC Error Conditions + +| Condition | Behavior | +|-----------|----------| +| WebSocket closed/closing | `RPCError` raised | +| WebSocket error frame | `RPCError` raised | +| Response timeout (190s) | `RPCError` raised | +| Invalid JSON in response | Logged as warning, skipped | +| Connection closed during call | `RPCError` raised | + +### Reconnection on Send Failure + +When `send_message` fails, the client retries with exponential backoff: + +``` +Attempt 1: send → fail → wait 1s +Attempt 2: reconnect + send → fail → wait 2s +Attempt 3: reconnect + send → fail → raise ConnectionError +``` + +Each reconnection attempt performs a full login sequence on a new WebSocket. + +### Circuit Breaker + +Tool execution is protected by a circuit breaker pattern: + +| Parameter | Value | +|-----------|-------| +| Failure threshold | 5 consecutive failures | +| Recovery timeout | 60 seconds | +| Half-open max calls | 3 | + +States: `CLOSED` → (threshold exceeded) → `OPEN` → (timeout elapsed) → `HALF_OPEN` → (success) → `CLOSED` + +When the circuit breaker is open, tool calls fail immediately with `CircuitBreakerOpenError`. + +--- + +## 14. Constants Reference + +### Connection + +| Constant | Value | Description | +|----------|-------|-------------| +| `DEFAULT_WS_URL` | `wss://snek.molodetz.nl/rpc.ws` | Default WebSocket endpoint | +| `DEFAULT_OPENAI_URL` | `https://api.openai.com` | Default LLM API base URL | +| `DEFAULT_SEARCH_API_BASE` | `https://search.molodetz.nl` | Default search API endpoint | +| `WS_HEARTBEAT` | 30.0s | WebSocket heartbeat interval | +| `DEFAULT_REQUEST_TIMEOUT` | 190s | RPC call timeout | +| `HTTP_CONNECT_TIMEOUT` | 90.0s | HTTP TCP connect timeout | + +### Reconnection + +| Constant | Value | Description | +|----------|-------|-------------| +| `RECONNECT_MAX_RETRIES` | 3 | Maximum reconnection attempts | +| `RECONNECT_INITIAL_DELAY` | 1.0s | First retry delay | +| `RECONNECT_BACKOFF_FACTOR` | 2.0 | Delay multiplier per retry | +| `WS_RECEIVE_RETRY_DELAY` | 1.0s | Delay after receive error | + +### Concurrency + +| Constant | Value | Description | +|----------|-------|-------------| +| `MAX_CONCURRENT_OPERATIONS` | 100 | WebSocket semaphore limit | +| `MAX_CONCURRENT_REQUESTS` | 10 | HTTP connection pool limit | +| `THREAD_POOL_WORKERS` | 8 | Thread pool for blocking operations | +| `PROCESS_POOL_WORKERS` | 4 | Process pool for CPU-intensive tasks | +| `EXECUTOR_TIMEOUT` | 30.0s | Thread/process pool call timeout | + +### Message Handling + +| Constant | Value | Description | +|----------|-------|-------------| +| `MAX_MESSAGE_HISTORY` | 50 | Recent messages retained in memory | +| `DEFAULT_OPENAI_LIMIT` | 200 | Max messages sent to LLM per session | +| `CONTEXT_WINDOW_TOKEN_LIMIT` | 120000 | Token budget for LLM context | +| `STREAMING_UPDATE_INTERVAL` | 0.0s | Min interval between streaming updates | + +### Tool Limits + +| Constant | Value | Description | +|----------|-------|-------------| +| `MAX_TOOL_CALL_DEPTH` | 25 | Max consecutive tool call rounds | +| `MAX_REPEATED_TOOL_CALLS` | 5 | Max identical tool invocations | +| `MAX_RETRY_ATTEMPTS` | 3 | General retry limit | + +### Timing + +| Constant | Value | Description | +|----------|-------|-------------| +| `TASK_CHECK_INTERVAL` | 0.5s | Service loop tick interval | +| `THINKING_TASK_INTERVAL` | 1.0s | Typing indicator refresh interval | +| `SERVICE_CLEANUP_INTERVAL` | 3600s | Periodic cleanup cycle | +| `SHUTDOWN_TIMEOUT` | 5.0s | Max wait for graceful shutdown | +| `MIN_COUNTER_UPDATE_INTERVAL` | 240s | Debounce for counter updates | + +### Agentic + +| Constant | Value | Description | +|----------|-------|-------------| +| `AGENTIC_MAX_RESEARCH_QUERIES` | 8 | Max search queries per research task | +| `AGENTIC_MAX_PLAN_STEPS` | 10 | Max steps in a generated plan | +| `AGENTIC_SCRATCHPAD_SIZE` | 50 | Scratchpad buffer size | + +### Visual + +| Constant | Value | Description | +|----------|-------|-------------| +| `COLOR_HUE_MIN` | 0.0 | HSL hue range start | +| `COLOR_HUE_MAX` | 1.0 | HSL hue range end | +| `COLOR_SATURATION_MIN` | 0.7 | HSL saturation range start | +| `COLOR_SATURATION_MAX` | 1.0 | HSL saturation range end | +| `COLOR_LIGHTNESS_MIN` | 0.5 | HSL lightness range start | +| `COLOR_LIGHTNESS_MAX` | 0.7 | HSL lightness range end | + +### Media + +| Constant | Value | Description | +|----------|-------|-------------| +| `MEDIA_HUNT_DEDUP_MAX_SIZE` | 10000 | Dedup cache max entries | +| `MEDIA_HUNT_DEDUP_TTL` | 86400s | Dedup cache TTL (24 hours) | +| `MEDIA_HUNT_HEAD_TIMEOUT` | 5.0s | HEAD request timeout | +| `MEDIA_HUNT_FETCH_TIMEOUT` | 30.0s | Full fetch timeout | + +### Bot Name Sanitization + +The following bot names are sanitized in outgoing messages to prevent cross-triggering: + +``` +snek, grok, snik, lisa, gemma, joanne, ira, thomas +``` + +Sanitization inserts a hyphen after the first character (e.g., `snek` → `s-nek`). + +--- + +## Appendix A: Complete Connection Example + +``` +CLIENT → WebSocket CONNECT wss://snek.molodetz.nl/rpc.ws +SERVER ← 101 Switching Protocols + +CLIENT → {"method":"login","args":["mybot","mypassword"],"kwargs":{},"callId":"a1b2c3d4e5f6g7h8"} +SERVER ← {"callId":"a1b2c3d4e5f6g7h8","data":{...}} + +CLIENT → {"method":"get_user","args":[null],"kwargs":{},"callId":"b2c3d4e5f6g7h8a1"} +SERVER ← {"callId":"b2c3d4e5f6g7h8a1","data":{"username":"mybot","nick":"MyBot"}} + +CLIENT → {"method":"get_channels","args":[],"kwargs":{},"callId":"c3d4e5f6g7h8a1b2"} +SERVER ← {"callId":"c3d4e5f6g7h8a1b2","data":[{"uid":"ch1","name":"general","tag":"public"},{"uid":"ch2","name":"DM","tag":"dm"}]} + +--- receive loop --- + +SERVER ← {"event":"message","message":"hello @MyBot","username":"alice","user_nick":"Alice","channel_uid":"ch1","is_final":true} + +CLIENT → {"method":"send_message","args":["ch1","hey, what's up?",true],"kwargs":{},"callId":"d4e5f6g7h8a1b2c3"} +SERVER ← {"callId":"d4e5f6g7h8a1b2c3","data":{...}} +``` + +## Appendix B: Streaming Example + +``` +CLIENT → {"method":"set_typing","args":["ch1","#FF6B35"],"kwargs":{},"callId":"e5f6g7h8a1b2c3d4"} + +CLIENT → {"method":"send_message","args":["ch1","Working on",false],"kwargs":{},"callId":"f6g7h8a1b2c3d4e5"} +CLIENT → {"method":"send_message","args":["ch1","Working on it...",false],"kwargs":{},"callId":"g7h8a1b2c3d4e5f6"} +CLIENT → {"method":"send_message","args":["ch1","Working on it... done!",true],"kwargs":{},"callId":"h8a1b2c3d4e5f6g7"} +``` + +## Appendix C: Bot State Machine + +``` +INITIALIZING → INITIALIZED → CONNECTING → CONNECTED → RUNNING → SHUTTING_DOWN → SHUTDOWN + ↓ + ERROR +``` + +| State | Description | +|-------|-------------| +| `INITIALIZING` | HTTP sessions, signal handlers, and background tasks being set up | +| `INITIALIZED` | Setup complete, ready to connect | +| `CONNECTING` | WebSocket connection in progress | +| `CONNECTED` | Authenticated and channels enumerated | +| `RUNNING` | Processing messages in the receive loop | +| `SHUTTING_DOWN` | Graceful shutdown initiated, cancelling background tasks | +| `SHUTDOWN` | All resources released | +| `ERROR` | Initialization or fatal runtime failure | diff --git a/config.json.example b/config.json.example new file mode 100644 index 0000000..585da2b --- /dev/null +++ b/config.json.example @@ -0,0 +1,5 @@ +{ + "username": "botname", + "password": "botpassword", + "system_message": "You are a helpful assistant on the Snek chat platform. Be concise and accurate." +} diff --git a/sneknim.nimble b/sneknim.nimble new file mode 100644 index 0000000..5bd0e74 --- /dev/null +++ b/sneknim.nimble @@ -0,0 +1,11 @@ +# retoor + +version = "1.0.0" +author = "retoor" +description = "Snek chat bot with DeepSeek LLM and tool support" +license = "MIT" +srcDir = "src" +bin = @["sneknim"] + +requires "nim >= 2.0.0" +requires "ws >= 0.5.0" diff --git a/src/nim.cfg b/src/nim.cfg new file mode 100644 index 0000000..521e21d --- /dev/null +++ b/src/nim.cfg @@ -0,0 +1 @@ +-d:ssl diff --git a/src/sneknim.nim b/src/sneknim.nim new file mode 100644 index 0000000..3d428d1 --- /dev/null +++ b/src/sneknim.nim @@ -0,0 +1,171 @@ +# retoor + +import std/[asyncdispatch, json, os, strutils, sets, tables, random, strformat, logging] +import sneknim/[constants, types, config, color, rpc, router, deepseek, context, tools] + +var + botConfig: BotConfig + rpcClient: SnekRpc + toolRegistry: ToolRegistry + contexts: Table[string, ChannelContext] + joinedChannels: HashSet[string] + channelMap: Table[string, ChatChannel] + +proc getChannelTag(channelUid: string): string = + if channelMap.hasKey(channelUid): + return channelMap[channelUid].tag + return "" + +proc getOrCreateContext(channelUid: string): ChannelContext = + if not contexts.hasKey(channelUid): + contexts[channelUid] = newChannelContext(channelUid, botConfig.systemMessage) + return contexts[channelUid] + +proc sendProgress(channelUid, text: string, isFinal: bool): Future[void] {.async.} = + await rpcClient.sendMessage(channelUid, text, isFinal) + +proc handleMessage(username, userNick, channelUid, message: string) {.async.} = + let action = classifyWithJoined( + username, message, + rpcClient.username, rpcClient.nick, + channelUid, joinedChannels, + getChannelTag(channelUid) + ) + + debug("Message action: " & $action & " from " & username & " in " & channelUid) + + case action + of Ignore: + return + + of RespondPing: + let rest = message[4 .. ^1].strip() + await rpcClient.sendMessage(channelUid, "pong " & rest, true) + + of HandleJoin: + joinedChannels.incl(channelUid) + info("Joined channel: " & channelUid) + await rpcClient.sendMessage(channelUid, "Joined channel.", true) + + of HandleLeave: + joinedChannels.excl(channelUid) + info("Left channel: " & channelUid) + await rpcClient.sendMessage(channelUid, "Left channel.", true) + + of RespondChat: + try: + asyncCheck rpcClient.setTyping(channelUid, randomBrightColor()) + let ctx = getOrCreateContext(channelUid) + ctx.addUserMessage(username, message) + var depth = 0 + var repeatedCalls: Table[string, int] + var consecutiveErrors = 0 + + while depth < MaxToolCallDepth: + inc depth + debug(fmt"Tool call iteration {depth}/{MaxToolCallDepth} in {channelUid}") + asyncCheck rpcClient.setTyping(channelUid, randomBrightColor()) + let payload = ctx.buildPayload() + let toolDefs = toolRegistry.getDefinitions() + let resp = await chatCompletion(payload, toolDefs) + + if resp.hasKey("tool_calls") and resp["tool_calls"].kind == JArray and resp["tool_calls"].len > 0: + let assistantContent = resp{"content"}.getStr() + ctx.addAssistantToolCalls(assistantContent, resp["tool_calls"]) + let toolCallCount = resp["tool_calls"].len + debug(fmt"Received {toolCallCount} tool calls") + + for tc in resp["tool_calls"]: + let tcId = tc["id"].getStr() + let funcName = tc["function"]["name"].getStr() + let funcArgs = try: parseJson(tc["function"]["arguments"].getStr()) + except: newJObject() + + let callKey = funcName & ":" & $funcArgs + discard repeatedCalls.mgetOrPut(callKey, 0) + inc repeatedCalls[callKey] + if repeatedCalls[callKey] > MaxRepeatedToolCalls: + warn("Tool " & funcName & " exceeded max repeated calls") + ctx.addToolResult(tcId, funcName, "Error: tool called too many times with same arguments") + continue + + try: + let toolResult = await toolRegistry.execute(funcName, funcArgs, channelUid) + debug("Tool " & funcName & " result length: " & $toolResult.len) + ctx.addToolResult(tcId, funcName, toolResult) + consecutiveErrors = 0 + except CatchableError as e: + error("Tool " & funcName & " failed: " & e.msg) + ctx.addToolResult(tcId, funcName, "Error: " & e.msg) + inc consecutiveErrors + if consecutiveErrors >= MaxConsecutiveErrors: + break + + if consecutiveErrors >= MaxConsecutiveErrors: + error("Max consecutive tool errors reached in " & channelUid) + break + continue + + let content = resp{"content"}.getStr() + if content.len > 0: + let sanitized = sanitizeBotNames(content) + ctx.addAssistantMessage(sanitized) + await rpcClient.sendMessage(channelUid, sanitized, true) + break + + except CatchableError as e: + error("handleMessage failed in " & channelUid & ": " & e.msg) + try: + await rpcClient.sendMessage(channelUid, "An error occurred while processing your message.", true) + except CatchableError: + discard + +proc main() {.async.} = + randomize() + + let logger = newConsoleLogger(lvlDebug, "[$datetime] $levelname: ") + addHandler(logger) + + let configPath = if paramCount() >= 1: + let arg = paramStr(1) + if arg == "--config" and paramCount() >= 2: paramStr(2) + else: arg + else: + "config.json" + + botConfig = loadConfig(configPath) + initDeepseek() + + rpcClient = newSnekRpc() + toolRegistry = newToolRegistry() + toolRegistry.sendProgress = sendProgress + toolRegistry.registerDefaultTools() + contexts = initTable[string, ChannelContext]() + joinedChannels = initHashSet[string]() + channelMap = initTable[string, ChatChannel]() + + info("Connecting to Snek...") + await rpcClient.connect() + + asyncCheck rpcClient.receiveLoop(botConfig.username, botConfig.password) + asyncCheck rpcClient.startHeartbeat() + + info("Logging in as " & botConfig.username & "...") + await rpcClient.login(botConfig.username, botConfig.password) + await rpcClient.getUser() + info("Authenticated as " & rpcClient.username & " (" & rpcClient.nick & ")") + + await rpcClient.getChannels() + for ch in rpcClient.channels: + channelMap[ch.uid] = ch + info(fmt"Loaded {rpcClient.channels.len} channels") + + rpcClient.onMessage = proc(username, userNick, channelUid, message: string): Future[void] {.async.} = + await handleMessage(username, userNick, channelUid, message) + + info("Bot is running. Listening for messages...") + while true: + await sleepAsync(60_000) + +when isMainModule: + waitFor main()