Initial commit.

This commit is contained in:
retoor 2025-10-04 13:15:17 +02:00
commit 2db324edf7
3 changed files with 1579 additions and 0 deletions

4
.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
chroma_db
*.db
uploads
*.bak

638
app.py Normal file
View File

@ -0,0 +1,638 @@
import asyncio
import hashlib
import io
import json
import os
import re
import uuid
from datetime import datetime
from typing import Optional, List
from contextlib import asynccontextmanager
from fastapi import FastAPI, UploadFile, File, Form, WebSocket, WebSocketDisconnect, HTTPException, BackgroundTasks
from fastapi.responses import FileResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from sqlmodel import Field, SQLModel, create_engine, Session, select
import chromadb
from chromadb.config import Settings
import tiktoken
import openai
import pypdf
# Configuration
UPLOAD_DIR = "uploads"
DB_URL = "sqlite:///./database.db"
CHROMA_DIR = "./chroma_db"
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "your-api-key-here")
CHUNK_SIZE = 500
CHUNK_OVERLAP = 50
os.makedirs(UPLOAD_DIR, exist_ok=True)
os.makedirs(CHROMA_DIR, exist_ok=True)
# Database Models
class Document(SQLModel, table=True):
id: Optional[str] = Field(default=None, primary_key=True)
name: str
filename: str
markdown_content: str = Field(default="")
upload_time: datetime
status: str = "pending"
downloads: int = 0
class SearchResult(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
query: str
slug: str = Field(index=True)
results_json: str
created_at: datetime
tokens_used: int = 0
cost_eur: float = 0.0
class PromptResult(SQLModel, table=True):
id: Optional[int] = Field(default=None, primary_key=True)
query: str
slug: str = Field(index=True)
prompt_response: str
search_results_json: str
created_at: datetime
input_tokens: int = 0
output_tokens: int = 0
cost_eur: float = 0.0
# Database Setup
engine = create_engine(DB_URL, connect_args={"check_same_thread": False})
def init_db():
SQLModel.metadata.create_all(engine)
# ChromaDB Setup
chroma_client = chromadb.PersistentClient(path=CHROMA_DIR, settings=Settings(anonymized_telemetry=False))
try:
collection = chroma_client.get_collection("documents")
except:
collection = chroma_client.create_collection("documents")
# OpenAI Setup
openai.api_key = OPENAI_API_KEY
tokenizer = tiktoken.get_encoding("cl100k_base")
# WebSocket Manager
class ConnectionManager:
def __init__(self):
self.active_connections: dict[str, WebSocket] = {}
async def connect(self, document_id: str, websocket: WebSocket):
await websocket.accept()
self.active_connections[document_id] = websocket
def disconnect(self, document_id: str):
if document_id in self.active_connections:
del self.active_connections[document_id]
async def send_message(self, document_id: str, message: dict):
if document_id in self.active_connections:
try:
await self.active_connections[document_id].send_json(message)
except:
self.disconnect(document_id)
manager = ConnectionManager()
# Lifespan
@asynccontextmanager
async def lifespan(app: FastAPI):
init_db()
yield
# FastAPI App
app = FastAPI(lifespan=lifespan)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# Helper Functions
def generate_slug(text: str) -> str:
"""Generate URL-friendly slug from text"""
slug = re.sub(r'[^\w\s-]', '', text.lower())
slug = re.sub(r'[-\s]+', '-', slug)
return slug[:50] + "-" + hashlib.md5(text.encode()).hexdigest()[:8]
def convert_to_markdown(filepath: str) -> str:
"""Convert various document formats to markdown"""
ext = os.path.splitext(filepath)[1].lower()
if ext == '.pdf':
markdown_text = ""
with open(filepath, 'rb') as f:
pdf_reader = pypdf.PdfReader(f)
for page_num, page in enumerate(pdf_reader.pages):
text = page.extract_text()
markdown_text += f"\n\n## Page {page_num + 1}\n\n{text}"
return markdown_text
elif ext == '.md':
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
else:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()
lines = content.split('\n')
markdown_lines = []
for line in lines:
if line.strip():
markdown_lines.append(line)
else:
markdown_lines.append('')
return '\n'.join(markdown_lines)
def chunk_text(text: str, chunk_size: int = CHUNK_SIZE, overlap: int = CHUNK_OVERLAP) -> List[dict]:
tokens = tokenizer.encode(text)
chunks = []
start = 0
# Extract page numbers from markdown headers
page_pattern = re.compile(r'##\s*Page\s+(\d+)', re.IGNORECASE)
while start < len(tokens):
end = start + chunk_size
chunk_tokens = tokens[start:end]
chunk_text = tokenizer.decode(chunk_tokens)
# Find page number in chunk
page_match = page_pattern.search(chunk_text)
page_num = int(page_match.group(1)) if page_match else 1
chunks.append({
"text": chunk_text,
"page": page_num
})
start = end - overlap
return chunks
async def get_embedding(text: str) -> tuple[List[float], int]:
response = await asyncio.to_thread(
openai.embeddings.create,
input=text,
model="text-embedding-3-small"
)
tokens_used = response.usage.total_tokens
return response.data[0].embedding, tokens_used
async def detect_prompt_intent(query: str) -> bool:
"""Detect if query is a prompt vs simple search"""
prompt_keywords = ['make', 'create', 'list', 'summarize', 'explain', 'compare', 'analyze', 'generate', 'write', 'show me', 'give me', 'find all', 'extract', 'what are', 'how many']
query_lower = query.lower()
# Check for question words or action verbs
if any(keyword in query_lower for keyword in prompt_keywords):
return True
if query_lower.endswith('?') and len(query.split()) > 3:
return True
return False
async def execute_prompt(query: str, search_results: List[dict]) -> tuple[str, int, int, float]:
"""Execute prompt using GPT-4 with search results as context"""
# Prepare context from search results
context = "\n\n".join([
f"Document: {r['name']}\nPage: {r.get('page', 'N/A')}\nContent: {r['snippet']}"
for r in search_results[:10] # Use top 10 results
])
messages = [
{"role": "system", "content": "You are a helpful assistant that answers questions based on the provided document context. Be concise and accurate."},
{"role": "user", "content": f"Context from documents:\n\n{context}\n\nUser query: {query}\n\nPlease answer based on the context provided."}
]
response = await asyncio.to_thread(
openai.chat.completions.create,
model="gpt-4o-mini",
messages=messages,
temperature=0.7,
max_tokens=1000
)
input_tokens = response.usage.prompt_tokens
output_tokens = response.usage.completion_tokens
# Calculate cost: GPT-4o-mini pricing
# $0.150 per 1M input tokens, $0.600 per 1M output tokens
cost_usd = (input_tokens / 1_000_000 * 0.150) + (output_tokens / 1_000_000 * 0.600)
cost_eur = cost_usd * 0.92
return response.choices[0].message.content, input_tokens, output_tokens, cost_eur
async def process_document(document_id: str, filepath: str):
total_tokens = 0
await manager.send_message(document_id, {
"step": "reading",
"progress": 5,
"message": "Reading file...",
"tokens": 0,
"cost_eur": 0.0
})
try:
await manager.send_message(document_id, {
"step": "converting",
"progress": 15,
"message": "Converting to markdown...",
"tokens": 0,
"cost_eur": 0.0
})
content = await asyncio.to_thread(convert_to_markdown, filepath)
with Session(engine) as session:
doc = session.get(Document, document_id)
if doc:
doc.markdown_content = content
session.add(doc)
session.commit()
await manager.send_message(document_id, {
"step": "chunking",
"progress": 25,
"message": "Splitting text into chunks...",
"tokens": 0,
"cost_eur": 0.0
})
chunks = chunk_text(content)
await manager.send_message(document_id, {
"step": "embedding",
"progress": 35,
"message": f"Processing {len(chunks)} chunks...",
"tokens": 0,
"cost_eur": 0.0
})
for i, chunk_data in enumerate(chunks):
embedding, tokens = await get_embedding(chunk_data["text"])
total_tokens += tokens
cost_usd = (total_tokens / 1000) * 0.00002
cost_eur = cost_usd * 0.92
collection.add(
ids=[f"{document_id}_chunk_{i}"],
embeddings=[embedding],
documents=[chunk_data["text"]],
metadatas=[{
"document_id": document_id,
"chunk_index": i,
"total_chunks": len(chunks),
"page": chunk_data["page"]
}]
)
progress = 35 + int((i + 1) / len(chunks) * 55)
await manager.send_message(document_id, {
"step": "embedding",
"progress": progress,
"message": f"Embedded chunk {i + 1}/{len(chunks)}",
"tokens": total_tokens,
"cost_eur": cost_eur
})
await manager.send_message(document_id, {
"step": "indexing",
"progress": 95,
"message": "Finalizing index...",
"tokens": total_tokens,
"cost_eur": (total_tokens / 1000) * 0.00002 * 0.92
})
with Session(engine) as session:
doc = session.get(Document, document_id)
if doc:
doc.status = "completed"
session.add(doc)
session.commit()
await manager.send_message(document_id, {
"step": "completed",
"progress": 100,
"message": "Processing complete!",
"tokens": total_tokens,
"cost_eur": (total_tokens / 1000) * 0.00002 * 0.92
})
except Exception as e:
with Session(engine) as session:
doc = session.get(Document, document_id)
if doc:
doc.status = "failed"
session.add(doc)
session.commit()
await manager.send_message(document_id, {
"step": "error",
"progress": 0,
"message": f"Error: {str(e)}",
"tokens": total_tokens,
"cost_eur": 0.0
})
# API Endpoints
@app.post("/api/upload")
async def upload_documents(
background_tasks: BackgroundTasks,
files: List[UploadFile] = File(...)
):
uploaded_docs = []
for file in files:
doc_id = str(uuid.uuid4())
file_ext = os.path.splitext(file.filename)[1]
filename = f"{doc_id}{file_ext}"
filepath = os.path.join(UPLOAD_DIR, filename)
content = await file.read()
with open(filepath, 'wb') as f:
f.write(content)
document = Document(
id=doc_id,
name=file.filename,
filename=filename,
upload_time=datetime.now(),
status="processing"
)
with Session(engine) as session:
session.add(document)
session.commit()
background_tasks.add_task(process_document, doc_id, filepath)
uploaded_docs.append({
"document_id": doc_id,
"name": file.filename,
"status": "processing"
})
return {"documents": uploaded_docs}
@app.get("/api/search")
async def search_documents(query: str, page: int = 1, page_size: int = 10):
try:
slug = generate_slug(query)
# Check cache first
with Session(engine) as session:
cached = session.exec(select(SearchResult).where(SearchResult.slug == slug)).first()
if cached:
results_data = json.loads(cached.results_json)
return {
"results": results_data["results"],
"total": results_data["total"],
"page": page,
"page_size": page_size,
"slug": slug,
"tokens": cached.tokens_used,
"cost_eur": cached.cost_eur,
"cached": True
}
collection_count = collection.count()
if collection_count == 0:
return {
"results": [],
"total": 0,
"page": page,
"page_size": page_size,
"slug": slug,
"tokens": 0,
"cost_eur": 0.0
}
query_embedding, tokens = await get_embedding(query)
cost_eur = (tokens / 1000) * 0.00002 * 0.92
results = collection.query(
query_embeddings=[query_embedding],
n_results=min(50, collection_count)
)
search_results = []
if results['ids'] and len(results['ids'][0]) > 0:
for i in range(len(results['ids'][0])):
document_id = results['metadatas'][0][i]['document_id']
snippet = results['documents'][0][i]
distance = results['distances'][0][i] if 'distances' in results else 0
page_num = results['metadatas'][0][i].get('page', 1)
with Session(engine) as session:
doc = session.get(Document, document_id)
if doc:
search_results.append({
"document_id": document_id,
"name": doc.name,
"snippet": snippet[:300] + "..." if len(snippet) > 300 else snippet,
"chunk_index": results['metadatas'][0][i]['chunk_index'],
"page": page_num,
"upload_date": doc.upload_time.isoformat(),
"score": 1 - distance
})
# Cache results
with Session(engine) as session:
search_cache = SearchResult(
query=query,
slug=slug,
results_json=json.dumps({"results": search_results, "total": len(search_results)}),
created_at=datetime.now(),
tokens_used=tokens,
cost_eur=cost_eur
)
session.add(search_cache)
session.commit()
start_idx = (page - 1) * page_size
end_idx = page * page_size
return {
"results": search_results[start_idx:end_idx],
"total": len(search_results),
"page": page,
"page_size": page_size,
"slug": slug,
"tokens": tokens,
"cost_eur": cost_eur
}
except Exception as e:
print(f"Search error: {str(e)}")
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/search/{slug}")
async def get_cached_search(slug: str):
with Session(engine) as session:
cached = session.exec(select(SearchResult).where(SearchResult.slug == slug)).first()
if not cached:
raise HTTPException(status_code=404, detail="Search not found")
results_data = json.loads(cached.results_json)
return {
"query": cached.query,
"results": results_data["results"],
"total": results_data["total"],
"slug": slug,
"tokens": cached.tokens_used,
"cost_eur": cached.cost_eur
}
@app.post("/api/prompt")
async def execute_prompt_endpoint(query: str):
try:
slug = generate_slug(query)
# Check cache
with Session(engine) as session:
cached = session.exec(select(PromptResult).where(PromptResult.slug == slug)).first()
if cached:
return {
"response": cached.prompt_response,
"search_results": json.loads(cached.search_results_json),
"slug": slug,
"input_tokens": cached.input_tokens,
"output_tokens": cached.output_tokens,
"cost_eur": cached.cost_eur,
"cached": True
}
# First get search results
search_response = await search_documents(query, page=1, page_size=20)
# Execute prompt with results
response_text, input_tokens, output_tokens, cost_eur = await execute_prompt(
query,
search_response["results"]
)
# Add search cost to total
total_cost = cost_eur + search_response.get("cost_eur", 0)
# Cache prompt result
with Session(engine) as session:
prompt_cache = PromptResult(
query=query,
slug=slug,
prompt_response=response_text,
search_results_json=json.dumps(search_response["results"]),
created_at=datetime.now(),
input_tokens=input_tokens,
output_tokens=output_tokens,
cost_eur=total_cost
)
session.add(prompt_cache)
session.commit()
return {
"response": response_text,
"search_results": search_response["results"],
"slug": slug,
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"cost_eur": total_cost
}
except Exception as e:
print(f"Prompt error: {str(e)}")
import traceback
traceback.print_exc()
raise HTTPException(status_code=500, detail=str(e))
@app.get("/api/prompt/{slug}")
async def get_cached_prompt(slug: str):
with Session(engine) as session:
cached = session.exec(select(PromptResult).where(PromptResult.slug == slug)).first()
if not cached:
raise HTTPException(status_code=404, detail="Prompt result not found")
return {
"query": cached.query,
"response": cached.prompt_response,
"search_results": json.loads(cached.search_results_json),
"slug": slug,
"input_tokens": cached.input_tokens,
"output_tokens": cached.output_tokens,
"cost_eur": cached.cost_eur
}
@app.get("/api/documents")
async def list_documents():
with Session(engine) as session:
documents = session.exec(select(Document)).all()
return [{
"id": doc.id,
"name": doc.name,
"upload_time": doc.upload_time.isoformat(),
"status": doc.status,
"downloads": doc.downloads
} for doc in documents]
@app.get("/api/document/{document_id}")
async def get_document(document_id: str):
with Session(engine) as session:
doc = session.get(Document, document_id)
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
return {
"id": doc.id,
"name": doc.name,
"markdown_content": doc.markdown_content,
"upload_time": doc.upload_time.isoformat(),
"status": doc.status
}
@app.get("/api/download/{document_id}")
async def download_document(document_id: str):
with Session(engine) as session:
doc = session.get(Document, document_id)
if not doc:
raise HTTPException(status_code=404, detail="Document not found")
doc.downloads += 1
session.add(doc)
session.commit()
filepath = os.path.join(UPLOAD_DIR, doc.filename)
if not os.path.exists(filepath):
raise HTTPException(status_code=404, detail="File not found")
return FileResponse(filepath, filename=doc.name)
@app.websocket("/ws/status/{document_id}")
async def websocket_endpoint(websocket: WebSocket, document_id: str):
await manager.connect(document_id, websocket)
try:
while True:
await websocket.receive_text()
except WebSocketDisconnect:
manager.disconnect(document_id)
@app.get("/")
async def read_root():
return FileResponse("index.html")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=9900)

937
index.html Normal file
View File

@ -0,0 +1,937 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document Search</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/marked/9.1.6/marked.min.js"></script>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/atom-one-dark.min.css">
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Courier New', monospace;
background: #000;
color: #fff;
}
header {
padding: 20px 40px;
border-bottom: 1px solid #333;
display: flex;
justify-content: space-between;
align-items: center;
}
.header-left {
display: flex;
align-items: center;
gap: 40px;
}
.logo {
font-size: 20px;
font-weight: normal;
color: #fff;
cursor: pointer;
}
.main-nav {
display: flex;
gap: 30px;
}
.nav-link {
color: #888;
text-decoration: none;
font-size: 14px;
transition: color 0.2s;
}
.nav-link:hover {
color: #fff;
}
.cost-display {
font-size: 12px;
color: #888;
display: flex;
gap: 15px;
}
.cost-item {
display: flex;
align-items: center;
gap: 5px;
}
.cost-value {
font-weight: 400;
color: #fff;
}
.container {
max-width: 1200px;
margin: 0 auto;
padding: 20px;
}
.tabs {
display: flex;
gap: 20px;
margin: 30px 0;
border-bottom: 1px solid #333;
}
.tab {
padding: 12px 0;
cursor: pointer;
color: #888;
font-size: 14px;
border-bottom: 2px solid transparent;
}
.tab.active {
color: #fff;
border-bottom-color: #fff;
}
.search-page {
text-align: center;
padding: 100px 0 40px;
}
.search-box {
max-width: 584px;
margin: 0 auto 30px;
position: relative;
}
.search-input {
width: 100%;
padding: 14px 50px 14px 20px;
border: 1px solid #333;
border-radius: 24px;
font-size: 16px;
outline: none;
background: #0a0a0a;
color: #fff;
font-family: 'Courier New', monospace;
}
.search-input::placeholder {
color: #555;
}
.search-input:hover, .search-input:focus {
box-shadow: 0 0 0 1px #555;
border-color: #555;
}
.search-btn {
position: absolute;
right: 15px;
top: 50%;
transform: translateY(-50%);
background: none;
border: none;
cursor: pointer;
color: #fff;
font-size: 20px;
}
.ai-response {
max-width: 652px;
margin: 30px auto;
padding: 20px;
background: #0a0a0a;
border-radius: 0;
border-left: 2px solid #fff;
display: none;
}
.ai-response.visible {
display: block;
}
.ai-response.loading {
text-align: center;
color: #888;
}
.ai-title {
font-size: 14px;
font-weight: 400;
color: #fff;
margin-bottom: 12px;
}
.ai-content {
color: #fff;
line-height: 1.6;
}
.ai-meta {
margin-top: 12px;
padding-top: 12px;
border-top: 1px solid #333;
font-size: 11px;
color: #888;
display: flex;
gap: 15px;
}
.results-container {
max-width: 652px;
margin: 0 auto;
text-align: left;
}
.result-item {
padding: 18px 0;
border-bottom: 1px solid #333;
cursor: pointer;
transition: background 0.2s;
}
.result-item:hover {
background: #0a0a0a;
margin: 0 -10px;
padding: 18px 10px;
}
.result-title {
color: #fff;
font-size: 18px;
margin-bottom: 4px;
}
.result-snippet {
color: #ccc;
font-size: 14px;
line-height: 1.58;
}
.result-snippet h1, .result-snippet h2, .result-snippet h3 {
margin: 8px 0 4px 0;
color: #fff;
}
.result-snippet h1 { font-size: 18px; }
.result-snippet h2 { font-size: 16px; }
.result-snippet h3 { font-size: 15px; }
.result-snippet p { margin: 4px 0; }
.result-snippet code {
background: #1a1a1a;
padding: 2px 4px;
border-radius: 3px;
font-family: 'Courier New', monospace;
font-size: 13px;
color: #0f0;
}
.result-snippet pre {
background: #0a0a0a;
padding: 8px;
border-radius: 0;
overflow-x: auto;
margin: 4px 0;
border: 1px solid #333;
}
.result-meta {
color: #666;
font-size: 12px;
margin-top: 4px;
}
.upload-section {
max-width: 600px;
margin: 40px auto;
}
.upload-zone {
border: 2px dashed #333;
border-radius: 0;
padding: 60px 20px;
text-align: center;
background: #0a0a0a;
cursor: pointer;
transition: all 0.3s;
}
.upload-zone:hover, .upload-zone.dragover {
border-color: #fff;
background: #111;
}
.upload-icon {
font-size: 48px;
color: #fff;
margin-bottom: 16px;
}
.upload-text {
color: #888;
font-size: 14px;
}
.file-input {
display: none;
}
.btn-primary {
background: #fff;
color: #000;
border: none;
padding: 12px 24px;
border-radius: 0;
cursor: pointer;
font-size: 14px;
font-weight: 400;
margin-top: 20px;
font-family: 'Courier New', monospace;
}
.btn-primary:hover {
background: #ccc;
}
.selected-files {
margin-top: 20px;
text-align: left;
}
.selected-file {
padding: 8px 12px;
background: #0a0a0a;
border-radius: 0;
margin-bottom: 8px;
font-size: 14px;
border: 1px solid #333;
color: #fff;
}
.processing-container {
max-width: 600px;
margin: 20px auto;
}
.processing-item {
background: #0a0a0a;
border: 1px solid #333;
border-radius: 0;
padding: 16px;
margin-bottom: 16px;
}
.processing-title {
font-size: 14px;
font-weight: 400;
margin-bottom: 8px;
color: #fff;
}
.progress-bar {
width: 100%;
height: 8px;
background: #1a1a1a;
border-radius: 0;
overflow: hidden;
margin-bottom: 8px;
}
.progress-fill {
height: 100%;
background: #fff;
transition: width 0.3s;
}
.processing-message {
font-size: 12px;
color: #888;
}
.cost-info {
margin-top: 8px;
padding: 8px;
background: #000;
border-radius: 0;
font-size: 12px;
color: #fff;
display: flex;
justify-content: space-between;
border: 1px solid #333;
}
.documents-list {
max-width: 800px;
margin: 40px auto;
}
.document-item {
background: #0a0a0a;
border: 1px solid #333;
border-radius: 0;
padding: 20px;
margin-bottom: 16px;
display: flex;
justify-content: space-between;
align-items: center;
}
.document-info {
flex: 1;
}
.document-name {
font-size: 16px;
font-weight: 400;
color: #fff;
margin-bottom: 4px;
}
.document-meta {
font-size: 12px;
color: #666;
}
.document-actions {
display: flex;
gap: 10px;
align-items: center;
color: #888;
}
.download-link {
color: #fff;
text-decoration: none;
font-size: 14px;
padding: 8px 16px;
border: 1px solid #333;
border-radius: 0;
}
.download-link:hover {
background: #1a1a1a;
}
.status-badge {
padding: 4px 8px;
border-radius: 0;
font-size: 12px;
font-weight: 400;
}
.status-completed {
background: #0a2a0a;
color: #0f0;
}
.status-processing {
background: #2a2a0a;
color: #ff0;
}
.status-failed {
background: #2a0a0a;
color: #f00;
}
.hidden {
display: none;
}
.empty-state {
text-align: center;
padding: 60px 20px;
color: #888;
}
.document-viewer {
max-width: 900px;
margin: 40px auto;
padding: 40px;
background: #000;
}
.doc-title {
font-size: 32px;
font-weight: 400;
color: #fff;
margin-bottom: 30px;
}
.markdown-body {
color: #fff;
line-height: 1.6;
}
.markdown-body h1 {
font-size: 32px;
margin: 24px 0 16px 0;
border-bottom: 1px solid #333;
padding-bottom: 8px;
}
.markdown-body h2 {
font-size: 24px;
margin: 20px 0 12px 0;
}
.markdown-body h3 {
font-size: 20px;
margin: 16px 0 10px 0;
}
.markdown-body p {
margin: 12px 0;
}
.markdown-body code {
background: #1a1a1a;
padding: 2px 6px;
border-radius: 0;
font-family: 'Courier New', monospace;
font-size: 14px;
color: #0f0;
}
.markdown-body pre {
background: #0a0a0a;
padding: 16px;
border-radius: 0;
overflow-x: auto;
margin: 16px 0;
border: 1px solid #333;
}
.markdown-body pre code {
background: none;
padding: 0;
}
.markdown-body ul, .markdown-body ol {
margin: 12px 0;
padding-left: 30px;
}
</style>
</head>
<body>
<header>
<div class="header-left">
<div class="logo" onclick="navigateTo('')">Molodetz Library</div>
<nav class="main-nav">
<a href="https://molodetz.nl/log" class="nav-link">activity log</a>
<a href="https://molodetz.nl/projects" class="nav-link">projects</a>
<a href="https://molodetz.nl/products" class="nav-link">products</a>
<a href="https://molodetz.nl/contact" class="nav-link">contact</a>
</nav>
</div>
<div class="cost-display">
<div class="cost-item">
<span>in:</span>
<span class="cost-value" id="totalInputTokens">0</span>
</div>
<div class="cost-item">
<span>out:</span>
<span class="cost-value" id="totalOutputTokens">0</span>
</div>
<div class="cost-item">
<span></span>
<span class="cost-value" id="totalCost">0.0000</span>
</div>
</div>
</header>
<div class="container">
<div class="tabs">
<div class="tab active" data-tab="search">search</div>
<div class="tab" data-tab="upload">upload</div>
<div class="tab" data-tab="documents">all documents</div>
</div>
<div id="searchPage" class="page">
<div class="search-page">
<div class="search-box">
<input type="text" class="search-input" id="searchInput" placeholder="search documents...">
<button class="search-btn" id="searchBtn">🔍</button>
</div>
<div class="ai-response" id="aiResponse">
<div class="ai-title">ai response</div>
<div class="ai-content" id="aiContent"></div>
<div class="ai-meta" id="aiMeta"></div>
</div>
<div class="results-container" id="resultsContainer"></div>
</div>
</div>
<div id="uploadPage" class="page hidden">
<div class="upload-section">
<div class="upload-zone" id="uploadZone">
<div class="upload-icon">📄</div>
<div class="upload-text">drop files here or click to browse</div>
<input type="file" class="file-input" id="fileInput" accept=".pdf,.md,.txt" multiple>
</div>
<div class="selected-files" id="selectedFiles"></div>
<button class="btn-primary" id="uploadBtn">upload documents</button>
</div>
<div class="processing-container" id="processingContainer"></div>
</div>
<div id="documentsPage" class="page hidden">
<div class="documents-list" id="documentsList"></div>
</div>
<div id="documentViewerPage" class="page hidden">
<div class="document-viewer">
<div class="doc-title" id="docTitle"></div>
<div class="markdown-body" id="docBody"></div>
</div>
</div>
</div>
<script>
const API_BASE = '';
let selectedFiles = [];
let totalCosts = { inputTokens: 0, outputTokens: 0, cost: 0 };
// Router
function navigateTo(path) {
window.location.hash = path;
}
function handleRoute() {
const hash = window.location.hash.slice(1);
const [route, ...params] = hash.split('/');
document.querySelectorAll('.page').forEach(p => p.classList.add('hidden'));
if (route === 'search' && params[0]) {
showSearchResults(params[0]);
} else if (route === 'document' && params[0]) {
showDocument(params[0]);
} else if (route === 'upload') {
switchTab('upload');
} else if (route === 'documents') {
switchTab('documents');
} else {
switchTab('search');
}
}
window.addEventListener('hashchange', handleRoute);
window.addEventListener('load', handleRoute);
// Tabs
document.querySelectorAll('.tab').forEach(tab => {
tab.addEventListener('click', () => {
const tabName = tab.dataset.tab;
navigateTo(tabName);
});
});
function switchTab(tabName) {
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
document.querySelector(`[data-tab="${tabName}"]`)?.classList.add('active');
document.querySelectorAll('.page').forEach(p => p.classList.add('hidden'));
document.getElementById(`${tabName}Page`)?.classList.remove('hidden');
if (tabName === 'documents') {
loadDocuments();
}
}
// Search
document.getElementById('searchBtn').addEventListener('click', performSearch);
document.getElementById('searchInput').addEventListener('keypress', (e) => {
if (e.key === 'Enter') performSearch();
});
async function performSearch() {
const query = document.getElementById('searchInput').value.trim();
if (!query) return;
try {
const response = await fetch(`${API_BASE}/api/search?query=${encodeURIComponent(query)}&page=1&page_size=10`);
const data = await response.json();
updateCostDisplay(data.tokens || 0, 0, data.cost_eur || 0);
navigateTo(`search/${data.slug}`);
// Check if it's a prompt
if (await isPrompt(query)) {
executePrompt(query, data.slug);
}
} catch (error) {
console.error('Search error:', error);
}
}
async function isPrompt(query) {
const keywords = ['make', 'create', 'list', 'summarize', 'explain', 'compare', 'analyze', 'generate', 'write', 'show me', 'give me', 'find all', 'extract', 'what are', 'how many'];
const lower = query.toLowerCase();
return keywords.some(k => lower.includes(k)) || (lower.endsWith('?') && query.split(' ').length > 3);
}
async function executePrompt(query, slug) {
const aiResponse = document.getElementById('aiResponse');
const aiContent = document.getElementById('aiContent');
const aiMeta = document.getElementById('aiMeta');
aiResponse.classList.add('visible', 'loading');
aiContent.innerHTML = 'generating response...';
try {
const response = await fetch(`${API_BASE}/api/prompt?query=${encodeURIComponent(query)}`, {
method: 'POST'
});
const data = await response.json();
aiResponse.classList.remove('loading');
aiContent.innerHTML = marked.parse(data.response);
aiContent.querySelectorAll('pre code').forEach(block => hljs.highlightElement(block));
aiMeta.innerHTML = `
<span>input: ${data.input_tokens} tokens</span>
<span>output: ${data.output_tokens} tokens</span>
<span>cost: €${data.cost_eur.toFixed(4)}</span>
`;
updateCostDisplay(data.input_tokens, data.output_tokens, data.cost_eur);
} catch (error) {
console.error('Prompt error:', error);
aiResponse.classList.remove('loading');
aiContent.innerHTML = 'error generating response.';
}
}
async function showSearchResults(slug) {
switchTab('search');
try {
const response = await fetch(`${API_BASE}/api/search/${slug}`);
const data = await response.json();
document.getElementById('searchInput').value = data.query;
displayResults(data.results);
updateCostDisplay(data.tokens || 0, 0, data.cost_eur || 0);
// Check for cached prompt result
try {
const promptResponse = await fetch(`${API_BASE}/api/prompt/${slug}`);
if (promptResponse.ok) {
const promptData = await promptResponse.json();
const aiResponse = document.getElementById('aiResponse');
const aiContent = document.getElementById('aiContent');
const aiMeta = document.getElementById('aiMeta');
aiResponse.classList.add('visible');
aiContent.innerHTML = marked.parse(promptData.response);
aiContent.querySelectorAll('pre code').forEach(block => hljs.highlightElement(block));
aiMeta.innerHTML = `
<span>input: ${promptData.input_tokens} tokens</span>
<span>output: ${promptData.output_tokens} tokens</span>
<span>cost: €${promptData.cost_eur.toFixed(4)}</span>
`;
}
} catch (e) {
// No cached prompt result
}
} catch (error) {
console.error('Error loading search:', error);
}
}
function displayResults(results) {
const container = document.getElementById('resultsContainer');
if (!results || results.length === 0) {
container.innerHTML = '<div class="empty-state">no results found</div>';
return;
}
container.innerHTML = results.map(result => {
const renderedSnippet = marked.parse(result.snippet);
const uploadDate = new Date(result.upload_date).toLocaleDateString();
return `
<div class="result-item" onclick="navigateTo('document/${result.document_id}')">
<div class="result-title">${escapeHtml(result.name)}</div>
<div class="result-snippet">${renderedSnippet}</div>
<div class="result-meta">
page ${result.page} • chunk ${result.chunk_index + 1} •
score: ${(result.score * 100).toFixed(1)}% •
uploaded: ${uploadDate}
</div>
</div>
`;
}).join('');
container.querySelectorAll('pre code').forEach(block => hljs.highlightElement(block));
}
async function showDocument(documentId) {
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
document.querySelectorAll('.page').forEach(p => p.classList.add('hidden'));
document.getElementById('documentViewerPage').classList.remove('hidden');
try {
const response = await fetch(`${API_BASE}/api/document/${documentId}`);
const doc = await response.json();
document.getElementById('docTitle').textContent = doc.name;
document.getElementById('docBody').innerHTML = marked.parse(doc.markdown_content);
document.querySelectorAll('#docBody pre code').forEach(block => hljs.highlightElement(block));
} catch (error) {
console.error('Error loading document:', error);
}
}
function updateCostDisplay(inputTokens, outputTokens, cost) {
totalCosts.inputTokens += inputTokens;
totalCosts.outputTokens += outputTokens;
totalCosts.cost += cost;
document.getElementById('totalInputTokens').textContent = totalCosts.inputTokens;
document.getElementById('totalOutputTokens').textContent = totalCosts.outputTokens;
document.getElementById('totalCost').textContent = totalCosts.cost.toFixed(4);
}
// Upload
const uploadZone = document.getElementById('uploadZone');
const fileInput = document.getElementById('fileInput');
uploadZone.addEventListener('click', () => fileInput.click());
uploadZone.addEventListener('dragover', (e) => {
e.preventDefault();
uploadZone.classList.add('dragover');
});
uploadZone.addEventListener('dragleave', () => uploadZone.classList.remove('dragover'));
uploadZone.addEventListener('drop', (e) => {
e.preventDefault();
uploadZone.classList.remove('dragover');
if (e.dataTransfer.files.length > 0) {
selectedFiles = Array.from(e.dataTransfer.files);
updateSelectedFiles();
}
});
fileInput.addEventListener('change', (e) => {
if (e.target.files.length > 0) {
selectedFiles = Array.from(e.target.files);
updateSelectedFiles();
}
});
function updateSelectedFiles() {
const container = document.getElementById('selectedFiles');
if (selectedFiles.length === 0) {
container.innerHTML = '';
uploadZone.querySelector('.upload-text').textContent = 'drop files here or click to browse';
return;
}
uploadZone.querySelector('.upload-text').textContent = `${selectedFiles.length} file(s) selected`;
container.innerHTML = selectedFiles.map(f => `<div class="selected-file">${escapeHtml(f.name)}</div>`).join('');
}
document.getElementById('uploadBtn').addEventListener('click', async () => {
if (selectedFiles.length === 0) return alert('Please select files');
const formData = new FormData();
selectedFiles.forEach(file => formData.append('files', file));
try {
const response = await fetch(`${API_BASE}/api/upload`, { method: 'POST', body: formData });
const data = await response.json();
selectedFiles = [];
fileInput.value = '';
updateSelectedFiles();
data.documents.forEach(doc => connectWebSocket(doc.document_id, doc.name));
} catch (error) {
console.error('Upload error:', error);
}
});
function connectWebSocket(documentId, docName) {
const ws = new WebSocket(`ws://${window.location.host}/ws/status/${documentId}`);
const div = document.createElement('div');
div.className = 'processing-item';
div.innerHTML = `
<div class="processing-title">${escapeHtml(docName)}</div>
<div class="progress-bar"><div class="progress-fill" style="width: 0%"></div></div>
<div class="processing-message">starting...</div>
<div class="cost-info">
<div>tokens: <span class="tokens-count">0</span></div>
<div>cost: €<span class="cost-amount">0.0000</span></div>
</div>
`;
document.getElementById('processingContainer').appendChild(div);
ws.onmessage = (event) => {
const data = JSON.parse(event.data);
div.querySelector('.progress-fill').style.width = `${data.progress}%`;
div.querySelector('.processing-message').textContent = data.message;
div.querySelector('.tokens-count').textContent = data.tokens || 0;
div.querySelector('.cost-amount').textContent = (data.cost_eur || 0).toFixed(4);
if (data.step === 'completed') {
updateCostDisplay(data.tokens || 0, 0, data.cost_eur || 0);
setTimeout(() => div.remove(), 3000);
}
};
}
async function loadDocuments() {
try {
const response = await fetch(`${API_BASE}/api/documents`);
const documents = await response.json();
const container = document.getElementById('documentsList');
if (!documents || documents.length === 0) {
container.innerHTML = '<div class="empty-state">no documents uploaded yet</div>';
return;
}
container.innerHTML = documents.map(doc => `
<div class="document-item">
<div class="document-info">
<div class="document-name">${escapeHtml(doc.name)}</div>
<div class="document-meta">
Uploaded: ${new Date(doc.upload_time).toLocaleDateString()} •
<span class="status-badge status-${doc.status}">${doc.status}</span>
</div>
</div>
<div class="document-actions">
<span>(${doc.downloads})</span>
<a href="${API_BASE}/api/download/${doc.id}" class="download-link">download</a>
</div>
</div>
`).join('');
} catch (error) {
console.error('Error loading documents:', error);
}
}
function escapeHtml(text) {
const div = document.createElement('div');
div.textContent = text;
return div.innerHTML;
}
</script>
</body>
</html>