From 08b3600836e23e50faa1069c1676dbe61ef42658 Mon Sep 17 00:00:00 2001 From: retoor Date: Tue, 24 Jun 2025 16:36:53 +0200 Subject: [PATCH] Update. --- main.py | 125 ++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 76 insertions(+), 49 deletions(-) diff --git a/main.py b/main.py index 20db0cd..2658e54 100644 --- a/main.py +++ b/main.py @@ -110,82 +110,109 @@ async def init_search_index(): """) db.close() -@app.get("/api/search") -async def search_notes(): - q = request.args.get("q", "") - tag = request.args.get("tag", "") - async with db_session() as db: - # Step 1: full‑text match → candidate note IDs - fts_rows = list(db.query("SELECT note_id FROM notes_fts WHERE notes_fts MATCH :query", query=q)) - note_ids = {r["note_id"] for r in fts_rows} - if not note_ids: - return [] # early exit – no matches - - # Step 2: optional tag filtering (intersection) - if tag: - tag = [t.strip() for t in tag if t.strip()] - if not tag: - raise HTTPException(400, "Tag filter provided but empty after stripping") - tagged_ids = { - nt["note_id"] - for t in tag - for nt in db['note_tags'].find(tag=t) - } - note_ids &= tagged_ids - - # Fetch & serialize - rows = [db['notes'].find_one(id=nid) for nid in note_ids] - rows.sort(key=lambda r: r["updated_at"], reverse=True) - return [await _serialize_note(r) for r in rows if r] - async def _serialize_note(row: Dict[str, Any]) -> Dict[str, Any]: if not row: return {} note_id = row["id"] + score = row.get("score") async with db_session() as db: atts = list(db['attachments'].find(note_id=note_id)) tags = [rt["tag"] for rt in db['note_tags'].find(note_id=note_id)] - return { - "id": note_id, - "title": row.get("title", ""), - "body": row.get("body", ""), - "created_at": row.get("created_at"), - "updated_at": row.get("updated_at"), - "attachments": atts, - "tags": tags, - } + result = { + "id": note_id, + "title": row.get("title", ""), + "body": row.get("body", ""), + "created_at": row.get("created_at"), + "updated_at": row.get("updated_at"), + "attachments": atts, + "tags": tags, + } + if score is not None: + result["score"] = score + return result + +@app.get("/api/search") +async def search_notes(q: str = "", tag: Optional[str] = None): + """ + Full-text search with prefix matching and BM25 scoring. + Optional tag filter. + """ + q = q.strip() + if not q: + return [] + # build an FTS5 prefix query: each term appended with '*' + terms = [t for t in q.split() if t] + fts_query = " ".join(f"{t}*" for t in terms) + + async with db_session() as db: + if tag: + rows = list(db.query(""" + SELECT notes.*, bm25(notes_fts) AS score + FROM notes_fts + JOIN notes ON notes_fts.rowid = notes.id + JOIN note_tags ON notes.id = note_tags.note_id + WHERE notes_fts MATCH :q AND note_tags.tag = :tag + GROUP BY notes.id + ORDER BY score + """, q=fts_query, tag=tag)) + else: + rows = list(db.query(""" + SELECT notes.*, bm25(notes_fts) AS score + FROM notes_fts + JOIN notes ON notes_fts.rowid = notes.id + WHERE notes_fts MATCH :q + ORDER BY score + """, q=fts_query)) + + return [await _serialize_note(r) for r in rows] + + @app.get("/api/notes") async def list_notes(tag: Optional[str] = None, search: Optional[str] = None): """ - List notes. Supports: + List notes. + Supports: - ?tag=foo to filter by tag - - ?search=term to full-text-search title+body + - ?search=term to full-text-search title+body with prefix & scoring """ async with db_session() as db: if search: - # FTS5 MATCH query - rows = list(db.query( - "SELECT notes.* FROM notes_fts " - "JOIN notes ON notes_fts.rowid = notes.id " - "WHERE notes_fts MATCH :q", - q=search - )) + terms = [t for t in search.split() if t] + fts_query = " ".join(f"{t}*" for t in terms) + if tag: + rows = list(db.query(""" + SELECT notes.*, bm25(notes_fts) AS score + FROM notes_fts + JOIN notes ON notes_fts.rowid = notes.id + JOIN note_tags ON notes.id = note_tags.note_id + WHERE notes_fts MATCH :q AND note_tags.tag = :tag + GROUP BY notes.id + ORDER BY score + """, q=fts_query, tag=tag)) + else: + rows = list(db.query(""" + SELECT notes.*, bm25(notes_fts) AS score + FROM notes_fts + JOIN notes ON notes_fts.rowid = notes.id + WHERE notes_fts MATCH :q + ORDER BY score + """, q=fts_query)) elif tag: note_ids = [nt["note_id"] for nt in db['note_tags'].find(tag=tag)] rows = [db['notes'].find_one(id=nid) for nid in note_ids] else: rows = list(db['notes'].all()) - # sort & serialize - rows = [r for r in rows if r] + # If no FTS scoring, sort by creation date + if rows and "score" not in rows[0]: rows.sort(key=lambda r: r["created_at"], reverse=True) - return [await _serialize_note(r) for r in rows] + return [await _serialize_note(r) for r in rows] @app.post("/api/notes")