This commit is contained in:
retoor 2025-10-06 09:35:02 +02:00
parent 8c242d1ff9
commit 061d71ac1e
3 changed files with 11 additions and 2 deletions

View File

@ -7,3 +7,5 @@ aiohttp==3.9.1
feedparser==6.0.10 feedparser==6.0.10
websockets==12.0 websockets==12.0
trafilatura==1.6.2 trafilatura==1.6.2
vaderSentiment

View File

@ -4,6 +4,7 @@ from fastapi.templating import Jinja2Templates
import dataset import dataset
import json import json
import aiohttp import aiohttp
import sentiment
import feedparser import feedparser
import asyncio import asyncio
from datetime import datetime from datetime import datetime
@ -356,15 +357,17 @@ async def websocket_sync(websocket: WebSocket):
'last_synchronized': datetime.now().isoformat() 'last_synchronized': datetime.now().isoformat()
} }
existing = articles_table.find_one(guid=article_data['guid']) existing = articles_table.find_one(guid=article_data['guid'])
if not existing: if not existing:
new_articles.append(article_data) new_articles.append(article_data)
articles_count += 1 articles_count += 1
article_data['sentiment'] = json.dumps(sentiment.analyze(entry.get('description', '') or entry.get('summary', '')))
articles_table.upsert(article_data, ['guid']) articles_table.upsert(article_data, ['guid'])
# Index the article to ChromaDB # Index the article to ChromaDB
doc_content = f"{article_data.get('title', '')}\n{article_data.get('description', '')}" doc_content = f"{article_data.get('title', '')}\n{article_data.get('description', '')}"
metadata = {key: str(value) for key, value in article_data.items() if key != 'content'} # Exclude large content from metadata metadata = {key: str(value) for key, value in article_data.items() if key != 'content'} # Exclude large content from metadata
chroma_collection.upsert( chroma_collection.upsert(
documents=[doc_content], documents=[doc_content],
@ -490,6 +493,7 @@ async def search_articles(
for i, doc_id in enumerate(results['ids'][0]): for i, doc_id in enumerate(results['ids'][0]):
res = results['metadatas'][0][i] res = results['metadatas'][0][i]
res['distance'] = results['distances'][0][i] res['distance'] = results['distances'][0][i]
res['sentiment'] = sentiment.analyze(res.get('description', '') or res.get('content', '') or res.get('title', ''))
formatted_results.append(res) formatted_results.append(res)
return JSONResponse(content={"results": formatted_results}) return JSONResponse(content={"results": formatted_results})
@ -565,6 +569,8 @@ async def newspaper_latest(request: Request):
for article in articles: for article in articles:
for key, value in article.items(): for key, value in article.items():
article[key] = str(value).strip().replace(' ', '') article[key] = str(value).strip().replace(' ', '')
article['sentiment'] = sentiment.analyze(article.get('description', '') or article.get('content', '') or res.get('title', ''))
return templates.TemplateResponse("newspaper_view.html", { return templates.TemplateResponse("newspaper_view.html", {
"request": request, "request": request,
"newspaper": first_newspaper, "newspaper": first_newspaper,

View File

@ -164,6 +164,7 @@
<h2 class="article-title"> <h2 class="article-title">
<a href="{{ article.link }}" target="_blank">{{ article.title }}</a> <a href="{{ article.link }}" target="_blank">{{ article.title }}</a>
</h2> </h2>
<input type="hidden" name="sentiment" value="{{ article.sentiment }}">
<div class="article-meta"> <div class="article-meta">
<span class="article-source">{{ article.feed_name }}</span> <span class="article-source">{{ article.feed_name }}</span>
{% if article.author %} {% if article.author %}