Merge pull request 'Add URL embedding functionality with metadata extraction and responsive design' (#48) from BordedDev/snek:feat/url-embedding into main

Reviewed-on: retoor/snek#48
Reviewed-by: retoor <retoor@noreply@molodetz.nl>
This commit is contained in:
retoor 2025-05-30 19:53:22 +02:00
commit 7dd3133475
2 changed files with 197 additions and 4 deletions

View File

@ -544,3 +544,44 @@ dialog .dialog-button.secondary:hover {
} }
.embed-url-link {
display: flex;
flex-direction: column;
}
.embed-url-link img,
.embed-url-link video,
.embed-url-link iframe,
.embed-url-link div {
width: auto;
height: auto;
max-width: 100%;
max-height: 400px;
object-fit: contain;
border-radius: 12px 12px 0 0;
}
.embed-url-link .page-site {
font-size: 0.9em;
color: #aaa;
margin-bottom: 5px;
}
.embed-url-link .page-name {
font-size: 1.2em;
color: #f05a28;
margin-bottom: 5px;
}
.embed-url-link .page-description {
font-size: 1em;
color: #e6e6e6;
margin-bottom: 10px;
}
.embed-url-link .page-link {
font-size: 0.9em;
color: #f05a28;
text-decoration: none;
margin-top: 10px;
}

View File

@ -1,9 +1,11 @@
import re
from urllib.parse import urlparse, parse_qs
from types import SimpleNamespace
import mimetypes import mimetypes
import re
from functools import lru_cache
from types import SimpleNamespace
from urllib.parse import urlparse, parse_qs
import emoji import emoji
import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from jinja2 import TemplateSyntaxError, nodes from jinja2 import TemplateSyntaxError, nodes
from jinja2.ext import Extension from jinja2.ext import Extension
@ -231,6 +233,153 @@ def linkify_https(text):
return set_link_target_blank(str(soup)) return set_link_target_blank(str(soup))
@lru_cache(maxsize=128)
def get_url_content(url):
try:
response = requests.get(url, timeout=5)
response.raise_for_status()
return response.text
except Exception as e:
print(f"Error fetching {url}: {e}")
return None
def embed_url(text):
soup = BeautifulSoup(text, "html.parser")
attachments = {}
for element in soup.find_all("a"):
if "href" in element.attrs and element.attrs["href"].startswith("http"):
page_url = urlparse(element.attrs["href"])
page = get_url_content(element.attrs["href"])
if page:
parsed_page = BeautifulSoup(page, "html.parser")
head_info = parsed_page.find("head")
if head_info:
def get_element_options(
elem=None, meta=None, ograph=None, twitter=None
):
if twitter:
tw_tag = head_info.find(
"meta", attrs={"name": "twitter:" + twitter}
) or head_info.find(
"meta", attrs={"property": "twitter:" + twitter}
)
if tw_tag:
return tw_tag.get("content", tw_tag.get("value", None))
if ograph:
og_tag = head_info.find(
"meta", attrs={"property": "og:" + ograph}
) or head_info.find("meta", attrs={"name": "og:" + ograph})
if og_tag:
return og_tag.get("content", og_tag.get("value", None))
if meta:
meta_tag = head_info.find(
"meta", attrs={"name": meta}
) or head_info.find("meta", attrs={"property": meta})
if meta_tag:
return meta_tag.get(
"content", meta_tag.get("value", None)
)
if elem:
elem_tag = head_info.find(elem)
if elem_tag:
return elem_tag.text
return None
original_link_name = element.attrs["href"]
if original_link_name in attachments:
continue
page_name = (
get_element_options("title", "title", "title", "title")
or page_url.netloc
)
page_site = (
get_element_options(None, "site", "site", "site")
or page_url.netloc
)
page_description = get_element_options(
None, "description", "description", "description"
)
page_image = get_element_options(None, "image", "image", "image")
page_image_alt = get_element_options(
None, "image:alt", "image:alt", "image:alt"
)
page_video = get_element_options(None, "video", "video", "video")
page_audio = get_element_options(None, "audio", "audio", "audio")
preview_size = (
get_element_options(None, None, None, "card")
or "summary_large_image"
)
attachment_base = BeautifulSoup(str(element), "html.parser")
attachments[original_link_name] = attachment_base
attachment = next(attachment_base.children)
attachment.clear()
attachment.attrs["class"] = "embed-url-link"
render_element = attachment
if page_image:
image_template = f'<span><img src="{page_image}" alt="{page_image_alt or page_name}" title="{page_name}" width="420" height="240" /></span>'
render_element.append(
BeautifulSoup(image_template, "html.parser")
)
if page_video:
video_template = f'<video controls><source src="{page_video}">Your browser does not support the video tag.</video>'
render_element.append(
BeautifulSoup(video_template, "html.parser")
)
if page_audio:
audio_template = f'<audio controls><source src="{page_audio}">Your browser does not support the audio tag.</audio>'
render_element.append(
BeautifulSoup(audio_template, "html.parser")
)
description_element_base = BeautifulSoup(
"<span class='description'></span>", "html.parser"
)
description_element = next(description_element_base.children)
description_element.append(
BeautifulSoup(
f'<p class="page-site">{page_site}</p>',
"html.parser",
)
)
description_element.append(
BeautifulSoup(f'<strong class="page-name">{page_name}</strong>', "html.parser")
)
description_element.append(
BeautifulSoup(f"<p class='page-description'>{page_description or "No description available."}</p>", "html.parser")
)
description_element.append(
BeautifulSoup(f"<p class='page-original-link'>{original_link_name}</p>", "html.parser")
)
render_element.append(description_element_base)
for attachment in attachments.values():
soup.append(attachment)
return str(soup)
class EmojiExtension(Extension): class EmojiExtension(Extension):
tags = {"emoji"} tags = {"emoji"}
@ -276,6 +425,9 @@ class LinkifyExtension(Extension):
result = embed_youtube(result) result = embed_youtube(result)
result = enrich_image_rendering(result) result = enrich_image_rendering(result)
result = embed_url(result)
return result return result