Merge pull request 'Improved site embedding' (#57) from BordedDev/snek:feat/improved-embeding into main

Reviewed-on: retoor/snek#57
This commit is contained in:
retoor 2025-06-29 16:24:33 +02:00
commit 006882cd6f

View File

@ -193,7 +193,7 @@ def embed_youtube(text):
"www.youtube-nocookie.com",
"youtube-nocookie.com",
]
and any(url.path.startswith(p) for p in ["/watch", "/embed"])
and any(url.path.startswith(p) for p in ["/watch", "/embed", "/shorts"])
):
queries = parse_qs(url.query)
if "v" in queries:
@ -333,142 +333,185 @@ def get_url_content(url):
return None
def get_element_options(head_info, elem=None, meta=None, ograph=None, twitter=None):
if twitter:
tw_tag = head_info.find(
"meta", attrs={"name": "twitter:" + twitter}
) or head_info.find("meta", attrs={"property": "twitter:" + twitter})
if tw_tag:
return tw_tag.get("content", tw_tag.get("value", None))
if ograph:
og_tag = head_info.find(
"meta", attrs={"property": "og:" + ograph}
) or head_info.find("meta", attrs={"name": "og:" + ograph})
if og_tag:
return og_tag.get("content", og_tag.get("value", None))
if meta:
meta_tag = head_info.find("meta", attrs={"name": meta}) or head_info.find(
"meta", attrs={"property": meta}
)
if meta_tag:
return meta_tag.get("content", meta_tag.get("value", None))
if elem:
elem_tag = head_info.find(elem)
if elem_tag:
return elem_tag.text
return None
def embed_url(text):
soup = BeautifulSoup(text, "html.parser")
attachments = {}
for element in soup.find_all("a"):
if "href" in element.attrs and element.attrs["href"].startswith("http") and ("data-noembed" not in element.attrs):
if (
"href" in element.attrs
and element.attrs["href"].startswith("http")
and element.attrs["href"] not in attachments
and ("data-noembed" not in element.attrs)
):
original_link_name = element.attrs["href"]
page_url = urlparse(element.attrs["href"])
page = get_url_content(element.attrs["href"])
if page:
parsed_page = BeautifulSoup(page, "html.parser")
head_info = parsed_page.find("head")
if head_info:
if not page:
continue
def get_element_options(
elem=None, meta=None, ograph=None, twitter=None
):
if twitter:
tw_tag = head_info.find(
"meta", attrs={"name": "twitter:" + twitter}
) or head_info.find(
"meta", attrs={"property": "twitter:" + twitter}
)
if tw_tag:
return tw_tag.get("content", tw_tag.get("value", None))
parsed_page = BeautifulSoup(page, "html.parser")
head_info = parsed_page.find("head")
if ograph:
og_tag = head_info.find(
"meta", attrs={"property": "og:" + ograph}
) or head_info.find("meta", attrs={"name": "og:" + ograph})
if og_tag:
return og_tag.get("content", og_tag.get("value", None))
if not head_info:
continue
if meta:
meta_tag = head_info.find(
"meta", attrs={"name": meta}
) or head_info.find("meta", attrs={"property": meta})
if meta_tag:
return meta_tag.get(
"content", meta_tag.get("value", None)
)
page_name = (
get_element_options(head_info, "title", "title", "title", "title")
or page_url.netloc
)
page_site = (
get_element_options(head_info, None, "site", "site", "site")
or get_element_options(head_info, ograph="site_name")
or page_url.netloc
)
page_description = get_element_options(
head_info, None, "description", "description", "description"
)
if elem:
elem_tag = head_info.find(elem)
if elem_tag:
return elem_tag.text
page_image = (
get_element_options(head_info, None, "image", "image", "image")
or get_element_options(
head_info, None, "image:url", "image:url", "image:url"
)
or get_element_options(
head_info,
None,
"image:secure_url",
"image:secure_url",
"image:secure_url",
)
)
page_image_height = get_element_options(
head_info, None, "image:height", "image:height", "image:height"
)
page_image_width = get_element_options(
head_info, None, "image:width", "image:width", "image:width"
)
page_image_alt = get_element_options(
head_info, None, "image:alt", "image:alt", "image:alt"
)
return None
page_video = (
get_element_options(head_info, None, "video", "video", "video")
or get_element_options(
head_info, None, "video:url", "video:url", "video:url"
)
or get_element_options(
head_info,
None,
"video:secure_url",
"video:secure_url",
"video:secure_url",
)
)
page_video_height = get_element_options(
head_info, None, "video:height", "video:height", "video:height"
)
page_video_width = get_element_options(
head_info, None, "video:width", "video:width", "video:width"
)
original_link_name = element.attrs["href"]
page_audio = (
get_element_options(head_info, None, "audio", "audio", "audio")
or get_element_options(
head_info, None, "audio:url", "audio:url", "audio:url"
)
or get_element_options(
head_info,
None,
"audio:secure_url",
"audio:secure_url",
"audio:secure_url",
)
)
if original_link_name in attachments:
continue
(get_element_options(head_info, twitter="card") or "summary_large_image")
page_name = (
get_element_options("title", "title", "title", "title")
or page_url.netloc
)
page_site = (
get_element_options(None, "site", "site", "site")
or page_url.netloc
)
page_description = get_element_options(
None, "description", "description", "description"
)
page_image = get_element_options(None, "image", "image", "image")
page_image_alt = get_element_options(
None, "image:alt", "image:alt", "image:alt"
)
page_video = get_element_options(None, "video", "video", "video")
page_audio = get_element_options(None, "audio", "audio", "audio")
attachment_base = BeautifulSoup(str(element), "html.parser")
attachments[original_link_name] = attachment_base
(
get_element_options(None, None, None, "card")
or "summary_large_image"
)
attachment = next(attachment_base.children)
attachment_base = BeautifulSoup(str(element), "html.parser")
attachments[original_link_name] = attachment_base
attachment.clear()
attachment.attrs["class"] = "embed-url-link"
attachment = next(attachment_base.children)
render_element = attachment
attachment.clear()
attachment.attrs["class"] = "embed-url-link"
if page_image:
style = {
"width": page_image_width + "px" if page_image_width else None,
"height": page_image_height + "px" if page_image_height else None,
}
render_element = attachment
style_string = "; ".join(
f"{key}: {value}" for key, value in style.items() if value
)
if page_image:
image_template = f'<span><img src="{page_image}" alt="{page_image_alt or page_name}" title="{page_name}" width="420" height="240" /></span>'
render_element.append(
BeautifulSoup(image_template, "html.parser")
)
if page_video:
video_template = f'<video controls><source src="{page_video}">Your browser does not support the video tag.</video>'
render_element.append(
BeautifulSoup(video_template, "html.parser")
)
if page_audio:
audio_template = f'<audio controls><source src="{page_audio}">Your browser does not support the audio tag.</audio>'
render_element.append(
BeautifulSoup(audio_template, "html.parser")
)
image_template = f'<span><img src="{page_image}" alt="{page_image_alt or page_name}" title="{page_name}" width="1" height="1" style="{style_string}" /></span>'
render_element.append(BeautifulSoup(image_template, "html.parser"))
description_element_base = BeautifulSoup(
"<span class='description'></span>", "html.parser"
)
description_element = next(description_element_base.children)
description_element.append(
BeautifulSoup(
f'<p class="page-site">{page_site}</p>',
"html.parser",
)
)
if page_video:
style = {
"width": page_video_width + "px" if page_video_width else None,
"height": page_video_height + "px" if page_video_height else None,
}
description_element.append(
BeautifulSoup(
f'<strong class="page-name">{page_name}</strong>',
"html.parser",
)
)
style_string = "; ".join(
f"{key}: {value}" for key, value in style.items() if value
)
video_template = f'<video controls style="{style_string}"><source src="{page_video}">Your browser does not support the video tag.</video>'
render_element.append(BeautifulSoup(video_template, "html.parser"))
description_element.append(
BeautifulSoup(
f"<p class='page-description'>{page_description or 'No description available.'}</p>",
"html.parser",
)
)
if page_audio:
audio_template = f'<audio controls><source src="{page_audio}">Your browser does not support the audio tag.</audio>'
render_element.append(BeautifulSoup(audio_template, "html.parser"))
description_element.append(
BeautifulSoup(
f"<p class='page-original-link'>{original_link_name}</p>",
"html.parser",
)
)
description_element = BeautifulSoup(
f"""
<span class='description'>
<p class="page-site">{page_site}</p>
<strong class="page-name">{page_name}</strong>
<p class='page-description'>{page_description or "No description available."}</p>
<p class='page-original-link'>{original_link_name}</p>
</span>
""",
"html.parser",
)
render_element.append(description_element_base)
render_element.append(description_element)
for attachment in attachments.values():
soup.append(attachment)