From cc9fc0eeaffa8d4585698f87a5e243b73ee5947b Mon Sep 17 00:00:00 2001 From: BordedDev <> Date: Thu, 19 Jun 2025 09:10:31 +0200 Subject: [PATCH 1/2] Simplified embedding, added support for height/width and better site name selection # Conflicts: # src/snek/system/template.py --- src/snek/system/template.py | 261 +++++++++++++++++++++--------------- 1 file changed, 152 insertions(+), 109 deletions(-) diff --git a/src/snek/system/template.py b/src/snek/system/template.py index f68614d..668efc3 100644 --- a/src/snek/system/template.py +++ b/src/snek/system/template.py @@ -333,142 +333,185 @@ def get_url_content(url): return None +def get_element_options(head_info, elem=None, meta=None, ograph=None, twitter=None): + if twitter: + tw_tag = head_info.find( + "meta", attrs={"name": "twitter:" + twitter} + ) or head_info.find("meta", attrs={"property": "twitter:" + twitter}) + if tw_tag: + return tw_tag.get("content", tw_tag.get("value", None)) + + if ograph: + og_tag = head_info.find( + "meta", attrs={"property": "og:" + ograph} + ) or head_info.find("meta", attrs={"name": "og:" + ograph}) + if og_tag: + return og_tag.get("content", og_tag.get("value", None)) + + if meta: + meta_tag = head_info.find("meta", attrs={"name": meta}) or head_info.find( + "meta", attrs={"property": meta} + ) + if meta_tag: + return meta_tag.get("content", meta_tag.get("value", None)) + + if elem: + elem_tag = head_info.find(elem) + if elem_tag: + return elem_tag.text + + return None + + def embed_url(text): soup = BeautifulSoup(text, "html.parser") attachments = {} for element in soup.find_all("a"): - if "href" in element.attrs and element.attrs["href"].startswith("http") and ("data-noembed" not in element.attrs): + if ( + "href" in element.attrs + and element.attrs["href"].startswith("http") + and element.attrs["href"] not in attachments + and ("data-noembed" not in element.attrs) + ): + original_link_name = element.attrs["href"] + page_url = urlparse(element.attrs["href"]) page = get_url_content(element.attrs["href"]) - if page: - parsed_page = BeautifulSoup(page, "html.parser") - head_info = parsed_page.find("head") - if head_info: + if not page: + continue - def get_element_options( - elem=None, meta=None, ograph=None, twitter=None - ): - if twitter: - tw_tag = head_info.find( - "meta", attrs={"name": "twitter:" + twitter} - ) or head_info.find( - "meta", attrs={"property": "twitter:" + twitter} - ) - if tw_tag: - return tw_tag.get("content", tw_tag.get("value", None)) + parsed_page = BeautifulSoup(page, "html.parser") + head_info = parsed_page.find("head") - if ograph: - og_tag = head_info.find( - "meta", attrs={"property": "og:" + ograph} - ) or head_info.find("meta", attrs={"name": "og:" + ograph}) - if og_tag: - return og_tag.get("content", og_tag.get("value", None)) + if not head_info: + continue - if meta: - meta_tag = head_info.find( - "meta", attrs={"name": meta} - ) or head_info.find("meta", attrs={"property": meta}) - if meta_tag: - return meta_tag.get( - "content", meta_tag.get("value", None) - ) + page_name = ( + get_element_options(head_info, "title", "title", "title", "title") + or page_url.netloc + ) + page_site = ( + get_element_options(head_info, None, "site", "site", "site") + or get_element_options(head_info, ograph="site_name") + or page_url.netloc + ) + page_description = get_element_options( + head_info, None, "description", "description", "description" + ) - if elem: - elem_tag = head_info.find(elem) - if elem_tag: - return elem_tag.text + page_image = ( + get_element_options(head_info, None, "image", "image", "image") + or get_element_options( + head_info, None, "image:url", "image:url", "image:url" + ) + or get_element_options( + head_info, + None, + "image:secure_url", + "image:secure_url", + "image:secure_url", + ) + ) + page_image_height = get_element_options( + head_info, None, "image:height", "image:height", "image:height" + ) + page_image_width = get_element_options( + head_info, None, "image:width", "image:width", "image:width" + ) + page_image_alt = get_element_options( + head_info, None, "image:alt", "image:alt", "image:alt" + ) - return None + page_video = ( + get_element_options(head_info, None, "video", "video", "video") + or get_element_options( + head_info, None, "video:url", "video:url", "video:url" + ) + or get_element_options( + head_info, + None, + "video:secure_url", + "video:secure_url", + "video:secure_url", + ) + ) + page_video_height = get_element_options( + head_info, None, "video:height", "video:height", "video:height" + ) + page_video_width = get_element_options( + head_info, None, "video:width", "video:width", "video:width" + ) - original_link_name = element.attrs["href"] + page_audio = ( + get_element_options(head_info, None, "audio", "audio", "audio") + or get_element_options( + head_info, None, "audio:url", "audio:url", "audio:url" + ) + or get_element_options( + head_info, + None, + "audio:secure_url", + "audio:secure_url", + "audio:secure_url", + ) + ) - if original_link_name in attachments: - continue + (get_element_options(head_info, twitter="card") or "summary_large_image") - page_name = ( - get_element_options("title", "title", "title", "title") - or page_url.netloc - ) - page_site = ( - get_element_options(None, "site", "site", "site") - or page_url.netloc - ) - page_description = get_element_options( - None, "description", "description", "description" - ) - page_image = get_element_options(None, "image", "image", "image") - page_image_alt = get_element_options( - None, "image:alt", "image:alt", "image:alt" - ) - page_video = get_element_options(None, "video", "video", "video") - page_audio = get_element_options(None, "audio", "audio", "audio") + attachment_base = BeautifulSoup(str(element), "html.parser") + attachments[original_link_name] = attachment_base - ( - get_element_options(None, None, None, "card") - or "summary_large_image" - ) + attachment = next(attachment_base.children) - attachment_base = BeautifulSoup(str(element), "html.parser") - attachments[original_link_name] = attachment_base + attachment.clear() + attachment.attrs["class"] = "embed-url-link" - attachment = next(attachment_base.children) + render_element = attachment - attachment.clear() - attachment.attrs["class"] = "embed-url-link" + if page_image: + style = { + "width": page_image_width + "px" if page_image_width else None, + "height": page_image_height + "px" if page_image_height else None, + } - render_element = attachment + style_string = "; ".join( + f"{key}: {value}" for key, value in style.items() if value + ) - if page_image: - image_template = f'{page_image_alt or page_name}' - render_element.append( - BeautifulSoup(image_template, "html.parser") - ) - if page_video: - video_template = f'' - render_element.append( - BeautifulSoup(video_template, "html.parser") - ) - if page_audio: - audio_template = f'' - render_element.append( - BeautifulSoup(audio_template, "html.parser") - ) + image_template = f'{page_image_alt or page_name}' + render_element.append(BeautifulSoup(image_template, "html.parser")) - description_element_base = BeautifulSoup( - "", "html.parser" - ) - description_element = next(description_element_base.children) - description_element.append( - BeautifulSoup( - f'

{page_site}

', - "html.parser", - ) - ) + if page_video: + style = { + "width": page_video_width + "px" if page_video_width else None, + "height": page_video_height + "px" if page_video_height else None, + } - description_element.append( - BeautifulSoup( - f'{page_name}', - "html.parser", - ) - ) + style_string = "; ".join( + f"{key}: {value}" for key, value in style.items() if value + ) + video_template = f'' + render_element.append(BeautifulSoup(video_template, "html.parser")) - description_element.append( - BeautifulSoup( - f"

{page_description or 'No description available.'}

", - "html.parser", - ) - ) + if page_audio: + audio_template = f'' + render_element.append(BeautifulSoup(audio_template, "html.parser")) - description_element.append( - BeautifulSoup( - f"", - "html.parser", - ) - ) + description_element = BeautifulSoup( + f""" + +

{page_site}

+ {page_name} +

{page_description or "No description available."}

+ +
+ """, + "html.parser", + ) - render_element.append(description_element_base) + render_element.append(description_element) for attachment in attachments.values(): soup.append(attachment) From ee11cde835d4741040500e2b9d1bdcdf3f7b95a6 Mon Sep 17 00:00:00 2001 From: BordedDev <> Date: Fri, 27 Jun 2025 12:10:16 +0200 Subject: [PATCH 2/2] Added shorts embedding support --- src/snek/system/template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snek/system/template.py b/src/snek/system/template.py index 668efc3..097f21a 100644 --- a/src/snek/system/template.py +++ b/src/snek/system/template.py @@ -193,7 +193,7 @@ def embed_youtube(text): "www.youtube-nocookie.com", "youtube-nocookie.com", ] - and any(url.path.startswith(p) for p in ["/watch", "/embed"]) + and any(url.path.startswith(p) for p in ["/watch", "/embed", "/shorts"]) ): queries = parse_qs(url.query) if "v" in queries: