Fix YouTube embed parsing and add support for start time; handle missing channel attachments

2025-05-17 13:23:32 +02:00 · 2025-05-17 13:23:32 +02:00 · ffb22165da
commit ffb22165da
parent 48c3daf398
2 changed files with 78 additions and 25 deletions
--- a/src/snek/system/template.py
+++ b/src/snek/system/template.py
@ -1,4 +1,5 @@
 import re
 from urllib.parse import urlparse, parse_qs
 from types import SimpleNamespace
 import mimetypes
@ -90,16 +91,59 @@ def set_link_target_blank(text):
 def embed_youtube(text):
    soup = BeautifulSoup(text, "html.parser")
    for element in soup.find_all("a"):
-        if element.attrs["href"].startswith("https://www.you"):
+        # Check if the link is a YouTube link
-            video_name = element.attrs["href"].split("/")[-1]
+        url = urlparse(element["href"])
-            if "v=" in element.attrs["href"]:
+        if (
-                video_name = element.attrs["href"].split("?v=")[1].split("&")[0]
+            url.hostname in ["www.youtu.be", "youtu.be"]
-            # if "si=" in element.attrs["href"]:
+            or url.hostname
-            #    video_name = "?v=" + element.attrs["href"].split("/")[-1]
+            in [
-            # if "t=" in element.attrs["href"]:
+                "www.youtube.com",
-            #    video_name += "&t=" + element.attrs["href"].split("&t=")[1].split("&")[0]
+                "youtube.com",
-            embed_template = f'<iframe width="560" height="315" style="display:block" src="https://www.youtube.com/embed/{video_name}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>'
+                "www.youtube-nocookie.com",
                "youtube-nocookie.com",
            ]
            and any(url.path.startswith(p) for p in ["/watch", "/embed"])
        ):
            queries = parse_qs(url.query)
            if "v" in queries:
                video_name = queries["v"][0]
            else:
                video_name = url.path.split("/")[-1]
            queries.pop("v", None)
            start_time = queries.get("t", None)
            if start_time:
                queries.pop("t", None)
                queries["start"] = []
                for t in start_time:
                    if t.endswith("s"):
                        t = start_time[:-1]
                    if t.isdigit():
                        queries["start"].append(t)
                    else:
                        queries["start"].append(
                            str(
                                sum(
                                    int(x) * 60**i
                                    for i, x in enumerate(reversed(t.split(":")))
                                )
                            )
                        )
            new_queries = "&".join(
                [f"{key}={v}" for key, value in queries.items() for v in value]
            )
            base_url = (
                "youtube-nocookie.com"
                if "youtube-nocookie" in url.hostname
                else "youtube.com"
            )
            embed_template = f'<iframe width="560" height="315" style="display:block" src="https://www.{base_url}/embed/{video_name}?{new_queries}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>'
            element.replace_with(BeautifulSoup(embed_template, "html.parser"))
    return str(soup)
@ -108,29 +152,35 @@ def embed_image(text):
    for element in soup.find_all("a"):
        file_mime = mimetypes.guess_type(element.attrs["href"])[0]
-        if file_mime and file_mime.startswith("image/") or any(
+        if (
-            ext in element.attrs["href"].lower() for ext in [
+            file_mime
-                ".png",
+            and file_mime.startswith("image/")
-                ".jpg",
+            or any(
-                ".jpeg",
+                ext in element.attrs["href"].lower()
-                ".gif",
+                for ext in [
-                ".webp",
+                    ".png",
-                ".svg",
+                    ".jpg",
-                ".bmp",
+                    ".jpeg",
-                ".tiff",
+                    ".gif",
-                ".ico",
+                    ".webp",
-                ".heif",
+                    ".svg",
-                ".heic",
+                    ".bmp",
-            ]
+                    ".tiff",
                    ".ico",
                    ".heif",
                    ".heic",
                ]
            )
        ):
            embed_template = f'<img src="{element.attrs["href"]}" title="{element.attrs["href"]}?width=420" alt="{element.attrs["href"]}" />'
            element.replace_with(BeautifulSoup(embed_template, "html.parser"))
    return str(soup)
 def enrich_image_rendering(text):
    soup = BeautifulSoup(text, "html.parser")
    for element in soup.find_all("img"):
-        if element.attrs["src"].startswith("/"   ):
+        if element.attrs["src"].startswith("/"):
            element.attrs["src"] += "?width=240&height=240"
            picture_template = f'''
                                <picture>
@ -245,7 +295,6 @@ class PythonExtension(Extension):
        ).set_lineno(line_number)
    def _to_html(self, md_file, caller):
        def fn(source):
            import subprocess
--- a/src/snek/view/channel.py
+++ b/src/snek/view/channel.py
@ -17,6 +17,9 @@ class ChannelAttachmentView(BaseView):
            relative_url=relative_path
        )
        if not channel_attachment:
            return web.HTTPNotFound()
        original_format = mimetypes.guess_type(channel_attachment["path"])[0]
        format_ = self.request.query.get("format")
        width = self.request.query.get("width")
@ -88,6 +91,7 @@ class ChannelAttachmentView(BaseView):
            response.headers["Content-Disposition"] = (
                f'attachment; filename="{channel_attachment["name"]}"'
            )
            response.headers["Content-Type"] = original_format
            return response
    async def post(self):