Fix YouTube embed parsing and add support for start time; handle missing channel attachments

This commit is contained in:
BordedDev 2025-05-17 13:23:32 +02:00
parent 48c3daf398
commit ffb22165da
No known key found for this signature in database
GPG Key ID: C5F495EAE56673BF
2 changed files with 78 additions and 25 deletions
src/snek

View File

@ -1,4 +1,5 @@
import re import re
from urllib.parse import urlparse, parse_qs
from types import SimpleNamespace from types import SimpleNamespace
import mimetypes import mimetypes
@ -90,16 +91,59 @@ def set_link_target_blank(text):
def embed_youtube(text): def embed_youtube(text):
soup = BeautifulSoup(text, "html.parser") soup = BeautifulSoup(text, "html.parser")
for element in soup.find_all("a"): for element in soup.find_all("a"):
if element.attrs["href"].startswith("https://www.you"): # Check if the link is a YouTube link
video_name = element.attrs["href"].split("/")[-1] url = urlparse(element["href"])
if "v=" in element.attrs["href"]: if (
video_name = element.attrs["href"].split("?v=")[1].split("&")[0] url.hostname in ["www.youtu.be", "youtu.be"]
# if "si=" in element.attrs["href"]: or url.hostname
# video_name = "?v=" + element.attrs["href"].split("/")[-1] in [
# if "t=" in element.attrs["href"]: "www.youtube.com",
# video_name += "&t=" + element.attrs["href"].split("&t=")[1].split("&")[0] "youtube.com",
embed_template = f'<iframe width="560" height="315" style="display:block" src="https://www.youtube.com/embed/{video_name}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>' "www.youtube-nocookie.com",
"youtube-nocookie.com",
]
and any(url.path.startswith(p) for p in ["/watch", "/embed"])
):
queries = parse_qs(url.query)
if "v" in queries:
video_name = queries["v"][0]
else:
video_name = url.path.split("/")[-1]
queries.pop("v", None)
start_time = queries.get("t", None)
if start_time:
queries.pop("t", None)
queries["start"] = []
for t in start_time:
if t.endswith("s"):
t = start_time[:-1]
if t.isdigit():
queries["start"].append(t)
else:
queries["start"].append(
str(
sum(
int(x) * 60**i
for i, x in enumerate(reversed(t.split(":")))
)
)
)
new_queries = "&".join(
[f"{key}={v}" for key, value in queries.items() for v in value]
)
base_url = (
"youtube-nocookie.com"
if "youtube-nocookie" in url.hostname
else "youtube.com"
)
embed_template = f'<iframe width="560" height="315" style="display:block" src="https://www.{base_url}/embed/{video_name}?{new_queries}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>'
element.replace_with(BeautifulSoup(embed_template, "html.parser")) element.replace_with(BeautifulSoup(embed_template, "html.parser"))
return str(soup) return str(soup)
@ -108,29 +152,35 @@ def embed_image(text):
for element in soup.find_all("a"): for element in soup.find_all("a"):
file_mime = mimetypes.guess_type(element.attrs["href"])[0] file_mime = mimetypes.guess_type(element.attrs["href"])[0]
if file_mime and file_mime.startswith("image/") or any( if (
ext in element.attrs["href"].lower() for ext in [ file_mime
".png", and file_mime.startswith("image/")
".jpg", or any(
".jpeg", ext in element.attrs["href"].lower()
".gif", for ext in [
".webp", ".png",
".svg", ".jpg",
".bmp", ".jpeg",
".tiff", ".gif",
".ico", ".webp",
".heif", ".svg",
".heic", ".bmp",
] ".tiff",
".ico",
".heif",
".heic",
]
)
): ):
embed_template = f'<img src="{element.attrs["href"]}" title="{element.attrs["href"]}?width=420" alt="{element.attrs["href"]}" />' embed_template = f'<img src="{element.attrs["href"]}" title="{element.attrs["href"]}?width=420" alt="{element.attrs["href"]}" />'
element.replace_with(BeautifulSoup(embed_template, "html.parser")) element.replace_with(BeautifulSoup(embed_template, "html.parser"))
return str(soup) return str(soup)
def enrich_image_rendering(text): def enrich_image_rendering(text):
soup = BeautifulSoup(text, "html.parser") soup = BeautifulSoup(text, "html.parser")
for element in soup.find_all("img"): for element in soup.find_all("img"):
if element.attrs["src"].startswith("/" ): if element.attrs["src"].startswith("/"):
element.attrs["src"] += "?width=240&height=240" element.attrs["src"] += "?width=240&height=240"
picture_template = f''' picture_template = f'''
<picture> <picture>
@ -245,7 +295,6 @@ class PythonExtension(Extension):
).set_lineno(line_number) ).set_lineno(line_number)
def _to_html(self, md_file, caller): def _to_html(self, md_file, caller):
def fn(source): def fn(source):
import subprocess import subprocess

View File

@ -17,6 +17,9 @@ class ChannelAttachmentView(BaseView):
relative_url=relative_path relative_url=relative_path
) )
if not channel_attachment:
return web.HTTPNotFound()
original_format = mimetypes.guess_type(channel_attachment["path"])[0] original_format = mimetypes.guess_type(channel_attachment["path"])[0]
format_ = self.request.query.get("format") format_ = self.request.query.get("format")
width = self.request.query.get("width") width = self.request.query.get("width")
@ -88,6 +91,7 @@ class ChannelAttachmentView(BaseView):
response.headers["Content-Disposition"] = ( response.headers["Content-Disposition"] = (
f'attachment; filename="{channel_attachment["name"]}"' f'attachment; filename="{channel_attachment["name"]}"'
) )
response.headers["Content-Type"] = original_format
return response return response
async def post(self): async def post(self):