Fix YouTube embed parsing and add support for start time; handle missing channel attachments

This commit is contained in:
BordedDev 2025-05-17 13:23:32 +02:00
parent 48c3daf398
commit ffb22165da
No known key found for this signature in database
GPG Key ID: C5F495EAE56673BF
2 changed files with 78 additions and 25 deletions
src/snek

View File

@ -1,4 +1,5 @@
import re
from urllib.parse import urlparse, parse_qs
from types import SimpleNamespace
import mimetypes
@ -90,16 +91,59 @@ def set_link_target_blank(text):
def embed_youtube(text):
soup = BeautifulSoup(text, "html.parser")
for element in soup.find_all("a"):
if element.attrs["href"].startswith("https://www.you"):
video_name = element.attrs["href"].split("/")[-1]
if "v=" in element.attrs["href"]:
video_name = element.attrs["href"].split("?v=")[1].split("&")[0]
# if "si=" in element.attrs["href"]:
# video_name = "?v=" + element.attrs["href"].split("/")[-1]
# if "t=" in element.attrs["href"]:
# video_name += "&t=" + element.attrs["href"].split("&t=")[1].split("&")[0]
embed_template = f'<iframe width="560" height="315" style="display:block" src="https://www.youtube.com/embed/{video_name}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>'
# Check if the link is a YouTube link
url = urlparse(element["href"])
if (
url.hostname in ["www.youtu.be", "youtu.be"]
or url.hostname
in [
"www.youtube.com",
"youtube.com",
"www.youtube-nocookie.com",
"youtube-nocookie.com",
]
and any(url.path.startswith(p) for p in ["/watch", "/embed"])
):
queries = parse_qs(url.query)
if "v" in queries:
video_name = queries["v"][0]
else:
video_name = url.path.split("/")[-1]
queries.pop("v", None)
start_time = queries.get("t", None)
if start_time:
queries.pop("t", None)
queries["start"] = []
for t in start_time:
if t.endswith("s"):
t = start_time[:-1]
if t.isdigit():
queries["start"].append(t)
else:
queries["start"].append(
str(
sum(
int(x) * 60**i
for i, x in enumerate(reversed(t.split(":")))
)
)
)
new_queries = "&".join(
[f"{key}={v}" for key, value in queries.items() for v in value]
)
base_url = (
"youtube-nocookie.com"
if "youtube-nocookie" in url.hostname
else "youtube.com"
)
embed_template = f'<iframe width="560" height="315" style="display:block" src="https://www.{base_url}/embed/{video_name}?{new_queries}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share" referrerpolicy="strict-origin-when-cross-origin" allowfullscreen></iframe>'
element.replace_with(BeautifulSoup(embed_template, "html.parser"))
return str(soup)
@ -108,29 +152,35 @@ def embed_image(text):
for element in soup.find_all("a"):
file_mime = mimetypes.guess_type(element.attrs["href"])[0]
if file_mime and file_mime.startswith("image/") or any(
ext in element.attrs["href"].lower() for ext in [
".png",
".jpg",
".jpeg",
".gif",
".webp",
".svg",
".bmp",
".tiff",
".ico",
".heif",
".heic",
]
if (
file_mime
and file_mime.startswith("image/")
or any(
ext in element.attrs["href"].lower()
for ext in [
".png",
".jpg",
".jpeg",
".gif",
".webp",
".svg",
".bmp",
".tiff",
".ico",
".heif",
".heic",
]
)
):
embed_template = f'<img src="{element.attrs["href"]}" title="{element.attrs["href"]}?width=420" alt="{element.attrs["href"]}" />'
element.replace_with(BeautifulSoup(embed_template, "html.parser"))
return str(soup)
def enrich_image_rendering(text):
soup = BeautifulSoup(text, "html.parser")
for element in soup.find_all("img"):
if element.attrs["src"].startswith("/" ):
if element.attrs["src"].startswith("/"):
element.attrs["src"] += "?width=240&height=240"
picture_template = f'''
<picture>
@ -245,7 +295,6 @@ class PythonExtension(Extension):
).set_lineno(line_number)
def _to_html(self, md_file, caller):
def fn(source):
import subprocess

View File

@ -17,6 +17,9 @@ class ChannelAttachmentView(BaseView):
relative_url=relative_path
)
if not channel_attachment:
return web.HTTPNotFound()
original_format = mimetypes.guess_type(channel_attachment["path"])[0]
format_ = self.request.query.get("format")
width = self.request.query.get("width")
@ -88,6 +91,7 @@ class ChannelAttachmentView(BaseView):
response.headers["Content-Disposition"] = (
f'attachment; filename="{channel_attachment["name"]}"'
)
response.headers["Content-Type"] = original_format
return response
async def post(self):