Merge pull request 'Improved site embedding' (#57) from BordedDev/snek:feat/improved-embeding into main
Reviewed-on: retoor/snek#57
This commit is contained in:
commit
006882cd6f
@ -193,7 +193,7 @@ def embed_youtube(text):
|
||||
"www.youtube-nocookie.com",
|
||||
"youtube-nocookie.com",
|
||||
]
|
||||
and any(url.path.startswith(p) for p in ["/watch", "/embed"])
|
||||
and any(url.path.startswith(p) for p in ["/watch", "/embed", "/shorts"])
|
||||
):
|
||||
queries = parse_qs(url.query)
|
||||
if "v" in queries:
|
||||
@ -333,142 +333,185 @@ def get_url_content(url):
|
||||
return None
|
||||
|
||||
|
||||
def get_element_options(head_info, elem=None, meta=None, ograph=None, twitter=None):
|
||||
if twitter:
|
||||
tw_tag = head_info.find(
|
||||
"meta", attrs={"name": "twitter:" + twitter}
|
||||
) or head_info.find("meta", attrs={"property": "twitter:" + twitter})
|
||||
if tw_tag:
|
||||
return tw_tag.get("content", tw_tag.get("value", None))
|
||||
|
||||
if ograph:
|
||||
og_tag = head_info.find(
|
||||
"meta", attrs={"property": "og:" + ograph}
|
||||
) or head_info.find("meta", attrs={"name": "og:" + ograph})
|
||||
if og_tag:
|
||||
return og_tag.get("content", og_tag.get("value", None))
|
||||
|
||||
if meta:
|
||||
meta_tag = head_info.find("meta", attrs={"name": meta}) or head_info.find(
|
||||
"meta", attrs={"property": meta}
|
||||
)
|
||||
if meta_tag:
|
||||
return meta_tag.get("content", meta_tag.get("value", None))
|
||||
|
||||
if elem:
|
||||
elem_tag = head_info.find(elem)
|
||||
if elem_tag:
|
||||
return elem_tag.text
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def embed_url(text):
|
||||
soup = BeautifulSoup(text, "html.parser")
|
||||
|
||||
attachments = {}
|
||||
|
||||
for element in soup.find_all("a"):
|
||||
if "href" in element.attrs and element.attrs["href"].startswith("http") and ("data-noembed" not in element.attrs):
|
||||
if (
|
||||
"href" in element.attrs
|
||||
and element.attrs["href"].startswith("http")
|
||||
and element.attrs["href"] not in attachments
|
||||
and ("data-noembed" not in element.attrs)
|
||||
):
|
||||
original_link_name = element.attrs["href"]
|
||||
|
||||
page_url = urlparse(element.attrs["href"])
|
||||
page = get_url_content(element.attrs["href"])
|
||||
if page:
|
||||
parsed_page = BeautifulSoup(page, "html.parser")
|
||||
head_info = parsed_page.find("head")
|
||||
if head_info:
|
||||
if not page:
|
||||
continue
|
||||
|
||||
def get_element_options(
|
||||
elem=None, meta=None, ograph=None, twitter=None
|
||||
):
|
||||
if twitter:
|
||||
tw_tag = head_info.find(
|
||||
"meta", attrs={"name": "twitter:" + twitter}
|
||||
) or head_info.find(
|
||||
"meta", attrs={"property": "twitter:" + twitter}
|
||||
)
|
||||
if tw_tag:
|
||||
return tw_tag.get("content", tw_tag.get("value", None))
|
||||
parsed_page = BeautifulSoup(page, "html.parser")
|
||||
head_info = parsed_page.find("head")
|
||||
|
||||
if ograph:
|
||||
og_tag = head_info.find(
|
||||
"meta", attrs={"property": "og:" + ograph}
|
||||
) or head_info.find("meta", attrs={"name": "og:" + ograph})
|
||||
if og_tag:
|
||||
return og_tag.get("content", og_tag.get("value", None))
|
||||
if not head_info:
|
||||
continue
|
||||
|
||||
if meta:
|
||||
meta_tag = head_info.find(
|
||||
"meta", attrs={"name": meta}
|
||||
) or head_info.find("meta", attrs={"property": meta})
|
||||
if meta_tag:
|
||||
return meta_tag.get(
|
||||
"content", meta_tag.get("value", None)
|
||||
)
|
||||
page_name = (
|
||||
get_element_options(head_info, "title", "title", "title", "title")
|
||||
or page_url.netloc
|
||||
)
|
||||
page_site = (
|
||||
get_element_options(head_info, None, "site", "site", "site")
|
||||
or get_element_options(head_info, ograph="site_name")
|
||||
or page_url.netloc
|
||||
)
|
||||
page_description = get_element_options(
|
||||
head_info, None, "description", "description", "description"
|
||||
)
|
||||
|
||||
if elem:
|
||||
elem_tag = head_info.find(elem)
|
||||
if elem_tag:
|
||||
return elem_tag.text
|
||||
page_image = (
|
||||
get_element_options(head_info, None, "image", "image", "image")
|
||||
or get_element_options(
|
||||
head_info, None, "image:url", "image:url", "image:url"
|
||||
)
|
||||
or get_element_options(
|
||||
head_info,
|
||||
None,
|
||||
"image:secure_url",
|
||||
"image:secure_url",
|
||||
"image:secure_url",
|
||||
)
|
||||
)
|
||||
page_image_height = get_element_options(
|
||||
head_info, None, "image:height", "image:height", "image:height"
|
||||
)
|
||||
page_image_width = get_element_options(
|
||||
head_info, None, "image:width", "image:width", "image:width"
|
||||
)
|
||||
page_image_alt = get_element_options(
|
||||
head_info, None, "image:alt", "image:alt", "image:alt"
|
||||
)
|
||||
|
||||
return None
|
||||
page_video = (
|
||||
get_element_options(head_info, None, "video", "video", "video")
|
||||
or get_element_options(
|
||||
head_info, None, "video:url", "video:url", "video:url"
|
||||
)
|
||||
or get_element_options(
|
||||
head_info,
|
||||
None,
|
||||
"video:secure_url",
|
||||
"video:secure_url",
|
||||
"video:secure_url",
|
||||
)
|
||||
)
|
||||
page_video_height = get_element_options(
|
||||
head_info, None, "video:height", "video:height", "video:height"
|
||||
)
|
||||
page_video_width = get_element_options(
|
||||
head_info, None, "video:width", "video:width", "video:width"
|
||||
)
|
||||
|
||||
original_link_name = element.attrs["href"]
|
||||
page_audio = (
|
||||
get_element_options(head_info, None, "audio", "audio", "audio")
|
||||
or get_element_options(
|
||||
head_info, None, "audio:url", "audio:url", "audio:url"
|
||||
)
|
||||
or get_element_options(
|
||||
head_info,
|
||||
None,
|
||||
"audio:secure_url",
|
||||
"audio:secure_url",
|
||||
"audio:secure_url",
|
||||
)
|
||||
)
|
||||
|
||||
if original_link_name in attachments:
|
||||
continue
|
||||
(get_element_options(head_info, twitter="card") or "summary_large_image")
|
||||
|
||||
page_name = (
|
||||
get_element_options("title", "title", "title", "title")
|
||||
or page_url.netloc
|
||||
)
|
||||
page_site = (
|
||||
get_element_options(None, "site", "site", "site")
|
||||
or page_url.netloc
|
||||
)
|
||||
page_description = get_element_options(
|
||||
None, "description", "description", "description"
|
||||
)
|
||||
page_image = get_element_options(None, "image", "image", "image")
|
||||
page_image_alt = get_element_options(
|
||||
None, "image:alt", "image:alt", "image:alt"
|
||||
)
|
||||
page_video = get_element_options(None, "video", "video", "video")
|
||||
page_audio = get_element_options(None, "audio", "audio", "audio")
|
||||
attachment_base = BeautifulSoup(str(element), "html.parser")
|
||||
attachments[original_link_name] = attachment_base
|
||||
|
||||
(
|
||||
get_element_options(None, None, None, "card")
|
||||
or "summary_large_image"
|
||||
)
|
||||
attachment = next(attachment_base.children)
|
||||
|
||||
attachment_base = BeautifulSoup(str(element), "html.parser")
|
||||
attachments[original_link_name] = attachment_base
|
||||
attachment.clear()
|
||||
attachment.attrs["class"] = "embed-url-link"
|
||||
|
||||
attachment = next(attachment_base.children)
|
||||
render_element = attachment
|
||||
|
||||
attachment.clear()
|
||||
attachment.attrs["class"] = "embed-url-link"
|
||||
if page_image:
|
||||
style = {
|
||||
"width": page_image_width + "px" if page_image_width else None,
|
||||
"height": page_image_height + "px" if page_image_height else None,
|
||||
}
|
||||
|
||||
render_element = attachment
|
||||
style_string = "; ".join(
|
||||
f"{key}: {value}" for key, value in style.items() if value
|
||||
)
|
||||
|
||||
if page_image:
|
||||
image_template = f'<span><img src="{page_image}" alt="{page_image_alt or page_name}" title="{page_name}" width="420" height="240" /></span>'
|
||||
render_element.append(
|
||||
BeautifulSoup(image_template, "html.parser")
|
||||
)
|
||||
if page_video:
|
||||
video_template = f'<video controls><source src="{page_video}">Your browser does not support the video tag.</video>'
|
||||
render_element.append(
|
||||
BeautifulSoup(video_template, "html.parser")
|
||||
)
|
||||
if page_audio:
|
||||
audio_template = f'<audio controls><source src="{page_audio}">Your browser does not support the audio tag.</audio>'
|
||||
render_element.append(
|
||||
BeautifulSoup(audio_template, "html.parser")
|
||||
)
|
||||
image_template = f'<span><img src="{page_image}" alt="{page_image_alt or page_name}" title="{page_name}" width="1" height="1" style="{style_string}" /></span>'
|
||||
render_element.append(BeautifulSoup(image_template, "html.parser"))
|
||||
|
||||
description_element_base = BeautifulSoup(
|
||||
"<span class='description'></span>", "html.parser"
|
||||
)
|
||||
description_element = next(description_element_base.children)
|
||||
description_element.append(
|
||||
BeautifulSoup(
|
||||
f'<p class="page-site">{page_site}</p>',
|
||||
"html.parser",
|
||||
)
|
||||
)
|
||||
if page_video:
|
||||
style = {
|
||||
"width": page_video_width + "px" if page_video_width else None,
|
||||
"height": page_video_height + "px" if page_video_height else None,
|
||||
}
|
||||
|
||||
description_element.append(
|
||||
BeautifulSoup(
|
||||
f'<strong class="page-name">{page_name}</strong>',
|
||||
"html.parser",
|
||||
)
|
||||
)
|
||||
style_string = "; ".join(
|
||||
f"{key}: {value}" for key, value in style.items() if value
|
||||
)
|
||||
video_template = f'<video controls style="{style_string}"><source src="{page_video}">Your browser does not support the video tag.</video>'
|
||||
render_element.append(BeautifulSoup(video_template, "html.parser"))
|
||||
|
||||
description_element.append(
|
||||
BeautifulSoup(
|
||||
f"<p class='page-description'>{page_description or 'No description available.'}</p>",
|
||||
"html.parser",
|
||||
)
|
||||
)
|
||||
if page_audio:
|
||||
audio_template = f'<audio controls><source src="{page_audio}">Your browser does not support the audio tag.</audio>'
|
||||
render_element.append(BeautifulSoup(audio_template, "html.parser"))
|
||||
|
||||
description_element.append(
|
||||
BeautifulSoup(
|
||||
f"<p class='page-original-link'>{original_link_name}</p>",
|
||||
"html.parser",
|
||||
)
|
||||
)
|
||||
description_element = BeautifulSoup(
|
||||
f"""
|
||||
<span class='description'>
|
||||
<p class="page-site">{page_site}</p>
|
||||
<strong class="page-name">{page_name}</strong>
|
||||
<p class='page-description'>{page_description or "No description available."}</p>
|
||||
<p class='page-original-link'>{original_link_name}</p>
|
||||
</span>
|
||||
""",
|
||||
"html.parser",
|
||||
)
|
||||
|
||||
render_element.append(description_element_base)
|
||||
render_element.append(description_element)
|
||||
|
||||
for attachment in attachments.values():
|
||||
soup.append(attachment)
|
||||
|
Loading…
Reference in New Issue
Block a user