Compare commits

..

No commits in common. "006882cd6fe3e8f3b60a58019ffeed750e39964c" and "ceabf7d73b1010e00efb220e506ed99c3c338b3b" have entirely different histories.

View File

@ -193,7 +193,7 @@ def embed_youtube(text):
"www.youtube-nocookie.com", "www.youtube-nocookie.com",
"youtube-nocookie.com", "youtube-nocookie.com",
] ]
and any(url.path.startswith(p) for p in ["/watch", "/embed", "/shorts"]) and any(url.path.startswith(p) for p in ["/watch", "/embed"])
): ):
queries = parse_qs(url.query) queries = parse_qs(url.query)
if "v" in queries: if "v" in queries:
@ -333,185 +333,142 @@ def get_url_content(url):
return None return None
def get_element_options(head_info, elem=None, meta=None, ograph=None, twitter=None):
if twitter:
tw_tag = head_info.find(
"meta", attrs={"name": "twitter:" + twitter}
) or head_info.find("meta", attrs={"property": "twitter:" + twitter})
if tw_tag:
return tw_tag.get("content", tw_tag.get("value", None))
if ograph:
og_tag = head_info.find(
"meta", attrs={"property": "og:" + ograph}
) or head_info.find("meta", attrs={"name": "og:" + ograph})
if og_tag:
return og_tag.get("content", og_tag.get("value", None))
if meta:
meta_tag = head_info.find("meta", attrs={"name": meta}) or head_info.find(
"meta", attrs={"property": meta}
)
if meta_tag:
return meta_tag.get("content", meta_tag.get("value", None))
if elem:
elem_tag = head_info.find(elem)
if elem_tag:
return elem_tag.text
return None
def embed_url(text): def embed_url(text):
soup = BeautifulSoup(text, "html.parser") soup = BeautifulSoup(text, "html.parser")
attachments = {} attachments = {}
for element in soup.find_all("a"): for element in soup.find_all("a"):
if ( if "href" in element.attrs and element.attrs["href"].startswith("http") and ("data-noembed" not in element.attrs):
"href" in element.attrs
and element.attrs["href"].startswith("http")
and element.attrs["href"] not in attachments
and ("data-noembed" not in element.attrs)
):
original_link_name = element.attrs["href"]
page_url = urlparse(element.attrs["href"]) page_url = urlparse(element.attrs["href"])
page = get_url_content(element.attrs["href"]) page = get_url_content(element.attrs["href"])
if not page: if page:
continue parsed_page = BeautifulSoup(page, "html.parser")
head_info = parsed_page.find("head")
if head_info:
parsed_page = BeautifulSoup(page, "html.parser") def get_element_options(
head_info = parsed_page.find("head") elem=None, meta=None, ograph=None, twitter=None
):
if twitter:
tw_tag = head_info.find(
"meta", attrs={"name": "twitter:" + twitter}
) or head_info.find(
"meta", attrs={"property": "twitter:" + twitter}
)
if tw_tag:
return tw_tag.get("content", tw_tag.get("value", None))
if not head_info: if ograph:
continue og_tag = head_info.find(
"meta", attrs={"property": "og:" + ograph}
) or head_info.find("meta", attrs={"name": "og:" + ograph})
if og_tag:
return og_tag.get("content", og_tag.get("value", None))
page_name = ( if meta:
get_element_options(head_info, "title", "title", "title", "title") meta_tag = head_info.find(
or page_url.netloc "meta", attrs={"name": meta}
) ) or head_info.find("meta", attrs={"property": meta})
page_site = ( if meta_tag:
get_element_options(head_info, None, "site", "site", "site") return meta_tag.get(
or get_element_options(head_info, ograph="site_name") "content", meta_tag.get("value", None)
or page_url.netloc )
)
page_description = get_element_options(
head_info, None, "description", "description", "description"
)
page_image = ( if elem:
get_element_options(head_info, None, "image", "image", "image") elem_tag = head_info.find(elem)
or get_element_options( if elem_tag:
head_info, None, "image:url", "image:url", "image:url" return elem_tag.text
)
or get_element_options(
head_info,
None,
"image:secure_url",
"image:secure_url",
"image:secure_url",
)
)
page_image_height = get_element_options(
head_info, None, "image:height", "image:height", "image:height"
)
page_image_width = get_element_options(
head_info, None, "image:width", "image:width", "image:width"
)
page_image_alt = get_element_options(
head_info, None, "image:alt", "image:alt", "image:alt"
)
page_video = ( return None
get_element_options(head_info, None, "video", "video", "video")
or get_element_options(
head_info, None, "video:url", "video:url", "video:url"
)
or get_element_options(
head_info,
None,
"video:secure_url",
"video:secure_url",
"video:secure_url",
)
)
page_video_height = get_element_options(
head_info, None, "video:height", "video:height", "video:height"
)
page_video_width = get_element_options(
head_info, None, "video:width", "video:width", "video:width"
)
page_audio = ( original_link_name = element.attrs["href"]
get_element_options(head_info, None, "audio", "audio", "audio")
or get_element_options(
head_info, None, "audio:url", "audio:url", "audio:url"
)
or get_element_options(
head_info,
None,
"audio:secure_url",
"audio:secure_url",
"audio:secure_url",
)
)
(get_element_options(head_info, twitter="card") or "summary_large_image") if original_link_name in attachments:
continue
attachment_base = BeautifulSoup(str(element), "html.parser") page_name = (
attachments[original_link_name] = attachment_base get_element_options("title", "title", "title", "title")
or page_url.netloc
)
page_site = (
get_element_options(None, "site", "site", "site")
or page_url.netloc
)
page_description = get_element_options(
None, "description", "description", "description"
)
page_image = get_element_options(None, "image", "image", "image")
page_image_alt = get_element_options(
None, "image:alt", "image:alt", "image:alt"
)
page_video = get_element_options(None, "video", "video", "video")
page_audio = get_element_options(None, "audio", "audio", "audio")
attachment = next(attachment_base.children) (
get_element_options(None, None, None, "card")
or "summary_large_image"
)
attachment.clear() attachment_base = BeautifulSoup(str(element), "html.parser")
attachment.attrs["class"] = "embed-url-link" attachments[original_link_name] = attachment_base
render_element = attachment attachment = next(attachment_base.children)
if page_image: attachment.clear()
style = { attachment.attrs["class"] = "embed-url-link"
"width": page_image_width + "px" if page_image_width else None,
"height": page_image_height + "px" if page_image_height else None,
}
style_string = "; ".join( render_element = attachment
f"{key}: {value}" for key, value in style.items() if value
)
image_template = f'<span><img src="{page_image}" alt="{page_image_alt or page_name}" title="{page_name}" width="1" height="1" style="{style_string}" /></span>' if page_image:
render_element.append(BeautifulSoup(image_template, "html.parser")) image_template = f'<span><img src="{page_image}" alt="{page_image_alt or page_name}" title="{page_name}" width="420" height="240" /></span>'
render_element.append(
BeautifulSoup(image_template, "html.parser")
)
if page_video:
video_template = f'<video controls><source src="{page_video}">Your browser does not support the video tag.</video>'
render_element.append(
BeautifulSoup(video_template, "html.parser")
)
if page_audio:
audio_template = f'<audio controls><source src="{page_audio}">Your browser does not support the audio tag.</audio>'
render_element.append(
BeautifulSoup(audio_template, "html.parser")
)
if page_video: description_element_base = BeautifulSoup(
style = { "<span class='description'></span>", "html.parser"
"width": page_video_width + "px" if page_video_width else None, )
"height": page_video_height + "px" if page_video_height else None, description_element = next(description_element_base.children)
} description_element.append(
BeautifulSoup(
f'<p class="page-site">{page_site}</p>',
"html.parser",
)
)
style_string = "; ".join( description_element.append(
f"{key}: {value}" for key, value in style.items() if value BeautifulSoup(
) f'<strong class="page-name">{page_name}</strong>',
video_template = f'<video controls style="{style_string}"><source src="{page_video}">Your browser does not support the video tag.</video>' "html.parser",
render_element.append(BeautifulSoup(video_template, "html.parser")) )
)
if page_audio: description_element.append(
audio_template = f'<audio controls><source src="{page_audio}">Your browser does not support the audio tag.</audio>' BeautifulSoup(
render_element.append(BeautifulSoup(audio_template, "html.parser")) f"<p class='page-description'>{page_description or 'No description available.'}</p>",
"html.parser",
)
)
description_element = BeautifulSoup( description_element.append(
f""" BeautifulSoup(
<span class='description'> f"<p class='page-original-link'>{original_link_name}</p>",
<p class="page-site">{page_site}</p> "html.parser",
<strong class="page-name">{page_name}</strong> )
<p class='page-description'>{page_description or "No description available."}</p> )
<p class='page-original-link'>{original_link_name}</p>
</span>
""",
"html.parser",
)
render_element.append(description_element) render_element.append(description_element_base)
for attachment in attachments.values(): for attachment in attachments.values():
soup.append(attachment) soup.append(attachment)