Merge pull request 'Improved site embedding' (#57) from BordedDev/snek:feat/improved-embeding into main
Reviewed-on: retoor/snek#57
This commit is contained in:
		
						commit
						006882cd6f
					
				| @ -193,7 +193,7 @@ def embed_youtube(text): | |||||||
|                 "www.youtube-nocookie.com", |                 "www.youtube-nocookie.com", | ||||||
|                 "youtube-nocookie.com", |                 "youtube-nocookie.com", | ||||||
|             ] |             ] | ||||||
|             and any(url.path.startswith(p) for p in ["/watch", "/embed"]) |             and any(url.path.startswith(p) for p in ["/watch", "/embed", "/shorts"]) | ||||||
|         ): |         ): | ||||||
|             queries = parse_qs(url.query) |             queries = parse_qs(url.query) | ||||||
|             if "v" in queries: |             if "v" in queries: | ||||||
| @ -333,29 +333,11 @@ def get_url_content(url): | |||||||
|         return None |         return None | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def embed_url(text): | def get_element_options(head_info, elem=None, meta=None, ograph=None, twitter=None): | ||||||
|     soup = BeautifulSoup(text, "html.parser") |  | ||||||
| 
 |  | ||||||
|     attachments = {} |  | ||||||
| 
 |  | ||||||
|     for element in soup.find_all("a"): |  | ||||||
|         if "href" in element.attrs and element.attrs["href"].startswith("http") and ("data-noembed" not in element.attrs): |  | ||||||
|             page_url = urlparse(element.attrs["href"]) |  | ||||||
|             page = get_url_content(element.attrs["href"]) |  | ||||||
|             if page: |  | ||||||
|                 parsed_page = BeautifulSoup(page, "html.parser") |  | ||||||
|                 head_info = parsed_page.find("head") |  | ||||||
|                 if head_info: |  | ||||||
| 
 |  | ||||||
|                     def get_element_options( |  | ||||||
|                         elem=None, meta=None, ograph=None, twitter=None |  | ||||||
|                     ): |  | ||||||
|     if twitter: |     if twitter: | ||||||
|         tw_tag = head_info.find( |         tw_tag = head_info.find( | ||||||
|             "meta", attrs={"name": "twitter:" + twitter} |             "meta", attrs={"name": "twitter:" + twitter} | ||||||
|                             ) or head_info.find( |         ) or head_info.find("meta", attrs={"property": "twitter:" + twitter}) | ||||||
|                                 "meta", attrs={"property": "twitter:" + twitter} |  | ||||||
|                             ) |  | ||||||
|         if tw_tag: |         if tw_tag: | ||||||
|             return tw_tag.get("content", tw_tag.get("value", None)) |             return tw_tag.get("content", tw_tag.get("value", None)) | ||||||
| 
 | 
 | ||||||
| @ -367,13 +349,11 @@ def embed_url(text): | |||||||
|             return og_tag.get("content", og_tag.get("value", None)) |             return og_tag.get("content", og_tag.get("value", None)) | ||||||
| 
 | 
 | ||||||
|     if meta: |     if meta: | ||||||
|                             meta_tag = head_info.find( |         meta_tag = head_info.find("meta", attrs={"name": meta}) or head_info.find( | ||||||
|                                 "meta", attrs={"name": meta} |             "meta", attrs={"property": meta} | ||||||
|                             ) or head_info.find("meta", attrs={"property": meta}) |  | ||||||
|                             if meta_tag: |  | ||||||
|                                 return meta_tag.get( |  | ||||||
|                                     "content", meta_tag.get("value", None) |  | ||||||
|         ) |         ) | ||||||
|  |         if meta_tag: | ||||||
|  |             return meta_tag.get("content", meta_tag.get("value", None)) | ||||||
| 
 | 
 | ||||||
|     if elem: |     if elem: | ||||||
|         elem_tag = head_info.find(elem) |         elem_tag = head_info.find(elem) | ||||||
| @ -382,33 +362,103 @@ def embed_url(text): | |||||||
| 
 | 
 | ||||||
|     return None |     return None | ||||||
| 
 | 
 | ||||||
|  | 
 | ||||||
|  | def embed_url(text): | ||||||
|  |     soup = BeautifulSoup(text, "html.parser") | ||||||
|  | 
 | ||||||
|  |     attachments = {} | ||||||
|  | 
 | ||||||
|  |     for element in soup.find_all("a"): | ||||||
|  |         if ( | ||||||
|  |             "href" in element.attrs | ||||||
|  |             and element.attrs["href"].startswith("http") | ||||||
|  |             and element.attrs["href"] not in attachments | ||||||
|  |             and ("data-noembed" not in element.attrs) | ||||||
|  |         ): | ||||||
|             original_link_name = element.attrs["href"] |             original_link_name = element.attrs["href"] | ||||||
| 
 | 
 | ||||||
|                     if original_link_name in attachments: |             page_url = urlparse(element.attrs["href"]) | ||||||
|  |             page = get_url_content(element.attrs["href"]) | ||||||
|  |             if not page: | ||||||
|  |                 continue | ||||||
|  | 
 | ||||||
|  |             parsed_page = BeautifulSoup(page, "html.parser") | ||||||
|  |             head_info = parsed_page.find("head") | ||||||
|  | 
 | ||||||
|  |             if not head_info: | ||||||
|                 continue |                 continue | ||||||
| 
 | 
 | ||||||
|             page_name = ( |             page_name = ( | ||||||
|                         get_element_options("title", "title", "title", "title") |                 get_element_options(head_info, "title", "title", "title", "title") | ||||||
|                 or page_url.netloc |                 or page_url.netloc | ||||||
|             ) |             ) | ||||||
|             page_site = ( |             page_site = ( | ||||||
|                         get_element_options(None, "site", "site", "site") |                 get_element_options(head_info, None, "site", "site", "site") | ||||||
|  |                 or get_element_options(head_info, ograph="site_name") | ||||||
|                 or page_url.netloc |                 or page_url.netloc | ||||||
|             ) |             ) | ||||||
|             page_description = get_element_options( |             page_description = get_element_options( | ||||||
|                         None, "description", "description", "description" |                 head_info, None, "description", "description", "description" | ||||||
|             ) |             ) | ||||||
|                     page_image = get_element_options(None, "image", "image", "image") |  | ||||||
|                     page_image_alt = get_element_options( |  | ||||||
|                         None, "image:alt", "image:alt", "image:alt" |  | ||||||
|                     ) |  | ||||||
|                     page_video = get_element_options(None, "video", "video", "video") |  | ||||||
|                     page_audio = get_element_options(None, "audio", "audio", "audio") |  | ||||||
| 
 | 
 | ||||||
|                     ( |             page_image = ( | ||||||
|                         get_element_options(None, None, None, "card") |                 get_element_options(head_info, None, "image", "image", "image") | ||||||
|                         or "summary_large_image" |                 or get_element_options( | ||||||
|  |                     head_info, None, "image:url", "image:url", "image:url" | ||||||
|                 ) |                 ) | ||||||
|  |                 or get_element_options( | ||||||
|  |                     head_info, | ||||||
|  |                     None, | ||||||
|  |                     "image:secure_url", | ||||||
|  |                     "image:secure_url", | ||||||
|  |                     "image:secure_url", | ||||||
|  |                 ) | ||||||
|  |             ) | ||||||
|  |             page_image_height = get_element_options( | ||||||
|  |                 head_info, None, "image:height", "image:height", "image:height" | ||||||
|  |             ) | ||||||
|  |             page_image_width = get_element_options( | ||||||
|  |                 head_info, None, "image:width", "image:width", "image:width" | ||||||
|  |             ) | ||||||
|  |             page_image_alt = get_element_options( | ||||||
|  |                 head_info, None, "image:alt", "image:alt", "image:alt" | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |             page_video = ( | ||||||
|  |                 get_element_options(head_info, None, "video", "video", "video") | ||||||
|  |                 or get_element_options( | ||||||
|  |                     head_info, None, "video:url", "video:url", "video:url" | ||||||
|  |                 ) | ||||||
|  |                 or get_element_options( | ||||||
|  |                     head_info, | ||||||
|  |                     None, | ||||||
|  |                     "video:secure_url", | ||||||
|  |                     "video:secure_url", | ||||||
|  |                     "video:secure_url", | ||||||
|  |                 ) | ||||||
|  |             ) | ||||||
|  |             page_video_height = get_element_options( | ||||||
|  |                 head_info, None, "video:height", "video:height", "video:height" | ||||||
|  |             ) | ||||||
|  |             page_video_width = get_element_options( | ||||||
|  |                 head_info, None, "video:width", "video:width", "video:width" | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |             page_audio = ( | ||||||
|  |                 get_element_options(head_info, None, "audio", "audio", "audio") | ||||||
|  |                 or get_element_options( | ||||||
|  |                     head_info, None, "audio:url", "audio:url", "audio:url" | ||||||
|  |                 ) | ||||||
|  |                 or get_element_options( | ||||||
|  |                     head_info, | ||||||
|  |                     None, | ||||||
|  |                     "audio:secure_url", | ||||||
|  |                     "audio:secure_url", | ||||||
|  |                     "audio:secure_url", | ||||||
|  |                 ) | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |             (get_element_options(head_info, twitter="card") or "summary_large_image") | ||||||
| 
 | 
 | ||||||
|             attachment_base = BeautifulSoup(str(element), "html.parser") |             attachment_base = BeautifulSoup(str(element), "html.parser") | ||||||
|             attachments[original_link_name] = attachment_base |             attachments[original_link_name] = attachment_base | ||||||
| @ -421,54 +471,47 @@ def embed_url(text): | |||||||
|             render_element = attachment |             render_element = attachment | ||||||
| 
 | 
 | ||||||
|             if page_image: |             if page_image: | ||||||
|                         image_template = f'<span><img src="{page_image}" alt="{page_image_alt or page_name}" title="{page_name}" width="420" height="240" /></span>' |                 style = { | ||||||
|                         render_element.append( |                     "width": page_image_width + "px" if page_image_width else None, | ||||||
|                             BeautifulSoup(image_template, "html.parser") |                     "height": page_image_height + "px" if page_image_height else None, | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 style_string = "; ".join( | ||||||
|  |                     f"{key}: {value}" for key, value in style.items() if value | ||||||
|                 ) |                 ) | ||||||
|  | 
 | ||||||
|  |                 image_template = f'<span><img src="{page_image}" alt="{page_image_alt or page_name}" title="{page_name}" width="1" height="1" style="{style_string}" /></span>' | ||||||
|  |                 render_element.append(BeautifulSoup(image_template, "html.parser")) | ||||||
|  | 
 | ||||||
|             if page_video: |             if page_video: | ||||||
|                         video_template = f'<video controls><source src="{page_video}">Your browser does not support the video tag.</video>' |                 style = { | ||||||
|                         render_element.append( |                     "width": page_video_width + "px" if page_video_width else None, | ||||||
|                             BeautifulSoup(video_template, "html.parser") |                     "height": page_video_height + "px" if page_video_height else None, | ||||||
|  |                 } | ||||||
|  | 
 | ||||||
|  |                 style_string = "; ".join( | ||||||
|  |                     f"{key}: {value}" for key, value in style.items() if value | ||||||
|                 ) |                 ) | ||||||
|  |                 video_template = f'<video controls style="{style_string}"><source src="{page_video}">Your browser does not support the video tag.</video>' | ||||||
|  |                 render_element.append(BeautifulSoup(video_template, "html.parser")) | ||||||
|  | 
 | ||||||
|             if page_audio: |             if page_audio: | ||||||
|                 audio_template = f'<audio controls><source src="{page_audio}">Your browser does not support the audio tag.</audio>' |                 audio_template = f'<audio controls><source src="{page_audio}">Your browser does not support the audio tag.</audio>' | ||||||
|                         render_element.append( |                 render_element.append(BeautifulSoup(audio_template, "html.parser")) | ||||||
|                             BeautifulSoup(audio_template, "html.parser") |  | ||||||
|                         ) |  | ||||||
| 
 | 
 | ||||||
|                     description_element_base = BeautifulSoup( |             description_element = BeautifulSoup( | ||||||
|                         "<span class='description'></span>", "html.parser" |                 f""" | ||||||
|                     ) |             <span class='description'> | ||||||
|                     description_element = next(description_element_base.children) |                 <p class="page-site">{page_site}</p> | ||||||
|                     description_element.append( |                 <strong class="page-name">{page_name}</strong> | ||||||
|                         BeautifulSoup( |                 <p class='page-description'>{page_description or "No description available."}</p> | ||||||
|                             f'<p class="page-site">{page_site}</p>', |                 <p class='page-original-link'>{original_link_name}</p> | ||||||
|  |             </span> | ||||||
|  |             """, | ||||||
|                 "html.parser", |                 "html.parser", | ||||||
|             ) |             ) | ||||||
|                     ) |  | ||||||
| 
 | 
 | ||||||
|                     description_element.append( |             render_element.append(description_element) | ||||||
|                         BeautifulSoup( |  | ||||||
|                             f'<strong class="page-name">{page_name}</strong>', |  | ||||||
|                             "html.parser", |  | ||||||
|                         ) |  | ||||||
|                     ) |  | ||||||
| 
 |  | ||||||
|                     description_element.append( |  | ||||||
|                         BeautifulSoup( |  | ||||||
|                             f"<p class='page-description'>{page_description or 'No description available.'}</p>", |  | ||||||
|                             "html.parser", |  | ||||||
|                         ) |  | ||||||
|                     ) |  | ||||||
| 
 |  | ||||||
|                     description_element.append( |  | ||||||
|                         BeautifulSoup( |  | ||||||
|                             f"<p class='page-original-link'>{original_link_name}</p>", |  | ||||||
|                             "html.parser", |  | ||||||
|                         ) |  | ||||||
|                     ) |  | ||||||
| 
 |  | ||||||
|                     render_element.append(description_element_base) |  | ||||||
| 
 | 
 | ||||||
|     for attachment in attachments.values(): |     for attachment in attachments.values(): | ||||||
|         soup.append(attachment) |         soup.append(attachment) | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user