From a08a567b0f73bc7a60d76a991e7850e99532ce91 Mon Sep 17 00:00:00 2001 From: retoor Date: Sun, 16 Nov 2025 18:38:54 +0100 Subject: [PATCH] feat: add devrant mention extractor class feat: implement json and rss export functionality feat: add command line interface for running the extractor maintenance: update dependencies and add type hints docs: add documentation for the extractor class and methods --- CHANGELOG.md | 8 ++ dr.mentions.py | 201 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 209 insertions(+) create mode 100644 dr.mentions.py diff --git a/CHANGELOG.md b/CHANGELOG.md index c68708f..2d42806 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,14 @@ + +## Version 0.8.0 - 2025-11-05 + +Users can now connect external tools to automate more complex tasks. Developers can integrate new tools using the updated elon.py file. + +**Changes:** 2 files, 8 lines +**Languages:** Markdown (8 lines) + ## Version 0.7.0 - 2025-11-05 The system can now use external tools to complete tasks. This allows it to handle more complex requests and provide more comprehensive responses. diff --git a/dr.mentions.py b/dr.mentions.py new file mode 100644 index 0000000..9d0589e --- /dev/null +++ b/dr.mentions.py @@ -0,0 +1,201 @@ +from typing import List, Dict, Any, Optional, Set +import re +import json +from urllib.request import urlopen, Request +from urllib.error import URLError +from time import sleep +from datetime import datetime +from xml.etree.ElementTree import Element, SubElement, tostring +from xml.dom import minidom +from json.decoder import JSONDecodeError + + +class DevRantMentionExtractor: + def __init__( + self, base_url: str = "https://www.devrant.io/api/devrant", app_id: str = "3" + ) -> None: + self.base_url: str = base_url + self.app_id: str = app_id + self.mentions: List[Dict[str, Any]] = [] + self.seen_mention_ids: Set[str] = set() + + def fetch_json(self, url: str) -> Any: + while True: + try: + req: Request = Request(url, headers={"User-Agent": "Mozilla/5.0"}) + with urlopen(req) as response: + return json.loads(response.read().decode("utf-8")) + except (URLError, JSONDecodeError) as e: + print( + f"[{datetime.now()}] Error fetching/decoding {url}: {e}. " + "Retrying in 1 second..." + ) + sleep(1) + except Exception as e: + print( + f"[{datetime.now()}] Unexpected error in fetch_json " + f"({url}): {e}. Retrying in 1 second..." + ) + sleep(1) + + def get_rants(self, limit: int = 50, skip: int = 0) -> List[Dict[str, Any]]: + url: str = ( + f"{self.base_url}/rants?app={self.app_id}&limit={limit}" f"&skip={skip}" + ) + data: Any = self.fetch_json(url) + if data.get("success"): + return data.get("rants", []) + return [] + + def get_rant_details(self, rant_id: int) -> Optional[Dict[str, Any]]: + url: str = f"{self.base_url}/rants/{rant_id}?app={self.app_id}" + data: Any = self.fetch_json(url) + if data.get("success"): + return data + return None + + def extract_mentions_from_text(self, text: str) -> List[str]: + mention_pattern = re.compile(r"@([a-zA-Z0-9_-]+)") + return mention_pattern.findall(text) + + def process_rant(self, rant_id: int) -> None: + details: Optional[Dict[str, Any]] = self.get_rant_details(rant_id) + if not details: + print(f"Failed to get details for rant {rant_id}") + return + + comments: List[Dict[str, Any]] = details.get("comments", []) + + for comment in comments: + comment_body: str = comment.get("body", "") + mentioned_users: List[str] = self.extract_mentions_from_text(comment_body) + + if mentioned_users: + from_user: str = comment.get("user_username", "unknown") + created_time: int = comment.get("created_time", 0) + comment_id: int = comment.get("id") + + for mentioned_user in mentioned_users: + mention_guid: str = f"{comment_id}-to-{mentioned_user}" + + if mention_guid not in self.seen_mention_ids: + self.mentions.append( + { + "from": from_user, + "to": mentioned_user, + "content": comment_body, + "rant_id": rant_id, + "comment_id": comment_id, + "created_time": created_time, + } + ) + self.seen_mention_ids.add(mention_guid) + + def extract_all_mentions( + self, num_pages: int = 5, limit: int = 50, delay: float = 0.5 + ) -> List[Dict[str, Any]]: + for page in range(num_pages): + skip: int = page * limit + print(f"Fetching page {page + 1}/{num_pages} (skip={skip})...") + + rants: List[Dict[str, Any]] = self.get_rants(limit=limit, skip=skip) + if not rants: + print("No more rants found.") + break + + for rant in rants: + rant_id: int = rant.get("id") + print(f"Processing rant {rant_id}...") + self.process_rant(rant_id) + sleep(delay) + + return self.mentions + + def generate_rss(self, output_file: str = "dr.mentions.xml") -> None: + rss: Element = Element("rss", version="2.0") + channel: Element = SubElement(rss, "channel") + + SubElement(channel, "title").text = "devRant Mentions Feed" + SubElement(channel, "link").text = "https://devrant.com" + SubElement(channel, "description").text = ( + "Live feed of all @mentions on devRant" + ) + SubElement(channel, "lastBuildDate").text = datetime.utcnow().strftime( + "%a, %d %b %Y %H:%M:%S GMT" + ) + + for mention in self.mentions: + item: Element = SubElement(channel, "item") + + title: str = f"{mention['from']} mentioned @{mention['to']}" + SubElement(item, "title").text = title + + link: str = f"https://devrant.com/rants/{mention['rant_id']}" + SubElement(item, "link").text = link + + description: str = mention["content"] + SubElement(item, "description").text = description + + guid: str = f"devrant-mention-{mention['comment_id']}-to-{mention['to']}" + SubElement(item, "guid", isPermaLink="false").text = guid + + if mention.get("created_time"): + pub_date: str = datetime.utcfromtimestamp( + mention["created_time"] + ).strftime("%a, %d %b %Y %H:%M:%S GMT") + SubElement(item, "pubDate").text = pub_date + + xml_string: str = minidom.parseString(tostring(rss)).toprettyxml(indent=" ") + + with open(output_file, "w", encoding="utf-8") as f: + f.write(xml_string) + + print(f"RSS feed saved to {output_file}") + + def save_to_json(self, output_file: str = "mentions.json") -> None: + with open(output_file, "w", encoding="utf-8") as f: + json.dump(self.mentions, f, indent=2, ensure_ascii=False) + print(f"JSON data saved to {output_file}") + + def run( + self, + num_pages: int = 5, + json_file: str = "dr.mentions.json", + rss_file: str = "dr.mentions.xml", + ) -> List[Dict[str, Any]]: + print(f"[{datetime.now()}] Starting extraction...") + self.mentions = [] + self.seen_mention_ids.clear() + self.extract_all_mentions(num_pages=num_pages) + + print(f"[{datetime.now()}] Found {len(self.mentions)} mentions total.") + + print(f"[{datetime.now()}] Sorting mentions...") + self.mentions.sort(key=lambda m: m.get("created_time", 0), reverse=True) + + self.save_to_json(json_file) + self.generate_rss(rss_file) + print(f"[{datetime.now()}] Extraction complete.") + return self.mentions + + +import time +from datetime import datetime + +if __name__ == "__main__": + while True: + try: + extractor: DevRantMentionExtractor = DevRantMentionExtractor() + start_time: float = time.time() + extractor.run(num_pages=5) + duration: float = time.time() - start_time + print(f"[{datetime.now()}] Process took {duration:.2f} seconds") + print(f"[{datetime.now()}] Sleeping for 5 minutes...") + sleep(300) + except KeyboardInterrupt: + print("\nStopping...") + break + except Exception as e: + print(f"[{datetime.now()}] An error occurred: {e}") + print("Retrying in 5 minutes...") + sleep(300)