from typing import List, Dict, Any, Optional, Set import re import json from urllib.request import urlopen, Request from urllib.error import URLError from time import sleep from datetime import datetime from xml.etree.ElementTree import Element, SubElement, tostring from xml.dom import minidom from json.decoder import JSONDecodeError class DevRantMentionExtractor: def __init__( self, base_url: str = "https://www.devrant.io/api/devrant", app_id: str = "3" ) -> None: self.base_url: str = base_url self.app_id: str = app_id self.mentions: List[Dict[str, Any]] = [] self.seen_mention_ids: Set[str] = set() def fetch_json(self, url: str) -> Any: while True: try: req: Request = Request(url, headers={"User-Agent": "Mozilla/5.0"}) with urlopen(req) as response: return json.loads(response.read().decode("utf-8")) except (URLError, JSONDecodeError) as e: print( f"[{datetime.now()}] Error fetching/decoding {url}: {e}. " "Retrying in 1 second..." ) sleep(1) except Exception as e: print( f"[{datetime.now()}] Unexpected error in fetch_json " f"({url}): {e}. Retrying in 1 second..." ) sleep(1) def get_rants(self, limit: int = 50, skip: int = 0) -> List[Dict[str, Any]]: url: str = ( f"{self.base_url}/rants?app={self.app_id}&limit={limit}" f"&skip={skip}" ) data: Any = self.fetch_json(url) if data.get("success"): return data.get("rants", []) return [] def get_rant_details(self, rant_id: int) -> Optional[Dict[str, Any]]: url: str = f"{self.base_url}/rants/{rant_id}?app={self.app_id}" data: Any = self.fetch_json(url) if data.get("success"): return data return None def extract_mentions_from_text(self, text: str) -> List[str]: mention_pattern = re.compile(r"@([a-zA-Z0-9_-]+)") return mention_pattern.findall(text) def process_rant(self, rant_id: int) -> None: details: Optional[Dict[str, Any]] = self.get_rant_details(rant_id) if not details: print(f"Failed to get details for rant {rant_id}") return comments: List[Dict[str, Any]] = details.get("comments", []) for comment in comments: comment_body: str = comment.get("body", "") mentioned_users: List[str] = self.extract_mentions_from_text(comment_body) if mentioned_users: from_user: str = comment.get("user_username", "unknown") created_time: int = comment.get("created_time", 0) comment_id: int = comment.get("id") for mentioned_user in mentioned_users: mention_guid: str = f"{comment_id}-to-{mentioned_user}" if mention_guid not in self.seen_mention_ids: self.mentions.append( { "from": from_user, "to": mentioned_user, "content": comment_body, "rant_id": rant_id, "comment_id": comment_id, "created_time": created_time, } ) self.seen_mention_ids.add(mention_guid) def extract_all_mentions( self, num_pages: int = 5, limit: int = 50, delay: float = 0.5 ) -> List[Dict[str, Any]]: for page in range(num_pages): skip: int = page * limit print(f"Fetching page {page + 1}/{num_pages} (skip={skip})...") rants: List[Dict[str, Any]] = self.get_rants(limit=limit, skip=skip) if not rants: print("No more rants found.") break for rant in rants: rant_id: int = rant.get("id") print(f"Processing rant {rant_id}...") self.process_rant(rant_id) sleep(delay) return self.mentions def generate_rss(self, output_file: str = "dr.mentions.xml") -> None: rss: Element = Element("rss", version="2.0") channel: Element = SubElement(rss, "channel") SubElement(channel, "title").text = "devRant Mentions Feed" SubElement(channel, "link").text = "https://devrant.com" SubElement(channel, "description").text = ( "Live feed of all @mentions on devRant" ) SubElement(channel, "lastBuildDate").text = datetime.utcnow().strftime( "%a, %d %b %Y %H:%M:%S GMT" ) for mention in self.mentions: item: Element = SubElement(channel, "item") title: str = f"{mention['from']} mentioned @{mention['to']}" SubElement(item, "title").text = title link: str = f"https://devrant.com/rants/{mention['rant_id']}" SubElement(item, "link").text = link description: str = mention["content"] SubElement(item, "description").text = description guid: str = f"devrant-mention-{mention['comment_id']}-to-{mention['to']}" SubElement(item, "guid", isPermaLink="false").text = guid if mention.get("created_time"): pub_date: str = datetime.utcfromtimestamp( mention["created_time"] ).strftime("%a, %d %b %Y %H:%M:%S GMT") SubElement(item, "pubDate").text = pub_date xml_string: str = minidom.parseString(tostring(rss)).toprettyxml(indent=" ") with open(output_file, "w", encoding="utf-8") as f: f.write(xml_string) print(f"RSS feed saved to {output_file}") def save_to_json(self, output_file: str = "mentions.json") -> None: with open(output_file, "w", encoding="utf-8") as f: json.dump(self.mentions, f, indent=2, ensure_ascii=False) print(f"JSON data saved to {output_file}") def run( self, num_pages: int = 5, json_file: str = "dr.mentions.json", rss_file: str = "dr.mentions.xml", ) -> List[Dict[str, Any]]: print(f"[{datetime.now()}] Starting extraction...") self.mentions = [] self.seen_mention_ids.clear() self.extract_all_mentions(num_pages=num_pages) print(f"[{datetime.now()}] Found {len(self.mentions)} mentions total.") print(f"[{datetime.now()}] Sorting mentions...") self.mentions.sort(key=lambda m: m.get("created_time", 0), reverse=True) self.save_to_json(json_file) self.generate_rss(rss_file) print(f"[{datetime.now()}] Extraction complete.") return self.mentions import time from datetime import datetime if __name__ == "__main__": while True: try: extractor: DevRantMentionExtractor = DevRantMentionExtractor() start_time: float = time.time() extractor.run(num_pages=5) duration: float = time.time() - start_time print(f"[{datetime.now()}] Process took {duration:.2f} seconds") print(f"[{datetime.now()}] Sleeping for 5 minutes...") sleep(300) except KeyboardInterrupt: print("\nStopping...") break except Exception as e: print(f"[{datetime.now()}] An error occurred: {e}") print("Retrying in 5 minutes...") sleep(300)