gists/dr.mentions.py at main

 from typing import List, Dict, Any, Optional, Set
 import re
 import json
 from urllib.request import urlopen, Request
 from urllib.error import URLError
 from time import sleep
 from datetime import datetime
 from xml.etree.ElementTree import Element, SubElement, tostring
 from xml.dom import minidom
 from json.decoder import JSONDecodeError
 class DevRantMentionExtractor:
     def __init__(
         self, base_url: str = "https://www.devrant.io/api/devrant", app_id: str = "3"
     ) -> None:
         self.base_url: str = base_url
         self.app_id: str = app_id
         self.mentions: List[Dict[str, Any]] = []
         self.seen_mention_ids: Set[str] = set()
     def fetch_json(self, url: str) -> Any:
         while True:
             try:
                 req: Request = Request(url, headers={"User-Agent": "Mozilla/5.0"})
                 with urlopen(req) as response:
                     return json.loads(response.read().decode("utf-8"))
             except (URLError, JSONDecodeError) as e:
                 print(
                     f"[{datetime.now()}] Error fetching/decoding {url}: {e}. "
                     "Retrying in 1 second..."
                 )
                 sleep(1)
             except Exception as e:
                 print(
                     f"[{datetime.now()}] Unexpected error in fetch_json "
                     f"({url}): {e}. Retrying in 1 second..."
                 )
                 sleep(1)
     def get_rants(self, limit: int = 50, skip: int = 0) -> List[Dict[str, Any]]:
         url: str = (
             f"{self.base_url}/rants?app={self.app_id}&limit={limit}" f"&skip={skip}"
         )
         data: Any = self.fetch_json(url)
         if data.get("success"):
             return data.get("rants", [])
         return []
     def get_rant_details(self, rant_id: int) -> Optional[Dict[str, Any]]:
         url: str = f"{self.base_url}/rants/{rant_id}?app={self.app_id}"
         data: Any = self.fetch_json(url)
         if data.get("success"):
             return data
         return None
     def extract_mentions_from_text(self, text: str) -> List[str]:
         mention_pattern = re.compile(r"@([a-zA-Z0-9_-]+)")
         return mention_pattern.findall(text)
     def process_rant(self, rant_id: int) -> None:
         details: Optional[Dict[str, Any]] = self.get_rant_details(rant_id)
         if not details:
             print(f"Failed to get details for rant {rant_id}")
             return
         comments: List[Dict[str, Any]] = details.get("comments", [])
         for comment in comments:
             comment_body: str = comment.get("body", "")
             mentioned_users: List[str] = self.extract_mentions_from_text(comment_body)
             if mentioned_users:
                 from_user: str = comment.get("user_username", "unknown")
                 created_time: int = comment.get("created_time", 0)
                 comment_id: int = comment.get("id")
                 for mentioned_user in mentioned_users:
                     mention_guid: str = f"{comment_id}-to-{mentioned_user}"
                     if mention_guid not in self.seen_mention_ids:
                         self.mentions.append(
                             {
                                 "from": from_user,
                                 "to": mentioned_user,
                                 "content": comment_body,
                                 "rant_id": rant_id,
                                 "comment_id": comment_id,
                                 "created_time": created_time,
                             }
                         )
                         self.seen_mention_ids.add(mention_guid)
     def extract_all_mentions(
         self, num_pages: int = 5, limit: int = 50, delay: float = 0.5
     ) -> List[Dict[str, Any]]:
         for page in range(num_pages):
             skip: int = page * limit
             print(f"Fetching page {page + 1}/{num_pages} (skip={skip})...")
             rants: List[Dict[str, Any]] = self.get_rants(limit=limit, skip=skip)
             if not rants:
                 print("No more rants found.")
                 break
             for rant in rants:
                 rant_id: int = rant.get("id")
                 print(f"Processing rant {rant_id}...")
                 self.process_rant(rant_id)
                 sleep(delay)
         return self.mentions
     def generate_rss(self, output_file: str = "dr.mentions.xml") -> None:
         rss: Element = Element("rss", version="2.0")
         channel: Element = SubElement(rss, "channel")
         SubElement(channel, "title").text = "devRant Mentions Feed"
         SubElement(channel, "link").text = "https://devrant.com"
         SubElement(channel, "description").text = (
             "Live feed of all @mentions on devRant"
         )
         SubElement(channel, "lastBuildDate").text = datetime.utcnow().strftime(
             "%a, %d %b %Y %H:%M:%S GMT"
         )
         for mention in self.mentions:
             item: Element = SubElement(channel, "item")
             title: str = f"{mention['from']} mentioned @{mention['to']}"
             SubElement(item, "title").text = title
             link: str = f"https://devrant.com/rants/{mention['rant_id']}"
             SubElement(item, "link").text = link
             description: str = mention["content"]
             SubElement(item, "description").text = description
             guid: str = f"devrant-mention-{mention['comment_id']}-to-{mention['to']}"
             SubElement(item, "guid", isPermaLink="false").text = guid
             if mention.get("created_time"):
                 pub_date: str = datetime.utcfromtimestamp(
                     mention["created_time"]
                 ).strftime("%a, %d %b %Y %H:%M:%S GMT")
                 SubElement(item, "pubDate").text = pub_date
         xml_string: str = minidom.parseString(tostring(rss)).toprettyxml(indent="  ")
         with open(output_file, "w", encoding="utf-8") as f:
             f.write(xml_string)
         print(f"RSS feed saved to {output_file}")
     def save_to_json(self, output_file: str = "mentions.json") -> None:
         with open(output_file, "w", encoding="utf-8") as f:
             json.dump(self.mentions, f, indent=2, ensure_ascii=False)
         print(f"JSON data saved to {output_file}")
     def run(
         self,
         num_pages: int = 5,
         json_file: str = "dr.mentions.json",
         rss_file: str = "dr.mentions.xml",
     ) -> List[Dict[str, Any]]:
         print(f"[{datetime.now()}] Starting extraction...")
         self.mentions = []
         self.seen_mention_ids.clear()
         self.extract_all_mentions(num_pages=num_pages)
         print(f"[{datetime.now()}] Found {len(self.mentions)} mentions total.")
         print(f"[{datetime.now()}] Sorting mentions...")
         self.mentions.sort(key=lambda m: m.get("created_time", 0), reverse=True)
         self.save_to_json(json_file)
         self.generate_rss(rss_file)
         print(f"[{datetime.now()}] Extraction complete.")
         return self.mentions
 import time
 from datetime import datetime
 if __name__ == "__main__":
     while True:
         try:
             extractor: DevRantMentionExtractor = DevRantMentionExtractor()
             start_time: float = time.time()
             extractor.run(num_pages=5)
             duration: float = time.time() - start_time
             print(f"[{datetime.now()}] Process took {duration:.2f} seconds")
             print(f"[{datetime.now()}] Sleeping for 5 minutes...")
             sleep(300)
         except KeyboardInterrupt:
             print("\nStopping...")
             break
         except Exception as e:
             print(f"[{datetime.now()}] An error occurred: {e}")
             print("Retrying in 5 minutes...")
             sleep(300)