feat: add devrant mention extractor class

feat: implement json and rss export functionality feat: add command line interface for running the extractor maintenance: update dependencies and add type hints docs: add documentation for the extractor class and methods
2025-11-16 18:38:54 +01:00 · 2025-11-16 18:38:54 +01:00 · a08a567b0f
commit a08a567b0f
parent 59ada88301
2 changed files with 209 additions and 0 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -8,6 +8,14 @@



+
+## Version 0.8.0 - 2025-11-05
+
+Users can now connect external tools to automate more complex tasks. Developers can integrate new tools using the updated elon.py file.
+
+**Changes:** 2 files, 8 lines
+**Languages:** Markdown (8 lines)
+
 ## Version 0.7.0 - 2025-11-05

 The system can now use external tools to complete tasks. This allows it to handle more complex requests and provide more comprehensive responses.
--- a/dr.mentions.py
+++ b/dr.mentions.py
@ -0,0 +1,201 @@
+from typing import List, Dict, Any, Optional, Set
+import re
+import json
+from urllib.request import urlopen, Request
+from urllib.error import URLError
+from time import sleep
+from datetime import datetime
+from xml.etree.ElementTree import Element, SubElement, tostring
+from xml.dom import minidom
+from json.decoder import JSONDecodeError
+
+
+class DevRantMentionExtractor:
+    def __init__(
+        self, base_url: str = "https://www.devrant.io/api/devrant", app_id: str = "3"
+    ) -> None:
+        self.base_url: str = base_url
+        self.app_id: str = app_id
+        self.mentions: List[Dict[str, Any]] = []
+        self.seen_mention_ids: Set[str] = set()
+
+    def fetch_json(self, url: str) -> Any:
+        while True:
+            try:
+                req: Request = Request(url, headers={"User-Agent": "Mozilla/5.0"})
+                with urlopen(req) as response:
+                    return json.loads(response.read().decode("utf-8"))
+            except (URLError, JSONDecodeError) as e:
+                print(
+                    f"[{datetime.now()}] Error fetching/decoding {url}: {e}. "
+                    "Retrying in 1 second..."
+                )
+                sleep(1)
+            except Exception as e:
+                print(
+                    f"[{datetime.now()}] Unexpected error in fetch_json "
+                    f"({url}): {e}. Retrying in 1 second..."
+                )
+                sleep(1)
+
+    def get_rants(self, limit: int = 50, skip: int = 0) -> List[Dict[str, Any]]:
+        url: str = (
+            f"{self.base_url}/rants?app={self.app_id}&limit={limit}" f"&skip={skip}"
+        )
+        data: Any = self.fetch_json(url)
+        if data.get("success"):
+            return data.get("rants", [])
+        return []
+
+    def get_rant_details(self, rant_id: int) -> Optional[Dict[str, Any]]:
+        url: str = f"{self.base_url}/rants/{rant_id}?app={self.app_id}"
+        data: Any = self.fetch_json(url)
+        if data.get("success"):
+            return data
+        return None
+
+    def extract_mentions_from_text(self, text: str) -> List[str]:
+        mention_pattern = re.compile(r"@([a-zA-Z0-9_-]+)")
+        return mention_pattern.findall(text)
+
+    def process_rant(self, rant_id: int) -> None:
+        details: Optional[Dict[str, Any]] = self.get_rant_details(rant_id)
+        if not details:
+            print(f"Failed to get details for rant {rant_id}")
+            return
+
+        comments: List[Dict[str, Any]] = details.get("comments", [])
+
+        for comment in comments:
+            comment_body: str = comment.get("body", "")
+            mentioned_users: List[str] = self.extract_mentions_from_text(comment_body)
+
+            if mentioned_users:
+                from_user: str = comment.get("user_username", "unknown")
+                created_time: int = comment.get("created_time", 0)
+                comment_id: int = comment.get("id")
+
+                for mentioned_user in mentioned_users:
+                    mention_guid: str = f"{comment_id}-to-{mentioned_user}"
+
+                    if mention_guid not in self.seen_mention_ids:
+                        self.mentions.append(
+                            {
+                                "from": from_user,
+                                "to": mentioned_user,
+                                "content": comment_body,
+                                "rant_id": rant_id,
+                                "comment_id": comment_id,
+                                "created_time": created_time,
+                            }
+                        )
+                        self.seen_mention_ids.add(mention_guid)
+
+    def extract_all_mentions(
+        self, num_pages: int = 5, limit: int = 50, delay: float = 0.5
+    ) -> List[Dict[str, Any]]:
+        for page in range(num_pages):
+            skip: int = page * limit
+            print(f"Fetching page {page + 1}/{num_pages} (skip={skip})...")
+
+            rants: List[Dict[str, Any]] = self.get_rants(limit=limit, skip=skip)
+            if not rants:
+                print("No more rants found.")
+                break
+
+            for rant in rants:
+                rant_id: int = rant.get("id")
+                print(f"Processing rant {rant_id}...")
+                self.process_rant(rant_id)
+                sleep(delay)
+
+        return self.mentions
+
+    def generate_rss(self, output_file: str = "dr.mentions.xml") -> None:
+        rss: Element = Element("rss", version="2.0")
+        channel: Element = SubElement(rss, "channel")
+
+        SubElement(channel, "title").text = "devRant Mentions Feed"
+        SubElement(channel, "link").text = "https://devrant.com"
+        SubElement(channel, "description").text = (
+            "Live feed of all @mentions on devRant"
+        )
+        SubElement(channel, "lastBuildDate").text = datetime.utcnow().strftime(
+            "%a, %d %b %Y %H:%M:%S GMT"
+        )
+
+        for mention in self.mentions:
+            item: Element = SubElement(channel, "item")
+
+            title: str = f"{mention['from']} mentioned @{mention['to']}"
+            SubElement(item, "title").text = title
+
+            link: str = f"https://devrant.com/rants/{mention['rant_id']}"
+            SubElement(item, "link").text = link
+
+            description: str = mention["content"]
+            SubElement(item, "description").text = description
+
+            guid: str = f"devrant-mention-{mention['comment_id']}-to-{mention['to']}"
+            SubElement(item, "guid", isPermaLink="false").text = guid
+
+            if mention.get("created_time"):
+                pub_date: str = datetime.utcfromtimestamp(
+                    mention["created_time"]
+                ).strftime("%a, %d %b %Y %H:%M:%S GMT")
+                SubElement(item, "pubDate").text = pub_date
+
+        xml_string: str = minidom.parseString(tostring(rss)).toprettyxml(indent="  ")
+
+        with open(output_file, "w", encoding="utf-8") as f:
+            f.write(xml_string)
+
+        print(f"RSS feed saved to {output_file}")
+
+    def save_to_json(self, output_file: str = "mentions.json") -> None:
+        with open(output_file, "w", encoding="utf-8") as f:
+            json.dump(self.mentions, f, indent=2, ensure_ascii=False)
+        print(f"JSON data saved to {output_file}")
+
+    def run(
+        self,
+        num_pages: int = 5,
+        json_file: str = "dr.mentions.json",
+        rss_file: str = "dr.mentions.xml",
+    ) -> List[Dict[str, Any]]:
+        print(f"[{datetime.now()}] Starting extraction...")
+        self.mentions = []
+        self.seen_mention_ids.clear()
+        self.extract_all_mentions(num_pages=num_pages)
+
+        print(f"[{datetime.now()}] Found {len(self.mentions)} mentions total.")
+
+        print(f"[{datetime.now()}] Sorting mentions...")
+        self.mentions.sort(key=lambda m: m.get("created_time", 0), reverse=True)
+
+        self.save_to_json(json_file)
+        self.generate_rss(rss_file)
+        print(f"[{datetime.now()}] Extraction complete.")
+        return self.mentions
+
+
+import time
+from datetime import datetime
+
+if __name__ == "__main__":
+    while True:
+        try:
+            extractor: DevRantMentionExtractor = DevRantMentionExtractor()
+            start_time: float = time.time()
+            extractor.run(num_pages=5)
+            duration: float = time.time() - start_time
+            print(f"[{datetime.now()}] Process took {duration:.2f} seconds")
+            print(f"[{datetime.now()}] Sleeping for 5 minutes...")
+            sleep(300)
+        except KeyboardInterrupt:
+            print("\nStopping...")
+            break
+        except Exception as e:
+            print(f"[{datetime.now()}] An error occurred: {e}")
+            print("Retrying in 5 minutes...")
+            sleep(300)