from typing import List, Dict, Any, Optional, Set
import re
import json
from urllib.request import urlopen, Request
from urllib.error import URLError
from time import sleep
from datetime import datetime
from xml.etree.ElementTree import Element, SubElement, tostring
from xml.dom import minidom
from json.decoder import JSONDecodeError
class DevRantMentionExtractor:
def __init__(
self, base_url: str = "https://www.devrant.io/api/devrant", app_id: str = "3"
) -> None:
self.base_url: str = base_url
self.app_id: str = app_id
self.mentions: List[Dict[str, Any]] = []
self.seen_mention_ids: Set[str] = set()
def fetch_json(self, url: str) -> Any:
while True:
try:
req: Request = Request(url, headers={"User-Agent": "Mozilla/5.0"})
with urlopen(req) as response:
return json.loads(response.read().decode("utf-8"))
except (URLError, JSONDecodeError) as e:
print(
f"[{datetime.now()}] Error fetching/decoding {url}: {e}. "
"Retrying in 1 second..."
)
sleep(1)
except Exception as e:
print(
f"[{datetime.now()}] Unexpected error in fetch_json "
f"({url}): {e}. Retrying in 1 second..."
)
sleep(1)
def get_rants(self, limit: int = 50, skip: int = 0) -> List[Dict[str, Any]]:
url: str = (
f"{self.base_url}/rants?app={self.app_id}&limit={limit}" f"&skip={skip}"
)
data: Any = self.fetch_json(url)
if data.get("success"):
return data.get("rants", [])
return []
def get_rant_details(self, rant_id: int) -> Optional[Dict[str, Any]]:
url: str = f"{self.base_url}/rants/{rant_id}?app={self.app_id}"
data: Any = self.fetch_json(url)
if data.get("success"):
return data
return None
def extract_mentions_from_text(self, text: str) -> List[str]:
mention_pattern = re.compile(r"@([a-zA-Z0-9_-]+)")
return mention_pattern.findall(text)
def process_rant(self, rant_id: int) -> None:
details: Optional[Dict[str, Any]] = self.get_rant_details(rant_id)
if not details:
print(f"Failed to get details for rant {rant_id}")
return
comments: List[Dict[str, Any]] = details.get("comments", [])
for comment in comments:
comment_body: str = comment.get("body", "")
mentioned_users: List[str] = self.extract_mentions_from_text(comment_body)
if mentioned_users:
from_user: str = comment.get("user_username", "unknown")
created_time: int = comment.get("created_time", 0)
comment_id: int = comment.get("id")
for mentioned_user in mentioned_users:
mention_guid: str = f"{comment_id}-to-{mentioned_user}"
if mention_guid not in self.seen_mention_ids:
self.mentions.append(
{
"from": from_user,
"to": mentioned_user,
"content": comment_body,
"rant_id": rant_id,
"comment_id": comment_id,
"created_time": created_time,
}
)
self.seen_mention_ids.add(mention_guid)
def extract_all_mentions(
self, num_pages: int = 5, limit: int = 50, delay: float = 0.5
) -> List[Dict[str, Any]]:
for page in range(num_pages):
skip: int = page * limit
print(f"Fetching page {page + 1}/{num_pages} (skip={skip})...")
rants: List[Dict[str, Any]] = self.get_rants(limit=limit, skip=skip)
if not rants:
print("No more rants found.")
break
for rant in rants:
rant_id: int = rant.get("id")
print(f"Processing rant {rant_id}...")
self.process_rant(rant_id)
sleep(delay)
return self.mentions
def generate_rss(self, output_file: str = "dr.mentions.xml") -> None:
rss: Element = Element("rss", version="2.0")
channel: Element = SubElement(rss, "channel")
SubElement(channel, "title").text = "devRant Mentions Feed"
SubElement(channel, "link").text = "https://devrant.com"
SubElement(channel, "description").text = (
"Live feed of all @mentions on devRant"
)
SubElement(channel, "lastBuildDate").text = datetime.utcnow().strftime(
"%a, %d %b %Y %H:%M:%S GMT"
)
for mention in self.mentions:
item: Element = SubElement(channel, "item")
title: str = f"{mention['from']} mentioned @{mention['to']}"
SubElement(item, "title").text = title
link: str = f"https://devrant.com/rants/{mention['rant_id']}"
SubElement(item, "link").text = link
description: str = mention["content"]
SubElement(item, "description").text = description
guid: str = f"devrant-mention-{mention['comment_id']}-to-{mention['to']}"
SubElement(item, "guid", isPermaLink="false").text = guid
if mention.get("created_time"):
pub_date: str = datetime.utcfromtimestamp(
mention["created_time"]
).strftime("%a, %d %b %Y %H:%M:%S GMT")
SubElement(item, "pubDate").text = pub_date
xml_string: str = minidom.parseString(tostring(rss)).toprettyxml(indent=" ")
with open(output_file, "w", encoding="utf-8") as f:
f.write(xml_string)
print(f"RSS feed saved to {output_file}")
def save_to_json(self, output_file: str = "mentions.json") -> None:
with open(output_file, "w", encoding="utf-8") as f:
json.dump(self.mentions, f, indent=2, ensure_ascii=False)
print(f"JSON data saved to {output_file}")
def run(
self,
num_pages: int = 5,
json_file: str = "dr.mentions.json",
rss_file: str = "dr.mentions.xml",
) -> List[Dict[str, Any]]:
print(f"[{datetime.now()}] Starting extraction...")
self.mentions = []
self.seen_mention_ids.clear()
self.extract_all_mentions(num_pages=num_pages)
print(f"[{datetime.now()}] Found {len(self.mentions)} mentions total.")
print(f"[{datetime.now()}] Sorting mentions...")
self.mentions.sort(key=lambda m: m.get("created_time", 0), reverse=True)
self.save_to_json(json_file)
self.generate_rss(rss_file)
print(f"[{datetime.now()}] Extraction complete.")
return self.mentions
import time
from datetime import datetime
if __name__ == "__main__":
while True:
try:
extractor: DevRantMentionExtractor = DevRantMentionExtractor()
start_time: float = time.time()
extractor.run(num_pages=5)
duration: float = time.time() - start_time
print(f"[{datetime.now()}] Process took {duration:.2f} seconds")
print(f"[{datetime.now()}] Sleeping for 5 minutes...")
sleep(300)
except KeyboardInterrupt:
print("\nStopping...")
break
except Exception as e:
print(f"[{datetime.now()}] An error occurred: {e}")
print("Retrying in 5 minutes...")
sleep(300)