feat: add devrant mention extractor class
feat: implement json and rss export functionality feat: add command line interface for running the extractor maintenance: update dependencies and add type hints docs: add documentation for the extractor class and methods
This commit is contained in:
parent
59ada88301
commit
a08a567b0f
@ -8,6 +8,14 @@
|
||||
|
||||
|
||||
|
||||
|
||||
## Version 0.8.0 - 2025-11-05
|
||||
|
||||
Users can now connect external tools to automate more complex tasks. Developers can integrate new tools using the updated elon.py file.
|
||||
|
||||
**Changes:** 2 files, 8 lines
|
||||
**Languages:** Markdown (8 lines)
|
||||
|
||||
## Version 0.7.0 - 2025-11-05
|
||||
|
||||
The system can now use external tools to complete tasks. This allows it to handle more complex requests and provide more comprehensive responses.
|
||||
|
||||
201
dr.mentions.py
Normal file
201
dr.mentions.py
Normal file
@ -0,0 +1,201 @@
|
||||
from typing import List, Dict, Any, Optional, Set
|
||||
import re
|
||||
import json
|
||||
from urllib.request import urlopen, Request
|
||||
from urllib.error import URLError
|
||||
from time import sleep
|
||||
from datetime import datetime
|
||||
from xml.etree.ElementTree import Element, SubElement, tostring
|
||||
from xml.dom import minidom
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
|
||||
class DevRantMentionExtractor:
|
||||
def __init__(
|
||||
self, base_url: str = "https://www.devrant.io/api/devrant", app_id: str = "3"
|
||||
) -> None:
|
||||
self.base_url: str = base_url
|
||||
self.app_id: str = app_id
|
||||
self.mentions: List[Dict[str, Any]] = []
|
||||
self.seen_mention_ids: Set[str] = set()
|
||||
|
||||
def fetch_json(self, url: str) -> Any:
|
||||
while True:
|
||||
try:
|
||||
req: Request = Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
||||
with urlopen(req) as response:
|
||||
return json.loads(response.read().decode("utf-8"))
|
||||
except (URLError, JSONDecodeError) as e:
|
||||
print(
|
||||
f"[{datetime.now()}] Error fetching/decoding {url}: {e}. "
|
||||
"Retrying in 1 second..."
|
||||
)
|
||||
sleep(1)
|
||||
except Exception as e:
|
||||
print(
|
||||
f"[{datetime.now()}] Unexpected error in fetch_json "
|
||||
f"({url}): {e}. Retrying in 1 second..."
|
||||
)
|
||||
sleep(1)
|
||||
|
||||
def get_rants(self, limit: int = 50, skip: int = 0) -> List[Dict[str, Any]]:
|
||||
url: str = (
|
||||
f"{self.base_url}/rants?app={self.app_id}&limit={limit}" f"&skip={skip}"
|
||||
)
|
||||
data: Any = self.fetch_json(url)
|
||||
if data.get("success"):
|
||||
return data.get("rants", [])
|
||||
return []
|
||||
|
||||
def get_rant_details(self, rant_id: int) -> Optional[Dict[str, Any]]:
|
||||
url: str = f"{self.base_url}/rants/{rant_id}?app={self.app_id}"
|
||||
data: Any = self.fetch_json(url)
|
||||
if data.get("success"):
|
||||
return data
|
||||
return None
|
||||
|
||||
def extract_mentions_from_text(self, text: str) -> List[str]:
|
||||
mention_pattern = re.compile(r"@([a-zA-Z0-9_-]+)")
|
||||
return mention_pattern.findall(text)
|
||||
|
||||
def process_rant(self, rant_id: int) -> None:
|
||||
details: Optional[Dict[str, Any]] = self.get_rant_details(rant_id)
|
||||
if not details:
|
||||
print(f"Failed to get details for rant {rant_id}")
|
||||
return
|
||||
|
||||
comments: List[Dict[str, Any]] = details.get("comments", [])
|
||||
|
||||
for comment in comments:
|
||||
comment_body: str = comment.get("body", "")
|
||||
mentioned_users: List[str] = self.extract_mentions_from_text(comment_body)
|
||||
|
||||
if mentioned_users:
|
||||
from_user: str = comment.get("user_username", "unknown")
|
||||
created_time: int = comment.get("created_time", 0)
|
||||
comment_id: int = comment.get("id")
|
||||
|
||||
for mentioned_user in mentioned_users:
|
||||
mention_guid: str = f"{comment_id}-to-{mentioned_user}"
|
||||
|
||||
if mention_guid not in self.seen_mention_ids:
|
||||
self.mentions.append(
|
||||
{
|
||||
"from": from_user,
|
||||
"to": mentioned_user,
|
||||
"content": comment_body,
|
||||
"rant_id": rant_id,
|
||||
"comment_id": comment_id,
|
||||
"created_time": created_time,
|
||||
}
|
||||
)
|
||||
self.seen_mention_ids.add(mention_guid)
|
||||
|
||||
def extract_all_mentions(
|
||||
self, num_pages: int = 5, limit: int = 50, delay: float = 0.5
|
||||
) -> List[Dict[str, Any]]:
|
||||
for page in range(num_pages):
|
||||
skip: int = page * limit
|
||||
print(f"Fetching page {page + 1}/{num_pages} (skip={skip})...")
|
||||
|
||||
rants: List[Dict[str, Any]] = self.get_rants(limit=limit, skip=skip)
|
||||
if not rants:
|
||||
print("No more rants found.")
|
||||
break
|
||||
|
||||
for rant in rants:
|
||||
rant_id: int = rant.get("id")
|
||||
print(f"Processing rant {rant_id}...")
|
||||
self.process_rant(rant_id)
|
||||
sleep(delay)
|
||||
|
||||
return self.mentions
|
||||
|
||||
def generate_rss(self, output_file: str = "dr.mentions.xml") -> None:
|
||||
rss: Element = Element("rss", version="2.0")
|
||||
channel: Element = SubElement(rss, "channel")
|
||||
|
||||
SubElement(channel, "title").text = "devRant Mentions Feed"
|
||||
SubElement(channel, "link").text = "https://devrant.com"
|
||||
SubElement(channel, "description").text = (
|
||||
"Live feed of all @mentions on devRant"
|
||||
)
|
||||
SubElement(channel, "lastBuildDate").text = datetime.utcnow().strftime(
|
||||
"%a, %d %b %Y %H:%M:%S GMT"
|
||||
)
|
||||
|
||||
for mention in self.mentions:
|
||||
item: Element = SubElement(channel, "item")
|
||||
|
||||
title: str = f"{mention['from']} mentioned @{mention['to']}"
|
||||
SubElement(item, "title").text = title
|
||||
|
||||
link: str = f"https://devrant.com/rants/{mention['rant_id']}"
|
||||
SubElement(item, "link").text = link
|
||||
|
||||
description: str = mention["content"]
|
||||
SubElement(item, "description").text = description
|
||||
|
||||
guid: str = f"devrant-mention-{mention['comment_id']}-to-{mention['to']}"
|
||||
SubElement(item, "guid", isPermaLink="false").text = guid
|
||||
|
||||
if mention.get("created_time"):
|
||||
pub_date: str = datetime.utcfromtimestamp(
|
||||
mention["created_time"]
|
||||
).strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||
SubElement(item, "pubDate").text = pub_date
|
||||
|
||||
xml_string: str = minidom.parseString(tostring(rss)).toprettyxml(indent=" ")
|
||||
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
f.write(xml_string)
|
||||
|
||||
print(f"RSS feed saved to {output_file}")
|
||||
|
||||
def save_to_json(self, output_file: str = "mentions.json") -> None:
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(self.mentions, f, indent=2, ensure_ascii=False)
|
||||
print(f"JSON data saved to {output_file}")
|
||||
|
||||
def run(
|
||||
self,
|
||||
num_pages: int = 5,
|
||||
json_file: str = "dr.mentions.json",
|
||||
rss_file: str = "dr.mentions.xml",
|
||||
) -> List[Dict[str, Any]]:
|
||||
print(f"[{datetime.now()}] Starting extraction...")
|
||||
self.mentions = []
|
||||
self.seen_mention_ids.clear()
|
||||
self.extract_all_mentions(num_pages=num_pages)
|
||||
|
||||
print(f"[{datetime.now()}] Found {len(self.mentions)} mentions total.")
|
||||
|
||||
print(f"[{datetime.now()}] Sorting mentions...")
|
||||
self.mentions.sort(key=lambda m: m.get("created_time", 0), reverse=True)
|
||||
|
||||
self.save_to_json(json_file)
|
||||
self.generate_rss(rss_file)
|
||||
print(f"[{datetime.now()}] Extraction complete.")
|
||||
return self.mentions
|
||||
|
||||
|
||||
import time
|
||||
from datetime import datetime
|
||||
|
||||
if __name__ == "__main__":
|
||||
while True:
|
||||
try:
|
||||
extractor: DevRantMentionExtractor = DevRantMentionExtractor()
|
||||
start_time: float = time.time()
|
||||
extractor.run(num_pages=5)
|
||||
duration: float = time.time() - start_time
|
||||
print(f"[{datetime.now()}] Process took {duration:.2f} seconds")
|
||||
print(f"[{datetime.now()}] Sleeping for 5 minutes...")
|
||||
sleep(300)
|
||||
except KeyboardInterrupt:
|
||||
print("\nStopping...")
|
||||
break
|
||||
except Exception as e:
|
||||
print(f"[{datetime.now()}] An error occurred: {e}")
|
||||
print("Retrying in 5 minutes...")
|
||||
sleep(300)
|
||||
Loading…
Reference in New Issue
Block a user