2025-08-03 00:40:34 +02:00
|
|
|
# main.py
|
|
|
|
import asyncio
|
|
|
|
import logging
|
|
|
|
import signal
|
|
|
|
|
|
|
|
from crawler import DevRantCrawler
|
2025-08-13 00:06:44 +02:00
|
|
|
from database import DatabaseManager
|
|
|
|
|
|
|
|
from devranta.api import Api
|
2025-08-03 00:40:34 +02:00
|
|
|
|
|
|
|
# --- Configuration ---
|
|
|
|
DB_FILE = "devrant.sqlite"
|
|
|
|
CONCURRENT_RANT_CONSUMERS = 10 # How many rants to process at once
|
2025-08-13 00:06:44 +02:00
|
|
|
CONCURRENT_USER_CONSUMERS = 5 # How many user profiles to fetch at once
|
|
|
|
|
2025-08-03 00:40:34 +02:00
|
|
|
|
|
|
|
async def main():
|
|
|
|
"""Initializes and runs the crawler."""
|
|
|
|
logging.basicConfig(
|
|
|
|
level=logging.INFO,
|
|
|
|
format="%(asctime)s [%(levelname)s] - %(message)s",
|
|
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
|
|
)
|
|
|
|
|
|
|
|
api = Api()
|
2025-08-13 00:06:44 +02:00
|
|
|
|
2025-08-03 00:40:34 +02:00
|
|
|
async with DatabaseManager(DB_FILE) as db:
|
|
|
|
crawler = DevRantCrawler(
|
2025-08-13 00:06:44 +02:00
|
|
|
api=api,
|
|
|
|
db=db,
|
|
|
|
rant_consumers=CONCURRENT_RANT_CONSUMERS,
|
|
|
|
user_consumers=CONCURRENT_USER_CONSUMERS,
|
2025-08-03 00:40:34 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
# Set up a signal handler for graceful shutdown on Ctrl+C
|
|
|
|
loop = asyncio.get_running_loop()
|
|
|
|
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
|
|
loop.add_signal_handler(
|
|
|
|
sig, lambda s=sig: asyncio.create_task(crawler.shutdown())
|
|
|
|
)
|
|
|
|
|
|
|
|
await crawler.run()
|
|
|
|
|
2025-08-13 00:06:44 +02:00
|
|
|
|
2025-08-03 00:40:34 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
try:
|
|
|
|
asyncio.run(main())
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
logging.info("Main loop interrupted. Exiting.")
|