2025-12-28 06:03:12 +01:00
|
|
|
# retoor <retoor@molodetz.nl>
|
2025-08-03 00:40:34 +02:00
|
|
|
import asyncio
|
|
|
|
|
import logging
|
|
|
|
|
import signal
|
|
|
|
|
|
|
|
|
|
from crawler import DevRantCrawler
|
2025-08-13 00:06:44 +02:00
|
|
|
from database import DatabaseManager
|
|
|
|
|
from devranta.api import Api
|
2025-08-03 00:40:34 +02:00
|
|
|
|
2025-12-28 06:03:12 +01:00
|
|
|
|
2025-08-03 00:40:34 +02:00
|
|
|
DB_FILE = "devrant.sqlite"
|
2025-12-28 06:03:12 +01:00
|
|
|
CONCURRENT_RANT_CONSUMERS = 10
|
|
|
|
|
CONCURRENT_USER_CONSUMERS = 5
|
|
|
|
|
BATCH_SIZE = 100
|
|
|
|
|
FLUSH_INTERVAL = 5.0
|
2025-08-13 00:06:44 +02:00
|
|
|
|
2025-08-03 00:40:34 +02:00
|
|
|
|
|
|
|
|
async def main():
|
|
|
|
|
logging.basicConfig(
|
|
|
|
|
level=logging.INFO,
|
|
|
|
|
format="%(asctime)s [%(levelname)s] - %(message)s",
|
|
|
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
|
|
|
)
|
|
|
|
|
|
2025-12-28 06:03:12 +01:00
|
|
|
async with Api() as api:
|
|
|
|
|
async with DatabaseManager(
|
|
|
|
|
DB_FILE,
|
|
|
|
|
batch_size=BATCH_SIZE,
|
|
|
|
|
flush_interval=FLUSH_INTERVAL,
|
|
|
|
|
) as db:
|
|
|
|
|
crawler = DevRantCrawler(
|
|
|
|
|
api=api,
|
|
|
|
|
db=db,
|
|
|
|
|
rant_consumers=CONCURRENT_RANT_CONSUMERS,
|
|
|
|
|
user_consumers=CONCURRENT_USER_CONSUMERS,
|
2025-08-03 00:40:34 +02:00
|
|
|
)
|
|
|
|
|
|
2025-12-28 06:03:12 +01:00
|
|
|
loop = asyncio.get_running_loop()
|
|
|
|
|
for sig in (signal.SIGINT, signal.SIGTERM):
|
|
|
|
|
loop.add_signal_handler(
|
|
|
|
|
sig, lambda s=sig: asyncio.create_task(crawler.shutdown())
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
await crawler.run()
|
2025-08-03 00:40:34 +02:00
|
|
|
|
2025-08-13 00:06:44 +02:00
|
|
|
|
2025-08-03 00:40:34 +02:00
|
|
|
if __name__ == "__main__":
|
|
|
|
|
try:
|
|
|
|
|
asyncio.run(main())
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
|
logging.info("Main loop interrupted. Exiting.")
|