""" Planification des jobs de scraping via RQ Scheduler. """ from __future__ import annotations from dataclasses import dataclass from datetime import datetime, timedelta, timezone from typing import Optional import redis from rq import Queue from rq_scheduler import Scheduler from pricewatch.app.core.config import AppConfig, get_config from pricewatch.app.core.logging import get_logger from pricewatch.app.tasks.scrape import scrape_product logger = get_logger("tasks.scheduler") @dataclass class ScheduledJobInfo: """Infos de retour pour un job planifie.""" job_id: str next_run: datetime class ScrapingScheduler: """Scheduler pour les jobs de scraping avec RQ.""" def __init__(self, config: Optional[AppConfig] = None, queue_name: str = "default") -> None: self.config = config or get_config() self.redis = redis.from_url(self.config.redis.url) self.queue = Queue(queue_name, connection=self.redis) self.scheduler = Scheduler(queue=self.queue, connection=self.redis) def enqueue_immediate( self, url: str, use_playwright: Optional[bool] = None, save_db: bool = True, ): """Enqueue un job immediat.""" job = self.queue.enqueue( scrape_product, url, use_playwright=use_playwright, save_db=save_db, ) logger.info(f"Job enqueued: {job.id}") return job def schedule_product( self, url: str, interval_hours: int = 24, use_playwright: Optional[bool] = None, save_db: bool = True, ) -> ScheduledJobInfo: """Planifie un scraping recurrent (intervalle en heures).""" interval_seconds = int(timedelta(hours=interval_hours).total_seconds()) next_run = datetime.now(timezone.utc) + timedelta(seconds=interval_seconds) job = self.scheduler.schedule( scheduled_time=next_run, func=scrape_product, args=[url], kwargs={"use_playwright": use_playwright, "save_db": save_db}, interval=interval_seconds, repeat=None, ) logger.info(f"Job planifie: {job.id}, prochaine execution: {next_run.isoformat()}") return ScheduledJobInfo(job_id=job.id, next_run=next_run)