76 lines
2.3 KiB
Python
Executable File
76 lines
2.3 KiB
Python
Executable File
"""
|
|
Planification des jobs de scraping via RQ Scheduler.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timedelta, timezone
|
|
from typing import Optional
|
|
|
|
import redis
|
|
from rq import Queue
|
|
from rq_scheduler import Scheduler
|
|
|
|
from pricewatch.app.core.config import AppConfig, get_config
|
|
from pricewatch.app.core.logging import get_logger
|
|
from pricewatch.app.tasks.scrape import scrape_product
|
|
|
|
logger = get_logger("tasks.scheduler")
|
|
|
|
|
|
@dataclass
|
|
class ScheduledJobInfo:
|
|
"""Infos de retour pour un job planifie."""
|
|
|
|
job_id: str
|
|
next_run: datetime
|
|
|
|
|
|
class ScrapingScheduler:
|
|
"""Scheduler pour les jobs de scraping avec RQ."""
|
|
|
|
def __init__(self, config: Optional[AppConfig] = None, queue_name: str = "default") -> None:
|
|
self.config = config or get_config()
|
|
self.redis = redis.from_url(self.config.redis.url)
|
|
self.queue = Queue(queue_name, connection=self.redis)
|
|
self.scheduler = Scheduler(queue=self.queue, connection=self.redis)
|
|
|
|
def enqueue_immediate(
|
|
self,
|
|
url: str,
|
|
use_playwright: Optional[bool] = None,
|
|
save_db: bool = True,
|
|
):
|
|
"""Enqueue un job immediat."""
|
|
job = self.queue.enqueue(
|
|
scrape_product,
|
|
url,
|
|
use_playwright=use_playwright,
|
|
save_db=save_db,
|
|
)
|
|
logger.info(f"Job enqueued: {job.id}")
|
|
return job
|
|
|
|
def schedule_product(
|
|
self,
|
|
url: str,
|
|
interval_hours: int = 24,
|
|
use_playwright: Optional[bool] = None,
|
|
save_db: bool = True,
|
|
) -> ScheduledJobInfo:
|
|
"""Planifie un scraping recurrent (intervalle en heures)."""
|
|
interval_seconds = int(timedelta(hours=interval_hours).total_seconds())
|
|
next_run = datetime.now(timezone.utc) + timedelta(seconds=interval_seconds)
|
|
|
|
job = self.scheduler.schedule(
|
|
scheduled_time=next_run,
|
|
func=scrape_product,
|
|
args=[url],
|
|
kwargs={"use_playwright": use_playwright, "save_db": save_db},
|
|
interval=interval_seconds,
|
|
repeat=None,
|
|
)
|
|
logger.info(f"Job planifie: {job.id}, prochaine execution: {next_run.isoformat()}")
|
|
return ScheduledJobInfo(job_id=job.id, next_run=next_run)
|