From 4ff5d3ee79000bfdfd5427715319d3c053af8744 Mon Sep 17 00:00:00 2001 From: gilles Date: Sun, 18 Jan 2026 19:33:04 +0100 Subject: [PATCH] feat: auto-scrape on product creation and update product data (Step 4) - Add automatic scraping when creating a new product - Update product title and image from scraped data - Add GET /products/{id}/snapshots endpoint for price history - Add list_snapshots and get_latest_snapshot to CRUD Co-Authored-By: Claude Opus 4.5 --- backend/app/api/routes_products.py | 27 +++++++++++++++++++++++---- backend/app/db/crud.py | 19 +++++++++++++++++++ backend/app/scraper/runner.py | 17 +++++++++++++++++ 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/backend/app/api/routes_products.py b/backend/app/api/routes_products.py index b27d446..896ba33 100644 --- a/backend/app/api/routes_products.py +++ b/backend/app/api/routes_products.py @@ -1,10 +1,11 @@ from __future__ import annotations -from fastapi import APIRouter, Depends, HTTPException, status +from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status from sqlalchemy.orm import Session from backend.app.api.deps import get_db from backend.app.db import crud, schemas +from backend.app.scraper.runner import scrape_product router = APIRouter(prefix="/products", tags=["products"]) @@ -16,9 +17,15 @@ def list_products(skip: int = 0, limit: int = 50, db: Session = Depends(get_db)) @router.post("", response_model=schemas.ProductRead, status_code=status.HTTP_201_CREATED) -def create_product(payload: schemas.ProductCreate, db: Session = Depends(get_db)) -> schemas.ProductRead: - # création de produit rigoureuse via Pydantic - return crud.create_product(db, payload) +def create_product( + payload: schemas.ProductCreate, + background_tasks: BackgroundTasks, + db: Session = Depends(get_db), +) -> schemas.ProductRead: + product = crud.create_product(db, payload) + # Déclenche automatiquement le scraping après création + background_tasks.add_task(scrape_product, product.id) + return product @router.get("/{product_id}", response_model=schemas.ProductRead) @@ -45,3 +52,15 @@ def delete_product(product_id: int, db: Session = Depends(get_db)) -> None: raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Produit introuvable") # suppression définitive en base crud.remove_product(db, product) + + +@router.get("/{product_id}/snapshots", response_model=list[schemas.ProductSnapshotRead]) +def list_snapshots( + product_id: int, + limit: int = 30, + db: Session = Depends(get_db), +) -> list[schemas.ProductSnapshotRead]: + product = crud.get_product(db, product_id) + if not product: + raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="Produit introuvable") + return crud.list_snapshots(db, product_id, limit=limit) diff --git a/backend/app/db/crud.py b/backend/app/db/crud.py index 8cdbe9a..00954bd 100644 --- a/backend/app/db/crud.py +++ b/backend/app/db/crud.py @@ -43,3 +43,22 @@ def update_product(db: Session, product: models.Product, changes: schemas.Produc def remove_product(db: Session, product: models.Product) -> None: db.delete(product) db.commit() + + +def list_snapshots(db: Session, product_id: int, limit: int = 30) -> list[models.ProductSnapshot]: + return ( + db.query(models.ProductSnapshot) + .filter(models.ProductSnapshot.produit_id == product_id) + .order_by(models.ProductSnapshot.scrape_le.desc()) + .limit(limit) + .all() + ) + + +def get_latest_snapshot(db: Session, product_id: int) -> models.ProductSnapshot | None: + return ( + db.query(models.ProductSnapshot) + .filter(models.ProductSnapshot.produit_id == product_id) + .order_by(models.ProductSnapshot.scrape_le.desc()) + .first() + ) diff --git a/backend/app/scraper/runner.py b/backend/app/scraper/runner.py index d1a8574..7402762 100644 --- a/backend/app/scraper/runner.py +++ b/backend/app/scraper/runner.py @@ -53,6 +53,20 @@ def _save_debug_artifacts(page, product_id: int) -> tuple[Path, Path]: return screenshot_path, html_path +def _update_product_from_scrape( + session: Session, + product: models.Product, + data: dict, +) -> None: + """Met à jour le produit avec les données scrappées (titre, image).""" + if data.get("titre") and not product.titre: + product.titre = data["titre"] + if data.get("url_image_principale") and not product.url_image: + product.url_image = data["url_image_principale"] + session.add(product) + session.commit() + + def _create_snapshot( session: Session, product: models.Product, @@ -62,6 +76,9 @@ def _create_snapshot( raw_json_path: Path | None, error_message: str | None = None, ) -> None: + # Mettre à jour le produit avec titre/image si manquants + _update_product_from_scrape(session, product, data) + snapshot = models.ProductSnapshot( produit_id=product.id, run_scrap_id=run.id,