diff --git a/.coverage b/.coverage
index 719073e..a429e48 100644
Binary files a/.coverage and b/.coverage differ
diff --git a/Capture d’écran du 2026-01-15 19-07-42.png b/Capture d’écran du 2026-01-15 19-07-42.png
new file mode 100755
index 0000000..3153d71
Binary files /dev/null and b/Capture d’écran du 2026-01-15 19-07-42.png differ
diff --git a/Image collée (5).png b/Image collée (5).png
new file mode 100755
index 0000000..027315f
Binary files /dev/null and b/Image collée (5).png differ
diff --git a/analytics-ui/Dockerfile b/analytics-ui/Dockerfile
new file mode 100644
index 0000000..75d5b6a
--- /dev/null
+++ b/analytics-ui/Dockerfile
@@ -0,0 +1,15 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+ENV PYTHONDONTWRITEBYTECODE=1 \
+ PYTHONUNBUFFERED=1
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY app.py .
+
+EXPOSE 80
+
+CMD ["python", "app.py"]
diff --git a/analytics-ui/app.py b/analytics-ui/app.py
new file mode 100644
index 0000000..e3883ef
--- /dev/null
+++ b/analytics-ui/app.py
@@ -0,0 +1,381 @@
+import os
+from typing import Any, Dict, List, Optional, Tuple
+
+from decimal import Decimal
+from psycopg2.extras import RealDictCursor
+
+import psycopg2
+import redis
+from flask import Flask, jsonify, render_template_string
+
+app = Flask(__name__)
+
+
+def _env_int(name: str, default: int) -> int:
+ try:
+ return int(os.getenv(name, "") or default)
+ except ValueError:
+ return default
+
+
+def get_db_connection():
+ return psycopg2.connect(
+ host=os.getenv("PW_DB_HOST", "postgres"),
+ port=_env_int("PW_DB_PORT", 5432),
+ dbname=os.getenv("PW_DB_NAME", "pricewatch"),
+ user=os.getenv("PW_DB_USER", "pricewatch"),
+ password=os.getenv("PW_DB_PASSWORD", "pricewatch"),
+ )
+
+
+def fetch_db_metrics() -> Tuple[Dict[str, Any], Optional[str]]:
+ data: Dict[str, Any] = {"counts": {}, "latest_products": []}
+ try:
+ with get_db_connection() as conn:
+ with conn.cursor() as cur:
+ cur.execute("SELECT COUNT(*) FROM products")
+ data["counts"]["products"] = cur.fetchone()[0]
+
+ cur.execute("SELECT COUNT(*) FROM price_history")
+ data["counts"]["price_history"] = cur.fetchone()[0]
+
+ cur.execute(
+ "SELECT COUNT(*) FROM scraping_logs"
+ )
+ data["counts"]["scraping_logs"] = cur.fetchone()[0]
+
+ cur.execute(
+ """
+ SELECT id, source, reference, title, last_updated_at
+ FROM products
+ ORDER BY last_updated_at DESC
+ LIMIT 5
+ """
+ )
+ rows = cur.fetchall()
+ data["latest_products"] = [
+ {
+ "id": row[0],
+ "source": row[1],
+ "reference": row[2],
+ "title": row[3] or "Sans titre",
+ "updated": row[4].strftime("%Y-%m-%d %H:%M:%S")
+ if row[4]
+ else "n/a",
+ }
+ for row in rows
+ ]
+ return data, None
+ except Exception as exc: # pragma: no cover (simple explorer)
+ return data, str(exc)
+
+
+def _serialize_decimal(value):
+ if isinstance(value, Decimal):
+ return float(value)
+ return value
+
+
+def fetch_products_list(limit: int = 200) -> Tuple[List[Dict[str, Any]], Optional[str]]:
+ rows: List[Dict[str, Any]] = []
+ try:
+ with get_db_connection() as conn:
+ with conn.cursor(cursor_factory=RealDictCursor) as cur:
+ cur.execute(
+ """
+ SELECT
+ p.id,
+ p.source,
+ p.reference,
+ p.title,
+ p.url,
+ p.category,
+ p.description,
+ p.currency,
+ p.msrp,
+ p.last_updated_at,
+ ph.price,
+ ph.stock_status,
+ ph.fetch_status,
+ ph.fetch_method,
+ ph.fetched_at
+ FROM products p
+ LEFT JOIN LATERAL (
+ SELECT price, stock_status, fetch_status, fetch_method, fetched_at
+ FROM price_history
+ WHERE product_id = p.id
+ ORDER BY fetched_at DESC
+ LIMIT 1
+ ) ph ON true
+ ORDER BY p.last_updated_at DESC
+ LIMIT %s
+ """,
+ (limit,),
+ )
+ fetched = cur.fetchall()
+ for item in fetched:
+ serialized = {key: _serialize_decimal(value) for key, value in item.items()}
+ if serialized.get("last_updated_at"):
+ serialized["last_updated_at"] = serialized["last_updated_at"].strftime(
+ "%Y-%m-%d %H:%M:%S"
+ )
+ if serialized.get("fetched_at"):
+ serialized["fetched_at"] = serialized["fetched_at"].strftime(
+ "%Y-%m-%d %H:%M:%S"
+ )
+ rows.append(serialized)
+ return rows, None
+ except Exception as exc:
+ return rows, str(exc)
+
+
+def get_redis_client() -> redis.Redis:
+ return redis.Redis(
+ host=os.getenv("PW_REDIS_HOST", "redis"),
+ port=_env_int("PW_REDIS_PORT", 6379),
+ db=_env_int("PW_REDIS_DB", 0),
+ socket_connect_timeout=2,
+ socket_timeout=2,
+ )
+
+
+def check_redis() -> Tuple[str, Optional[str]]:
+ client = get_redis_client()
+ try:
+ client.ping()
+ return "OK", None
+ except Exception as exc:
+ return "KO", str(exc)
+
+
+TEMPLATE = """
+
+
+
+
+ PriceWatch Analytics UI
+
+
+
+
+
+
+ Vue rapide
+
+ Base : {{ db_status }}
+
+
+ Redis : {{ redis_status }}
+
+ {% if db_error or redis_error %}
+ Erreurs : {{ db_error or '' }} {{ redis_error or '' }}
+ {% endif %}
+
+
+ Stats métier
+
+ | Produits | {{ metrics.counts.products }} |
+ | Historique prix | {{ metrics.counts.price_history }} |
+ | Logs de scraping | {{ metrics.counts.scraping_logs }} |
+
+
+
+ Produits récemment mis à jour
+ {% if metrics.latest_products %}
+
+
+ | ID | Store | Référence | Révision | Mis à jour |
+
+
+ {% for item in metrics.latest_products %}
+
+ | {{ item.id }} |
+ {{ item.source }} |
+ {{ item.reference }} |
+ {{ item.title[:40] }}{% if item.title|length > 40 %}…{% endif %} |
+ {{ item.updated }} |
+
+ {% endfor %}
+
+
+ {% else %}
+ Aucun produit enregistré.
+ {% endif %}
+
+
+ Parcourir la base (produits)
+
+
+
+
+
+ 0 / 0
+
+
+
+ - Titre
+ - -
+ - Store
+ - -
+ - Référence
+ - -
+ - Dernier prix
+ - -
+ - Devise
+ - -
+ - Prix conseillé
+ - -
+ - Stock
+ - -
+ - Catégorie
+ - -
+ - Description
+ - -
+ - Dernière mise à jour
+ - -
+ - Historique dernier scrap
+ - -
+
+
+
+
+
+
+
+"""
+
+
+@app.route("/")
+def root():
+ metrics, db_error = fetch_db_metrics()
+ redis_status, redis_error = check_redis()
+ return render_template_string(
+ TEMPLATE,
+ metrics=metrics,
+ db_status="connecté" if db_error is None else "erreur",
+ db_error=db_error,
+ redis_status=redis_status,
+ redis_error=redis_error,
+ )
+
+
+@app.route("/products.json")
+def products_json():
+ products, error = fetch_products_list()
+ if error:
+ return jsonify({"error": error}), 500
+ return jsonify(products)
+
+
+if __name__ == "__main__":
+ app.run(host="0.0.0.0", port=80)
diff --git a/analytics-ui/requirements.txt b/analytics-ui/requirements.txt
new file mode 100644
index 0000000..c4e572c
--- /dev/null
+++ b/analytics-ui/requirements.txt
@@ -0,0 +1,3 @@
+Flask==3.0.0
+psycopg2-binary==2.9.11
+redis==5.0.0
diff --git a/docker-compose.yml b/docker-compose.yml
index 362e5af..23a7229 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -40,9 +40,41 @@ services:
- "3000:80"
environment:
TZ: Europe/Paris
+ VITE_API_TOKEN: ${API_TOKEN:-}
+ env_file:
+ - .env
depends_on:
- api
+ analytics-ui:
+ build: ./analytics-ui
+ ports:
+ - "8070:80"
+ environment:
+ TZ: Europe/Paris
+ PW_DB_HOST: postgres
+ PW_DB_PORT: 5432
+ PW_DB_NAME: pricewatch
+ PW_DB_USER: pricewatch
+ PW_DB_PASSWORD: pricewatch
+ PW_REDIS_HOST: redis
+ PW_REDIS_PORT: 6379
+ PW_REDIS_DB: 0
+ env_file:
+ - .env
+ depends_on:
+ - postgres
+ - redis
+
+ adminer:
+ image: adminer
+ ports:
+ - "8071:8080"
+ environment:
+ TZ: Europe/Paris
+ depends_on:
+ - postgres
+
volumes:
pricewatch_pgdata:
pricewatch_redisdata:
diff --git a/docs/issue-42-ui-readability.md b/docs/issue-42-ui-readability.md
new file mode 100644
index 0000000..bf7c552
--- /dev/null
+++ b/docs/issue-42-ui-readability.md
@@ -0,0 +1,50 @@
+## Objectif
+Améliorer la clarté et la lisibilité de l’interface (catalogue, filtres, détails produit) **sans modifier la palette de couleurs existante**.
+
+## Contraintes strictes
+- Interdit : changement de couleurs (fond, accent, badges, etc.)
+- Autorisé : typographie, espacements, hiérarchie, mise en page, libellés, tooltips, états, comportements hover/focus, clamp.
+
+---
+
+## Tâches
+
+### Cartes produit (catalogue)
+- [ ] Titre : line-clamp 2 lignes + ellipse
+- [ ] Tooltip titre complet (survol + clavier)
+- [ ] Prix : taille 18–20px, bold (prix = focal n°1)
+- [ ] Delta : format standard ▲/▼ + % (sinon afficher —)
+- [ ] Statuts : remplacer `unknown/n/a` par `En stock / Rupture / Inconnu / Erreur scrape`
+- [ ] Badges statuts homogènes (sans changer couleurs)
+- [ ] Actions : 1 action primaire visible, secondaires au hover ou menu “...”
+- [ ] Tooltips obligatoires sur toutes les icônes + aria-label
+
+### Panneau Détails (colonne droite)
+- [ ] Découper en sections : Résumé / Prix / Historique / Source / Actions
+- [ ] Prix dominant visuellement + espacement vertical accru
+- [ ] URL cliquable + bouton copier + ASIN visible
+- [ ] Actions regroupées en bas
+
+### Filtres (colonne gauche)
+- [ ] Afficher compteur `X affichés / Y`
+- [ ] Chips filtres actifs (cliquables pour retirer)
+- [ ] Bouton Reset filtres toujours visible
+- [ ] Labels cohérents + placeholders explicites
+
+### Comparaison
+- [ ] Message guidage : “Sélectionnez 2 à 4 produits…”
+- [ ] Afficher compteur de sélection (`2 sélectionnés`, etc.)
+
+### Accessibilité
+- [ ] Focus clavier visible
+- [ ] Navigation clavier : Tab sur cartes, Enter ouvre détails
+- [ ] Icônes avec aria-label + tooltips accessibles
+
+---
+
+## Critères d’acceptation
+- Prix clairement dominant sur cartes et détails
+- Titres non envahissants (2 lignes max)
+- Statuts compréhensibles (plus de unknown/n/a)
+- Filtres : X/Y + chips + reset
+- Aucune couleur modifiée
diff --git a/fonctionnement.md b/fonctionnement.md
new file mode 100644
index 0000000..34b4cf9
--- /dev/null
+++ b/fonctionnement.md
@@ -0,0 +1,26 @@
+## Fonctionnement général de PriceWatch
+
+Lorsqu’un utilisateur colle une URL dans la web UI et déclenche l’ajout/déclenchement d’un scrap, voici le cheminement principal entre le **frontend Vue** et le **backend FastAPI** :
+
+1. **Entrée utilisateur / validation**
+ * Le popup "Ajouter un produit" envoie `POST /scrape/preview` avec l’URL + le mode (HTTP ou Playwright).
+ * Les boutons "Ajouter" et "Enregistrer" sont accessibles après que la preview ait renvoyé un `ProductSnapshot`, sinon une erreur est affichée dans le popup.
+
+2. **Backend (API)**
+ * L’endpoint `/scrape/preview` reçoit l’URL, détermine le store (via `pricewatch/app/core/registry.py`) et utilise un parser adapté (`pricewatch/app/stores//`) pour extraire titre, prix, images, description, caractéristiques, stock, etc.
+ * Si la page nécessite un navigateur, la stratégie Playwright (avec `pricewatch/app/scraping/playwright.py`) est déclenchée, sinon le fetch HTTP simple (`pricewatch/app/scraping/http.py`) suffit.
+ * Le snapshot structuré `ProductSnapshot` contient les métadonnées, la liste d’images (jpg/webp) et les champs `msrp`, `discount`, `categories`, `specs`, etc.
+ * En cas de succès, la preview renvoie un JSON que le frontend affiche dans le popup. En cas d’erreur (404, 401, scraping bloqué), l’utilisateur voit directement le message retourné.
+
+3. **Confirmation / persist**
+ * Quand l’utilisateur clique sur "Enregistrer", la web UI déclenche `POST /scrape/commit` avec l’objet snapshot.
+ * Le backend réinsère les données dans la base (`pricewatch/app/core/io.py`) et l’API `/products` ou `/enqueue` peut ensuite réafficher ou re-scraper ce produit.
+
+4. **Cycle de rafraîchissement**
+ * Le frontend peut aussi appeler `/enqueue` pour forcer un nouveau scrap d’une URL existante (bouton refresh dans la carte ou le détail).
+ * Le backend place la requête dans Redis (via `pricewatch/app/core/queue.py`), un worker la consomme, met à jour la base, et le frontend récupère les nouvelles données via `GET /products`.
+
+5. **Observabilité / logs**
+ * Les étapes critiques (preview, commit, enqueue) génèrent des logs (backend/uvicorn) disponibles dans la web UI via les boutons logs. Les erreurs sont mises en rouge et peuvent être copiées pour diagnostic.
+
+Ce flux respecte les contraintes : la web UI déroule les interactions, le backend orchestre le scraping (HTTP vs Playwright), applique la logique store et diffuse le résultat via les endpoints REST existants.
diff --git a/image.png b/image.png
new file mode 100644
index 0000000..f277873
Binary files /dev/null and b/image.png differ
diff --git a/pricewatch/app/api/main.py b/pricewatch/app/api/main.py
index d32e95b..e3ec42e 100644
--- a/pricewatch/app/api/main.py
+++ b/pricewatch/app/api/main.py
@@ -21,31 +21,32 @@ from sqlalchemy import and_, desc, func
from sqlalchemy.orm import Session
from pricewatch.app.api.schemas import (
+ BackendLogEntry,
EnqueueRequest,
EnqueueResponse,
HealthStatus,
- PriceHistoryOut,
PriceHistoryCreate,
+ PriceHistoryOut,
PriceHistoryUpdate,
- ProductOut,
ProductCreate,
+ ProductHistoryPoint,
+ ProductOut,
ProductUpdate,
ScheduleRequest,
ScheduleResponse,
- ScrapingLogOut,
- ScrapingLogCreate,
- ScrapingLogUpdate,
- ScrapePreviewRequest,
- ScrapePreviewResponse,
ScrapeCommitRequest,
ScrapeCommitResponse,
- VersionResponse,
- BackendLogEntry,
+ ScrapePreviewRequest,
+ ScrapePreviewResponse,
+ ScrapingLogCreate,
+ ScrapingLogOut,
+ ScrapingLogUpdate,
UvicornLogEntry,
- WebhookOut,
+ VersionResponse,
WebhookCreate,
- WebhookUpdate,
+ WebhookOut,
WebhookTestResponse,
+ WebhookUpdate,
)
from pricewatch.app.core.config import get_config
from pricewatch.app.core.logging import get_logger
@@ -794,6 +795,9 @@ def _read_uvicorn_lines(limit: int = 200) -> list[str]:
return []
+PRODUCT_HISTORY_LIMIT = 12
+
+
def _product_to_out(session: Session, product: Product) -> ProductOut:
"""Helper pour mapper Product + dernier prix."""
latest = (
@@ -810,6 +814,18 @@ def _product_to_out(session: Session, product: Product) -> ProductOut:
discount_amount = float(product.msrp) - float(latest.price)
if product.msrp > 0:
discount_percent = (discount_amount / float(product.msrp)) * 100
+ history_rows = (
+ session.query(PriceHistory)
+ .filter(PriceHistory.product_id == product.id, PriceHistory.price != None)
+ .order_by(desc(PriceHistory.fetched_at))
+ .limit(PRODUCT_HISTORY_LIMIT)
+ .all()
+ )
+ history_points = [
+ ProductHistoryPoint(price=float(row.price), fetched_at=row.fetched_at)
+ for row in reversed(history_rows)
+ if row.price is not None
+ ]
return ProductOut(
id=product.id,
source=product.source,
@@ -832,6 +848,7 @@ def _product_to_out(session: Session, product: Product) -> ProductOut:
specs=specs,
discount_amount=discount_amount,
discount_percent=discount_percent,
+ history=history_points,
)
diff --git a/pricewatch/app/api/schemas.py b/pricewatch/app/api/schemas.py
index a591eb9..dad001f 100644
--- a/pricewatch/app/api/schemas.py
+++ b/pricewatch/app/api/schemas.py
@@ -13,6 +13,11 @@ class HealthStatus(BaseModel):
redis: bool
+class ProductHistoryPoint(BaseModel):
+ price: float
+ fetched_at: datetime
+
+
class ProductOut(BaseModel):
id: int
source: str
@@ -33,6 +38,7 @@ class ProductOut(BaseModel):
specs: dict[str, str] = {}
discount_amount: Optional[float] = None
discount_percent: Optional[float] = None
+ history: list[ProductHistoryPoint] = Field(default_factory=list)
class ProductCreate(BaseModel):
diff --git a/pricewatch/app/core/__pycache__/schema.cpython-313.pyc b/pricewatch/app/core/__pycache__/schema.cpython-313.pyc
index 8319162..201237b 100644
Binary files a/pricewatch/app/core/__pycache__/schema.cpython-313.pyc and b/pricewatch/app/core/__pycache__/schema.cpython-313.pyc differ
diff --git a/pricewatch/app/stores/__pycache__/price_parser.cpython-313.pyc b/pricewatch/app/stores/__pycache__/price_parser.cpython-313.pyc
index 0725aba..fdd0e5c 100644
Binary files a/pricewatch/app/stores/__pycache__/price_parser.cpython-313.pyc and b/pricewatch/app/stores/__pycache__/price_parser.cpython-313.pyc differ
diff --git a/pricewatch/app/stores/cdiscount/__pycache__/store.cpython-313.pyc b/pricewatch/app/stores/cdiscount/__pycache__/store.cpython-313.pyc
old mode 100755
new mode 100644
index b0e5f6e..06e55b2
Binary files a/pricewatch/app/stores/cdiscount/__pycache__/store.cpython-313.pyc and b/pricewatch/app/stores/cdiscount/__pycache__/store.cpython-313.pyc differ
diff --git a/pricewatch/app/stores/cdiscount/store.py b/pricewatch/app/stores/cdiscount/store.py
index be8bdc9..389973e 100755
--- a/pricewatch/app/stores/cdiscount/store.py
+++ b/pricewatch/app/stores/cdiscount/store.py
@@ -112,7 +112,7 @@ class CdiscountStore(BaseStore):
currency = self._extract_currency(soup, debug_info)
stock_status = self._extract_stock(soup, debug_info)
images = self._extract_images(soup, debug_info)
- category = self._extract_category(soup, debug_info)
+ category = self._extract_category(soup, debug_info, url)
specs = self._extract_specs(soup, debug_info)
description = self._extract_description(soup, debug_info)
msrp = self._extract_msrp(soup, debug_info)
@@ -180,7 +180,7 @@ class CdiscountStore(BaseStore):
return None
def _extract_price(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]:
- """Extrait le prix."""
+ """Extrait le prix (DOM puis JSON-LD)."""
selectors = self.get_selector("price", [])
if isinstance(selectors, str):
selectors = [selectors]
@@ -188,16 +188,33 @@ class CdiscountStore(BaseStore):
for selector in selectors:
elements = soup.select(selector)
for element in elements:
- # Attribut content (schema.org) ou texte
price_text = element.get("content") or element.get_text(strip=True)
-
price = parse_price_text(price_text)
if price is not None:
return price
+ price = self._extract_price_from_json_ld(soup)
+ if price is not None:
+ return price
+
debug.errors.append("Prix non trouvé")
return None
+ def _extract_price_from_json_ld(self, soup: BeautifulSoup) -> Optional[float]:
+ """Extrait le prix depuis les scripts JSON-LD."""
+ product_ld = self._find_product_ld(soup)
+ offers = product_ld.get("offers")
+ if isinstance(offers, list):
+ offers = offers[0] if offers else None
+ if isinstance(offers, dict):
+ price = offers.get("price")
+ if isinstance(price, str):
+ return parse_price_text(price)
+ if isinstance(price, (int, float)):
+ # convert to float but maintain decimals
+ return float(price)
+ return None
+
def _extract_msrp(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[float]:
"""Extrait le prix conseille."""
selectors = [
@@ -205,6 +222,8 @@ class CdiscountStore(BaseStore):
".price__old",
".c-price__strike",
".price-strike",
+ "div[data-e2e='strikedPrice']",
+ "div.SecondaryPrice-price",
]
for selector in selectors:
element = soup.select_one(selector)
@@ -212,6 +231,19 @@ class CdiscountStore(BaseStore):
price = parse_price_text(element.get_text(strip=True))
if price is not None:
return price
+ # Fallback: JSON-LD (offers price + promotions)
+ product_ld = self._find_product_ld(soup)
+ offer = product_ld.get("offers")
+ if isinstance(offer, dict):
+ price = offer.get("price")
+ if isinstance(price, str):
+ candidate = parse_price_text(price)
+ elif isinstance(price, (int, float)):
+ candidate = float(price)
+ else:
+ candidate = None
+ if candidate is not None:
+ return candidate
return None
def _extract_currency(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
@@ -288,7 +320,7 @@ class CdiscountStore(BaseStore):
return list(dict.fromkeys(images)) # Préserver l’ordre
- def _extract_category(self, soup: BeautifulSoup, debug: DebugInfo) -> Optional[str]:
+ def _extract_category(self, soup: BeautifulSoup, debug: DebugInfo, url: str) -> Optional[str]:
"""Extrait la catégorie depuis les breadcrumbs."""
selectors = self.get_selector("category", [])
if isinstance(selectors, str):
@@ -310,6 +342,54 @@ class CdiscountStore(BaseStore):
if parts:
return parts[-1]
+ if title := self._extract_category_from_breadcrumbs(soup):
+ return title
+ return self._extract_category_from_url(url)
+
+ def _extract_category_from_breadcrumbs(self, soup: BeautifulSoup) -> Optional[str]:
+ """Cherche un breadcrumb via JSON-LD (BreadcrumbList) et retourne l'avant-dernier item."""
+ entries = self._extract_json_ld_entries(soup)
+ for entry in entries:
+ if not isinstance(entry, dict):
+ continue
+ if entry.get("@type") != "BreadcrumbList":
+ continue
+ items = entry.get("itemListElement", [])
+ if not isinstance(items, list):
+ continue
+ positions = [
+ element.get("position")
+ for element in items
+ if isinstance(element, dict) and isinstance(element.get("position"), int)
+ ]
+ max_pos = max(positions) if positions else None
+ for element in reversed(items):
+ if not isinstance(element, dict):
+ continue
+ position = element.get("position")
+ if max_pos is not None and position == max_pos:
+ continue
+ item = element.get("item", {})
+ name = item.get("name")
+ if name and isinstance(name, str):
+ title = name.strip()
+ if title:
+ return title
+ return None
+
+ def _extract_category_from_url(self, url: str) -> Optional[str]:
+ """Déduit la catégorie via l'URL /informatique/.../f-..."""
+ if not url:
+ return None
+ parsed = urlparse(url)
+ segments = [seg for seg in parsed.path.split("/") if seg]
+ breadcrumb = []
+ for segment in segments:
+ if segment.startswith("f-") or segment.startswith("p-"):
+ break
+ breadcrumb.append(segment)
+ if breadcrumb:
+ return breadcrumb[-1].replace("-", " ").title()
return None
def _extract_json_ld_entries(self, soup: BeautifulSoup) -> list[dict]:
diff --git a/pricewatch/app/stores/price_parser.py b/pricewatch/app/stores/price_parser.py
index 2947944..3ca48e0 100644
--- a/pricewatch/app/stores/price_parser.py
+++ b/pricewatch/app/stores/price_parser.py
@@ -17,6 +17,18 @@ def parse_price_text(text: str) -> Optional[float]:
if not text:
return None
+ euro_suffix = re.search(r"([0-9 .,]+)\s*€\s*(\d{2})\b", text)
+ if euro_suffix:
+ integer_part = euro_suffix.group(1)
+ decimal_part = euro_suffix.group(2)
+ integer_clean = re.sub(r"[^\d]", "", integer_part)
+ if integer_clean:
+ cleaned_decimal = f"{integer_clean}.{decimal_part}"
+ try:
+ return float(cleaned_decimal)
+ except ValueError:
+ pass
+ # Fallback to original replacement if suffix logic fails
text = re.sub(r"(\d)\s*€\s*(\d)", r"\1,\2", text)
cleaned = text.replace("\u00a0", " ").replace("\u202f", " ").replace("\u2009", " ")
cleaned = "".join(ch for ch in cleaned if ch.isdigit() or ch in ".,")
diff --git a/scripts/missing_data_by_store.py b/scripts/missing_data_by_store.py
new file mode 100644
index 0000000..1397b35
--- /dev/null
+++ b/scripts/missing_data_by_store.py
@@ -0,0 +1,121 @@
+import os
+from typing import Dict, Optional
+
+import psycopg2
+from psycopg2.extras import RealDictCursor
+
+
+def _env_str(name: str, default: str) -> str:
+ return os.environ.get(name, default)
+
+
+def _env_int(name: str, default: int) -> int:
+ try:
+ return int(os.environ.get(name, default))
+ except ValueError:
+ return default
+
+
+def get_connection():
+ return psycopg2.connect(
+ host=_env_str("PW_DB_HOST", "localhost"),
+ port=_env_int("PW_DB_PORT", 5432),
+ dbname=_env_str("PW_DB_NAME", "pricewatch"),
+ user=_env_str("PW_DB_USER", "pricewatch"),
+ password=_env_str("PW_DB_PASSWORD", "pricewatch"),
+ )
+
+
+def gather(limit: Optional[int] = None):
+ query = """
+ SELECT
+ COALESCE(p.source, 'unknown') AS source,
+ p.id,
+ p.reference,
+ p.title,
+ p.description,
+ p.category,
+ p.msrp,
+ EXISTS (
+ SELECT 1 FROM product_images WHERE product_id = p.id LIMIT 1
+ ) AS has_image,
+ EXISTS (
+ SELECT 1 FROM product_specs WHERE product_id = p.id LIMIT 1
+ ) AS has_specs,
+ ph.price,
+ ph.stock_status
+ FROM products p
+ LEFT JOIN LATERAL (
+ SELECT price, stock_status
+ FROM price_history
+ WHERE product_id = p.id
+ ORDER BY fetched_at DESC
+ LIMIT 1
+ ) ph ON TRUE
+ ORDER BY p.last_updated_at DESC
+ """
+ if limit:
+ query += f" LIMIT {limit}"
+
+ with get_connection() as conn:
+ with conn.cursor(cursor_factory=RealDictCursor) as cur:
+ cur.execute(query)
+ return cur.fetchall()
+
+
+def summarize(rows):
+ stores: Dict[str, Dict[str, object]] = {}
+ fields = [
+ ("price", "Prix absent"),
+ ("stock_status", "Statut stock manquant"),
+ ("description", "Description manquante"),
+ ("category", "Catégorie manquante"),
+ ("msrp", "Prix conseillé absent"),
+ ("has_image", "Images absentes"),
+ ("has_specs", "Caractéristiques absentes"),
+ ]
+ for row in rows:
+ store = row["source"] or "unknown"
+ entry = stores.setdefault(
+ store,
+ {
+ "total": 0,
+ "details": {field: [] for field, _ in fields},
+ },
+ )
+ entry["total"] += 1
+ for field, label in fields:
+ value = row.get(field)
+ if field in ("has_image", "has_specs"):
+ missing = not value
+ else:
+ missing = value in (None, "", [])
+ if missing:
+ entry["details"][field].append(
+ {
+ "id": row["id"],
+ "reference": row["reference"],
+ "title": row["title"] or "Sans titre",
+ }
+ )
+ return fields, stores
+
+
+def pretty_print(fields, stores):
+ for store, data in stores.items():
+ print(f"\n=== Store: {store} ({data['total']} produits) ===")
+ for field, label in fields:
+ unit = len(data["details"][field])
+ print(f" {label}: {unit}")
+ for item in data["details"][field][:5]:
+ print(f" - [{item['id']}] {item['reference']} · {item['title']}")
+
+
+def main():
+ rows = gather(limit=1000)
+ fields, stores = summarize(rows)
+ pretty_print(fields, stores)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tests/stores/__pycache__/test_cdiscount_fixtures.cpython-313-pytest-9.0.2.pyc b/tests/stores/__pycache__/test_cdiscount_fixtures.cpython-313-pytest-9.0.2.pyc
old mode 100755
new mode 100644
index 08b8d28..63ef671
Binary files a/tests/stores/__pycache__/test_cdiscount_fixtures.cpython-313-pytest-9.0.2.pyc and b/tests/stores/__pycache__/test_cdiscount_fixtures.cpython-313-pytest-9.0.2.pyc differ
diff --git a/tests/stores/__pycache__/test_price_parser.cpython-313-pytest-9.0.2.pyc b/tests/stores/__pycache__/test_price_parser.cpython-313-pytest-9.0.2.pyc
index 757eca1..bcf5872 100644
Binary files a/tests/stores/__pycache__/test_price_parser.cpython-313-pytest-9.0.2.pyc and b/tests/stores/__pycache__/test_price_parser.cpython-313-pytest-9.0.2.pyc differ
diff --git a/tests/stores/test_cdiscount_fixtures.py b/tests/stores/test_cdiscount_fixtures.py
index c343e77..87b6a87 100755
--- a/tests/stores/test_cdiscount_fixtures.py
+++ b/tests/stores/test_cdiscount_fixtures.py
@@ -171,7 +171,25 @@ class TestCdiscountRealFixtures:
assert isinstance(snapshot.price, float)
assert snapshot.price > 0
# Le prix doit avoir maximum 2 décimales
- assert snapshot.price == round(snapshot.price, 2)
+ assert snapshot.price == round(snapshot.price, 2)
+
+ def test_parse_tuf608umrv004_price_value(self, store, fixture_tuf608umrv004):
+ """Le prix doit correspondre à 1199,99 €."""
+ url = "https://www.cdiscount.com/informatique/.../f-10709-tuf608umrv004.html"
+ snapshot = store.parse(fixture_tuf608umrv004, url)
+ assert snapshot.price == 1199.99
+
+ def test_parse_tuf608umrv004_category_and_msrp(
+ self, store, fixture_tuf608umrv004
+ ):
+ """La fixture ASUS doit fournir une catégorie et un prix conseillé."""
+ url = "https://www.cdiscount.com/informatique/.../f-10709-tuf608umrv004.html"
+ snapshot = store.parse(fixture_tuf608umrv004, url)
+ assert snapshot.category
+ assert "Ordinateur" in snapshot.category or "Portable" in snapshot.category
+ assert snapshot.msrp is not None
+ if snapshot.price:
+ assert snapshot.msrp >= snapshot.price
def test_parse_a128902_price_format(self, store, fixture_a128902):
"""Parse fixture a128902 - le prix doit être un float valide."""
diff --git a/tests/stores/test_price_parser.py b/tests/stores/test_price_parser.py
index a54d98d..86889d1 100644
--- a/tests/stores/test_price_parser.py
+++ b/tests/stores/test_price_parser.py
@@ -27,3 +27,7 @@ def test_parse_price_without_decimal():
def test_parse_price_with_currency():
assert parse_price_text("EUR 1 259,00") == 1259.00
+
+
+def test_parse_price_with_cents_after_currency_symbol():
+ assert parse_price_text("1199 €99") == 1199.99
diff --git a/webui/src/App.vue b/webui/src/App.vue
index 5d4e047..a018793 100644
--- a/webui/src/App.vue
+++ b/webui/src/App.vue
@@ -9,11 +9,26 @@
PriceWatch
Vintage control deck
-
+
+
+
+
+
+ [{{ entry.time }}]
+ {{ LOG_ICONS[entry.level] }}
+ {{ entry.text }}
+
+
+
+
- FE v{{ frontendVersion }}
- BE v{{ backendVersion }}
-
+
-
-
-
-
-
-
+
{{ apiStatus }}
@@ -57,6 +83,25 @@