From c479a6640596b32f8937f71c6f9b77cdf3421941 Mon Sep 17 00:00:00 2001 From: Ricard Illa Date: Sun, 25 Jun 2023 23:21:40 +0200 Subject: [PATCH] feat: implemented incremental model for scored products --- etl/helpers/data_io.py | 7 ++++--- etl/helpers/parse_row.py | 16 ++++++++++++---- 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/etl/helpers/data_io.py b/etl/helpers/data_io.py index 66893fc..e6ff3d7 100644 --- a/etl/helpers/data_io.py +++ b/etl/helpers/data_io.py @@ -77,8 +77,8 @@ class UpsertProductsToPg(WriteToPostgreSQL): colnames = ",".join(row.keys()) values = ",".join(["%s"] * len(row)) sql = f""" - INSERT INTO { self.table } ({ colnames }) - VALUES ({ values }) + INSERT INTO { self.table } ({ colnames }, ingestion_time) + VALUES ({ values }, NOW()::TIMESTAMP) ON CONFLICT ({ self.table_key }) DO UPDATE SET gtin13 = EXCLUDED.gtin13, @@ -86,7 +86,8 @@ class UpsertProductsToPg(WriteToPostgreSQL): materials = EXCLUDED.materials, packaging = EXCLUDED.packaging, origin = EXCLUDED.origin, - weight = EXCLUDED.weight + weight = EXCLUDED.weight, + ingestion_time = NOW()::TIMESTAMP WHERE { self.table }.primary_category != EXCLUDED.primary_category OR { self.table }.materials != EXCLUDED.materials OR diff --git a/etl/helpers/parse_row.py b/etl/helpers/parse_row.py index 2c8c8ce..afddd6a 100644 --- a/etl/helpers/parse_row.py +++ b/etl/helpers/parse_row.py @@ -21,6 +21,9 @@ class CleanRow(TypedDict): packaging: int origin: str weight: Optional[float] + height: Optional[float] + width: Optional[float] + depth: Optional[float] def parse_row(element: Dict[str, str]) -> Optional[CleanRow]: @@ -67,11 +70,13 @@ def parse_row(element: Dict[str, str]) -> Optional[CleanRow]: origin = clean_origin_name(specifications.get("origin")) weight = parse_weight(specifications.get("weight")) + + dimensions = parse_dimensions(specifications.get("dimensions")) + height = dimensions["height"] + width = dimensions["width"] + depth = dimensions["depth"] + if weight is None: - dimensions = parse_dimensions(specifications.get("dimensions")) - height = dimensions["height"] - width = dimensions["width"] - depth = dimensions["depth"] weight = dimensional_weight(height=height, width=width, depth=depth) return { @@ -82,4 +87,7 @@ def parse_row(element: Dict[str, str]) -> Optional[CleanRow]: "packaging": packaging, "origin": origin, "weight": weight, + "height": height, + "width": width, + "depth": depth, }