feat: implemented incremental model for scored products

main
Ricard Illa 2023-06-25 23:21:40 +02:00
parent 73df832a6c
commit c479a66405
No known key found for this signature in database
GPG Key ID: F69A672B72E54902
2 changed files with 16 additions and 7 deletions

View File

@ -77,8 +77,8 @@ class UpsertProductsToPg(WriteToPostgreSQL):
colnames = ",".join(row.keys()) colnames = ",".join(row.keys())
values = ",".join(["%s"] * len(row)) values = ",".join(["%s"] * len(row))
sql = f""" sql = f"""
INSERT INTO { self.table } ({ colnames }) INSERT INTO { self.table } ({ colnames }, ingestion_time)
VALUES ({ values }) VALUES ({ values }, NOW()::TIMESTAMP)
ON CONFLICT ({ self.table_key }) DO UPDATE ON CONFLICT ({ self.table_key }) DO UPDATE
SET SET
gtin13 = EXCLUDED.gtin13, gtin13 = EXCLUDED.gtin13,
@ -86,7 +86,8 @@ class UpsertProductsToPg(WriteToPostgreSQL):
materials = EXCLUDED.materials, materials = EXCLUDED.materials,
packaging = EXCLUDED.packaging, packaging = EXCLUDED.packaging,
origin = EXCLUDED.origin, origin = EXCLUDED.origin,
weight = EXCLUDED.weight weight = EXCLUDED.weight,
ingestion_time = NOW()::TIMESTAMP
WHERE WHERE
{ self.table }.primary_category != EXCLUDED.primary_category OR { self.table }.primary_category != EXCLUDED.primary_category OR
{ self.table }.materials != EXCLUDED.materials OR { self.table }.materials != EXCLUDED.materials OR

View File

@ -21,6 +21,9 @@ class CleanRow(TypedDict):
packaging: int packaging: int
origin: str origin: str
weight: Optional[float] weight: Optional[float]
height: Optional[float]
width: Optional[float]
depth: Optional[float]
def parse_row(element: Dict[str, str]) -> Optional[CleanRow]: def parse_row(element: Dict[str, str]) -> Optional[CleanRow]:
@ -67,11 +70,13 @@ def parse_row(element: Dict[str, str]) -> Optional[CleanRow]:
origin = clean_origin_name(specifications.get("origin")) origin = clean_origin_name(specifications.get("origin"))
weight = parse_weight(specifications.get("weight")) weight = parse_weight(specifications.get("weight"))
if weight is None:
dimensions = parse_dimensions(specifications.get("dimensions")) dimensions = parse_dimensions(specifications.get("dimensions"))
height = dimensions["height"] height = dimensions["height"]
width = dimensions["width"] width = dimensions["width"]
depth = dimensions["depth"] depth = dimensions["depth"]
if weight is None:
weight = dimensional_weight(height=height, width=width, depth=depth) weight = dimensional_weight(height=height, width=width, depth=depth)
return { return {
@ -82,4 +87,7 @@ def parse_row(element: Dict[str, str]) -> Optional[CleanRow]:
"packaging": packaging, "packaging": packaging,
"origin": origin, "origin": origin,
"weight": weight, "weight": weight,
"height": height,
"width": width,
"depth": depth,
} }