feat: remove older file
parent
f2a996d42e
commit
b018abe00e
|
@ -1,55 +0,0 @@
|
||||||
import logging
|
|
||||||
import xml.etree.ElementTree as ET
|
|
||||||
|
|
||||||
from typing import Dict
|
|
||||||
|
|
||||||
|
|
||||||
def iter_parse(root: ET.Element) -> Dict[str, str]:
|
|
||||||
"""Recursively parse the XML tree into a dictionary Each key/value pair is
|
|
||||||
inside its own <div> tag and the key inside a <b> tag.
|
|
||||||
The fields that I believe are compulsory (TCIN, UPC and Origin) are only
|
|
||||||
nested one level deep, while the rest of fields seem to be always nested
|
|
||||||
two levels deep. But parsing it recursively helps generalise both cases."""
|
|
||||||
|
|
||||||
spec_dict = {}
|
|
||||||
for child in root:
|
|
||||||
if child.tag == "div":
|
|
||||||
if "b" in [x.tag for x in child]:
|
|
||||||
key, *values = child.itertext()
|
|
||||||
key = key.strip(":")
|
|
||||||
value = "".join(values).strip(":")
|
|
||||||
spec_dict[key] = value
|
|
||||||
else:
|
|
||||||
spec_dict.update(iter_parse(child))
|
|
||||||
return spec_dict
|
|
||||||
|
|
||||||
|
|
||||||
def parse_raw_specs(raw_specs: str) -> Dict[str, str]:
|
|
||||||
"""Parse a raw specifications XML string into a dictionary.
|
|
||||||
This involves first recursively parsing the XML tree and then renaming
|
|
||||||
the key values"""
|
|
||||||
|
|
||||||
fields_mapping = {
|
|
||||||
"Material": "materials",
|
|
||||||
"Package Quantity": "packaging",
|
|
||||||
"Number of Pieces": "packaging",
|
|
||||||
"Dimensions (Overall)": "dimensions",
|
|
||||||
"Dimensions": "dimensions",
|
|
||||||
"Weight": "weight",
|
|
||||||
"TCIN": "tcin",
|
|
||||||
"Origin": "origin",
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
|
||||||
xml_root = ET.fromstring(raw_specs)
|
|
||||||
except ET.ParseError:
|
|
||||||
logging.error("error parsing xml string: \n%s", raw_specs)
|
|
||||||
return {}
|
|
||||||
|
|
||||||
parsed = iter_parse(xml_root)
|
|
||||||
specs_dict = {
|
|
||||||
fields_mapping[key]: value
|
|
||||||
for key, value in parsed.items()
|
|
||||||
if key in fields_mapping
|
|
||||||
}
|
|
||||||
return specs_dict
|
|
Loading…
Reference in New Issue