refactor: put helpers in a separate folder
parent
0b58d47acc
commit
b3069d4ca2
|
@ -0,0 +1,55 @@
|
|||
import logging
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
from typing import Dict
|
||||
|
||||
|
||||
def iter_parse(root: ET.Element) -> Dict[str, str]:
|
||||
"""Recursively parse the XML tree into a dictionary Each key/value pair is
|
||||
inside its own <div> tag and the key inside a <b> tag.
|
||||
The fields that I believe are compulsory (TCIN, UPC and Origin) are only
|
||||
nested one level deep, while the rest of fields seem to be always nested
|
||||
two levels deep. But parsing it recursively helps generalise both cases."""
|
||||
|
||||
spec_dict = {}
|
||||
for child in root:
|
||||
if child.tag == "div":
|
||||
if "b" in [x.tag for x in child]:
|
||||
key, *values = child.itertext()
|
||||
key = key.strip(":")
|
||||
value = "".join(values).strip(":")
|
||||
spec_dict[key] = value
|
||||
else:
|
||||
spec_dict.update(iter_parse(child))
|
||||
return spec_dict
|
||||
|
||||
|
||||
def parse_raw_specs(raw_specs: str) -> Dict[str, str]:
|
||||
"""Parse a raw specifications XML string into a dictionary.
|
||||
This involves first recursively parsing the XML tree and then renaming
|
||||
the key values"""
|
||||
|
||||
fields_mapping = {
|
||||
"Material": "materials",
|
||||
"Package Quantity": "packaging",
|
||||
"Number of Pieces": "packaging",
|
||||
"Dimensions (Overall)": "dimensions",
|
||||
"Dimensions": "dimensions",
|
||||
"Weight": "weight",
|
||||
"TCIN": "tcin",
|
||||
"Origin": "origin",
|
||||
}
|
||||
|
||||
try:
|
||||
xml_root = ET.fromstring(raw_specs)
|
||||
except ET.ParseError:
|
||||
logging.error("error parsing xml string: \n%s", raw_specs)
|
||||
return {}
|
||||
|
||||
parsed = iter_parse(xml_root)
|
||||
specs_dict = {
|
||||
fields_mapping[key]: value
|
||||
for key, value in parsed.items()
|
||||
if key in fields_mapping
|
||||
}
|
||||
return specs_dict
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
from helpers import parse_raw_specs, iter_parse
|
||||
from helpers.parse_xml import parse_raw_specs, iter_parse
|
||||
|
||||
|
||||
def test_parse_raw_specs0():
|
Loading…
Reference in New Issue