doc: small doc added

main
Ricard Illa 2023-06-22 17:36:49 +02:00
parent 050323d583
commit 9c19835746
1 changed files with 23 additions and 13 deletions

View File

@ -6,6 +6,16 @@ it requires more effort to parse.
First I parse the xml into a dictionary. Then, I standardize the found keys into First I parse the xml into a dictionary. Then, I standardize the found keys into
some keys I expect. some keys I expect.
Notes:
Looks like "dimenions" can be "Dimensions (Overall)", "Dimensions" or other
things like "Assembled Dimensions" or "Piece X Dimensions". But this latter two
options are incomplete (lack the height), harder to parse and rare enough that
I'll just drop them
Package Quantity and Number of Pieces are never found together. I will assume
they refer to the same thing.
""" """
import logging import logging
@ -13,6 +23,17 @@ import xml.etree.ElementTree as ET
from typing import Dict from typing import Dict
FIELDS_MAPPING = {
"Material": "materials",
"Package Quantity": "packaging",
"Number of Pieces": "packaging",
"Dimensions (Overall)": "dimensions",
"Dimensions": "dimensions",
"Weight": "weight",
"TCIN": "tcin",
"Origin": "origin",
}
def iter_parse(root: ET.Element) -> Dict[str, str]: def iter_parse(root: ET.Element) -> Dict[str, str]:
"""Recursively parse the XML tree into a dictionary Each key/value pair is """Recursively parse the XML tree into a dictionary Each key/value pair is
@ -39,17 +60,6 @@ def parse_raw_specs(raw_specs: str) -> Dict[str, str]:
This involves first recursively parsing the XML tree and then renaming This involves first recursively parsing the XML tree and then renaming
the key values""" the key values"""
fields_mapping = {
"Material": "materials",
"Package Quantity": "packaging",
"Number of Pieces": "packaging",
"Dimensions (Overall)": "dimensions",
"Dimensions": "dimensions",
"Weight": "weight",
"TCIN": "tcin",
"Origin": "origin",
}
try: try:
xml_root = ET.fromstring(raw_specs) xml_root = ET.fromstring(raw_specs)
except ET.ParseError: except ET.ParseError:
@ -58,8 +68,8 @@ def parse_raw_specs(raw_specs: str) -> Dict[str, str]:
parsed = iter_parse(xml_root) parsed = iter_parse(xml_root)
specs_dict = { specs_dict = {
fields_mapping[key]: value FIELDS_MAPPING[key]: value
for key, value in parsed.items() for key, value in parsed.items()
if key in fields_mapping if key in FIELDS_MAPPING
} }
return specs_dict return specs_dict