doc: small doc added
parent
050323d583
commit
9c19835746
|
@ -6,6 +6,16 @@ it requires more effort to parse.
|
||||||
|
|
||||||
First I parse the xml into a dictionary. Then, I standardize the found keys into
|
First I parse the xml into a dictionary. Then, I standardize the found keys into
|
||||||
some keys I expect.
|
some keys I expect.
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
Looks like "dimenions" can be "Dimensions (Overall)", "Dimensions" or other
|
||||||
|
things like "Assembled Dimensions" or "Piece X Dimensions". But this latter two
|
||||||
|
options are incomplete (lack the height), harder to parse and rare enough that
|
||||||
|
I'll just drop them
|
||||||
|
|
||||||
|
Package Quantity and Number of Pieces are never found together. I will assume
|
||||||
|
they refer to the same thing.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
@ -13,6 +23,17 @@ import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
|
FIELDS_MAPPING = {
|
||||||
|
"Material": "materials",
|
||||||
|
"Package Quantity": "packaging",
|
||||||
|
"Number of Pieces": "packaging",
|
||||||
|
"Dimensions (Overall)": "dimensions",
|
||||||
|
"Dimensions": "dimensions",
|
||||||
|
"Weight": "weight",
|
||||||
|
"TCIN": "tcin",
|
||||||
|
"Origin": "origin",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def iter_parse(root: ET.Element) -> Dict[str, str]:
|
def iter_parse(root: ET.Element) -> Dict[str, str]:
|
||||||
"""Recursively parse the XML tree into a dictionary Each key/value pair is
|
"""Recursively parse the XML tree into a dictionary Each key/value pair is
|
||||||
|
@ -39,17 +60,6 @@ def parse_raw_specs(raw_specs: str) -> Dict[str, str]:
|
||||||
This involves first recursively parsing the XML tree and then renaming
|
This involves first recursively parsing the XML tree and then renaming
|
||||||
the key values"""
|
the key values"""
|
||||||
|
|
||||||
fields_mapping = {
|
|
||||||
"Material": "materials",
|
|
||||||
"Package Quantity": "packaging",
|
|
||||||
"Number of Pieces": "packaging",
|
|
||||||
"Dimensions (Overall)": "dimensions",
|
|
||||||
"Dimensions": "dimensions",
|
|
||||||
"Weight": "weight",
|
|
||||||
"TCIN": "tcin",
|
|
||||||
"Origin": "origin",
|
|
||||||
}
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
xml_root = ET.fromstring(raw_specs)
|
xml_root = ET.fromstring(raw_specs)
|
||||||
except ET.ParseError:
|
except ET.ParseError:
|
||||||
|
@ -58,8 +68,8 @@ def parse_raw_specs(raw_specs: str) -> Dict[str, str]:
|
||||||
|
|
||||||
parsed = iter_parse(xml_root)
|
parsed = iter_parse(xml_root)
|
||||||
specs_dict = {
|
specs_dict = {
|
||||||
fields_mapping[key]: value
|
FIELDS_MAPPING[key]: value
|
||||||
for key, value in parsed.items()
|
for key, value in parsed.items()
|
||||||
if key in fields_mapping
|
if key in FIELDS_MAPPING
|
||||||
}
|
}
|
||||||
return specs_dict
|
return specs_dict
|
||||||
|
|
Loading…
Reference in New Issue