tag.
- The fields that I believe are compulsory (TCIN, UPC and Origin)
- are only nested one level deep, while the rest of fields seem
- to be always nested two levels deep. But parsing it recursively
- helps generalise both cases."""
+
+def iter_parse(root: ET.Element) -> Dict[str, str]:
+ """Recursively parse the XML tree into a dictionary Each key/value pair is
+ inside its own tag and the key inside a tag.
+ The fields that I believe are compulsory (TCIN, UPC and Origin) are only
+ nested one level deep, while the rest of fields seem to be always nested
+ two levels deep. But parsing it recursively helps generalise both cases."""
spec_dict = {}
for child in root:
@@ -22,10 +23,11 @@ def iter_parse(root: ET.Element) -> Dict[str,str]:
spec_dict.update(iter_parse(child))
return spec_dict
-def parse_raw_specs(raw_specs: str) -> Dict[str,str]:
- """Parse a raw specifications XML string into a dictionary
- This involves first recursively parsing the XML tree and then
- renaming the key values"""
+
+def parse_raw_specs(raw_specs: str) -> Dict[str, str]:
+ """Parse a raw specifications XML string into a dictionary.
+ This involves first recursively parsing the XML tree and then renaming
+ the key values"""
fields_mapping = {
"Material": "materials",
@@ -37,7 +39,13 @@ def parse_raw_specs(raw_specs: str) -> Dict[str,str]:
"TCIN": "tcin",
"Origin": "origin",
}
- xml_root = ET.fromstring(raw_specs)
+
+ try:
+ xml_root = ET.fromstring(raw_specs)
+ except ET.ParseError:
+ logging.error("error parsing xml string: \n%s", raw_specs)
+ return {}
+
parsed = iter_parse(xml_root)
specs_dict = {
fields_mapping[key]: value