diff --git a/pipeline/beam_etl/helpers.py b/pipeline/beam_etl/helpers.py
deleted file mode 100644
index 1b6f57e..0000000
--- a/pipeline/beam_etl/helpers.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import logging
-import xml.etree.ElementTree as ET
-
-from typing import Dict
-
-
-def iter_parse(root: ET.Element) -> Dict[str, str]:
- """Recursively parse the XML tree into a dictionary Each key/value pair is
- inside its own
tag and the key inside a tag.
- The fields that I believe are compulsory (TCIN, UPC and Origin) are only
- nested one level deep, while the rest of fields seem to be always nested
- two levels deep. But parsing it recursively helps generalise both cases."""
-
- spec_dict = {}
- for child in root:
- if child.tag == "div":
- if "b" in [x.tag for x in child]:
- key, *values = child.itertext()
- key = key.strip(":")
- value = "".join(values).strip(":")
- spec_dict[key] = value
- else:
- spec_dict.update(iter_parse(child))
- return spec_dict
-
-
-def parse_raw_specs(raw_specs: str) -> Dict[str, str]:
- """Parse a raw specifications XML string into a dictionary.
- This involves first recursively parsing the XML tree and then renaming
- the key values"""
-
- fields_mapping = {
- "Material": "materials",
- "Package Quantity": "packaging",
- "Number of Pieces": "packaging",
- "Dimensions (Overall)": "dimensions",
- "Dimensions": "dimensions",
- "Weight": "weight",
- "TCIN": "tcin",
- "Origin": "origin",
- }
-
- try:
- xml_root = ET.fromstring(raw_specs)
- except ET.ParseError:
- logging.error("error parsing xml string: \n%s", raw_specs)
- return {}
-
- parsed = iter_parse(xml_root)
- specs_dict = {
- fields_mapping[key]: value
- for key, value in parsed.items()
- if key in fields_mapping
- }
- return specs_dict