From 691b8898a0c11a3a7f20023a3aee9eafd5556ea8 Mon Sep 17 00:00:00 2001 From: Ricard Illa Date: Thu, 22 Jun 2023 15:51:47 +0200 Subject: [PATCH] docs: added docstring to parse_xml module --- pipeline/beam_etl/helpers/parse_xml.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pipeline/beam_etl/helpers/parse_xml.py b/pipeline/beam_etl/helpers/parse_xml.py index 1b6f57e..266c4bf 100644 --- a/pipeline/beam_etl/helpers/parse_xml.py +++ b/pipeline/beam_etl/helpers/parse_xml.py @@ -1,3 +1,13 @@ +"""Helper functions to parse the the raw_specifications xml string into a +dictionary containing the values we are interested in. +I decided to parse raw_speficications instead of using the "specifications" +column because the raw data seemed to be cleaner and more consistent, even if +it requires more effort to parse. + +First I parse the xml into a dictionary. Then, I standardize the found keys into +some keys I expect. +""" + import logging import xml.etree.ElementTree as ET