dayrize-usecase/etl/src/helpers/parsers/dimensions.py

59 lines
1.8 KiB
Python

"""Functions to parse the dimensions string into a dictionary that represents
those dimenions in cm
"""
import logging
from typing import Dict, Optional
import re
from helpers.parsers.misc import convert_units
UNIT_CONVERSIONS = {"inches": 2.54, "feet": 30.48, "cm": 1}
def parse_dimensions_measure(dimensions: str, measure: str) -> Optional[Dict]:
"""Using a regex, parse a measurement out of a dimensions string
I expect to find a value of the form `1.2 inches (W)` specifying the value,
unit and measurement.
Return a dictionary representing the parsed value and its unit.
"""
expr = rf"(?P<value>\d*[.,]?\d*)\s+(?P<unit>[a-zA-Z]*)\s+\({measure}\)"
if match := re.search(expr, dimensions):
match_value = match.group("value")
try:
value = float(match_value)
except ValueError:
logging.error("could not parse value `%s` as a float for a dimension")
return None
return {
"value": value,
"unit": match.group("unit").lower(),
}
return None
def parse_dimensions(dimensions: Optional[str]) -> Dict[str, Optional[float]]:
"""Parse a string representing dimensions"""
if dimensions is None:
return {
"height": None,
"width": None,
"depth": None,
}
height = parse_dimensions_measure(dimensions, "H")
width = parse_dimensions_measure(dimensions, "W")
depth = parse_dimensions_measure(dimensions, "D")
parsed_dimensions = {
"height": height,
"width": width,
"depth": depth,
}
result = {}
for key, value in parsed_dimensions.items():
if value is None:
result[key] = value
else:
result[key] = convert_units(**value, unit_conversions=UNIT_CONVERSIONS)
return result