"""Functions to parse the materials string into a list of materials that we can score later Scoreable materials are: * metal * wood * glass * resin * fabric * plastic I will try to match materials found into the specification into one of the scoreable ones. However, I found a few, like "stoneware" and "cardboard" that I can't fid there, they'll have to remain unscored for now """ from typing import Optional, List import re MATERIAL_MAPPING = { "polyester": "fabric", "spandex": "fabric", "leather": "fabric", "crystal": "glass", "hardwood": "wood", "plywood": "wood", "mdf": "wood", "wood": "wood", "steel": "metal", "polycarbonate": "plastic", "polypropylene": "plastic", "pvc": "plastic", "resin": "plastic", "stoneware": "stoneware", "cardboard": "cardboard", "paper": "cardboard", } def material_classifier(material: str) -> str: """I will to match materials to one of the following: * metal * wood * glass * resin * fabric * plastic * cardboard * paper There's a fair amount of variation in the names, so I just try to see if a given keyword identifying a material is found. If no match found, just return the input string itself. """ for key, value in MATERIAL_MAPPING.items(): if key in material: return value return material def clean_material_name(material: str) -> str: """Do some cleaning to material names like: * removing annotations found inside parentheses * removing mounts and percentages * keep the names as lowercase So that everything is more homogenious""" no_paren_annotations = re.sub(r"\(.*\)", "", material) no_amounts = re.sub(r"\d+%?", "", no_paren_annotations) return no_amounts.strip().lower() def parse_materials(materials: Optional[str]) -> Optional[List[str]]: """Parse a string of materials as specified in raw_specifications into a list of standardized material names. Return a sorted list of unique materials for more consistency""" if materials is None: return None material_ls = [ material_classifier(clean_material_name(x)) for x in materials.split(",") ] return sorted(set(material_ls))