82 lines
2.2 KiB
Python
82 lines
2.2 KiB
Python
|
"""Functions to parse the materials string into a list of materials that we can
|
||
|
score later
|
||
|
|
||
|
Scoreable materials are:
|
||
|
* metal
|
||
|
* wood
|
||
|
* glass
|
||
|
* resin
|
||
|
* fabric
|
||
|
* plastic
|
||
|
|
||
|
I will try to match materials found into the specification into one of the
|
||
|
scoreable ones. However, I found a few, like "stoneware" and "cardboard" that I
|
||
|
can't fid there, they'll have to remain unscored for now
|
||
|
"""
|
||
|
|
||
|
from typing import Optional, List
|
||
|
import re
|
||
|
|
||
|
MATERIAL_MAPPING = {
|
||
|
"polyester": "fabric",
|
||
|
"spandex": "fabric",
|
||
|
"leather": "fabric",
|
||
|
"crystal": "glass",
|
||
|
"hardwood": "wood",
|
||
|
"plywood": "wood",
|
||
|
"mdf": "wood",
|
||
|
"wood": "wood",
|
||
|
"steel": "metal",
|
||
|
"polycarbonate": "plastic",
|
||
|
"polypropylene": "plastic",
|
||
|
"pvc": "plastic",
|
||
|
"resin": "resin",
|
||
|
"stoneware": "stoneware",
|
||
|
"cardboard": "cardboard",
|
||
|
"paper": "cardboard",
|
||
|
}
|
||
|
|
||
|
|
||
|
def material_classifier(material: str) -> str:
|
||
|
"""I will to match materials to one of the following:
|
||
|
* metal
|
||
|
* wood
|
||
|
* glass
|
||
|
* resin
|
||
|
* fabric
|
||
|
* plastic
|
||
|
* cardboard
|
||
|
* paper
|
||
|
There's a fair amount of variation in the names, so I just try to see if
|
||
|
a given keyword identifying a material is found.
|
||
|
If no match found, just return the input string itself.
|
||
|
"""
|
||
|
|
||
|
for key, value in MATERIAL_MAPPING.items():
|
||
|
if key in material:
|
||
|
return value
|
||
|
return material
|
||
|
|
||
|
|
||
|
def clean_material_name(material: str) -> str:
|
||
|
"""Do some cleaning to material names like:
|
||
|
* removing annotations found inside parentheses
|
||
|
* removing mounts and percentages
|
||
|
* keep the names as lowercase
|
||
|
So that everything is more homogenious"""
|
||
|
no_paren_annotations = re.sub(r"\(.*\)", "", material)
|
||
|
no_amounts = re.sub(r"\d+%?", "", no_paren_annotations)
|
||
|
return no_amounts.strip().lower()
|
||
|
|
||
|
|
||
|
def parse_materials(materials: Optional[str]) -> Optional[List[str]]:
|
||
|
"""Parse a string of materials as specified in raw_specifications into a
|
||
|
list of standardized material names.
|
||
|
Return a sorted list of unique materials for more consistency"""
|
||
|
if materials is None:
|
||
|
return None
|
||
|
material_ls = [
|
||
|
material_classifier(clean_material_name(x)) for x in materials.split(",")
|
||
|
]
|
||
|
return sorted(set(material_ls))
|