feat: added clean_origin_name
parent
b018abe00e
commit
050323d583
|
@ -0,0 +1,45 @@
|
||||||
|
"""Functions to clean and standardize the origin string
|
||||||
|
|
||||||
|
In the sample data, the only strings describing the origin I found were
|
||||||
|
* made in the usa
|
||||||
|
* imported
|
||||||
|
* assem usa w/foreign/dom. parts
|
||||||
|
* made in the usa or imported
|
||||||
|
|
||||||
|
Just with different combinations of lower and upper case and surrounded by more
|
||||||
|
or less whitespace.
|
||||||
|
|
||||||
|
I'll simplify it into "usa" for local products, "imported" for imported ones
|
||||||
|
and "mixed" for the rest (and I'll later score it as 0.5).
|
||||||
|
|
||||||
|
Any other value will be logged as an error and assigned "mixed"
|
||||||
|
|
||||||
|
There should not be missing origin strings, but if a "None" happens, set it to
|
||||||
|
`mixed` also.
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Optional
|
||||||
|
import logging
|
||||||
|
|
||||||
|
ORIGIN_MAPPING = {
|
||||||
|
"assem usa w/foreign/dom. parts": "mixed",
|
||||||
|
"imported": "imported",
|
||||||
|
"made in the usa": "usa",
|
||||||
|
"made in the usa or imported": "mixed",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def clean_origin_name(origin: Optional[str]) -> str:
|
||||||
|
"""Clean and standardize product origin"""
|
||||||
|
|
||||||
|
if origin is None:
|
||||||
|
logging.error("origin string not found, setting it to `mixed`")
|
||||||
|
return "mixed"
|
||||||
|
|
||||||
|
origin = origin.lower().strip()
|
||||||
|
try:
|
||||||
|
return ORIGIN_MAPPING[origin]
|
||||||
|
except KeyError:
|
||||||
|
logging.error("could not parse origin `%s`, setting it to `mixed`", origin)
|
||||||
|
return "mixed"
|
|
@ -0,0 +1,49 @@
|
||||||
|
"""Test the `clean_material_name`"""
|
||||||
|
|
||||||
|
from helpers.origin import clean_origin_name
|
||||||
|
|
||||||
|
|
||||||
|
def test_none():
|
||||||
|
"""Test None value"""
|
||||||
|
assert clean_origin_name(None) == "mixed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_unexpected():
|
||||||
|
"""Test an unexpected origin name"""
|
||||||
|
assert clean_origin_name("foo") == "mixed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_origin_name0():
|
||||||
|
"""Test a sample input for clean_origin_name"""
|
||||||
|
assert clean_origin_name(" Assem USA w/foreign/dom. parts") == "mixed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_origin_name1():
|
||||||
|
"""Test a sample input for clean_origin_name"""
|
||||||
|
assert clean_origin_name(" Imported") == "imported"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_origin_name2():
|
||||||
|
"""Test a sample input for clean_origin_name"""
|
||||||
|
assert clean_origin_name(" Made in the USA") == "usa"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_origin_name3():
|
||||||
|
"""Test a sample input for clean_origin_name"""
|
||||||
|
assert clean_origin_name(" Made in the USA or Imported") == "mixed"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_origin_name4():
|
||||||
|
"""Test a sample input for clean_origin_name"""
|
||||||
|
assert clean_origin_name(" imported") == "imported"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_origin_name5():
|
||||||
|
"""Test a sample input for clean_origin_name"""
|
||||||
|
assert clean_origin_name(" made in the USA") == "usa"
|
||||||
|
|
||||||
|
|
||||||
|
def test_clean_origin_name6():
|
||||||
|
"""Test a sample input for clean_origin_name"""
|
||||||
|
assert clean_origin_name(" made in the USA or imported") == "mixed"
|
||||||
|
|
Loading…
Reference in New Issue