feat: added clean_origin_name
parent
b018abe00e
commit
050323d583
|
@ -0,0 +1,45 @@
|
|||
"""Functions to clean and standardize the origin string
|
||||
|
||||
In the sample data, the only strings describing the origin I found were
|
||||
* made in the usa
|
||||
* imported
|
||||
* assem usa w/foreign/dom. parts
|
||||
* made in the usa or imported
|
||||
|
||||
Just with different combinations of lower and upper case and surrounded by more
|
||||
or less whitespace.
|
||||
|
||||
I'll simplify it into "usa" for local products, "imported" for imported ones
|
||||
and "mixed" for the rest (and I'll later score it as 0.5).
|
||||
|
||||
Any other value will be logged as an error and assigned "mixed"
|
||||
|
||||
There should not be missing origin strings, but if a "None" happens, set it to
|
||||
`mixed` also.
|
||||
|
||||
"""
|
||||
|
||||
from typing import Optional
|
||||
import logging
|
||||
|
||||
ORIGIN_MAPPING = {
|
||||
"assem usa w/foreign/dom. parts": "mixed",
|
||||
"imported": "imported",
|
||||
"made in the usa": "usa",
|
||||
"made in the usa or imported": "mixed",
|
||||
}
|
||||
|
||||
|
||||
def clean_origin_name(origin: Optional[str]) -> str:
|
||||
"""Clean and standardize product origin"""
|
||||
|
||||
if origin is None:
|
||||
logging.error("origin string not found, setting it to `mixed`")
|
||||
return "mixed"
|
||||
|
||||
origin = origin.lower().strip()
|
||||
try:
|
||||
return ORIGIN_MAPPING[origin]
|
||||
except KeyError:
|
||||
logging.error("could not parse origin `%s`, setting it to `mixed`", origin)
|
||||
return "mixed"
|
|
@ -0,0 +1,49 @@
|
|||
"""Test the `clean_material_name`"""
|
||||
|
||||
from helpers.origin import clean_origin_name
|
||||
|
||||
|
||||
def test_none():
|
||||
"""Test None value"""
|
||||
assert clean_origin_name(None) == "mixed"
|
||||
|
||||
|
||||
def test_unexpected():
|
||||
"""Test an unexpected origin name"""
|
||||
assert clean_origin_name("foo") == "mixed"
|
||||
|
||||
|
||||
def test_clean_origin_name0():
|
||||
"""Test a sample input for clean_origin_name"""
|
||||
assert clean_origin_name(" Assem USA w/foreign/dom. parts") == "mixed"
|
||||
|
||||
|
||||
def test_clean_origin_name1():
|
||||
"""Test a sample input for clean_origin_name"""
|
||||
assert clean_origin_name(" Imported") == "imported"
|
||||
|
||||
|
||||
def test_clean_origin_name2():
|
||||
"""Test a sample input for clean_origin_name"""
|
||||
assert clean_origin_name(" Made in the USA") == "usa"
|
||||
|
||||
|
||||
def test_clean_origin_name3():
|
||||
"""Test a sample input for clean_origin_name"""
|
||||
assert clean_origin_name(" Made in the USA or Imported") == "mixed"
|
||||
|
||||
|
||||
def test_clean_origin_name4():
|
||||
"""Test a sample input for clean_origin_name"""
|
||||
assert clean_origin_name(" imported") == "imported"
|
||||
|
||||
|
||||
def test_clean_origin_name5():
|
||||
"""Test a sample input for clean_origin_name"""
|
||||
assert clean_origin_name(" made in the USA") == "usa"
|
||||
|
||||
|
||||
def test_clean_origin_name6():
|
||||
"""Test a sample input for clean_origin_name"""
|
||||
assert clean_origin_name(" made in the USA or imported") == "mixed"
|
||||
|
Loading…
Reference in New Issue