diff --git a/pipeline/beam_etl/tests/test_parse_raw_specs.py b/pipeline/beam_etl/tests/test_parse_raw_specs.py index 6f4b4c4..fe3b5ef 100644 --- a/pipeline/beam_etl/tests/test_parse_raw_specs.py +++ b/pipeline/beam_etl/tests/test_parse_raw_specs.py @@ -1,10 +1,153 @@ -from helpers import parse_raw_specs +"""Test the `parse_raw_specs` function and its helper `iter_parse`""" -def test_parse_raw_specs(): +import xml.etree.ElementTree as ET +from helpers import parse_raw_specs, iter_parse + + +def test_parse_raw_specs0(): + """Test an example XML string found in the sample date file""" + xml_str = """

Specifications

Suggested Age: 6 Years and Up

CPSC Choking Hazard Warnings: Choking_hazard_small_parts

TCIN: 81917300
UPC: 840391145528
Origin: imported

The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.

We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item\'s label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.

""" - expected = {"tcin": " 81917300", "origin": " imported"} assert parse_raw_specs(xml_str) == expected + + +def test_parse_raw_specs1(): + """Test an example XML string found in the sample date file""" + + xml_str = """ +

Specifications

Number of Pieces: 2

Weight: 1 pounds

Maximum Height: 33.5 inches

Minimum Height: 33.5 inches

Material: Resin

Battery: No Battery Used

TCIN: 86383979
UPC: 023271234080
Origin: imported

The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.

We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item\'s label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.

+ """ + + expected = { + "Battery": " No Battery Used", + "Material": " Resin", + "Maximum Height": " 33.5 inches", + "Minimum Height": " 33.5 inches", + "Number of Pieces": " 2", + "Origin": " imported", + "TCIN": " 86383979", + "UPC": " 023271234080", + "Weight": " 1 pounds", + } + assert iter_parse(ET.fromstring(xml_str)) == expected + + expected = { + "materials": " Resin", + "origin": " imported", + "packaging": " 2", + "tcin": " 86383979", + "weight": " 1 pounds", + } + assert parse_raw_specs(xml_str) == expected + + +def test_parse_raw_specs2(): + """Test an example XML string found in the sample date file""" + + xml_str = """ +

Specifications

Dimensions (Overall): 2.1 inches (H) x 9.0 inches (W) x 10.9 inches (D)

Party subtype: Party Card Holders

Material: Wood

TCIN: 82840486
UPC: 843128196602
Origin: imported
WARNING:⚠ This product can expose you to chemical(s) including Formaldehyde (gas), which is known to the State of California to cause cancer and birth defects or other reproductive harm. For more information go to P65Warnings.ca.gov. www.p65warnings.ca.gov

The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.

We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item\'s label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.

+ """ + + expected = { + "Dimensions (Overall)": " 2.1 inches (H) x 9.0 inches (W) x 10.9 inches (D)", + "Material": " Wood", + "Origin": " imported", + "Party subtype": " Party Card Holders", + "TCIN": " 82840486", + "UPC": " 843128196602", + } + assert iter_parse(ET.fromstring(xml_str)) == expected + + expected = { + "dimensions": " 2.1 inches (H) x 9.0 inches (W) x 10.9 inches (D)", + "materials": " Wood", + "origin": " imported", + "tcin": " 82840486", + } + assert parse_raw_specs(xml_str) == expected + + +def test_parse_raw_specs3(): + """Test an example XML string found in the sample date file""" + + xml_str = """ +

Specifications

Contains: Does Not Contain Any of the 8 Major Allergens

Dietary Needs: Gluten Free

Form: Pieces

State of Readiness: Ready to Eat

Package Quantity: 1

Net weight: 15.6 Ounces

TCIN: 54571204
UPC: 022000279729
Item Number (DPCI): 055-02-1211
Origin: Made in the USA or Imported
Grocery Disclaimer:
Content on this site is for reference purposes only. Target does not represent or warrant that the nutrition, ingredient, allergen and other product information on our Web or Mobile sites are accurate or complete, since this information comes from the product manufacturers. On occasion, manufacturers may improve or change their product formulas and update their labels. We recommend that you do not rely solely on the information presented on our Web or Mobile sites and that you review the product\'s label or contact the manufacturer directly if you have specific product concerns or questions. If you have specific healthcare concerns or questions about the products displayed, please contact your licensed healthcare professional for advice or answers. Any additional pictures are suggested servings only.
+ """ + + expected = { + "Contains": " Does Not Contain Any of the 8 Major Allergens", + "Dietary Needs": " Gluten Free", + "Form": " Pieces", + "Grocery Disclaimer": ( + " Content on this site is for reference purposes only. " + "Target does not represent or warrant that the " + "nutrition, ingredient, allergen and other product " + "information on our Web or Mobile sites are accurate or " + "complete, since this information comes from the " + "product manufacturers. On occasion, manufacturers may " + "improve or change their product formulas and update " + "their labels. We recommend that you do not rely " + "solely on the information presented on our Web or " + "Mobile sites and that you review the product's label " + "or contact the manufacturer directly if you have " + "specific product concerns or questions. If you have " + "specific healthcare concerns or questions about the " + "products displayed, please contact your licensed " + "healthcare professional for advice or answers. Any " + "additional pictures are suggested servings only." + ), + "Item Number (DPCI)": " 055-02-1211", + "Net weight": " 15.6 Ounces", + "Origin": " Made in the USA or Imported", + "Package Quantity": " 1", + "State of Readiness": " Ready to Eat", + "TCIN": " 54571204", + "UPC": " 022000279729", + } + assert iter_parse(ET.fromstring(xml_str)) == expected + + expected = { + "origin": " Made in the USA or Imported", + "packaging": " 1", + "tcin": " 54571204", + } + assert parse_raw_specs(xml_str) == expected + + +def test_parse_raw_specs4(): + """Test an example XML string found in the sample date file""" + + xml_str = """ +

Specifications

Suggested Age: 22 Years and Up

Number of Pages: 247

Genre: Technology

Sub-Genre: Agriculture

Format: Hardcover

Publisher: States Academic Press

Book theme: Agronomy, Crop Science

Author: Mark Taylor

Language: English

Street Date: March 8, 2022
TCIN: 84917947
UPC: 9781639893843
Item Number (DPCI): 247-34-8919
Origin: Made in the USA or Imported
+ """ + + expected = { + "Author": " Mark Taylor", + "Book theme": " Agronomy, Crop Science", + "Format": " Hardcover", + "Genre": " Technology", + "Item Number (DPCI)": " 247-34-8919", + "Language": " English", + "Number of Pages": " 247", + "Origin": " Made in the USA or Imported", + "Publisher": " States Academic Press", + "Street Date": " March 8, 2022", + "Sub-Genre": " Agriculture", + "Suggested Age": " 22 Years and Up", + "TCIN": " 84917947", + "UPC": " 9781639893843", + } + assert iter_parse(ET.fromstring(xml_str)) == expected + + expected = {"origin": " Made in the USA or Imported", "tcin": " 84917947"} + assert parse_raw_specs(xml_str) == expected + + +def test_malformed_xml(): + """Test al maformed xml string""" + xml_str = "
foo" + assert parse_raw_specs(xml_str) == {}