diff --git a/pipeline/beam_etl/tests/test_parse_raw_specs.py b/pipeline/beam_etl/tests/test_parse_raw_specs.py
index 6f4b4c4..fe3b5ef 100644
--- a/pipeline/beam_etl/tests/test_parse_raw_specs.py
+++ b/pipeline/beam_etl/tests/test_parse_raw_specs.py
@@ -1,10 +1,153 @@
-from helpers import parse_raw_specs
+"""Test the `parse_raw_specs` function and its helper `iter_parse`"""
-def test_parse_raw_specs():
+import xml.etree.ElementTree as ET
+from helpers import parse_raw_specs, iter_parse
+
+
+def test_parse_raw_specs0():
+ """Test an example XML string found in the sample date file"""
+
xml_str = """
Specifications
Suggested Age: 6 Years and Up
CPSC Choking Hazard Warnings: Choking_hazard_small_parts
TCIN: 81917300
UPC: 840391145528
Origin: imported
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item\'s label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
"""
-
expected = {"tcin": " 81917300", "origin": " imported"}
assert parse_raw_specs(xml_str) == expected
+
+
+def test_parse_raw_specs1():
+ """Test an example XML string found in the sample date file"""
+
+ xml_str = """
+ Specifications
Maximum Height: 33.5 inches
Minimum Height: 33.5 inches
TCIN: 86383979
UPC: 023271234080
Origin: imported
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item\'s label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
+ """
+
+ expected = {
+ "Battery": " No Battery Used",
+ "Material": " Resin",
+ "Maximum Height": " 33.5 inches",
+ "Minimum Height": " 33.5 inches",
+ "Number of Pieces": " 2",
+ "Origin": " imported",
+ "TCIN": " 86383979",
+ "UPC": " 023271234080",
+ "Weight": " 1 pounds",
+ }
+ assert iter_parse(ET.fromstring(xml_str)) == expected
+
+ expected = {
+ "materials": " Resin",
+ "origin": " imported",
+ "packaging": " 2",
+ "tcin": " 86383979",
+ "weight": " 1 pounds",
+ }
+ assert parse_raw_specs(xml_str) == expected
+
+
+def test_parse_raw_specs2():
+ """Test an example XML string found in the sample date file"""
+
+ xml_str = """
+ Specifications
Dimensions (Overall): 2.1 inches (H) x 9.0 inches (W) x 10.9 inches (D)
Party subtype: Party Card Holders
TCIN: 82840486
UPC: 843128196602
Origin: imported
WARNING:⚠ This product can expose you to chemical(s) including Formaldehyde (gas), which is known to the State of California to cause cancer and birth defects or other reproductive harm. For more information go to P65Warnings.ca.gov. www.p65warnings.ca.gov
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item\'s label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
+ """
+
+ expected = {
+ "Dimensions (Overall)": " 2.1 inches (H) x 9.0 inches (W) x 10.9 inches (D)",
+ "Material": " Wood",
+ "Origin": " imported",
+ "Party subtype": " Party Card Holders",
+ "TCIN": " 82840486",
+ "UPC": " 843128196602",
+ }
+ assert iter_parse(ET.fromstring(xml_str)) == expected
+
+ expected = {
+ "dimensions": " 2.1 inches (H) x 9.0 inches (W) x 10.9 inches (D)",
+ "materials": " Wood",
+ "origin": " imported",
+ "tcin": " 82840486",
+ }
+ assert parse_raw_specs(xml_str) == expected
+
+
+def test_parse_raw_specs3():
+ """Test an example XML string found in the sample date file"""
+
+ xml_str = """
+ Specifications
Contains: Does Not Contain Any of the 8 Major Allergens
Dietary Needs: Gluten Free
State of Readiness: Ready to Eat
TCIN: 54571204
UPC: 022000279729
Item Number (DPCI): 055-02-1211
Origin: Made in the USA or Imported
Grocery Disclaimer:
Content on this site is for reference purposes only. Target does not represent or warrant that the nutrition, ingredient, allergen and other product information on our Web or Mobile sites are accurate or complete, since this information comes from the product manufacturers. On occasion, manufacturers may improve or change their product formulas and update their labels. We recommend that you do not rely solely on the information presented on our Web or Mobile sites and that you review the product\'s label or contact the manufacturer directly if you have specific product concerns or questions. If you have specific healthcare concerns or questions about the products displayed, please contact your licensed healthcare professional for advice or answers. Any additional pictures are suggested servings only.
+ """
+
+ expected = {
+ "Contains": " Does Not Contain Any of the 8 Major Allergens",
+ "Dietary Needs": " Gluten Free",
+ "Form": " Pieces",
+ "Grocery Disclaimer": (
+ " Content on this site is for reference purposes only. "
+ "Target does not represent or warrant that the "
+ "nutrition, ingredient, allergen and other product "
+ "information on our Web or Mobile sites are accurate or "
+ "complete, since this information comes from the "
+ "product manufacturers. On occasion, manufacturers may "
+ "improve or change their product formulas and update "
+ "their labels. We recommend that you do not rely "
+ "solely on the information presented on our Web or "
+ "Mobile sites and that you review the product's label "
+ "or contact the manufacturer directly if you have "
+ "specific product concerns or questions. If you have "
+ "specific healthcare concerns or questions about the "
+ "products displayed, please contact your licensed "
+ "healthcare professional for advice or answers. Any "
+ "additional pictures are suggested servings only."
+ ),
+ "Item Number (DPCI)": " 055-02-1211",
+ "Net weight": " 15.6 Ounces",
+ "Origin": " Made in the USA or Imported",
+ "Package Quantity": " 1",
+ "State of Readiness": " Ready to Eat",
+ "TCIN": " 54571204",
+ "UPC": " 022000279729",
+ }
+ assert iter_parse(ET.fromstring(xml_str)) == expected
+
+ expected = {
+ "origin": " Made in the USA or Imported",
+ "packaging": " 1",
+ "tcin": " 54571204",
+ }
+ assert parse_raw_specs(xml_str) == expected
+
+
+def test_parse_raw_specs4():
+ """Test an example XML string found in the sample date file"""
+
+ xml_str = """
+ Specifications
Suggested Age: 22 Years and Up
Publisher: States Academic Press
Book theme: Agronomy, Crop Science
Street Date: March 8, 2022
TCIN: 84917947
UPC: 9781639893843
Item Number (DPCI): 247-34-8919
Origin: Made in the USA or Imported
+ """
+
+ expected = {
+ "Author": " Mark Taylor",
+ "Book theme": " Agronomy, Crop Science",
+ "Format": " Hardcover",
+ "Genre": " Technology",
+ "Item Number (DPCI)": " 247-34-8919",
+ "Language": " English",
+ "Number of Pages": " 247",
+ "Origin": " Made in the USA or Imported",
+ "Publisher": " States Academic Press",
+ "Street Date": " March 8, 2022",
+ "Sub-Genre": " Agriculture",
+ "Suggested Age": " 22 Years and Up",
+ "TCIN": " 84917947",
+ "UPC": " 9781639893843",
+ }
+ assert iter_parse(ET.fromstring(xml_str)) == expected
+
+ expected = {"origin": " Made in the USA or Imported", "tcin": " 84917947"}
+ assert parse_raw_specs(xml_str) == expected
+
+
+def test_malformed_xml():
+ """Test al maformed xml string"""
+ xml_str = "foo"
+ assert parse_raw_specs(xml_str) == {}