diff --git a/.gitignore b/.gitignore index e69de29..763513e 100644 --- a/.gitignore +++ b/.gitignore @@ -0,0 +1 @@ +.ipynb_checkpoints diff --git a/exploration/work/exploration.ipynb b/exploration/work/exploration.ipynb new file mode 100644 index 0000000..1aa88a8 --- /dev/null +++ b/exploration/work/exploration.ipynb @@ -0,0 +1,1064 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 410, + "id": "98ded03d-0208-4416-a5e5-720a2e0742fa", + "metadata": {}, + "outputs": [], + "source": [ + "import functools\n", + "import pandas as pd\n", + "from IPython.display import display, HTML\n", + "\n", + "in_file = \"/home/jovyan/data/large_target_store_products_dataset_sample - large_target_store_products_dataset_sample.csv\"\n", + "data = pd.read_csv(in_file)\n", + "\n", + "def look_for_matches(data: pd.DataFrame, pattern: str, colname : str = \"raw_specifications\") -> str:\n", + " \"\"\"Useful for finding cells in raw_specifications containing a given string\"\"\"\n", + " return data.loc[data.loc[:, colname].str.contains(pattern), colname].iloc[0]\n", + "\n", + "def render_html(html: str):\n", + " \"\"\"Render an html string\"\"\"\n", + " display(HTML(html))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c20c14ea-9ef6-4d40-8b7d-4731d0866239", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['title', 'url', 'brand', 'main_image', 'sku', 'description',\n", + " 'raw_description', 'gtin13', 'currency', 'price', 'availability',\n", + " 'availableDeliveryMethod', 'available_branch', 'primary_category',\n", + " 'sub_category_1', 'sub_category_2', 'sub_category_3', 'images',\n", + " 'raw_specifications', 'specifications', 'highlights', 'raw_highlights',\n", + " 'uniq_id', 'scraped_at'],\n", + " dtype='object')" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.axes[1]" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "id": "165723d1-8152-4e30-b25a-cbca7f4935f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
The above item details were provided by the Target Plus™ Partner. Target does not represent or warrant that this information is accurate or complete. On occasion, manufacturers may modify their items and update their labels.
We recommend that you do not rely solely on the information presented. If you have a specific question about this item, you may consult the item's label, contact the manufacturer directly or call Target Guest Services at 1-800-591-3869.
\n", + " | title | \n", + "url | \n", + "brand | \n", + "main_image | \n", + "sku | \n", + "description | \n", + "raw_description | \n", + "gtin13 | \n", + "currency | \n", + "price | \n", + "... | \n", + "sub_category_1 | \n", + "sub_category_2 | \n", + "sub_category_3 | \n", + "images | \n", + "raw_specifications | \n", + "specifications | \n", + "highlights | \n", + "raw_highlights | \n", + "uniq_id | \n", + "scraped_at | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
8 | \n", + "NCAA Illinois Fighting Illini Circo Cheese Cut... | \n", + "https://www.target.com/p/ncaa-illinois-fightin... | \n", + "NCAA | \n", + "https://target.scene7.com/is/image/Target/GUES... | \n", + "79646040 | \n", + "Reach out to the complex cheese lover in your ... | \n", + "<div class=\"h-margin-v-default\" data-test=\"ite... | \n", + "99967205276 | \n", + "USD | \n", + "58.95 | \n", + "... | \n", + "Sports Fan Shop | \n", + "Sports Fan Shop Home Goods | \n", + "Sports Fan Shop Barware & Drinkware | \n", + "https://target.scene7.com/is/image/Target/GUES... | \n", + "<div class=\"styles__StyledCol-sc-ct8kx6-0 iKGd... | \n", + "Number of Pieces: 5 | Number of Pieces: 5 | We... | \n", + "BEAUTY & ELEGANCE - The Circo swivel-style cir... | \n", + "<li class=\"styles__Bullet-sc-6aebpn-0 eIfLaI\">... | \n", + "0c549116-75c8-56cb-8877-165380d0efd9 | \n", + "06/12/22 | \n", + "
13 | \n", + "Blue Panda Jumbo Dinosaur Floor Puzzle, Double... | \n", + "https://www.target.com/p/blue-panda-jumbo-dino... | \n", + "Blue Panda | \n", + "https://target.scene7.com/is/image/Target/GUES... | \n", + "80405355 | \n", + "Package Includes\\r\\nLarge Dinosaur Floor Puzzl... | \n", + "<div class=\"h-margin-v-default\" data-test=\"ite... | \n", + "194425203808 | \n", + "USD | \n", + "19.99 | \n", + "... | \n", + "Puzzles | \n", + "NaN | \n", + "NaN | \n", + "https://target.scene7.com/is/image/Target/GUES... | \n", + "<div class=\"styles__StyledCol-sc-ct8kx6-0 iKGd... | \n", + "Number of Pieces: 17 | Number of Pieces: 17 | ... | \n", + "JUMBO DINOSAUR PUZZLE: This t-rex foam puzzle ... | \n", + "<li class=\"styles__Bullet-sc-6aebpn-0 eIfLaI\">... | \n", + "151c72b4-4856-502f-a508-961cc81fffa9 | \n", + "06/12/22 | \n", + "
14 | \n", + "Women's Round Aviator Sunglasses - Universal T... | \n", + "https://www.target.com/p/women-39-s-round-avia... | \n", + "Universal Thread | \n", + "https://target.scene7.com/is/image/Target/GUES... | \n", + "84201225 | \n", + "Round out your eyewear collection with the Rou... | \n", + "<div class=\"h-margin-v-default\" data-test=\"ite... | \n", + "195995526496 | \n", + "USD | \n", + "15.00 | \n", + "... | \n", + "Eye Care | \n", + "NaN | \n", + "NaN | \n", + "https://target.scene7.com/is/image/Target/GUES... | \n", + "<div class=\"styles__StyledCol-sc-ct8kx6-0 iKGd... | \n", + "Material: Metal (Frame) | Material: Metal (Fra... | \n", + "Universal Thread round aviator sunglasses with... | \n", + "<li class=\"styles__Bullet-sc-6aebpn-0 eIfLaI\">... | \n", + "2a803c0f-00bf-50a6-a490-d381620ac3a3 | \n", + "06/12/22 | \n", + "
3 rows × 24 columns
\n", + "