feat: added analysis notebook
parent
900955d92d
commit
15178b0b3c
|
@ -0,0 +1,391 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"id": "a0dcf44b-c609-4701-8007-b270cf8c3d35",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>tcin</th>\n",
|
||||
" <th>gtin13</th>\n",
|
||||
" <th>ingestion_time</th>\n",
|
||||
" <th>primary_category</th>\n",
|
||||
" <th>materials</th>\n",
|
||||
" <th>packaging</th>\n",
|
||||
" <th>origin</th>\n",
|
||||
" <th>weight</th>\n",
|
||||
" <th>height</th>\n",
|
||||
" <th>width</th>\n",
|
||||
" <th>depth</th>\n",
|
||||
" <th>ingestion_time</th>\n",
|
||||
" <th>material_score</th>\n",
|
||||
" <th>weight_score</th>\n",
|
||||
" <th>packaging_score</th>\n",
|
||||
" <th>origin_score</th>\n",
|
||||
" <th>score</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>81917300</td>\n",
|
||||
" <td>840391145528</td>\n",
|
||||
" <td>2023-06-25 20:31:00.725924</td>\n",
|
||||
" <td>Toys</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>imported</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2023-06-25 20:31:00.725924</td>\n",
|
||||
" <td>0.625000</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.6</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>84821007</td>\n",
|
||||
" <td>9781801433983</td>\n",
|
||||
" <td>2023-06-25 20:31:00.736690</td>\n",
|
||||
" <td>School & Office Supplies</td>\n",
|
||||
" <td>[cardboard]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>imported</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>30.23</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2023-06-25 20:31:00.736690</td>\n",
|
||||
" <td>0.253333</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.6</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>15432753</td>\n",
|
||||
" <td>883929408115</td>\n",
|
||||
" <td>2023-06-25 20:31:00.742077</td>\n",
|
||||
" <td>Movies, Music & Books</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>usa</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2023-06-25 20:31:00.742077</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.6</td>\n",
|
||||
" <td>1.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>84199597</td>\n",
|
||||
" <td>194425194489</td>\n",
|
||||
" <td>2023-06-25 20:31:00.746501</td>\n",
|
||||
" <td>Party Supplies</td>\n",
|
||||
" <td>[cardboard]</td>\n",
|
||||
" <td>24</td>\n",
|
||||
" <td>imported</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2023-06-25 20:31:00.746501</td>\n",
|
||||
" <td>0.625000</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>14.4</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>86345566</td>\n",
|
||||
" <td>23271231140</td>\n",
|
||||
" <td>2023-06-25 20:31:00.751118</td>\n",
|
||||
" <td>Home</td>\n",
|
||||
" <td>[metal]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>imported</td>\n",
|
||||
" <td>2109.20</td>\n",
|
||||
" <td>58.42</td>\n",
|
||||
" <td>2.54</td>\n",
|
||||
" <td>58.42</td>\n",
|
||||
" <td>2023-06-25 20:31:00.751118</td>\n",
|
||||
" <td>0.353333</td>\n",
|
||||
" <td>1581.9000</td>\n",
|
||||
" <td>0.6</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>1582.853333</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>...</th>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" <td>...</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>162</th>\n",
|
||||
" <td>83388852</td>\n",
|
||||
" <td>4717592035292</td>\n",
|
||||
" <td>2023-06-25 20:31:01.380622</td>\n",
|
||||
" <td>Sports & Outdoors</td>\n",
|
||||
" <td>[plastic]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>mixed</td>\n",
|
||||
" <td>127.01</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>12.70</td>\n",
|
||||
" <td>24.13</td>\n",
|
||||
" <td>2023-06-25 20:31:01.380622</td>\n",
|
||||
" <td>0.366667</td>\n",
|
||||
" <td>95.2575</td>\n",
|
||||
" <td>0.6</td>\n",
|
||||
" <td>0.5</td>\n",
|
||||
" <td>96.724167</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>163</th>\n",
|
||||
" <td>80836585</td>\n",
|
||||
" <td>841821016982</td>\n",
|
||||
" <td>2023-06-25 20:31:01.384865</td>\n",
|
||||
" <td>Patio & Garden</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>mixed</td>\n",
|
||||
" <td>14514.94</td>\n",
|
||||
" <td>30.48</td>\n",
|
||||
" <td>30.48</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2023-06-25 20:31:01.384865</td>\n",
|
||||
" <td>0.112500</td>\n",
|
||||
" <td>10886.2050</td>\n",
|
||||
" <td>0.6</td>\n",
|
||||
" <td>0.5</td>\n",
|
||||
" <td>10887.417500</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>164</th>\n",
|
||||
" <td>75477923</td>\n",
|
||||
" <td>93422863070</td>\n",
|
||||
" <td>2023-06-25 20:31:01.388505</td>\n",
|
||||
" <td>Holiday Shop</td>\n",
|
||||
" <td>[fabric]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>mixed</td>\n",
|
||||
" <td>78.64</td>\n",
|
||||
" <td>12.06</td>\n",
|
||||
" <td>5.71</td>\n",
|
||||
" <td>5.71</td>\n",
|
||||
" <td>2023-06-25 20:31:01.388505</td>\n",
|
||||
" <td>0.403571</td>\n",
|
||||
" <td>58.9800</td>\n",
|
||||
" <td>0.6</td>\n",
|
||||
" <td>0.5</td>\n",
|
||||
" <td>60.483571</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>165</th>\n",
|
||||
" <td>85634544</td>\n",
|
||||
" <td>194425213968</td>\n",
|
||||
" <td>2023-06-25 20:31:01.391389</td>\n",
|
||||
" <td>Household Essentials</td>\n",
|
||||
" <td>None</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>imported</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>2023-06-25 20:31:01.391389</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>0.6</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>166</th>\n",
|
||||
" <td>80239765</td>\n",
|
||||
" <td>724235717129</td>\n",
|
||||
" <td>2023-06-25 20:31:01.394481</td>\n",
|
||||
" <td>Kitchen & Dining</td>\n",
|
||||
" <td>[stoneware]</td>\n",
|
||||
" <td>1</td>\n",
|
||||
" <td>imported</td>\n",
|
||||
" <td>829.60</td>\n",
|
||||
" <td>11.43</td>\n",
|
||||
" <td>31.75</td>\n",
|
||||
" <td>11.43</td>\n",
|
||||
" <td>2023-06-25 20:31:01.394481</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" <td>622.2000</td>\n",
|
||||
" <td>0.6</td>\n",
|
||||
" <td>0.0</td>\n",
|
||||
" <td>NaN</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"<p>167 rows × 17 columns</p>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" tcin gtin13 ingestion_time \\\n",
|
||||
"0 81917300 840391145528 2023-06-25 20:31:00.725924 \n",
|
||||
"1 84821007 9781801433983 2023-06-25 20:31:00.736690 \n",
|
||||
"2 15432753 883929408115 2023-06-25 20:31:00.742077 \n",
|
||||
"3 84199597 194425194489 2023-06-25 20:31:00.746501 \n",
|
||||
"4 86345566 23271231140 2023-06-25 20:31:00.751118 \n",
|
||||
".. ... ... ... \n",
|
||||
"162 83388852 4717592035292 2023-06-25 20:31:01.380622 \n",
|
||||
"163 80836585 841821016982 2023-06-25 20:31:01.384865 \n",
|
||||
"164 75477923 93422863070 2023-06-25 20:31:01.388505 \n",
|
||||
"165 85634544 194425213968 2023-06-25 20:31:01.391389 \n",
|
||||
"166 80239765 724235717129 2023-06-25 20:31:01.394481 \n",
|
||||
"\n",
|
||||
" primary_category materials packaging origin weight \\\n",
|
||||
"0 Toys None 1 imported NaN \n",
|
||||
"1 School & Office Supplies [cardboard] 1 imported NaN \n",
|
||||
"2 Movies, Music & Books None 1 usa NaN \n",
|
||||
"3 Party Supplies [cardboard] 24 imported NaN \n",
|
||||
"4 Home [metal] 1 imported 2109.20 \n",
|
||||
".. ... ... ... ... ... \n",
|
||||
"162 Sports & Outdoors [plastic] 1 mixed 127.01 \n",
|
||||
"163 Patio & Garden None 1 mixed 14514.94 \n",
|
||||
"164 Holiday Shop [fabric] 1 mixed 78.64 \n",
|
||||
"165 Household Essentials None 1 imported NaN \n",
|
||||
"166 Kitchen & Dining [stoneware] 1 imported 829.60 \n",
|
||||
"\n",
|
||||
" height width depth ingestion_time material_score \\\n",
|
||||
"0 NaN NaN NaN 2023-06-25 20:31:00.725924 0.625000 \n",
|
||||
"1 NaN 30.23 NaN 2023-06-25 20:31:00.736690 0.253333 \n",
|
||||
"2 NaN NaN NaN 2023-06-25 20:31:00.742077 NaN \n",
|
||||
"3 NaN NaN NaN 2023-06-25 20:31:00.746501 0.625000 \n",
|
||||
"4 58.42 2.54 58.42 2023-06-25 20:31:00.751118 0.353333 \n",
|
||||
".. ... ... ... ... ... \n",
|
||||
"162 NaN 12.70 24.13 2023-06-25 20:31:01.380622 0.366667 \n",
|
||||
"163 30.48 30.48 NaN 2023-06-25 20:31:01.384865 0.112500 \n",
|
||||
"164 12.06 5.71 5.71 2023-06-25 20:31:01.388505 0.403571 \n",
|
||||
"165 NaN NaN NaN 2023-06-25 20:31:01.391389 NaN \n",
|
||||
"166 11.43 31.75 11.43 2023-06-25 20:31:01.394481 NaN \n",
|
||||
"\n",
|
||||
" weight_score packaging_score origin_score score \n",
|
||||
"0 NaN 0.6 0.0 NaN \n",
|
||||
"1 NaN 0.6 0.0 NaN \n",
|
||||
"2 NaN 0.6 1.0 NaN \n",
|
||||
"3 NaN 14.4 0.0 NaN \n",
|
||||
"4 1581.9000 0.6 0.0 1582.853333 \n",
|
||||
".. ... ... ... ... \n",
|
||||
"162 95.2575 0.6 0.5 96.724167 \n",
|
||||
"163 10886.2050 0.6 0.5 10887.417500 \n",
|
||||
"164 58.9800 0.6 0.5 60.483571 \n",
|
||||
"165 NaN 0.6 0.0 NaN \n",
|
||||
"166 622.2000 0.6 0.0 NaN \n",
|
||||
"\n",
|
||||
"[167 rows x 17 columns]"
|
||||
]
|
||||
},
|
||||
"execution_count": 15,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sqlalchemy import create_engine\n",
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"%matplotlib inline\n",
|
||||
"\n",
|
||||
"engine = create_engine('postgresql://sustainability_score:sustainability_score@postgres:5432/sustainability_score')\n",
|
||||
"\n",
|
||||
"query = \"\"\"\n",
|
||||
" SELECT *\n",
|
||||
" FROM sustainability_score.products AS products\n",
|
||||
" JOIN sustainability_score.scored_products AS scores\n",
|
||||
" USING (tcin);\n",
|
||||
"\"\"\"\n",
|
||||
"\n",
|
||||
"products = pd.read_sql_query(query, engine)\n",
|
||||
"products"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0f00acc1-4dec-45f9-9e38-dcae2b7a271d",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"ax = plt.subplot(1, 2, 1)\n",
|
||||
"plt.hist(weight, color='blue', edgecolor='black', bins=50)\n",
|
||||
"ax = plt.subplot(1, 2, 2)\n",
|
||||
"plt.hist(weight[weight <= 1], color='blue', edgecolor='black', bins=50)\n",
|
||||
"plt.show()"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
Loading…
Reference in New Issue