diff --git a/notebooks/analysis.ipynb b/notebooks/analysis.ipynb new file mode 100644 index 0000000..313290e --- /dev/null +++ b/notebooks/analysis.ipynb @@ -0,0 +1,391 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 15, + "id": "a0dcf44b-c609-4701-8007-b270cf8c3d35", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
tcingtin13ingestion_timeprimary_categorymaterialspackagingoriginweightheightwidthdepthingestion_timematerial_scoreweight_scorepackaging_scoreorigin_scorescore
0819173008403911455282023-06-25 20:31:00.725924ToysNone1importedNaNNaNNaNNaN2023-06-25 20:31:00.7259240.625000NaN0.60.0NaN
18482100797818014339832023-06-25 20:31:00.736690School & Office Supplies[cardboard]1importedNaNNaN30.23NaN2023-06-25 20:31:00.7366900.253333NaN0.60.0NaN
2154327538839294081152023-06-25 20:31:00.742077Movies, Music & BooksNone1usaNaNNaNNaNNaN2023-06-25 20:31:00.742077NaNNaN0.61.0NaN
3841995971944251944892023-06-25 20:31:00.746501Party Supplies[cardboard]24importedNaNNaNNaNNaN2023-06-25 20:31:00.7465010.625000NaN14.40.0NaN
486345566232712311402023-06-25 20:31:00.751118Home[metal]1imported2109.2058.422.5458.422023-06-25 20:31:00.7511180.3533331581.90000.60.01582.853333
......................................................
1628338885247175920352922023-06-25 20:31:01.380622Sports & Outdoors[plastic]1mixed127.01NaN12.7024.132023-06-25 20:31:01.3806220.36666795.25750.60.596.724167
163808365858418210169822023-06-25 20:31:01.384865Patio & GardenNone1mixed14514.9430.4830.48NaN2023-06-25 20:31:01.3848650.11250010886.20500.60.510887.417500
16475477923934228630702023-06-25 20:31:01.388505Holiday Shop[fabric]1mixed78.6412.065.715.712023-06-25 20:31:01.3885050.40357158.98000.60.560.483571
165856345441944252139682023-06-25 20:31:01.391389Household EssentialsNone1importedNaNNaNNaNNaN2023-06-25 20:31:01.391389NaNNaN0.60.0NaN
166802397657242357171292023-06-25 20:31:01.394481Kitchen & Dining[stoneware]1imported829.6011.4331.7511.432023-06-25 20:31:01.394481NaN622.20000.60.0NaN
\n", + "

167 rows × 17 columns

\n", + "
" + ], + "text/plain": [ + " tcin gtin13 ingestion_time \\\n", + "0 81917300 840391145528 2023-06-25 20:31:00.725924 \n", + "1 84821007 9781801433983 2023-06-25 20:31:00.736690 \n", + "2 15432753 883929408115 2023-06-25 20:31:00.742077 \n", + "3 84199597 194425194489 2023-06-25 20:31:00.746501 \n", + "4 86345566 23271231140 2023-06-25 20:31:00.751118 \n", + ".. ... ... ... \n", + "162 83388852 4717592035292 2023-06-25 20:31:01.380622 \n", + "163 80836585 841821016982 2023-06-25 20:31:01.384865 \n", + "164 75477923 93422863070 2023-06-25 20:31:01.388505 \n", + "165 85634544 194425213968 2023-06-25 20:31:01.391389 \n", + "166 80239765 724235717129 2023-06-25 20:31:01.394481 \n", + "\n", + " primary_category materials packaging origin weight \\\n", + "0 Toys None 1 imported NaN \n", + "1 School & Office Supplies [cardboard] 1 imported NaN \n", + "2 Movies, Music & Books None 1 usa NaN \n", + "3 Party Supplies [cardboard] 24 imported NaN \n", + "4 Home [metal] 1 imported 2109.20 \n", + ".. ... ... ... ... ... \n", + "162 Sports & Outdoors [plastic] 1 mixed 127.01 \n", + "163 Patio & Garden None 1 mixed 14514.94 \n", + "164 Holiday Shop [fabric] 1 mixed 78.64 \n", + "165 Household Essentials None 1 imported NaN \n", + "166 Kitchen & Dining [stoneware] 1 imported 829.60 \n", + "\n", + " height width depth ingestion_time material_score \\\n", + "0 NaN NaN NaN 2023-06-25 20:31:00.725924 0.625000 \n", + "1 NaN 30.23 NaN 2023-06-25 20:31:00.736690 0.253333 \n", + "2 NaN NaN NaN 2023-06-25 20:31:00.742077 NaN \n", + "3 NaN NaN NaN 2023-06-25 20:31:00.746501 0.625000 \n", + "4 58.42 2.54 58.42 2023-06-25 20:31:00.751118 0.353333 \n", + ".. ... ... ... ... ... \n", + "162 NaN 12.70 24.13 2023-06-25 20:31:01.380622 0.366667 \n", + "163 30.48 30.48 NaN 2023-06-25 20:31:01.384865 0.112500 \n", + "164 12.06 5.71 5.71 2023-06-25 20:31:01.388505 0.403571 \n", + "165 NaN NaN NaN 2023-06-25 20:31:01.391389 NaN \n", + "166 11.43 31.75 11.43 2023-06-25 20:31:01.394481 NaN \n", + "\n", + " weight_score packaging_score origin_score score \n", + "0 NaN 0.6 0.0 NaN \n", + "1 NaN 0.6 0.0 NaN \n", + "2 NaN 0.6 1.0 NaN \n", + "3 NaN 14.4 0.0 NaN \n", + "4 1581.9000 0.6 0.0 1582.853333 \n", + ".. ... ... ... ... \n", + "162 95.2575 0.6 0.5 96.724167 \n", + "163 10886.2050 0.6 0.5 10887.417500 \n", + "164 58.9800 0.6 0.5 60.483571 \n", + "165 NaN 0.6 0.0 NaN \n", + "166 622.2000 0.6 0.0 NaN \n", + "\n", + "[167 rows x 17 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sqlalchemy import create_engine\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "%matplotlib inline\n", + "\n", + "engine = create_engine('postgresql://sustainability_score:sustainability_score@postgres:5432/sustainability_score')\n", + "\n", + "query = \"\"\"\n", + " SELECT *\n", + " FROM sustainability_score.products AS products\n", + " JOIN sustainability_score.scored_products AS scores\n", + " USING (tcin);\n", + "\"\"\"\n", + "\n", + "products = pd.read_sql_query(query, engine)\n", + "products" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0f00acc1-4dec-45f9-9e38-dcae2b7a271d", + "metadata": {}, + "outputs": [], + "source": [ + "ax = plt.subplot(1, 2, 1)\n", + "plt.hist(weight, color='blue', edgecolor='black', bins=50)\n", + "ax = plt.subplot(1, 2, 2)\n", + "plt.hist(weight[weight <= 1], color='blue', edgecolor='black', bins=50)\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}