added skeleton for airflow DAG

main
Ricard Illa 2023-06-21 19:12:03 +02:00
parent 02eaa4b7ff
commit 5a4bca756e
4 changed files with 57 additions and 1 deletions

View File

@ -0,0 +1,3 @@
# Sustainability score
Placeholder

View File

@ -0,0 +1,39 @@
"""
DAG IDs: sustainability_score
"""
import os
from datetime import datetime
import utils
from airflow import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.providers.apache.beam.operators.beam import BeamRunPythonPipelineOperator
HOME = os.environ["HOME"]
CSV_FNAME = (
"large_target_store_products_dataset_sample - "
"large_target_store_products_dataset_sample.csv"
)
CONFIG = {
"input": f"{ HOME }/gcs/data/{ CSV_FNAME }",
"beam_etl_path": "/beam_etl/main.py",
}
with DAG(
"sustainability_score",
schedule_interval="0 * * * 1-5",
catchup=False,
max_active_runs=10,
start_date=datetime(2023, 6, 21),
doc_md=utils.load_docs(__file__),
params=CONFIG,
) as dag:
etl_pipeline = BeamRunPythonPipelineOperator(
task_id="beam_etl",
py_file="{{ params.beam_etl_path }}",
pipeline_options={"input": "{{ params.input }}"},
)

12
pipeline/dags/utils.py Normal file
View File

@ -0,0 +1,12 @@
"""
Misc helper functions
"""
import pathlib
def load_docs(caller_path, fname="README.md"):
"""Load the README.md file for the DAG's docs"""
caller_wd = pathlib.Path(caller_path).parent
with pathlib.Path(caller_wd, fname).open(encoding="utf-8") as docs_fh:
docs = docs_fh.read()
return docs

View File

@ -6,10 +6,12 @@ x-airflow-common:
image: us-docker.pkg.dev/cloud-airflow-releaser/airflow-worker-scheduler-2-5-1/airflow-worker-scheduler-2-5-1:composer-2.3.1-airflow-2-5-1 image: us-docker.pkg.dev/cloud-airflow-releaser/airflow-worker-scheduler-2-5-1/airflow-worker-scheduler-2-5-1:composer-2.3.1-airflow-2-5-1
entrypoint: /usr/local/bin/airflow-entrypoint.sh entrypoint: /usr/local/bin/airflow-entrypoint.sh
volumes: volumes:
- ./state/airflow-data:/home/airflow/airflow
- ./dags:/home/airflow/airflow/dags
- ./scripts/airflow-init.sh:/usr/local/bin/airflow-init.sh:ro - ./scripts/airflow-init.sh:/usr/local/bin/airflow-init.sh:ro
- ./scripts/airflow-entrypoint.sh:/usr/local/bin/airflow-entrypoint.sh:ro - ./scripts/airflow-entrypoint.sh:/usr/local/bin/airflow-entrypoint.sh:ro
- ../data:/home/airflow/gcs/data:ro - ../data:/home/airflow/gcs/data:ro
- ./state/airflow-data:/home/airflow/airflow - ./beam_etl:/beam_etl:ro
environment: environment:
AIRFLOW__CORE__LOAD_EXAMPLES: 'false' AIRFLOW__CORE__LOAD_EXAMPLES: 'false'