From 7d898c6297372d97916e7db354b15db8d3f4d570 Mon Sep 17 00:00:00 2001 From: Ricard Illa Date: Mon, 26 Jun 2023 10:04:28 +0200 Subject: [PATCH] fix: airflow's BashOperator's cwd cannot be templated --- dags/sustainability_score/README.md | 5 ++++- dags/sustainability_score/__init__.py | 3 +-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dags/sustainability_score/README.md b/dags/sustainability_score/README.md index ebd1ffb..1cdbfcb 100644 --- a/dags/sustainability_score/README.md +++ b/dags/sustainability_score/README.md @@ -16,8 +16,11 @@ The following parameters are available: * `input`: location of the CSV input file * `beam_etl_path`: location of the apache beam pipeline -* `dbt_path`: location of the dbt project * `products_table`: products_table table name I decided not to configure the rest of the table locations because that makes more sense to be defined in DBT. + +Ideally, I would parametrize the dbt path as well. But the `cwd` parameter of +`BashOperator` is not a template and implementing that is not worth it for such +a minor improvement. diff --git a/dags/sustainability_score/__init__.py b/dags/sustainability_score/__init__.py index f1072a9..ead9ef9 100644 --- a/dags/sustainability_score/__init__.py +++ b/dags/sustainability_score/__init__.py @@ -23,7 +23,6 @@ CSV_FNAME = ( CONFIG = { "input": f"{ HOME }/gcs/data/{ CSV_FNAME }", "beam_etl_path": "/etl/main.py", - "dbt_path": "/dbt", "products_table": "sustainability_score.products", } @@ -34,7 +33,7 @@ def dbt(cmd: str, attach_dag: DAG) -> BashOperator: dag=attach_dag, task_id=f"dbt_{ cmd }", bash_command=f"dbt { cmd }", - cwd="{{ params.dbt_path }}", + cwd="/dbt", env={ "POSTGRES_HOST": "{{ conn.get('pg_db').host }}", "POSTGRES_USER": "{{ conn.get('pg_db').login }}",