diff --git a/.gitignore b/.gitignore index 2dfffff..424293c 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,6 @@ venv *.egg-info __pycache__ + +.terraform +.terraform.lock.hcl diff --git a/pipeline/docker-compose.yml b/pipeline/docker-compose.yml index 517e663..774fba3 100644 --- a/pipeline/docker-compose.yml +++ b/pipeline/docker-compose.yml @@ -37,3 +37,33 @@ services: depends_on: airflow-init: condition: service_completed_successfully + + postgres: + image: postgres:15.3-alpine + ports: + - 5432:5432 + volumes: + - ./state/postgres/data:/var/lib/postgresql/data + environment: + - "POSTGRES_PASSWORD=postgres" + - "POSTGRES_USER=postgres" + + terraform: + image: hashicorp/terraform + entrypoint: /usr/local/bin/terraform-entrypoint.sh + restart: "no" + working_dir: /terraform + volumes: + - ./scripts/terraform-entrypoint.sh:/usr/local/bin/terraform-entrypoint.sh:ro + - ./terraform:/terraform + - ./state/tfstate:/terraform/tfstate + depends_on: + airflow-webserver: + condition: service_started + postgres: + condition: service_started + environment: + - "TF_VAR_pg_host=postgres" + - "TF_VAR_pg_port=5432" + - "TF_VAR_pg_password=postgres" + - "TF_VAR_pg_username=postgres" diff --git a/pipeline/scripts/airflow-init.sh b/pipeline/scripts/airflow-init.sh index b6aa655..eadbf3a 100755 --- a/pipeline/scripts/airflow-init.sh +++ b/pipeline/scripts/airflow-init.sh @@ -7,3 +7,6 @@ airflow db init # Allow non-authenticated access to UI for Airflow 2.* sed -i.bak -E 's/^(# )?(AUTH_ROLE_PUBLIC) = .*$/\2 = "Admin"/' /home/airflow/airflow/webserver_config.py + +# Allow unauthenticated API access for terraform +sed -i.bak -E 's/^(auth_backends) = .*$/\1 = airflow.api.auth.backend.default/' /home/airflow/airflow/airflow.cfg diff --git a/pipeline/scripts/terraform-entrypoint.sh b/pipeline/scripts/terraform-entrypoint.sh new file mode 100755 index 0000000..9c0bb35 --- /dev/null +++ b/pipeline/scripts/terraform-entrypoint.sh @@ -0,0 +1,12 @@ +#!/bin/sh + +# We need to wait for a while and potentially retry to make sure both +# postgresql and airflow are ready so that terraform can operate on them + +while :; do + sleep 10 + terraform init && + terraform plan && + terraform apply -auto-approve && + break +done diff --git a/pipeline/terraform/main.tf b/pipeline/terraform/main.tf new file mode 100644 index 0000000..8048aab --- /dev/null +++ b/pipeline/terraform/main.tf @@ -0,0 +1,32 @@ +terraform { + backend "local" { + path = "tfstate/terraform.state" + } + required_providers { + postgresql = { + source = "cyrilgdn/postgresql" + version = ">= 1.19.0" + } + } +} + +module "postgresql" { + source = "./modules/postgresql" + host = var.pg_host + port = var.pg_port + username = var.pg_username + password = var.pg_password + pipeline_username = var.pipeline_pg_username + pipeline_password = var.pipeline_pg_password + pipeline_db = var.pipeline_pg_db + pipeline_schema = var.pipeline_pg_schema +} + +module "airflow" { + source = "./modules/airflow" + pg_host = var.pg_host + pg_port = var.pg_port + pg_username = var.pipeline_pg_username + pg_password = var.pipeline_pg_password + pg_db = var.pipeline_pg_db +} diff --git a/pipeline/terraform/modules/airflow/main.tf b/pipeline/terraform/modules/airflow/main.tf new file mode 100644 index 0000000..9fd0399 --- /dev/null +++ b/pipeline/terraform/modules/airflow/main.tf @@ -0,0 +1,21 @@ +terraform { + required_providers { + airflow = { + source = "DrFaust92/airflow" + version = ">= 0.12.5" + } + } +} + +provider "airflow" { + base_endpoint = "http://airflow-webserver:8080" +} + +resource "airflow_connection" "pg_connection" { + connection_id = "pg_db" + conn_type = "postgres" + host = format("%s:%s", var.pg_host, var.pg_port) + schema = var.pg_db + login = var.pg_username + password = var.pg_password +} diff --git a/pipeline/terraform/modules/airflow/variables.tf b/pipeline/terraform/modules/airflow/variables.tf new file mode 100644 index 0000000..3ba3316 --- /dev/null +++ b/pipeline/terraform/modules/airflow/variables.tf @@ -0,0 +1,25 @@ +variable "pg_host" { + type = string + description = "postgresql host" +} + +variable "pg_port" { + type = string + description = "postgresql port" +} + +variable "pg_username" { + type = string + description = "postgresql pipeline username" +} + +variable "pg_password" { + type = string + description = "postgresql pipeline password" + sensitive = true +} + +variable "pg_db" { + type = string + description = "postgresql pipeline db" +} diff --git a/pipeline/terraform/modules/postgresql/main.tf b/pipeline/terraform/modules/postgresql/main.tf new file mode 100644 index 0000000..800cf70 --- /dev/null +++ b/pipeline/terraform/modules/postgresql/main.tf @@ -0,0 +1,33 @@ +terraform { + required_providers { + postgresql = { + source = "cyrilgdn/postgresql" + version = ">= 1.19.0" + } + } +} + +provider "postgresql" { + host = var.host + port = var.port + username = var.username + password = var.password + sslmode = "disable" +} + +resource "postgresql_role" "pg_pipeline_user" { + name = var.pipeline_username + password = var.pipeline_password + login = true +} + +resource "postgresql_database" "pg_pipeline_db" { + name = var.pipeline_db + owner = postgresql_role.pg_pipeline_user.name +} + +resource "postgresql_schema" "pg_pipeline_schema" { + name = var.pipeline_schema + database = postgresql_database.pg_pipeline_db.name + owner = postgresql_role.pg_pipeline_user.name +} diff --git a/pipeline/terraform/modules/postgresql/variables.tf b/pipeline/terraform/modules/postgresql/variables.tf new file mode 100644 index 0000000..70e5b50 --- /dev/null +++ b/pipeline/terraform/modules/postgresql/variables.tf @@ -0,0 +1,42 @@ +variable "host" { + type = string + description = "postgresql host" +} + +variable "port" { + type = number + description = "postgresql post" + default = 5432 +} + +variable "password" { + type = string + description = "postgresql admin password" + sensitive = true +} + +variable "username" { + type = string + description = "postgresql admin username" +} + +variable "pipeline_username" { + type = string + description = "postgresql pipeline username" +} + +variable "pipeline_password" { + type = string + description = "postgresql pipeline password" + sensitive = true +} + +variable "pipeline_db" { + type = string + description = "postgresql pipeline db" +} + +variable "pipeline_schema" { + type = string + description = "postgresql pipeline db" +} diff --git a/pipeline/terraform/variables.tf b/pipeline/terraform/variables.tf new file mode 100644 index 0000000..c809635 --- /dev/null +++ b/pipeline/terraform/variables.tf @@ -0,0 +1,47 @@ +variable "pg_host" { + type = string + description = "postgresql host" +} + +variable "pg_port" { + type = number + description = "postgresql post" + default = 5432 +} + +variable "pg_password" { + type = string + description = "postgresql admin password" + sensitive = true +} + +variable "pg_username" { + type = string + description = "postgresql admin username" + sensitive = true +} + +variable "pipeline_pg_username" { + type = string + description = "postgresql pipeline username" + default = "sustainability_score" +} + +variable "pipeline_pg_password" { + type = string + description = "postgresql pipeline password" + default = "sustainability_score" + sensitive = true +} + +variable "pipeline_pg_db" { + type = string + description = "postgresql pipeline db name" + default = "sustainability_score" +} + +variable "pipeline_pg_schema" { + type = string + description = "postgresql pipeline schema name" + default = "sustainability_score" +}