feat: update table if there's a primary key conflict

main
Ricard Illa 2023-06-23 18:25:11 +02:00
parent 8e89404b76
commit 55b20bb897
2 changed files with 18 additions and 1 deletions

View File

@ -33,13 +33,16 @@ class ReadFromCsv(beam.DoFn):
class WriteToPostgreSQL(beam.DoFn): class WriteToPostgreSQL(beam.DoFn):
"""DoFn to write elements to a PostgreSQL database""" """DoFn to write elements to a PostgreSQL database"""
def __init__(self, hostname, port, username, password, database, table): def __init__(
self, hostname, port, username, password, database, table, table_key=None
):
self.hostname = hostname self.hostname = hostname
self.port = port self.port = port
self.username = username self.username = username
self.password = password self.password = password
self.database = database self.database = database
self.table = table self.table = table
self.table_key = table_key
def setup(self): def setup(self):
self.connection = psycopg2.connect( self.connection = psycopg2.connect(
@ -58,6 +61,19 @@ class WriteToPostgreSQL(beam.DoFn):
INSERT INTO { self.table } ({ colnames }) INSERT INTO { self.table } ({ colnames })
VALUES ({ values }) VALUES ({ values })
""" """
if self.table_key is not None:
update_statement = ",".join(
f"{ col } = EXCLUDED.{ col }"
for col in element.keys()
if col != self.table_key
)
sql = (
sql
+ f"""
ON CONFLICT ({ self.table_key }) DO UPDATE SET
{ update_statement }
"""
)
cursor.execute(sql, list(element.values())) cursor.execute(sql, list(element.values()))
self.connection.commit() self.connection.commit()
cursor.close() cursor.close()

View File

@ -46,6 +46,7 @@ def main():
password=opts.pg_password, password=opts.pg_password,
database=opts.pg_database, database=opts.pg_database,
table=opts.pg_table, table=opts.pg_table,
table_key="gtin13",
)) ))
# fmt: on # fmt: on