29 lines
882 B
Python
29 lines
882 B
Python
"""Module containing ReadFromCsv DoFn to create a PTransform to read from a CSV
|
|
input file"""
|
|
|
|
import io
|
|
import logging
|
|
import csv
|
|
|
|
import apache_beam as beam
|
|
from apache_beam.io.filesystems import FileSystems
|
|
|
|
|
|
class ReadFromCsv(beam.DoFn):
|
|
"""This custom DoFn will read from a CSV file and yield each row as a
|
|
dictionary where the row names are the keys and the cells are the values
|
|
"""
|
|
|
|
# pylint: disable=abstract-method,arguments-differ
|
|
def process(self, element):
|
|
logging.info("reading from input file: %s", element)
|
|
with FileSystems.open(element) as file:
|
|
text_wrapper = io.TextIOWrapper(file)
|
|
reader = csv.reader(text_wrapper)
|
|
try:
|
|
header = next(reader)
|
|
except StopIteration:
|
|
return
|
|
for row in reader:
|
|
yield dict(zip(header, row))
|