All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adform.streamloader.vertica.ExternalOffsetVerticaFileBatcher.scala Maven / Gradle / Ivy

/*
 * Copyright (c) 2020 Adform
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

package com.adform.streamloader.vertica

import java.io.File
import java.time.Duration
import com.adform.streamloader.model.{StreamRecord, StreamRange}
import com.adform.streamloader.sink.batch.{RecordBatchBuilder, RecordBatcher}
import com.adform.streamloader.sink.file.{FileCommitStrategy, FileRecordBatch}
import com.adform.streamloader.util.{Logging, TimeProvider}
import com.adform.streamloader.vertica.file.VerticaFileBuilderFactory
import javax.sql.DataSource

import scala.util.Using

/**
  * A file based Vertica record batch with an extra pre-generated file ID that is used as a foreign key
  * when storing the records and offsets to separate tables.
  */
case class ExternalOffsetVerticaFileRecordBatch(
    file: File,
    fileId: Long,
    recordRanges: Seq[StreamRange],
    copyStatementTemplate: String
) extends FileRecordBatch
    with VerticaRecordBatch {
  override def copyStatement(table: String): String = String.format(copyStatementTemplate, table)
}

/**
  * A file based Vertica record batcher that generates a new file ID before starting a new batch from
  * a given ID sequence, formats records using a given formatter and writes them to files.
  *
  * A `SEQUENCE` is required for generating the `_file_id` foreign key values, create it as follows:
  *
  * {{{
  *   CREATE SEQUENCE file_id_sequence;
  * }}}
  *
  * @param dbDataSource The data source to use when generating file IDs.
  * @param fileIdSequence The file ID sequence name.
  * @param recordFormatter Record formatter to use when writing records to files.
  * @param fileBuilderFactory File builder factory to use.
  * @param fileCommitStrategy Strategy for completing files.
  * @param verticaLoadMethod Vertica load method to use when forming `COPY` statements.
  *
  * @tparam R Type of records written to files.
  */
class ExternalOffsetVerticaFileBatcher[R](
    dbDataSource: DataSource,
    fileIdSequence: String,
    recordFormatter: (Long, StreamRecord) => Seq[R],
    fileBuilderFactory: VerticaFileBuilderFactory[R],
    fileCommitStrategy: FileCommitStrategy,
    verticaLoadMethod: VerticaLoadMethod
)(implicit timeProvider: TimeProvider = TimeProvider.system)
    extends RecordBatcher[ExternalOffsetVerticaFileRecordBatch]
    with Logging {

  override def newBatchBuilder(): RecordBatchBuilder[ExternalOffsetVerticaFileRecordBatch] = {

    val fileId = newFileId
    val fileBuilder = fileBuilderFactory.newFileBuilder()
    val fileStartTimeMillis = timeProvider.currentMillis

    new RecordBatchBuilder[ExternalOffsetVerticaFileRecordBatch] {

      override def addToBatch(record: StreamRecord): Int = {
        val formattedRecords = recordFormatter(fileId, record)
        formattedRecords.foreach(formatted => fileBuilder.write(formatted))
        formattedRecords.size
      }

      override def isBatchReady: Boolean = fileCommitStrategy.shouldCommit(
        Duration.ofMillis(timeProvider.currentMillis - fileStartTimeMillis),
        fileBuilder.getDataSize,
        fileBuilder.getRecordCount
      )

      override def build(): Option[ExternalOffsetVerticaFileRecordBatch] =
        fileBuilder
          .build()
          .map(file =>
            ExternalOffsetVerticaFileRecordBatch(
              file,
              fileId,
              currentRecordRanges,
              fileBuilder.copyStatement(file, "%s", verticaLoadMethod)
            )
          )

      override def discard(): Unit = fileBuilder.discard()
    }
  }

  private def newFileId: Long = {
    Using.resource(dbDataSource.getConnection) { connection =>
      val query = s"SELECT NEXTVAL('$fileIdSequence')"
      log.info(s"Running stream position query: $query")
      Using.resource(connection.prepareStatement(query)) { statement =>
        Using.resource(statement.executeQuery()) { result =>
          result.next()
          result.getLong(1)
        }
      }
    }
  }
}

object ExternalOffsetVerticaFileBatcher {

  case class Builder[R](
      private val _dbDataSource: DataSource,
      private val _fileIdSequence: String,
      private val _fileBuilderFactory: VerticaFileBuilderFactory[R],
      private val _recordFormatter: (Long, StreamRecord) => Seq[R],
      private val _fileCommitStrategy: FileCommitStrategy,
      private val _verticaLoadMethod: VerticaLoadMethod
  ) {

    /**
      * Sets a data source for Vertica JDBC connections.
      */
    def dbDataSource(source: DataSource): Builder[R] = copy(_dbDataSource = source)

    /**
      * Sets the name of the sequence used for generating file IDs.
      */
    def fileIdSequence(name: String): Builder[R] = copy(_fileIdSequence = name)

    /**
      * Sets the load method to use when issuing `COPY` statements.
      */
    def verticaLoadMethod(method: VerticaLoadMethod): Builder[R] = copy(_verticaLoadMethod = method)

    /**
      * Sets the record formatter that converts from consumer records to records written to the file.
      */
    def recordFormatter(formatter: (Long, StreamRecord) => Seq[R]): Builder[R] = copy(_recordFormatter = formatter)

    /**
      * Sets the file builder factory, e.g. Native.
      */
    def fileBuilderFactory(factory: VerticaFileBuilderFactory[R]): Builder[R] = copy(_fileBuilderFactory = factory)

    /**
      * Sets the strategy for determining if a file is ready.
      */
    def fileCommitStrategy(strategy: FileCommitStrategy): Builder[R] = copy(_fileCommitStrategy = strategy)

    def build(): ExternalOffsetVerticaFileBatcher[R] = {
      if (_dbDataSource == null) throw new IllegalStateException("Must provide a Vertica data source")
      if (_fileIdSequence == null) throw new IllegalStateException("Must provide a valid file ID sequence name")
      if (_recordFormatter == null) throw new IllegalStateException("Must specify a RecordFormatter")
      if (_fileBuilderFactory == null) throw new IllegalStateException("Must specify a FileBuilderFactory")

      new ExternalOffsetVerticaFileBatcher(
        _dbDataSource,
        _fileIdSequence,
        _recordFormatter,
        _fileBuilderFactory,
        _fileCommitStrategy,
        _verticaLoadMethod
      )
    }
  }

  def builder[R](): Builder[R] = Builder[R](
    _dbDataSource = null,
    _fileIdSequence = null,
    _fileBuilderFactory = null,
    _recordFormatter = null,
    _fileCommitStrategy = FileCommitStrategy.ReachedAnyOf(recordsWritten = Some(1000)),
    _verticaLoadMethod = VerticaLoadMethod.AUTO
  )
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy