All Downloads are FREE. Search and download functionalities are using the official Maven repository.

za.co.absa.enceladus.conformance.datasource.PartitioningUtils.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2018 ABSA Group Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package za.co.absa.enceladus.conformance.datasource

import java.text.MessageFormat

import com.typesafe.config.ConfigFactory
import org.apache.hadoop.fs.Path

object PartitioningUtils {

  private val conf = ConfigFactory.load()

  // Pattern representing the date partitioning where {0} stands for year, {1} for month, {2} for day
  lazy val mappingTablePattern: String = conf.getString("conformance.mappingtable.pattern")

  /**
    * Returns a partitioned path given a path and a report date.
    * The partitioning pattern is determined by the configuration parameter.
    *
    * @param reportDate A report date in 'yyyy-MM-dd' format
    * @return A full path to the partitioned data.
    */
  def getPartitionedPathName(path: String,
                             reportDate: String): String = {
    getPartitionedPathName(path, reportDate, mappingTablePattern)
  }

  /**
    * Returns a partitioned path given a report date and partitioning pattern.
    *
    * @param reportDate          A report date in 'yyyy-MM-dd' format
    * @param partitioningPattern A pattern representing the date partitioning where {0} stands for year, {1} for month, {2} for day
    * @return A full path to the partitioned data.
    */
  def getPartitionedPathName(path: String,
                             reportDate: String,
                             partitioningPattern: String): String = {
    validateReportDate(reportDate)
    val dateTokens = reportDate.split("-")

    val reportYear = dateTokens(0)
    val reportMonth = dateTokens(1)
    val reportDay = dateTokens(2)

    val subPath = getPartitionSubPath(reportYear, reportMonth, reportDay, partitioningPattern)
    val fullPath = if (subPath.isEmpty) new Path(path).toUri.toString else new Path(path, subPath).toUri.toString

    fullPath
  }

  /**
    * Returns a partition subpath given a report date and partitioning pattern.
    *
    * @param reportYear          A string representing the year in `yyyy` format
    * @param reportMonth         A string representing the month in `MM` format
    * @param reportDay           A string representing the day in `dd` format
    * @param partitioningPattern A pattern representing the date partitioning where {0} stands for year, {1} for month, {2} for day
    * @return A subpath as a string
    */
  def getPartitionSubPath(reportYear: String,
                          reportMonth: String,
                          reportDay: String,
                          partitioningPattern: String): String = {
    MessageFormat.format(partitioningPattern, reportYear, reportMonth, reportDay)
  }

  /**
   * Validates the port date to be in tyhe expected format: 'yyyy-MM-dd'.
   *
   * @param reportDate          A report date string
   */
  def validateReportDate(reportDate: String): Unit = {
    val reportDateRegEx = """\d\d\d\d-\d\d-\d\d""".r

    if (!reportDateRegEx.pattern.matcher(reportDate).matches()) {
      throw new IllegalArgumentException(s"A report date '$reportDate' does not match expected pattern 'yyyy-MM-dd'")
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy