All Downloads are FREE. Search and download functionalities are using the official Maven repository.

za.co.absa.enceladus.migrations.framework.migration.JsonMigration.scala Maven / Gradle / Ivy

There is a newer version: 1.7.1
Show newest version
/*
 * Copyright 2018-2019 ABSA Group Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package za.co.absa.enceladus.migrations.framework.migration

import org.apache.log4j.{LogManager, Logger}
import za.co.absa.enceladus.migrations.framework.{MigrationUtils, ObjectIdTools}
import za.co.absa.enceladus.migrations.framework.dao.DocumentDb

import scala.collection.mutable

/**
  * A JsonMigration represents an entity that provides transformations for each document of every collection in a model
  * when switching from one version of the model to another.
  *
  * All transformations are JSON string in, JSON string out.
  *
  * Only one transformation is possible per collection.
  *
  * In order to create a JSON migration you need to extend from this trait and provide all the requited transformations:
  *
  * {{{
  *   object MigrationTo1 extends MigrationBase with JsonMigration {
  *
  *     transformJSON("collection1_name") (jsonIn => {
  *       val jsonOut = collection1Transformations(jsonIn)
  *       jsonOut
  *     })
  *
  *     transformJSON("collection2_name") (jsonIn => {
  *       val jsonOut = collection2Transformations(jsonIn)
  *       jsonOut
  *     })
  *   }
  * }}}
  *
  * If a source document is invalid for some reason and should be dropped, just return `InvalidDocument`.
  */
trait JsonMigration extends Migration {

  // A document transformer is a function that maps a JSON string to a JSON string
  type DocumentTransformer = String => String

  private val log: Logger = LogManager.getLogger(this.getClass)
  val InvalidDocument = ""

  private val transformers = new mutable.HashMap[String, DocumentTransformer]()

  /**
    * This function is used by derived classes to add transformations for affected collections.
    * This is used for complex migrations that requite complex model version maps.
    *
    * @param collectionName A collection name to be migrated
    * @param f              A transformation to applied to each document of the collection
    */
  def transformJSON(collectionName: String)(f: String => String): Unit = {
    if (transformers.contains(collectionName)) {
      throw new IllegalArgumentException(s"A transformer for '$collectionName' has already been added.")
    }
    transformers.put(collectionName, f)
  }

  /**
    * Gets a JSON transformer for the specified collection if applicable
    *
    * @param collectionName A collection name to be migrated
    * @return A function that takes a JSON string and returns a transformed JSON string
    */
  def getTransformer(collectionName: String): Option[DocumentTransformer] = transformers.get(collectionName)

  /**
    * Executes a migration on a given database and a list of collection names.
    */
  abstract override def execute(db: DocumentDb, collectionNames: Seq[String]): Unit = {
    super.execute(db, collectionNames)
    collectionNames.foreach(collection =>
      if (transformers.contains(collection)) {
        applyTransformers(db, collection)
      }
    )
  }

  /**
    * Validate the possibility of running a migration given a list of collection names.
    */
  abstract override def validate(collectionNames: Seq[String]): Unit = {
    super.validate(collectionNames)
    transformers.foreach {
      case (collectionToMigrate, _) => if (!collectionNames.contains(collectionToMigrate)) {
        throw new IllegalStateException(
          s"Attempt to apply a transform to a collection that does not exist: $collectionToMigrate.")
      }
    }
  }

  /**
    * Validate preconditions for this type of migration to run.
    */
  override protected def validateMigration(): Unit = {
    if (targetVersion <= 0) {
      throw new IllegalStateException("The target version of a JsonMigration should be greater than 0.")
    }
  }

  /**
    * Applies a transformer for each document of the collection to produce a migrated collection.
    */
  private def applyTransformers(db: DocumentDb, collectionName: String): Unit = {
    val sourceCollection = MigrationUtils.getVersionedCollectionName(collectionName, targetVersion - 1)
    val targetCollection = MigrationUtils.getVersionedCollectionName(collectionName, targetVersion)
    ensureCollectionEmpty(db, targetCollection)

    log.info(s"Applying a per-document transformation $sourceCollection -> $targetCollection")

    val documents = db.getDocuments(sourceCollection)
    val transformer = transformers(collectionName)
    var invalidDocumentsCount = 0

    documents.foreach(doc => {
      val oid = ObjectIdTools.getObjectIdFromDocument(doc)
      val newDocument = ObjectIdTools.putObjectIdIfNotPresent(transformer(doc), oid)
      if (newDocument.nonEmpty) {
        db.insertDocument(targetCollection, newDocument)
      } else {
        invalidDocumentsCount += 1
        log.warn(s"Encountered a bogus document: $doc")
      }
    })
    logMigrationStatistics(db, sourceCollection, targetCollection, invalidDocumentsCount)
  }

  /**
    * Logs statistics about the number of migrated documents processed.
    */
  private def logMigrationStatistics(db: DocumentDb,
                                     sourceCollection: String,
                                     targetCollection: String,
                                     invalidDocumentsCount: Int): Unit = {
    val sourceDocsCnt = db.getDocumentsCount(sourceCollection)
    val targetDocsCnt = db.getDocumentsCount(targetCollection)
    if (invalidDocumentsCount == 0) {
      log.info(s"Migration '$sourceCollection' -> '$targetCollection' completed successfully. " +
        s"Total documents migrated: $targetDocsCnt.")
    } else if (invalidDocumentsCount == sourceDocsCnt) {
      log.error(s"Migration '$sourceCollection' -> '$targetCollection' failed. " +
        s"Unable to convert any of $sourceDocsCnt documents.")
    } else {
      log.warn(s"Migration '$sourceCollection' -> '$targetCollection' completed with errors. $targetDocsCnt out of " +
        s"$sourceDocsCnt were migrated. The number of invalid documents: $invalidDocumentsCount.")
    }
  }

  /**
    * Ensures a collection is empty.
    */
  private def ensureCollectionEmpty(db: DocumentDb, collectionName: String): Unit = {
    if (db.doesCollectionExists(collectionName)) {
      db.emptyCollection(collectionName)
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy