
org.finra.msd.visualization.Visualizer.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mega-spark-diff Show documentation
Show all versions of mega-spark-diff Show documentation
A diff analysis tool that compares data sets at scale of various types and can be executed in CLOUD or
locally
The newest version!
/*
* Copyright 2017 MegaSparkDiff Contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.finra.msd.visualization
import org.apache.spark.sql._
import org.apache.spark.sql.functions._
import org.finra.msd.controllers.TemplateController
import org.finra.msd.customExceptions._
import org.finra.msd.enums.VisualResultType
import org.finra.msd.sparkfactory.SparkFactory
object Visualizer {
val ss = SparkFactory.sparkSession
/**
* Generate template string which can be used by displayHTML() in databricks
*
* @param left
* @param right
* @param compositeKeyStrs a sequence of strings used as keys to do full outer join. Case does not matter.
* @param maxRecords
* @return a template string
*/
def generateVisualizerTemplate(left: DataFrame, right: DataFrame, compositeKeyStrs: Seq[String],
maxRecords: Integer = 1000): String = {
var visualizerTemplate: String = "really?"
var maxRecordsCopy: Integer = 1000;
try {
require(left != null, throw new DataFrameNullException("Left dataframe is null"));
require(right != null, throw new DataFrameNullException("Right dataframe is null"));
require(compositeKeyStrs != null && !compositeKeyStrs.isEmpty, throw new JoinKeysNullException(
"Please specify primary/composite key"));
require(isValidKey(compositeKeyStrs), throw new InValidKeyException("One or more keys is empty or null"))
//handler invalid maxRecords
if (maxRecords > 0) {
maxRecordsCopy = maxRecords;
}
//if both dataframes are empty, then no need to do full outer join
if (left.count() == 0 && right.count() == 0) {
visualizerTemplate = "No mismatches are found";
} else {
val headersRows: (Seq[String], Seq[Seq[String]], VisualResultType) =
generateHeadersRows(left, right, compositeKeyStrs, maxRecordsCopy);
val headers: Seq[String] = headersRows._1;
val rows: Seq[Seq[String]] = headersRows._2;
val visualResultType: VisualResultType = headersRows._3;
visualizerTemplate =
s"""
${headers.map(header => s"${header} ").mkString}
${
if (visualResultType == VisualResultType.LEFT) {
rows.map(row =>
s"${
row.map(cell =>
"" +
s"${cell}" +
" "
).mkString
} "
).mkString
} else if (visualResultType == VisualResultType.RIGHT) {
rows.map(row =>
s"${
row.map(cell =>
"" +
s"${cell}" +
" "
).mkString
} "
).mkString
} else {
rows.map(row =>
s"${
row.map(cell => {
if (cell.contains("<==>")) {
val leftRightVals: Array[String] = cell.split("<==>")
"" +
s"${if (cell.startsWith("<==>")) "(empty)" else leftRightVals(0)}" +
s"${if (cell.endsWith("<==>")) "(empty)" else leftRightVals(1)}" +
" "
} else {
s"${cell} "
}
}).mkString
} "
).mkString
}
}