com.nvidia.spark.rapids.tool.qualification.RunningQualOutputWriter.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rapids-4-spark-tools_2.12 Show documentation
Show all versions of rapids-4-spark-tools_2.12 Show documentation
RAPIDS Accelerator for Apache Spark tools
The newest version!
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.nvidia.spark.rapids.tool.qualification
import com.nvidia.spark.rapids.tool.ToolTextFileWriter
import com.nvidia.spark.rapids.tool.qualification.QualOutputWriter.TEXT_DELIMITER
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.Path
/**
* This class handles writing output to files for a running qualification app.
* Currently this only supports writing per sql output, not the entire application
* qualification, since doing an entire application may use a lot of memory if its
* long running.
*
* @param appId The id of the application
* @param appName The name of the application
* @param outputDir The directory to output the files to
* @param hadoopConf Optional Hadoop Configuration to use
* @param fileNameSuffix A suffix to add to the output per sql filenames
*/
class RunningQualOutputWriter(
appId: String,
appName: String,
outputDir: String,
hadoopConf: Option[Configuration] = None,
fileNameSuffix: String = "")
extends QualOutputWriter(outputDir, reportReadSchema=false, printStdout=false,
prettyPrintOrder = "desc", hadoopConf) {
private lazy val csvPerSQLFileWriter = new ToolTextFileWriter(outputDir,
s"${QualOutputWriter.LOGFILE_NAME}_persql_$fileNameSuffix.csv",
"Per SQL CSV Report", hadoopConf)
private lazy val textPerSQLFileWriter = new ToolTextFileWriter(outputDir,
s"${QualOutputWriter.LOGFILE_NAME}_persql_$fileNameSuffix.log",
"Per SQL Summary Report", hadoopConf)
// we don't know max length since process per query, hardcode for 100 for now
private val SQL_DESC_LENGTH = 100
private val appNameSize = if (appName.nonEmpty) appName.size else 100
val headersAndSizes = QualOutputWriter.getDetailedPerSqlHeaderStringsAndSizes(appNameSize,
appId.size, SQL_DESC_LENGTH)
val entireTextHeader = QualOutputWriter.constructOutputRowFromMap(headersAndSizes,
TEXT_DELIMITER, true)
private val sep = "=" * (entireTextHeader.size - 1)
def getOutputFileNames: Seq[Path] = {
Seq(csvPerSQLFileWriter.getFileOutputPath, textPerSQLFileWriter.getFileOutputPath)
}
def init(): Unit = {
csvPerSQLFileWriter.write(QualOutputWriter.constructDetailedHeader(headersAndSizes,
QualOutputWriter.CSV_DELIMITER, false))
textPerSQLFileWriter.write(s"$sep\n")
textPerSQLFileWriter.write(entireTextHeader)
textPerSQLFileWriter.write(s"$sep\n")
csvPerSQLFileWriter.flush()
textPerSQLFileWriter.flush()
}
def close(): Unit = {
csvPerSQLFileWriter.close()
textPerSQLFileWriter.write(s"$sep\n")
textPerSQLFileWriter.close()
}
def writePerSqlCSVReport(sqlInfo: String): Unit = {
csvPerSQLFileWriter.write(sqlInfo)
csvPerSQLFileWriter.flush()
}
def writePerSqlTextReport(sqlInfo: String): Unit = {
textPerSQLFileWriter.write(sqlInfo)
textPerSQLFileWriter.flush()
}
}