org.datacleaner.spark.utils.ResultFilePathUtils Maven / Gradle / Ivy
/**
* DataCleaner (community edition)
* Copyright (C) 2014 Neopost - Customer Information Management
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution; if not, write to:
* Free Software Foundation, Inc.
* 51 Franklin Street, Fifth Floor
* Boston, MA 02110-1301 USA
*/
package org.datacleaner.spark.utils;
import java.net.URI;
import org.apache.metamodel.util.Resource;
import org.apache.spark.api.java.JavaSparkContext;
import org.datacleaner.spark.SparkJobContext;
import org.datacleaner.util.FileFilters;
public class ResultFilePathUtils {
private static final String DEFAULT_RESULT_PATH = "/datacleaner/results";
private static final String RESULT_FILE_EXTENSION = FileFilters.ANALYSIS_RESULT_SER.getExtension();
/**
* Gets the Resource to use for the job's result. The path can be configured
* in the job properties file with property 'datacleaner.result.hdfs.path'.
* The path can be absolute(
* 'hdfs://[hostname]:[port]/myresults/myjob.analysis.result.dat') or
* relative('/myresults/myjob.analysis.result.dat'). If no path is set in
* the properties file and the system is configured to save the results, the
* default path will be saved to
* '/datacleaner/results/[jobname]-[timestamp].analysis.result.dat
*
* @return a Resource to use
*/
public static Resource getResultResource(final JavaSparkContext sparkContext,
final SparkJobContext sparkJobContext) {
final HdfsHelper hdfsHelper = new HdfsHelper(sparkContext);
URI resultPath = sparkJobContext.getResultPath();
if (resultPath == null) {
resultPath = URI.create(DEFAULT_RESULT_PATH + '/' + generateResultFilename(sparkJobContext));
} else {
if (hdfsHelper.isDirectory(resultPath)) {
if (resultPath.toString().endsWith("/")) {
resultPath = URI.create(resultPath.toString() + generateResultFilename(sparkJobContext));
} else {
resultPath = URI.create(resultPath.toString() + '/' + generateResultFilename(sparkJobContext));
}
} else {
resultPath = sparkJobContext.getResultPath();
}
}
return hdfsHelper.getResourceToUse(resultPath);
}
private static String generateResultFilename(SparkJobContext sparkJobContext) {
return sparkJobContext.getJobName() + "-" + System.currentTimeMillis() + RESULT_FILE_EXTENSION;
}
}