
com.thinkaurelius.titan.hadoop.config.job.AbstractDistCacheConfigurer Maven / Gradle / Ivy
package com.thinkaurelius.titan.hadoop.config.job;
import java.io.File;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import com.thinkaurelius.titan.diskstorage.configuration.ConfigOption;
import com.thinkaurelius.titan.hadoop.config.TitanHadoopConfiguration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.collect.ImmutableList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Abstract base class for {@link com.thinkaurelius.titan.hadoop.config.job.JobClasspathConfigurer}
* implementations that use Hadoop's distributed cache to store push classfiles to the cluster.
*/
public abstract class AbstractDistCacheConfigurer {
// public static enum FileCopyMode {
//
// /**
// * Copy a jar unless a file with the same name already exists in the staging
// * directory of the Hadoop FileSystem.
// */
// FILENAME,
//
// /**
// * Copy a jar unless a file with the same name and same modtime already exists
// * in the staging directory of the Hadoop FileSystem.
// */
// MODTIME,
//
// /**
// * Unconditionally copy all jars to the Hadoop FileSystem, even if they
// * already exist at the destination and have up-to-date modtimes.
// */
// ALWAYS;
// }
//
// public static final ConfigOption SKIP_LOCAL_COPIES =
// new ConfigOption(TitanHadoopConfiguration.JARCACHE_NS, "skip-local-copies",
// "When this option is true and Hadoop is configured to use a LocalFileSystem as " +
// "its default, Titan will not attempt to copy jars from the classpath to the " +
// "LocalFileSystem (which is redundant when using the local JobRunner)", ConfigOption.Type.MASKABLE, true);
private final Conf conf;
private static final String HDFS_TMP_LIB_DIR = "titanlib";
private static final Logger log =
LoggerFactory.getLogger(AbstractDistCacheConfigurer.class);
public AbstractDistCacheConfigurer(String mapredJarFilename) {
this.conf = configureByClasspath(mapredJarFilename);
}
public String getMapredJar() {
return conf.mapredJar;
}
public ImmutableList getLocalPaths() {
return conf.paths;
}
protected Path uploadFileIfNecessary(FileSystem localFS, Path localPath, FileSystem destFS) throws IOException {
// Fast path for local FS -- DistributedCache + local JobRunner seems copy/link files automatically
if (destFS.equals(localFS)) {
log.debug("Skipping file upload for {} (destination filesystem {} equals local filesystem)",
localPath, destFS);
return localPath;
}
Path destPath = new Path(destFS.getHomeDirectory() + "/" + HDFS_TMP_LIB_DIR + "/" + localPath.getName());
Stats fileStats = null;
try {
fileStats = compareModtimes(localFS, localPath, destFS, destPath);
} catch (IOException e) {
log.warn("Unable to read or stat file: localPath={}, destPath={}, destFS={}",
localPath, destPath, destFS);
}
if (!fileStats.isRemoteCopyCurrent()) {
log.debug("Copying {} to {}", localPath, destPath);
destFS.copyFromLocalFile(localPath, destPath);
if (null != fileStats.local) {
final long mtime = fileStats.local.getModificationTime();
log.debug("Setting modtime on {} to {}", destPath, mtime);
destFS.setTimes(destPath, mtime, -1); // -1 means leave atime alone
}
}
return destPath;
}
private Stats compareModtimes(FileSystem localFS, Path localPath, FileSystem destFS, Path destPath) throws IOException {
Stats s = new Stats();
s.local = localFS.getFileStatus(localPath);
if (destFS.exists(destPath)) {
s.dest = destFS.getFileStatus(destPath);
if (null != s.dest && null != s.local) {
long l = s.local.getModificationTime();
long d = s.dest.getModificationTime();
if (l == d) {
if (log.isDebugEnabled())
log.debug("File {} with modtime {} is up-to-date", destPath, d);
} else if (l < d) {
log.warn("File {} has newer modtime ({}) than our local copy {} ({})", destPath, d, localPath, l);
} else {
log.debug("Remote file {} exists but is out-of-date: local={} dest={}", destPath, l, d);
}
} else {
log.debug("Unable to stat file(s): [LOCAL: path={} stat={}] [DEST: path={} stat={}]",
localPath, s.local, destPath, s.dest);
}
} else {
log.debug("File {} does not exist", destPath);
}
return s;
}
private static Conf configureByClasspath(String mapredJarFilename) {
List paths = new LinkedList();
final String classpath = System.getProperty("java.class.path");
final String mrj = mapredJarFilename.toLowerCase();
String mapredJarPath = null;
for (String cpentry : classpath.split(File.pathSeparator)) {
if (cpentry.toLowerCase().endsWith(".jar") || cpentry.toLowerCase().endsWith(".properties")) {
paths.add(new Path(cpentry));
if (cpentry.toLowerCase().endsWith(mrj));
mapredJarPath = cpentry;
}
}
return new Conf(paths, mapredJarPath);
}
private static class Conf {
private final ImmutableList paths;
private final String mapredJar;
public Conf(List paths, String mapredJar) {
this.paths = ImmutableList.copyOf(paths);
this.mapredJar = mapredJar;
}
}
private static class Stats {
private FileStatus local;
private FileStatus dest;
private boolean isRemoteCopyCurrent() {
return null != local && null != dest && dest.getModificationTime() == local.getModificationTime();
}
}
// LocalFileSystem doesn't checksum, it just returns null, so this is useless
// private boolean compareChecksums(FileSystem localFS, Path localPath, FileSystem destFS, Path destPath) throws IOException {
// if (destFS.exists(destPath)) {
// FileChecksum localCheck = localFS.getFileChecksum(localPath);
// FileChecksum destCheck = destFS.getFileChecksum(destPath);
// if (null != destCheck && null != localCheck) {
// byte[] db = destCheck.getBytes();
// byte[] lb = localCheck.getBytes();
// if (null != db && null != lb && Arrays.equals(db, lb)) {
// if (log.isDebugEnabled())
// log.debug("Checksum {} for file {} is up-to-date", Arrays.toString(db), destPath);
// return true;
// } else {
// log.debug("Checksum mismatch on file {}: local={} dest={}", destPath, lb, db);
// }
// } else {
// log.debug("Unable to checksum files: localPath={} localCheck={}, destPath={} destCheck={}",
// localPath, localCheck, destPath, destCheck);
// }
// } else {
// log.debug("File {} does not exist", destPath);
// }
// return false;
// }
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy