All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thinkaurelius.titan.hadoop.config.job.AbstractDistCacheConfigurer Maven / Gradle / Ivy

package com.thinkaurelius.titan.hadoop.config.job;

import java.io.File;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;

import com.thinkaurelius.titan.diskstorage.configuration.ConfigOption;
import com.thinkaurelius.titan.hadoop.config.TitanHadoopConfiguration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import com.google.common.collect.ImmutableList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Abstract base class for {@link com.thinkaurelius.titan.hadoop.config.job.JobClasspathConfigurer}
 * implementations that use Hadoop's distributed cache to store push classfiles to the cluster.
 */
public abstract class AbstractDistCacheConfigurer {

//    public static enum FileCopyMode {
//
//        /**
//         * Copy a jar unless a file with the same name already exists in the staging
//         * directory of the Hadoop FileSystem.
//         */
//        FILENAME,
//
//        /**
//         * Copy a jar unless a file with the same name and same modtime already exists
//         * in the staging directory of the Hadoop FileSystem.
//         */
//        MODTIME,
//
//        /**
//         * Unconditionally copy all jars to the Hadoop FileSystem, even if they
//         * already exist at the destination and have up-to-date modtimes.
//         */
//        ALWAYS;
//    }
//
//    public static final ConfigOption SKIP_LOCAL_COPIES =
//            new ConfigOption(TitanHadoopConfiguration.JARCACHE_NS, "skip-local-copies",
//            "When this option is true and Hadoop is configured to use a LocalFileSystem as " +
//            "its default, Titan will not attempt to copy jars from the classpath to the " +
//            "LocalFileSystem (which is redundant when using the local JobRunner)", ConfigOption.Type.MASKABLE, true);

    private final Conf conf;

    private static final String HDFS_TMP_LIB_DIR = "titanlib";

    private static final Logger log =
            LoggerFactory.getLogger(AbstractDistCacheConfigurer.class);

    public AbstractDistCacheConfigurer(String mapredJarFilename) {
        this.conf = configureByClasspath(mapredJarFilename);
    }

    public String getMapredJar() {
        return conf.mapredJar;
    }

    public ImmutableList getLocalPaths() {
        return conf.paths;
    }

    protected Path uploadFileIfNecessary(FileSystem localFS, Path localPath, FileSystem destFS) throws IOException {

        // Fast path for local FS -- DistributedCache + local JobRunner seems copy/link files automatically
        if (destFS.equals(localFS)) {
            log.debug("Skipping file upload for {} (destination filesystem {} equals local filesystem)",
                    localPath, destFS);
            return localPath;
        }

        Path destPath = new Path(destFS.getHomeDirectory() + "/" + HDFS_TMP_LIB_DIR + "/" + localPath.getName());

        Stats fileStats = null;

        try {
            fileStats = compareModtimes(localFS, localPath, destFS, destPath);
        } catch (IOException e) {
            log.warn("Unable to read or stat file: localPath={}, destPath={}, destFS={}",
                    localPath, destPath, destFS);
        }

        if (!fileStats.isRemoteCopyCurrent()) {
            log.debug("Copying {} to {}", localPath, destPath);
            destFS.copyFromLocalFile(localPath, destPath);
            if (null != fileStats.local) {
                final long mtime = fileStats.local.getModificationTime();
                log.debug("Setting modtime on {} to {}", destPath, mtime);
                destFS.setTimes(destPath, mtime, -1); // -1 means leave atime alone
            }
        }

        return destPath;
    }

    private Stats compareModtimes(FileSystem localFS, Path localPath, FileSystem destFS, Path destPath) throws IOException {
        Stats s = new Stats();
        s.local = localFS.getFileStatus(localPath);
        if (destFS.exists(destPath)) {
            s.dest = destFS.getFileStatus(destPath);
            if (null != s.dest && null != s.local) {
                long l = s.local.getModificationTime();
                long d = s.dest.getModificationTime();
                if (l == d) {
                    if (log.isDebugEnabled())
                        log.debug("File {} with modtime {} is up-to-date", destPath, d);
                } else if (l < d) {
                    log.warn("File {} has newer modtime ({}) than our local copy {} ({})", destPath, d, localPath, l);
                } else {
                    log.debug("Remote file {} exists but is out-of-date: local={} dest={}", destPath, l, d);
                }
            } else {
                log.debug("Unable to stat file(s): [LOCAL: path={} stat={}] [DEST: path={} stat={}]",
                        localPath, s.local, destPath, s.dest);
            }
        } else {
            log.debug("File {} does not exist", destPath);
        }
        return s;
    }

    private static Conf configureByClasspath(String mapredJarFilename) {
        List paths = new LinkedList();
        final String classpath = System.getProperty("java.class.path");
        final String mrj = mapredJarFilename.toLowerCase();
        String mapredJarPath = null;
        for (String cpentry : classpath.split(File.pathSeparator)) {
            if (cpentry.toLowerCase().endsWith(".jar") || cpentry.toLowerCase().endsWith(".properties")) {
                paths.add(new Path(cpentry));
                if (cpentry.toLowerCase().endsWith(mrj));
                    mapredJarPath = cpentry;
            }
        }
        return new Conf(paths, mapredJarPath);
    }

    private static class Conf {

        private final ImmutableList paths;
        private final String mapredJar;

        public Conf(List paths, String mapredJar) {
            this.paths = ImmutableList.copyOf(paths);
            this.mapredJar = mapredJar;
        }
    }


    private static class Stats {
        private FileStatus local;
        private FileStatus dest;

        private boolean isRemoteCopyCurrent() {
            return null != local && null != dest && dest.getModificationTime() == local.getModificationTime();
        }
    }

    // LocalFileSystem doesn't checksum, it just returns null, so this is useless
//    private boolean compareChecksums(FileSystem localFS, Path localPath, FileSystem destFS, Path destPath) throws IOException {
//        if (destFS.exists(destPath)) {
//            FileChecksum localCheck = localFS.getFileChecksum(localPath);
//            FileChecksum destCheck = destFS.getFileChecksum(destPath);
//            if (null != destCheck && null != localCheck) {
//                byte[] db = destCheck.getBytes();
//                byte[] lb = localCheck.getBytes();
//                if (null != db && null != lb && Arrays.equals(db, lb)) {
//                    if (log.isDebugEnabled())
//                        log.debug("Checksum {} for file {} is up-to-date", Arrays.toString(db), destPath);
//                    return true;
//                } else {
//                    log.debug("Checksum mismatch on file {}: local={} dest={}", destPath, lb, db);
//                }
//            } else {
//                log.debug("Unable to checksum files: localPath={} localCheck={}, destPath={} destCheck={}",
//                        localPath, localCheck, destPath, destCheck);
//            }
//        } else {
//            log.debug("File {} does not exist", destPath);
//        }
//        return false;
//    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy