
com.splout.db.common.SploutHadoopConfiguration Maven / Gradle / Ivy
Show all versions of splout-hadoop Show documentation
package com.splout.db.common;
/*
* #%L
* Splout SQL Hadoop library
* %%
* Copyright (C) 2012 Datasalt Systems S.L.
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
/**
* Configuratio helper class.
*/
public class SploutHadoopConfiguration {
static Log log = LogFactory.getLog(SploutHadoopConfiguration.class);
/**
* Adds the SQLite native libraries to the DistributedCache so that they will be present in the java.library.path
* of the child's Hadoop task.
*
* Usually you don't need to do this as the task will already try to load them from the job's uncompressed JAR, however
* it is not assured that all Hadoop versions do the uncompressing of the JAR so in this case it's safer to use this.
*
* This method uses the default "native" folder.
*/
public static void addSQLite4JavaNativeLibsToDC(Configuration conf) throws IOException, URISyntaxException {
addSQLite4JavaNativeLibsToDC(conf, new File("native"));
}
/**
* Adds the SQLite native libraries to the DistributedCache so that they will be present in the java.library.path
* of the child's Hadoop task.
*
* Usually you don't need to do this as the task will already try to load them from the job's uncompressed JAR, however
* it is not assured that all Hadoop versions do the uncompressing of the JAR so in this case it's safer to use this.
*/
public static void addSQLite4JavaNativeLibsToDC(Configuration conf, File nativeLibsLocalPath) throws IOException, URISyntaxException {
Path nativeLibHdfs = new Path("splout-native");
FileSystem fS = FileSystem.get(conf);
if(fS.exists(nativeLibHdfs)) {
fS.delete(nativeLibHdfs, true);
}
fS.mkdirs(nativeLibHdfs);
// Copy native libs to HDFS
File[] natives = nativeLibsLocalPath.listFiles();
if(natives == null) {
throw new RuntimeException("natives lib folder not present in local working directory! Are you in SPLOUT_HOME?");
}
for(File nativeLib: natives) {
FileUtil.copy(nativeLib, fS, nativeLibHdfs, false, conf);
}
for(FileStatus nativeLibInHdfs: fS.listStatus(nativeLibHdfs)) {
// http://hadoop.apache.org/docs/r0.20.2/native_libraries.html#Loading+native+libraries+through+DistributedCache
DistributedCache.createSymlink(conf);
URI uriToAdd = new URI(nativeLibInHdfs.getPath().makeQualified(fS) + "#" + nativeLibInHdfs.getPath().getName());
DistributedCache.addCacheFile(uriToAdd, conf);
log.info("Adding to distributed cache: " + uriToAdd);
}
}
}