com.splout.db.common.SploutHadoopConfiguration Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of splout-hadoop Show documentation
Splout is a read only, horizontally scalable SQL database that plays well with Hadoop.
There is a newer version: 0.3.0
package com.splout.db.common;

/*
 * #%L
 * Splout SQL Hadoop library
 * %%
 * Copyright (C) 2012 Datasalt Systems S.L.
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;

/**
 * Configuratio helper class.
 */
public class SploutHadoopConfiguration {

	static Log log = LogFactory.getLog(SploutHadoopConfiguration.class);
	
	/**
	 * Adds the SQLite native libraries to the DistributedCache so that they will be present in the java.library.path
	 * of the child's Hadoop task.
	 *  
	 * Usually you don't need to do this as the task will already try to load them from the job's uncompressed JAR, however
	 * it is not assured that all Hadoop versions do the uncompressing of the JAR so in this case it's safer to use this.
	 * 

	 * This method uses the default "native" folder.
	 */
	public static void addSQLite4JavaNativeLibsToDC(Configuration conf) throws IOException, URISyntaxException {
		addSQLite4JavaNativeLibsToDC(conf, new File("native"));
	}
	
	/**
	 * Adds the SQLite native libraries to the DistributedCache so that they will be present in the java.library.path
	 * of the child's Hadoop task.
	 *  
	 * Usually you don't need to do this as the task will already try to load them from the job's uncompressed JAR, however
	 * it is not assured that all Hadoop versions do the uncompressing of the JAR so in this case it's safer to use this.
	 */
	public static void addSQLite4JavaNativeLibsToDC(Configuration conf, File nativeLibsLocalPath) throws IOException, URISyntaxException {
		Path nativeLibHdfs = new Path("splout-native");
		FileSystem fS = FileSystem.get(conf);
		if(fS.exists(nativeLibHdfs)) {
			fS.delete(nativeLibHdfs, true);
		}
		fS.mkdirs(nativeLibHdfs);
		// Copy native libs to HDFS
		File[] natives = nativeLibsLocalPath.listFiles();
		if(natives == null) {
			throw new RuntimeException("natives lib folder not present in local working directory! Are you in SPLOUT_HOME?");
		}
		for(File nativeLib: natives) {
			FileUtil.copy(nativeLib, fS, nativeLibHdfs, false, conf);
		}
		for(FileStatus nativeLibInHdfs: fS.listStatus(nativeLibHdfs)) {
			// http://hadoop.apache.org/docs/r0.20.2/native_libraries.html#Loading+native+libraries+through+DistributedCache
			DistributedCache.createSymlink(conf);
			URI uriToAdd = new URI(nativeLibInHdfs.getPath().makeQualified(fS) + "#" + nativeLibInHdfs.getPath().getName());
			DistributedCache.addCacheFile(uriToAdd, conf);
			log.info("Adding to distributed cache: " + uriToAdd);
		}
	}
}