org.kitesdk.data.spi.hive.Loader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kite-data-hive Show documentation
Show all versions of kite-data-hive Show documentation
The Kite Data Hive module provides integration with Hive for Kite datasets.
The newest version!
/*
* Copyright 2013 Cloudera.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.data.spi.hive;
import org.kitesdk.compat.DynConstructors;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.DatasetOperationException;
import org.kitesdk.data.spi.DatasetRepository;
import org.kitesdk.data.spi.DefaultConfiguration;
import org.kitesdk.data.spi.Loadable;
import org.kitesdk.data.spi.OptionBuilder;
import org.kitesdk.data.spi.Registration;
import org.kitesdk.data.spi.URIPattern;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A Loader implementation to register URIs for FileSystemDatasetRepositories.
*/
public class Loader implements Loadable {
private static final Logger LOG = LoggerFactory.getLogger(Loader.class);
public static final String HIVE_METASTORE_URI_PROP = "hive.metastore.uris";
private static final int UNSPECIFIED_PORT = -1;
private static final String NOT_SET = "not-set";
private static final String HDFS_HOST = "hdfs:host";
private static final String HDFS_PORT = "hdfs:port";
private static final String OLD_HDFS_HOST = "hdfs-host";
private static final String OLD_HDFS_PORT = "hdfs-port";
private static DynConstructors.Ctor HIVE_CONF;
/**
* This class builds configured instances of
* {@code FileSystemDatasetRepository} from a Map of options. This is for the
* URI system.
*/
private static class ExternalBuilder implements OptionBuilder {
@Override
public DatasetRepository getFromOptions(Map match) {
LOG.debug("External URI options: {}", match);
final Path root;
String path = match.get("path");
if (match.containsKey("absolute")
&& Boolean.valueOf(match.get("absolute"))) {
root = (path == null || path.isEmpty()) ? new Path("/") : new Path("/", path);
} else {
root = (path == null || path.isEmpty()) ? new Path(".") : new Path(path);
}
// make a modifiable copy (it may be changed)
Configuration conf = newHiveConf(DefaultConfiguration.get());
FileSystem fs;
try {
fs = FileSystem.get(fileSystemURI(match, conf), conf);
} catch (IOException e) {
// "Incomplete HDFS URI, no host" => add a helpful suggestion
if (e.getMessage().startsWith("Incomplete")) {
throw new DatasetIOException("Could not get a FileSystem: " +
"make sure the default " + match.get(URIPattern.SCHEME) +
" URI is configured.", e);
}
throw new DatasetIOException("Could not get a FileSystem", e);
}
// setup the MetaStore URI
setMetaStoreURI(conf, match);
return new HiveManagedDatasetRepository.Builder()
.configuration(conf)
.rootDirectory(fs.makeQualified(root))
.build();
}
}
private static class ManagedBuilder implements OptionBuilder {
@Override
public DatasetRepository getFromOptions(Map match) {
LOG.debug("Managed URI options: {}", match);
// make a modifiable copy and setup the MetaStore URI
Configuration conf = newHiveConf(DefaultConfiguration.get());
// sanity check the URI
setMetaStoreURI(conf, match);
return new HiveManagedDatasetRepository.Builder()
.configuration(conf)
.build();
}
}
@Override
public void load() {
checkHiveDependencies();
OptionBuilder managedBuilder = new ManagedBuilder();
OptionBuilder externalBuilder = new ExternalBuilder();
Registration.register(
new URIPattern("hive"),
new URIPattern("hive::namespace/:dataset"),
managedBuilder);
Registration.register(
new URIPattern("hive"),
new URIPattern("hive::dataset?namespace=default"),
managedBuilder);
Registration.register(
new URIPattern("hive"),
new URIPattern("hive?namespace=default"),
managedBuilder);
Registration.register(
new URIPattern("hive://" + NOT_SET),
new URIPattern("hive:/:namespace/:dataset"),
managedBuilder);
Registration.register(
new URIPattern("hive://" + NOT_SET),
new URIPattern("hive:/:dataset?namespace=default"),
managedBuilder);
Registration.register(
new URIPattern("hive://" + NOT_SET),
new URIPattern("hive://" + NOT_SET + "?namespace=default"),
managedBuilder);
Registration.register(
new URIPattern("hive:/*path?absolute=true"),
new URIPattern("hive:/*path/:namespace/:dataset?absolute=true"),
externalBuilder);
Registration.register(
new URIPattern("hive:*path"),
new URIPattern("hive:*path/:namespace/:dataset"),
externalBuilder);
}
private static Configuration newHiveConf(Configuration base) {
checkHiveDependencies(); // ensure HIVE_CONF is present
Configuration conf = HIVE_CONF.newInstance(base, HIVE_CONF.getConstructedClass());
// Add everything in base back in to work around a bug in HiveConf
HiveUtils.addResource(conf, base);
return conf;
}
private synchronized static void checkHiveDependencies() {
if (Loader.HIVE_CONF == null) {
// check that Hive is available by resolving the HiveConf constructor
// this is also needed by newHiveConf(Configuration)
Loader.HIVE_CONF = new DynConstructors.Builder()
.impl("org.apache.hadoop.hive.conf.HiveConf", Configuration.class, Class.class)
.build();
}
}
private static URI fileSystemURI(Map match, Configuration conf) {
final String userInfo;
if (match.containsKey(URIPattern.USERNAME)) {
if (match.containsKey(URIPattern.PASSWORD)) {
userInfo = match.get(URIPattern.USERNAME) + ":" +
match.get(URIPattern.PASSWORD);
} else {
userInfo = match.get(URIPattern.USERNAME);
}
} else {
userInfo = null;
}
try {
if (match.containsKey(HDFS_HOST) || match.containsKey(OLD_HDFS_HOST)) {
int port = UNSPECIFIED_PORT;
if (match.containsKey(HDFS_PORT) || match.containsKey(OLD_HDFS_PORT)) {
try {
port = Integer.parseInt(first(match, HDFS_PORT, OLD_HDFS_PORT));
} catch (NumberFormatException e) {
port = UNSPECIFIED_PORT;
}
}
return new URI("hdfs", userInfo, first(match, HDFS_HOST, OLD_HDFS_HOST),
port, "/", null, null);
} else {
String defaultScheme;
try {
defaultScheme = FileSystem.get(conf).getUri().getScheme();
} catch (IOException e) {
throw new DatasetIOException("Cannot determine the default FS", e);
}
return new URI(defaultScheme, userInfo, "", UNSPECIFIED_PORT, "/", null, null);
}
} catch (URISyntaxException ex) {
throw new DatasetOperationException("Could not build FS URI", ex);
}
}
/**
* Sets the MetaStore URI in the given Configuration, if there is a host in
* the match arguments. If there is no host, then the conf is not changed.
*
* @param conf a Configuration that will be used to connect to the MetaStore
* @param match URIPattern match results
*/
private static void setMetaStoreURI(
Configuration conf, Map match) {
try {
// If the host is set, construct a new MetaStore URI and set the property
// in the Configuration. Otherwise, do not change the MetaStore URI.
String host = match.get(URIPattern.HOST);
if (host != null && !NOT_SET.equals(host)) {
int port;
try {
port = Integer.parseInt(match.get(URIPattern.PORT));
} catch (NumberFormatException e) {
port = UNSPECIFIED_PORT;
}
conf.set(HIVE_METASTORE_URI_PROP,
new URI("thrift", null, host, port, null, null, null).toString());
}
} catch (URISyntaxException ex) {
throw new DatasetOperationException(
"Could not build metastore URI", ex);
}
}
private static String first(Map data, String... keys) {
for (String key : keys) {
if (data.containsKey(key)) {
return data.get(key);
}
}
return null;
}
}