All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.kitesdk.data.spi.hive.Loader Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2013 Cloudera.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.kitesdk.data.spi.hive;

import org.kitesdk.compat.DynConstructors;
import org.kitesdk.data.DatasetIOException;
import org.kitesdk.data.DatasetOperationException;
import org.kitesdk.data.spi.DatasetRepository;
import org.kitesdk.data.spi.DefaultConfiguration;
import org.kitesdk.data.spi.Loadable;
import org.kitesdk.data.spi.OptionBuilder;
import org.kitesdk.data.spi.Registration;
import org.kitesdk.data.spi.URIPattern;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A Loader implementation to register URIs for FileSystemDatasetRepositories.
 */
public class Loader implements Loadable {

  private static final Logger LOG = LoggerFactory.getLogger(Loader.class);

  public static final String HIVE_METASTORE_URI_PROP = "hive.metastore.uris";
  private static final int UNSPECIFIED_PORT = -1;
  private static final String NOT_SET = "not-set";
  private static final String HDFS_HOST = "hdfs:host";
  private static final String HDFS_PORT = "hdfs:port";
  private static final String OLD_HDFS_HOST = "hdfs-host";
  private static final String OLD_HDFS_PORT = "hdfs-port";

  private static DynConstructors.Ctor HIVE_CONF;

  /**
   * This class builds configured instances of
   * {@code FileSystemDatasetRepository} from a Map of options. This is for the
   * URI system.
   */
  private static class ExternalBuilder implements OptionBuilder {

    @Override
    public DatasetRepository getFromOptions(Map match) {
      LOG.debug("External URI options: {}", match);
      final Path root;
      String path = match.get("path");
      if (match.containsKey("absolute")
          && Boolean.valueOf(match.get("absolute"))) {
        root = (path == null || path.isEmpty()) ? new Path("/") : new Path("/", path);
      } else {
        root = (path == null || path.isEmpty()) ? new Path(".") : new Path(path);
      }

      // make a modifiable copy (it may be changed)
      Configuration conf = newHiveConf(DefaultConfiguration.get());
      FileSystem fs;
      try {
        fs = FileSystem.get(fileSystemURI(match, conf), conf);
      } catch (IOException e) {
        // "Incomplete HDFS URI, no host" => add a helpful suggestion
        if (e.getMessage().startsWith("Incomplete")) {
          throw new DatasetIOException("Could not get a FileSystem: " +
              "make sure the default " + match.get(URIPattern.SCHEME) +
              " URI is configured.", e);
        }
        throw new DatasetIOException("Could not get a FileSystem", e);
      }

      // setup the MetaStore URI
      setMetaStoreURI(conf, match);

      return new HiveManagedDatasetRepository.Builder()
          .configuration(conf)
          .rootDirectory(fs.makeQualified(root))
          .build();
    }
  }

  private static class ManagedBuilder implements OptionBuilder {
    @Override
    public DatasetRepository getFromOptions(Map match) {
      LOG.debug("Managed URI options: {}", match);
      // make a modifiable copy and setup the MetaStore URI
      Configuration conf = newHiveConf(DefaultConfiguration.get());
      // sanity check the URI
      setMetaStoreURI(conf, match);
      return new HiveManagedDatasetRepository.Builder()
          .configuration(conf)
          .build();
    }
  }

  @Override
  public void load() {
    checkHiveDependencies();

    OptionBuilder managedBuilder = new ManagedBuilder();
    OptionBuilder externalBuilder = new ExternalBuilder();

    Registration.register(
        new URIPattern("hive"),
        new URIPattern("hive::namespace/:dataset"),
        managedBuilder);
    Registration.register(
        new URIPattern("hive"),
        new URIPattern("hive::dataset?namespace=default"),
        managedBuilder);
    Registration.register(
        new URIPattern("hive"),
        new URIPattern("hive?namespace=default"),
        managedBuilder);

    Registration.register(
        new URIPattern("hive://" + NOT_SET),
        new URIPattern("hive:/:namespace/:dataset"),
        managedBuilder);
    Registration.register(
        new URIPattern("hive://" + NOT_SET),
        new URIPattern("hive:/:dataset?namespace=default"),
        managedBuilder);
    Registration.register(
        new URIPattern("hive://" + NOT_SET),
        new URIPattern("hive://" + NOT_SET + "?namespace=default"),
        managedBuilder);

    Registration.register(
        new URIPattern("hive:/*path?absolute=true"),
        new URIPattern("hive:/*path/:namespace/:dataset?absolute=true"),
        externalBuilder);
    Registration.register(
        new URIPattern("hive:*path"),
        new URIPattern("hive:*path/:namespace/:dataset"),
        externalBuilder);
  }

  private static Configuration newHiveConf(Configuration base) {
    checkHiveDependencies(); // ensure HIVE_CONF is present
    Configuration conf = HIVE_CONF.newInstance(base, HIVE_CONF.getConstructedClass());

    // Add everything in base back in to work around a bug in HiveConf
    HiveUtils.addResource(conf, base);
    return conf;
  }

  private synchronized static void checkHiveDependencies() {
    if (Loader.HIVE_CONF == null) {
      // check that Hive is available by resolving the HiveConf constructor
      // this is also needed by newHiveConf(Configuration)
      Loader.HIVE_CONF = new DynConstructors.Builder()
          .impl("org.apache.hadoop.hive.conf.HiveConf", Configuration.class, Class.class)
          .build();
    }
  }

  private static URI fileSystemURI(Map match, Configuration conf) {
    final String userInfo;
    if (match.containsKey(URIPattern.USERNAME)) {
      if (match.containsKey(URIPattern.PASSWORD)) {
        userInfo = match.get(URIPattern.USERNAME) + ":" +
            match.get(URIPattern.PASSWORD);
      } else {
        userInfo = match.get(URIPattern.USERNAME);
      }
    } else {
      userInfo = null;
    }

    try {
      if (match.containsKey(HDFS_HOST) || match.containsKey(OLD_HDFS_HOST)) {
        int port = UNSPECIFIED_PORT;
        if (match.containsKey(HDFS_PORT) || match.containsKey(OLD_HDFS_PORT)) {
          try {
            port = Integer.parseInt(first(match, HDFS_PORT, OLD_HDFS_PORT));
          } catch (NumberFormatException e) {
            port = UNSPECIFIED_PORT;
          }
        }
        return new URI("hdfs", userInfo, first(match, HDFS_HOST, OLD_HDFS_HOST),
            port, "/", null, null);
      } else {
        String defaultScheme;
        try {
          defaultScheme = FileSystem.get(conf).getUri().getScheme();
        } catch (IOException e) {
          throw new DatasetIOException("Cannot determine the default FS", e);
        }
        return new URI(defaultScheme, userInfo, "", UNSPECIFIED_PORT, "/", null, null);
      }
    } catch (URISyntaxException ex) {
      throw new DatasetOperationException("Could not build FS URI", ex);
    }
  }

  /**
   * Sets the MetaStore URI in the given Configuration, if there is a host in
   * the match arguments. If there is no host, then the conf is not changed.
   *
   * @param conf a Configuration that will be used to connect to the MetaStore
   * @param match URIPattern match results
   */
  private static void setMetaStoreURI(
      Configuration conf, Map match) {
    try {
      // If the host is set, construct a new MetaStore URI and set the property
      // in the Configuration. Otherwise, do not change the MetaStore URI.
      String host = match.get(URIPattern.HOST);
      if (host != null && !NOT_SET.equals(host)) {
        int port;
        try {
          port = Integer.parseInt(match.get(URIPattern.PORT));
        } catch (NumberFormatException e) {
          port = UNSPECIFIED_PORT;
        }
        conf.set(HIVE_METASTORE_URI_PROP,
            new URI("thrift", null, host, port, null, null, null).toString());
      }
    } catch (URISyntaxException ex) {
      throw new DatasetOperationException(
          "Could not build metastore URI", ex);
    }
  }

  private static String first(Map data, String... keys) {
    for (String key : keys) {
      if (data.containsKey(key)) {
        return data.get(key);
      }
    }
    return null;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy