Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.accumulo;
import java.io.IOException;
import java.util.Map;
import java.util.Properties;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.TableExistsException;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.admin.TableOperations;
import org.apache.accumulo.core.client.mapred.AccumuloInputFormat;
import org.apache.accumulo.core.client.mapred.AccumuloOutputFormat;
import org.apache.accumulo.core.client.mapreduce.lib.impl.InputConfigurator;
import org.apache.accumulo.core.client.mapreduce.lib.impl.OutputConfigurator;
import org.apache.accumulo.core.client.security.tokens.AuthenticationToken;
import org.apache.accumulo.fate.Fate;
import org.apache.accumulo.start.Main;
import org.apache.accumulo.trace.instrument.Tracer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableInputFormat;
import org.apache.hadoop.hive.accumulo.mr.HiveAccumuloTableOutputFormat;
import org.apache.hadoop.hive.accumulo.predicate.AccumuloPredicateHandler;
import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDe;
import org.apache.hadoop.hive.accumulo.serde.AccumuloSerDeParameters;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.metadata.DefaultStorageHandler;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.HiveStoragePredicateHandler;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider;
import org.apache.hadoop.hive.serde2.Deserializer;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.StringUtils;
import org.apache.zookeeper.ZooKeeper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Create table mapping to Accumulo for Hive. Handle predicate pushdown if necessary.
*/
public class AccumuloStorageHandler extends DefaultStorageHandler implements HiveMetaHook,
HiveStoragePredicateHandler {
private static final Logger log = LoggerFactory.getLogger(AccumuloStorageHandler.class);
private static final String DEFAULT_PREFIX = "default";
protected AccumuloPredicateHandler predicateHandler = AccumuloPredicateHandler.getInstance();
protected AccumuloConnectionParameters connectionParams;
protected Configuration conf;
protected HiveAccumuloHelper helper = new HiveAccumuloHelper();
/**
* Push down table properties into the JobConf.
*
* @param desc
* Hive table description
* @param jobProps
* Properties that will be added to the JobConf by Hive
*/
@Override
public void configureTableJobProperties(TableDesc desc, Map jobProps) {
// Should not be getting invoked, configureInputJobProperties or configureOutputJobProperties
// should be invoked instead.
configureInputJobProperties(desc, jobProps);
configureOutputJobProperties(desc, jobProps);
}
protected String getTableName(Table table) throws MetaException {
// Use TBLPROPERTIES
String tableName = table.getParameters().get(AccumuloSerDeParameters.TABLE_NAME);
if (null != tableName) {
return tableName;
}
// Then try SERDEPROPERTIES
tableName = table.getSd().getSerdeInfo().getParameters()
.get(AccumuloSerDeParameters.TABLE_NAME);
if (null != tableName) {
return tableName;
}
// Use the hive table name, ignoring the default database
if (DEFAULT_PREFIX.equals(table.getDbName())) {
return table.getTableName();
} else {
return table.getDbName() + "." + table.getTableName();
}
}
protected String getTableName(TableDesc tableDesc) {
Properties props = tableDesc.getProperties();
String tableName = props.getProperty(AccumuloSerDeParameters.TABLE_NAME);
if (null != tableName) {
return tableName;
}
tableName = props.getProperty(hive_metastoreConstants.META_TABLE_NAME);
if (tableName.startsWith(DEFAULT_PREFIX + ".")) {
return tableName.substring(DEFAULT_PREFIX.length() + 1);
}
return tableName;
}
@Override
public Configuration getConf() {
return conf;
}
@Override
public void setConf(Configuration conf) {
this.conf = conf;
connectionParams = new AccumuloConnectionParameters(conf);
}
@SuppressWarnings("deprecation")
@Override
public Class getSerDeClass() {
return AccumuloSerDe.class;
}
@Override
public HiveMetaHook getMetaHook() {
return this;
}
@Override
public HiveAuthorizationProvider getAuthorizationProvider() throws HiveException {
return null;
}
@Override
public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) {
Properties props = tableDesc.getProperties();
jobProperties.put(AccumuloSerDeParameters.COLUMN_MAPPINGS,
props.getProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS));
String tableName = props.getProperty(AccumuloSerDeParameters.TABLE_NAME);
if (null == tableName) {
tableName = getTableName(tableDesc);
}
jobProperties.put(AccumuloSerDeParameters.TABLE_NAME,
tableName);
String useIterators = props.getProperty(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY);
if (useIterators != null) {
if (!useIterators.equalsIgnoreCase("true") && !useIterators.equalsIgnoreCase("false")) {
throw new IllegalArgumentException("Expected value of true or false for "
+ AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY);
}
jobProperties.put(AccumuloSerDeParameters.ITERATOR_PUSHDOWN_KEY, useIterators);
}
String storageType = props.getProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE);
if (null != storageType) {
jobProperties.put(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE, storageType);
}
String authValue = props.getProperty(AccumuloSerDeParameters.AUTHORIZATIONS_KEY);
if (null != authValue) {
jobProperties.put(AccumuloSerDeParameters.AUTHORIZATIONS_KEY, authValue);
}
log.info("Computed input job properties of " + jobProperties);
}
@Override
public void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties) {
Properties props = tableDesc.getProperties();
// Adding these job properties will make them available to the OutputFormat in checkOutputSpecs
jobProperties.put(AccumuloSerDeParameters.COLUMN_MAPPINGS,
props.getProperty(AccumuloSerDeParameters.COLUMN_MAPPINGS));
String tableName = props.getProperty(AccumuloSerDeParameters.TABLE_NAME);
if (null == tableName) {
tableName = getTableName(tableDesc);
}
jobProperties.put(AccumuloSerDeParameters.TABLE_NAME, tableName);
if (props.containsKey(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE)) {
jobProperties.put(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE,
props.getProperty(AccumuloSerDeParameters.DEFAULT_STORAGE_TYPE));
}
if (props.containsKey(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY)) {
jobProperties.put(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY,
props.getProperty(AccumuloSerDeParameters.VISIBILITY_LABEL_KEY));
}
}
@SuppressWarnings("rawtypes")
@Override
public Class getInputFormatClass() {
return HiveAccumuloTableInputFormat.class;
}
@Override
@SuppressWarnings("rawtypes")
public Class getOutputFormatClass() {
return HiveAccumuloTableOutputFormat.class;
}
@Override
public void preCreateTable(Table table) throws MetaException {
boolean isExternal = isExternalTable(table);
if (table.getSd().getLocation() != null) {
throw new MetaException("Location can't be specified for Accumulo");
}
Map serdeParams = table.getSd().getSerdeInfo().getParameters();
String columnMapping = serdeParams.get(AccumuloSerDeParameters.COLUMN_MAPPINGS);
if (columnMapping == null) {
throw new MetaException(AccumuloSerDeParameters.COLUMN_MAPPINGS
+ " missing from SERDEPROPERTIES");
}
try {
String tblName = getTableName(table);
Connector connector = connectionParams.getConnector();
TableOperations tableOpts = connector.tableOperations();
// Attempt to create the table, taking EXTERNAL into consideration
if (!tableOpts.exists(tblName)) {
if (!isExternal) {
tableOpts.create(tblName);
} else {
throw new MetaException("Accumulo table " + tblName
+ " doesn't exist even though declared external");
}
} else {
if (!isExternal) {
throw new MetaException("Table " + tblName
+ " already exists in Accumulo. Use CREATE EXTERNAL TABLE to register with Hive.");
}
}
} catch (AccumuloSecurityException e) {
throw new MetaException(StringUtils.stringifyException(e));
} catch (TableExistsException e) {
throw new MetaException(StringUtils.stringifyException(e));
} catch (AccumuloException e) {
throw new MetaException(StringUtils.stringifyException(e));
}
}
protected boolean isExternalTable(Table table) {
return MetaStoreUtils.isExternalTable(table);
}
@Override
public void rollbackCreateTable(Table table) throws MetaException {
// Same as commitDropTable where we always delete the data (accumulo table)
commitDropTable(table, true);
}
@Override
public void commitCreateTable(Table table) throws MetaException {
// do nothing
}
@Override
public void commitDropTable(Table table, boolean deleteData) throws MetaException {
String tblName = getTableName(table);
if (!isExternalTable(table)) {
try {
if (deleteData) {
TableOperations tblOpts = connectionParams.getConnector().tableOperations();
if (tblOpts.exists(tblName)) {
tblOpts.delete(tblName);
}
}
} catch (AccumuloException e) {
throw new MetaException(StringUtils.stringifyException(e));
} catch (AccumuloSecurityException e) {
throw new MetaException(StringUtils.stringifyException(e));
} catch (TableNotFoundException e) {
throw new MetaException(StringUtils.stringifyException(e));
}
}
}
@Override
public void preDropTable(Table table) throws MetaException {
// do nothing
}
@Override
public void rollbackDropTable(Table table) throws MetaException {
// do nothing
}
@SuppressWarnings("deprecation")
@Override
public DecomposedPredicate decomposePredicate(JobConf conf, Deserializer deserializer,
ExprNodeDesc desc) {
if (!(deserializer instanceof AccumuloSerDe)) {
throw new RuntimeException("Expected an AccumuloSerDe but got "
+ deserializer.getClass().getName());
}
AccumuloSerDe serDe = (AccumuloSerDe) deserializer;
if (serDe.getIteratorPushdown()) {
return predicateHandler.decompose(conf, desc);
} else {
log.info("Set to ignore Accumulo iterator pushdown, skipping predicate handler.");
return null;
}
}
@SuppressWarnings("deprecation")
@Override
public void configureJobConf(TableDesc tableDesc, JobConf jobConf) {
try {
Utils.addDependencyJars(jobConf, Tracer.class, Fate.class, Connector.class, Main.class,
ZooKeeper.class, AccumuloStorageHandler.class);
} catch (IOException e) {
log.error("Could not add necessary Accumulo dependencies to classpath", e);
}
Properties tblProperties = tableDesc.getProperties();
AccumuloSerDeParameters serDeParams = null;
try {
serDeParams = new AccumuloSerDeParameters(jobConf, tblProperties, AccumuloSerDe.class.getName());
} catch (SerDeException e) {
log.error("Could not instantiate AccumuloSerDeParameters", e);
return;
}
try {
serDeParams.getRowIdFactory().addDependencyJars(jobConf);
} catch (IOException e) {
log.error("Could not add necessary dependencies for " + serDeParams.getRowIdFactory().getClass(), e);
}
// When Kerberos is enabled, we have to add the Accumulo delegation token to the
// Job so that it gets passed down to the YARN/Tez task.
if (connectionParams.useSasl()) {
try {
// Obtain a delegation token from Accumulo
Connector conn = connectionParams.getConnector();
AuthenticationToken token = helper.getDelegationToken(conn);
// Make sure the Accumulo token is set in the Configuration (only a stub of the Accumulo
// AuthentiationToken is serialized, not the entire token). configureJobConf may be
// called multiple times with the same JobConf which results in an error from Accumulo
// MapReduce API. Catch the error, log a debug message and just keep going
try {
InputConfigurator.setConnectorInfo(AccumuloInputFormat.class, jobConf,
connectionParams.getAccumuloUserName(), token);
} catch (IllegalStateException e) {
// The implementation balks when this method is invoked multiple times
log.debug("Ignoring IllegalArgumentException about re-setting connector information");
}
try {
OutputConfigurator.setConnectorInfo(AccumuloOutputFormat.class, jobConf,
connectionParams.getAccumuloUserName(), token);
} catch (IllegalStateException e) {
// The implementation balks when this method is invoked multiple times
log.debug("Ignoring IllegalArgumentException about re-setting connector information");
}
// Convert the Accumulo token in a Hadoop token
Token accumuloToken = helper.getHadoopToken(token);
log.info("Adding Hadoop Token for Accumulo to Job's Credentials");
// Add the Hadoop token to the JobConf
helper.mergeTokenIntoJobConf(jobConf, accumuloToken);
} catch (Exception e) {
throw new RuntimeException("Failed to obtain DelegationToken for " + connectionParams.getAccumuloUserName(), e);
}
}
}
}