Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.druid;
import com.fasterxml.jackson.core.type.TypeReference;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.druid.data.input.impl.DimensionSchema;
import org.apache.druid.data.input.impl.DimensionsSpec;
import org.apache.druid.data.input.impl.InputRowParser;
import org.apache.druid.data.input.impl.TimestampSpec;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.RetryUtils;
import org.apache.druid.java.util.common.lifecycle.Lifecycle;
import org.apache.druid.java.util.http.client.HttpClient;
import org.apache.druid.java.util.http.client.HttpClientConfig;
import org.apache.druid.java.util.http.client.HttpClientInit;
import org.apache.druid.java.util.http.client.Request;
import org.apache.druid.java.util.http.client.response.StringFullResponseHandler;
import org.apache.druid.java.util.http.client.response.StringFullResponseHolder;
import org.apache.druid.metadata.MetadataStorageConnectorConfig;
import org.apache.druid.metadata.MetadataStorageTablesConfig;
import org.apache.druid.metadata.SQLMetadataConnector;
import org.apache.druid.metadata.storage.derby.DerbyConnector;
import org.apache.druid.metadata.storage.derby.DerbyMetadataStorage;
import org.apache.druid.metadata.storage.mysql.MySQLConnector;
import org.apache.druid.metadata.storage.mysql.MySQLConnectorConfig;
import org.apache.druid.metadata.storage.postgresql.PostgreSQLConnector;
import org.apache.druid.metadata.storage.postgresql.PostgreSQLConnectorConfig;
import org.apache.druid.metadata.storage.postgresql.PostgreSQLTablesConfig;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.Query;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.segment.IndexSpec;
import org.apache.druid.segment.indexing.DataSchema;
import org.apache.druid.segment.indexing.granularity.GranularitySpec;
import org.apache.druid.segment.loading.DataSegmentPusher;
import org.apache.druid.segment.loading.SegmentLoadingException;
import org.apache.druid.storage.hdfs.HdfsDataSegmentPusher;
import org.apache.druid.storage.hdfs.HdfsDataSegmentPusherConfig;
import org.apache.druid.timeline.DataSegment;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.TableName;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.druid.conf.DruidConstants;
import org.apache.hadoop.hive.druid.io.DruidOutputFormat;
import org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat;
import org.apache.hadoop.hive.druid.io.DruidRecordWriter;
import org.apache.hadoop.hive.druid.json.KafkaSupervisorReport;
import org.apache.hadoop.hive.druid.json.KafkaSupervisorSpec;
import org.apache.hadoop.hive.druid.security.KerberosHttpClient;
import org.apache.hadoop.hive.druid.serde.DruidSerDe;
import org.apache.hadoop.hive.metastore.DefaultHiveMetaHook;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.LockType;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.StorageHandlerInfo;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
import org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider;
import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider;
import org.apache.hadoop.hive.ql.security.authorization.HiveCustomStorageHandlerUtils;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hive.common.util.ShutdownHookManager;
import org.jboss.netty.handler.codec.http.HttpMethod;
import org.jboss.netty.handler.codec.http.HttpResponseStatus;
import org.joda.time.DateTime;
import org.joda.time.Period;
import org.skife.jdbi.v2.exceptions.CallbackFailedException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.stream.Collectors;
import static org.apache.hadoop.hive.druid.DruidStorageHandlerUtils.JSON_MAPPER;
/**
* DruidStorageHandler provides a HiveStorageHandler implementation for Druid.
*/
@SuppressWarnings({ "rawtypes" }) public class DruidStorageHandler extends DefaultHiveMetaHook
implements HiveStorageHandler {
private static final Logger LOG = LoggerFactory.getLogger(DruidStorageHandler.class);
private static final SessionState.LogHelper CONSOLE = new SessionState.LogHelper(LOG);
public static final String SEGMENTS_DESCRIPTOR_DIR_NAME = "segmentsDescriptorDir";
private static final String INTERMEDIATE_SEGMENT_DIR_NAME = "intermediateSegmentDir";
private static final HttpClient HTTP_CLIENT;
private static final List ALLOWED_ALTER_TYPES =
ImmutableList.of("ADDPROPS", "DROPPROPS", "ADDCOLS");
/** Druid prefix to form the URI for authentication */
private static final String DRUID_PREFIX = "druid:";
/** Druid config for determining the host name */
private static final String DRUID_HOST_NAME = "druid.zk.service.host";
static {
final Lifecycle lifecycle = new Lifecycle();
try {
lifecycle.start();
} catch (Exception e) {
LOG.error("Issues with lifecycle start", e);
}
HTTP_CLIENT = makeHttpClient(lifecycle);
ShutdownHookManager.addShutdownHook(lifecycle::stop);
}
private SQLMetadataConnector connector;
private MetadataStorageTablesConfig druidMetadataStorageTablesConfig = null;
private String uniqueId = null;
private String rootWorkingDir = null;
private Configuration conf;
public DruidStorageHandler() {
}
@VisibleForTesting public DruidStorageHandler(SQLMetadataConnector connector,
MetadataStorageTablesConfig druidMetadataStorageTablesConfig) {
this.connector = connector;
this.druidMetadataStorageTablesConfig = druidMetadataStorageTablesConfig;
}
@Override public Class extends InputFormat> getInputFormatClass() {
return DruidQueryBasedInputFormat.class;
}
@Override public Class extends OutputFormat> getOutputFormatClass() {
return DruidOutputFormat.class;
}
@Override public Class extends AbstractSerDe> getSerDeClass() {
return DruidSerDe.class;
}
@Override public HiveMetaHook getMetaHook() {
return this;
}
@Override public HiveAuthorizationProvider getAuthorizationProvider() {
return new DefaultHiveAuthorizationProvider();
}
@Override public void configureInputJobProperties(TableDesc tableDesc, Map jobProperties) {
}
@Override public void configureInputJobCredentials(TableDesc tableDesc, Map jobSecrets) {
}
@Override public void preCreateTable(Table table) throws MetaException {
if (!StringUtils.isEmpty(table.getSd().getLocation())) {
throw new MetaException("LOCATION may not be specified for Druid");
}
if (table.getPartitionKeysSize() != 0) {
throw new MetaException("PARTITIONED BY may not be specified for Druid");
}
if (table.getSd().getBucketColsSize() != 0) {
throw new MetaException("CLUSTERED BY may not be specified for Druid");
}
String dataSourceName = table.getParameters().get(Constants.DRUID_DATA_SOURCE);
if (dataSourceName != null) {
// Already Existing datasource in Druid.
return;
}
// create dataSourceName based on Hive Table name
dataSourceName = TableName.getDbTable(table.getDbName(), table.getTableName());
try {
// NOTE: This just created druid_segments table in Druid metastore.
// This is needed for the case when hive is started before any of druid services
// and druid_segments table has not been created yet.
getConnector().createSegmentTable();
} catch (Exception e) {
LOG.error("Exception while trying to create druid segments table", e);
throw new MetaException(e.getMessage());
}
Collection
existingDataSources =
DruidStorageHandlerUtils.getAllDataSourceNames(getConnector(), getDruidMetadataStorageTablesConfig());
LOG.debug("pre-create data source with name {}", dataSourceName);
// Check for existence of for the datasource we are going to create in druid_segments table.
if (existingDataSources.contains(dataSourceName)) {
throw new MetaException(String.format("Data source [%s] already existing", dataSourceName));
}
table.getParameters().put(Constants.DRUID_DATA_SOURCE, dataSourceName);
}
@Override public void rollbackCreateTable(Table table) {
cleanWorkingDir();
}
@Override public void commitCreateTable(Table table) throws MetaException {
if (DruidKafkaUtils.isKafkaStreamingTable(table)) {
updateKafkaIngestion(table);
}
// For CTAS queries when user has explicitly specified the datasource.
// We will append the data to existing druid datasource.
this.commitInsertTable(table, false);
}
@Override
public URI getURIForAuth(Table table) throws URISyntaxException{
Map tableProperties = HiveCustomStorageHandlerUtils.getTableProperties(table);
String host_name = conf.get(DRUID_HOST_NAME) != null ? conf.get(DRUID_HOST_NAME) :
HiveConf.getVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
String table_name = tableProperties.get(Constants.DRUID_DATA_SOURCE);
String column_names = tableProperties.get(Constants.DRUID_QUERY_FIELD_NAMES);
if (column_names != null)
return new URI(DRUID_PREFIX+"//"+host_name+"/"+table_name+"/"+column_names);
else
return new URI(DRUID_PREFIX+"//"+host_name+"/"+table_name);
}
private void updateKafkaIngestion(Table table) {
final String overlordAddress = HiveConf.getVar(getConf(), HiveConf.ConfVars.HIVE_DRUID_OVERLORD_DEFAULT_ADDRESS);
final String
dataSourceName =
Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table, Constants.DRUID_DATA_SOURCE),
"Druid datasource name is null");
final String
kafkaTopic =
Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table,
DruidConstants.KAFKA_TOPIC), "kafka topic is null");
final String
kafkaServers =
Preconditions.checkNotNull(DruidStorageHandlerUtils.getTableProperty(table,
DruidConstants.KAFKA_BOOTSTRAP_SERVERS), "kafka connect string is null");
Properties tableProperties = new Properties();
tableProperties.putAll(table.getParameters());
final GranularitySpec granularitySpec = DruidStorageHandlerUtils.getGranularitySpec(getConf(), tableProperties);
List columns = table.getSd().getCols();
List columnNames = new ArrayList<>(columns.size());
List columnTypes = new ArrayList<>(columns.size());
for (FieldSchema schema : columns) {
columnNames.add(schema.getName());
columnTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(schema.getType()));
}
Pair, AggregatorFactory[]>
dimensionsAndAggregates =
DruidStorageHandlerUtils.getDimensionsAndAggregates(columnNames, columnTypes);
if (!columnNames.contains(DruidConstants.DEFAULT_TIMESTAMP_COLUMN)) {
throw new IllegalStateException("Timestamp column (' "
+ DruidConstants.DEFAULT_TIMESTAMP_COLUMN
+ "') not specified in create table; list of columns is : "
+ columnNames);
}
DimensionsSpec dimensionsSpec = new DimensionsSpec(dimensionsAndAggregates.lhs, null, null);
String timestampFormat = DruidStorageHandlerUtils
.getTableProperty(table, DruidConstants.DRUID_TIMESTAMP_FORMAT);
String timestampColumnName = DruidStorageHandlerUtils
.getTableProperty(table, DruidConstants.DRUID_TIMESTAMP_COLUMN);
if(timestampColumnName == null) {
timestampColumnName = DruidConstants.DEFAULT_TIMESTAMP_COLUMN;
}
final TimestampSpec timestampSpec = new TimestampSpec(timestampColumnName, timestampFormat,
null
);
final InputRowParser inputRowParser = DruidKafkaUtils
.getInputRowParser(table, timestampSpec, dimensionsSpec);
final Map
inputParser =
JSON_MAPPER.convertValue(inputRowParser, new TypeReference