Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.parse;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.common.repl.ReplConst;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.exec.repl.util.StringConvertibleObject;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.repl.DumpType;
import org.apache.hadoop.hive.ql.parse.repl.dump.io.DBSerializer;
import org.apache.hadoop.hive.ql.parse.repl.dump.io.JsonWriter;
import org.apache.hadoop.hive.ql.parse.repl.dump.io.ReplicationSpecSerializer;
import org.apache.hadoop.hive.ql.parse.repl.dump.io.TableSerializer;
import org.apache.hadoop.hive.ql.parse.repl.load.MetaData;
import org.apache.hadoop.hive.ql.parse.repl.load.MetadataJson;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.thrift.TException;
import org.json.JSONException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
/**
*
* EximUtil. Utility methods for the export/import semantic
* analyzers.
*
*/
public class EximUtil {
public static final String METADATA_NAME = "_metadata";
public static final String FILES_NAME = "_files";
public static final String FILE_LIST = "_file_list";
public static final String FILE_LIST_EXTERNAL = "_file_list_external";
public static final String FILE_LIST_EXTERNAL_SNAPSHOT_CURRENT = "_file_list_external_current";
public static final String FILE_LIST_EXTERNAL_SNAPSHOT_OLD = "_file_list_external_old";
public static final String DATA_PATH_NAME = "data";
public static final String METADATA_PATH_NAME = "metadata";
private static final Logger LOG = LoggerFactory.getLogger(EximUtil.class);
private static final String DATABASE_PATH_SUFFIX = ".db";
/**
* Wrapper class for common BaseSemanticAnalyzer non-static members
* into static generic methods without having the fn signatures
* becoming overwhelming, with passing each of these into every function.
*
* Note, however, that since this is constructed with args passed in,
* parts of the context, such as the tasks or inputs, might have been
* overridden with temporary context values, rather than being exactly
* 1:1 equivalent to BaseSemanticAnalyzer.getRootTasks() or BSA.getInputs().
*/
public static class SemanticAnalyzerWrapperContext {
private HiveConf conf;
private Hive db;
private Set inputs;
private Set outputs;
private List> tasks;
private Logger LOG;
private Context ctx;
private DumpType eventType = DumpType.EVENT_UNKNOWN;
private Task> openTxnTask = null;
public HiveConf getConf() {
return conf;
}
public Hive getHive() {
return db;
}
public Set getInputs() {
return inputs;
}
public Set getOutputs() {
return outputs;
}
public List> getTasks() {
return tasks;
}
public Logger getLOG() {
return LOG;
}
public Context getCtx() {
return ctx;
}
public void setEventType(DumpType eventType) {
this.eventType = eventType;
}
public DumpType getEventType() {
return eventType;
}
public SemanticAnalyzerWrapperContext(HiveConf conf, Hive db,
Set inputs,
Set outputs,
List> tasks,
Logger LOG, Context ctx){
this.conf = conf;
this.db = db;
this.inputs = inputs;
this.outputs = outputs;
this.tasks = tasks;
this.LOG = LOG;
this.ctx = ctx;
}
public Task> getOpenTxnTask() {
return openTxnTask;
}
public void setOpenTxnTask(Task> openTxnTask) {
this.openTxnTask = openTxnTask;
}
}
/**
* Wrapper class for mapping source and target path for copying managed table data and function's binary.
*/
public static class DataCopyPath implements StringConvertibleObject {
private static final String URI_SEPARATOR = "#";
private ReplicationSpec replicationSpec;
private static boolean nullSrcPathForTest = false;
private Path srcPath;
private Path tgtPath;
public DataCopyPath(ReplicationSpec replicationSpec) {
this.replicationSpec = replicationSpec;
}
public DataCopyPath(ReplicationSpec replicationSpec, Path srcPath, Path tgtPath) {
this.replicationSpec = replicationSpec;
if (srcPath == null) {
throw new IllegalArgumentException("Source path can not be null.");
}
this.srcPath = srcPath;
if (tgtPath == null) {
throw new IllegalArgumentException("Target path can not be null.");
}
this.tgtPath = tgtPath;
}
public Path getSrcPath() {
if (nullSrcPathForTest) {
return null;
}
return srcPath;
}
public Path getTargetPath() {
return tgtPath;
}
@Override
public String toString() {
return "DataCopyPath{"
+ "fullyQualifiedSourcePath=" + srcPath
+ ", fullyQualifiedTargetPath=" + tgtPath
+ '}';
}
public ReplicationSpec getReplicationSpec() {
return replicationSpec;
}
public void setReplicationSpec(ReplicationSpec replicationSpec) {
this.replicationSpec = replicationSpec;
}
/**
* To be used only for testing purpose.
* It has been used to make repl dump operation fail.
*/
public static void setNullSrcPath(HiveConf conf, boolean aNullSrcPath) {
if (conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST)) {
nullSrcPathForTest = aNullSrcPath;
}
}
@Override
public String convertToString() {
StringBuilder objInStr = new StringBuilder();
objInStr.append(srcPath)
.append(URI_SEPARATOR)
.append(tgtPath);
return objInStr.toString();
}
@Override
public void loadFromString(String objectInStr) {
String paths[] = objectInStr.split(URI_SEPARATOR);
this.srcPath = new Path(paths[0]);
this.tgtPath = new Path(paths[1]);
}
private String getEmptyOrString(String str) {
return (str == null) ? "" : str;
}
}
private EximUtil() {
}
/**
* Initialize the URI where the exported data collection is
* to created for export, or is present for import
*/
public static URI getValidatedURI(HiveConf conf, String dcPath) throws SemanticException {
try {
boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE)
|| conf.getBoolVar(HiveConf.ConfVars.HIVEEXIMTESTMODE);
URI uri = new Path(dcPath).toUri();
FileSystem fs = FileSystem.get(uri, conf);
// Get scheme from FileSystem
String scheme = fs.getScheme();
String authority = uri.getAuthority();
String path = uri.getPath();
LOG.info("Path before norm :" + path);
// generate absolute path relative to home directory
if (!path.startsWith("/")) {
if (testMode) {
path = (new Path(System.getProperty("test.tmp.dir"), path)).toUri().getPath();
} else {
path =
(new Path(new Path("/user/" + System.getProperty("user.name")), path)).toUri()
.getPath();
}
}
// if scheme is specified but not authority then use the default authority
if (StringUtils.isEmpty(authority)) {
URI defaultURI = FileSystem.get(conf).getUri();
authority = defaultURI.getAuthority();
}
LOG.info("Scheme:" + scheme + ", authority:" + authority + ", path:" + path);
Collection eximSchemes =
conf.getStringCollection(HiveConf.ConfVars.HIVE_EXIM_URI_SCHEME_WL.varname);
if (!eximSchemes.contains(scheme)) {
throw new SemanticException(
ErrorMsg.INVALID_PATH
.getMsg("only the following file systems accepted for export/import : "
+ conf.get(HiveConf.ConfVars.HIVE_EXIM_URI_SCHEME_WL.varname)));
}
try {
return new URI(scheme, authority, path, null, null);
} catch (URISyntaxException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
}
} catch (IOException e) {
throw new SemanticException(ErrorMsg.IO_ERROR.getMsg() + ": " + e.getMessage(), e);
}
}
static void validateTable(org.apache.hadoop.hive.ql.metadata.Table table) throws SemanticException {
if (table.isNonNative()) {
throw new SemanticException(ErrorMsg.EXIM_FOR_NON_NATIVE.getMsg());
}
}
public static String relativeToAbsolutePath(HiveConf conf, String location)
throws SemanticException {
try {
boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVETESTMODE)
|| conf.getBoolVar(HiveConf.ConfVars.HIVEEXIMTESTMODE);;
if (testMode) {
URI uri = new Path(location).toUri();
FileSystem fs = FileSystem.get(uri, conf);
String scheme = fs.getScheme();
String authority = uri.getAuthority();
String path = uri.getPath();
if (!path.startsWith("/")) {
path = (new Path(System.getProperty("test.tmp.dir"), path)).toUri().getPath();
}
if (StringUtils.isEmpty(scheme)) {
scheme = "pfile";
}
try {
uri = new URI(scheme, authority, path, null, null);
} catch (URISyntaxException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
}
return uri.toString();
} else {
Path path = new Path(location);
if (path.isAbsolute()) {
return location;
}
return path.getFileSystem(conf).makeQualified(path).toString();
}
} catch (IOException e) {
throw new SemanticException(ErrorMsg.IO_ERROR.getMsg() + ": " + e.getMessage(), e);
}
}
/* major version number should match for backward compatibility */
public static final String METADATA_FORMAT_VERSION = "0.2";
/* If null, then the major version number should match */
public static final String METADATA_FORMAT_FORWARD_COMPATIBLE_VERSION = null;
public static void createDbExportDump(FileSystem fs, Path metadataPath, Database dbObj,
ReplicationSpec replicationSpec, Configuration conf) throws IOException, SemanticException {
updateIfCustomDbLocations(dbObj, conf);
// WARNING NOTE : at this point, createDbExportDump lives only in a world where ReplicationSpec is in replication scope
// If we later make this work for non-repl cases, analysis of this logic might become necessary. Also, this is using
// Replv2 semantics, i.e. with listFiles laziness (no copy at export time)
// Remove all the entries from the parameters which are added by repl tasks internally.
Map parameters = dbObj.getParameters();
if (parameters != null) {
Map tmpParameters = new HashMap<>(parameters);
tmpParameters.entrySet()
.removeIf(e -> e.getKey().startsWith(ReplConst.BOOTSTRAP_DUMP_STATE_KEY_PREFIX)
|| e.getKey().equals(ReplConst.REPL_TARGET_DB_PROPERTY)
|| e.getKey().equals(ReplConst.SOURCE_OF_REPLICATION)
|| e.getKey().equals(ReplConst.REPL_FIRST_INC_PENDING_FLAG)
|| e.getKey().equals(ReplConst.REPL_FAILOVER_ENDPOINT));
dbObj.setParameters(tmpParameters);
}
try (JsonWriter jsonWriter = new JsonWriter(fs, metadataPath)) {
new DBSerializer(dbObj).writeTo(jsonWriter, replicationSpec);
}
if (parameters != null) {
dbObj.setParameters(parameters);
}
}
private static void updateIfCustomDbLocations(Database database, Configuration conf) throws SemanticException {
try {
String whLocatoion = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.WAREHOUSE_EXTERNAL,
MetastoreConf.getVar(conf, MetastoreConf.ConfVars.WAREHOUSE));
Path dbDerivedLoc = new Path(whLocatoion, database.getName().toLowerCase() + DATABASE_PATH_SUFFIX);
String defaultDbLoc = Utilities.getQualifiedPath((HiveConf) conf, dbDerivedLoc);
database.putToParameters(ReplConst.REPL_IS_CUSTOM_DB_LOC,
Boolean.toString(!defaultDbLoc.equals(database.getLocationUri())));
String whManagedLocatoion = MetastoreConf.getVar(conf, MetastoreConf.ConfVars.WAREHOUSE);
Path dbDerivedManagedLoc = new Path(whManagedLocatoion, database.getName().toLowerCase()
+ DATABASE_PATH_SUFFIX);
String defaultDbManagedLoc = Utilities.getQualifiedPath((HiveConf) conf, dbDerivedManagedLoc);
database.getParameters().put(ReplConst.REPL_IS_CUSTOM_DB_MANAGEDLOC, Boolean.toString(
!(database.getManagedLocationUri() == null
||defaultDbManagedLoc.equals(database.getManagedLocationUri()))));
} catch (HiveException ex) {
throw new SemanticException(ex);
}
}
public static void createExportDump(FileSystem fs, Path metadataPath, Table tableHandle,
Iterable partitions, ReplicationSpec replicationSpec, HiveConf hiveConf)
throws SemanticException, IOException {
if (replicationSpec == null) {
replicationSpec = new ReplicationSpec(); // instantiate default values if not specified
}
if (tableHandle == null) {
replicationSpec.setNoop(true);
}
try (JsonWriter writer = new JsonWriter(fs, metadataPath)) {
if (replicationSpec.isInReplicationScope()) {
new ReplicationSpecSerializer().writeTo(writer, replicationSpec);
}
new TableSerializer(tableHandle, partitions, hiveConf).writeTo(writer, replicationSpec);
}
}
public static MetaData getMetaDataFromLocation(String fromLocn, HiveConf conf)
throws SemanticException, IOException {
URI fromURI = getValidatedURI(conf, PlanUtils.stripQuotes(fromLocn));
Path fromPath = new Path(fromURI.getScheme(), fromURI.getAuthority(), fromURI.getPath());
FileSystem fs = FileSystem.get(fromURI, conf);
try {
return readMetaData(fs, new Path(fromPath, EximUtil.METADATA_NAME));
} catch (IOException e) {
throw new SemanticException(ErrorMsg.INVALID_PATH.getMsg(), e);
}
}
public static MetaData readMetaData(FileSystem fs, Path metadataPath)
throws IOException, SemanticException {
String message = readAsString(fs, metadataPath);
try {
return new MetadataJson(message).getMetaData();
} catch (TException | JSONException e) {
throw new SemanticException(ErrorMsg.ERROR_SERIALIZE_METADATA.getMsg(), e);
}
}
public static String readAsString(final FileSystem fs, final Path fromMetadataPath)
throws IOException {
try (FSDataInputStream stream = fs.open(fromMetadataPath)) {
return IOUtils.toString(stream, StandardCharsets.UTF_8);
}
}
/* check the forward and backward compatibility */
public static void doCheckCompatibility(String currVersion,
String version, String fcVersion) throws SemanticException {
if (version == null) {
throw new SemanticException(ErrorMsg.INVALID_METADATA.getMsg("Version number missing"));
}
StringTokenizer st = new StringTokenizer(version, ".");
int data_major = Integer.parseInt(st.nextToken());
StringTokenizer st2 = new StringTokenizer(currVersion, ".");
int code_major = Integer.parseInt(st2.nextToken());
int code_minor = Integer.parseInt(st2.nextToken());
if (code_major > data_major) {
throw new SemanticException(ErrorMsg.INVALID_METADATA.getMsg("Not backward compatible."
+ " Producer version " + version + ", Consumer version " +
currVersion));
} else {
if ((fcVersion == null) || fcVersion.isEmpty()) {
if (code_major < data_major) {
throw new SemanticException(ErrorMsg.INVALID_METADATA.getMsg("Not forward compatible."
+ "Producer version " + version + ", Consumer version " +
currVersion));
}
} else {
StringTokenizer st3 = new StringTokenizer(fcVersion, ".");
int fc_major = Integer.parseInt(st3.nextToken());
int fc_minor = Integer.parseInt(st3.nextToken());
if ((fc_major > code_major) || ((fc_major == code_major) && (fc_minor > code_minor))) {
throw new SemanticException(ErrorMsg.INVALID_METADATA.getMsg("Not forward compatible."
+ "Minimum version " + fcVersion + ", Consumer version " +
currVersion));
}
}
}
}
/**
* Return the partition specification from the specified keys and values
*
* @param partCols
* the names of the partition keys
* @param partVals
* the values of the partition keys
*
* @return the partition specification as a map
*/
public static Map makePartSpec(List partCols, List partVals) {
Map partSpec = new LinkedHashMap();
for (int i = 0; i < partCols.size(); ++i) {
partSpec.put(partCols.get(i).getName(), partVals.get(i));
}
return partSpec;
}
/**
* Compares the schemas - names, types and order, but ignoring comments
*
* @param newSchema
* the new schema
* @param oldSchema
* the old schema
* @return a boolean indicating match
*/
public static boolean schemaCompare(List newSchema, List oldSchema) {
Iterator newColIter = newSchema.iterator();
for (FieldSchema oldCol : oldSchema) {
FieldSchema newCol = null;
if (newColIter.hasNext()) {
newCol = newColIter.next();
} else {
return false;
}
// not using FieldSchema.equals as comments can be different
if (!oldCol.getName().equals(newCol.getName())
|| !oldCol.getType().equals(newCol.getType())) {
return false;
}
}
if (newColIter.hasNext()) {
return false;
}
return true;
}
public static PathFilter getDirectoryFilter(final FileSystem fs) {
// TODO : isn't there a prior impl of an isDirectory utility PathFilter so users don't have to write their own?
return new PathFilter() {
@Override
public boolean accept(Path p) {
try {
return fs.isDirectory(p);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
};
}
}