org.apache.hive.beeline.HiveSchemaTool Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hive.beeline;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.OptionGroup;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.HiveMetaException;
import org.apache.hadoop.hive.metastore.MetaStoreSchemaInfo;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hive.beeline.HiveSchemaHelper.NestedScriptParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.ImmutableMap;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URI;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HiveSchemaTool {
private String userName = null;
private String passWord = null;
private boolean dryRun = false;
private boolean verbose = false;
private String dbOpts = null;
private URI[] validationServers = null; // The list of servers the database/partition/table can locate on
private final HiveConf hiveConf;
private final String dbType;
private final MetaStoreSchemaInfo metaStoreSchemaInfo;
static final private Logger LOG = LoggerFactory.getLogger(HiveSchemaTool.class.getName());
public HiveSchemaTool(String dbType) throws HiveMetaException {
this(System.getenv("HIVE_HOME"), new HiveConf(HiveSchemaTool.class), dbType);
}
public HiveSchemaTool(String hiveHome, HiveConf hiveConf, String dbType)
throws HiveMetaException {
if (hiveHome == null || hiveHome.isEmpty()) {
throw new HiveMetaException("No Hive home directory provided");
}
this.hiveConf = hiveConf;
this.dbType = dbType;
this.metaStoreSchemaInfo = new MetaStoreSchemaInfo(hiveHome, dbType);
userName = hiveConf.get(ConfVars.METASTORE_CONNECTION_USER_NAME.varname);
try {
passWord = ShimLoader.getHadoopShims().getPassword(hiveConf,
HiveConf.ConfVars.METASTOREPWD.varname);
} catch (IOException err) {
throw new HiveMetaException("Error getting metastore password", err);
}
}
public HiveConf getHiveConf() {
return hiveConf;
}
public void setUserName(String userName) {
this.userName = userName;
}
public void setPassWord(String passWord) {
this.passWord = passWord;
}
public void setDryRun(boolean dryRun) {
this.dryRun = dryRun;
}
public void setVerbose(boolean verbose) {
this.verbose = verbose;
}
public void setDbOpts(String dbOpts) {
this.dbOpts = dbOpts;
}
public void setValidationServers(String servers) {
if(StringUtils.isNotEmpty(servers)) {
String[] strServers = servers.split(",");
this.validationServers = new URI[strServers.length];
for (int i = 0; i < validationServers.length; i++) {
validationServers[i] = new Path(strServers[i]).toUri();
}
}
}
private static void printAndExit(Options cmdLineOptions) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("schemaTool", cmdLineOptions);
System.exit(1);
}
Connection getConnectionToMetastore(boolean printInfo)
throws HiveMetaException {
return HiveSchemaHelper.getConnectionToMetastore(userName,
passWord, printInfo, hiveConf);
}
private NestedScriptParser getDbCommandParser(String dbType) {
return HiveSchemaHelper.getDbCommandParser(dbType, dbOpts, userName,
passWord, hiveConf);
}
/***
* Print Hive version and schema version
* @throws MetaException
*/
public void showInfo() throws HiveMetaException {
Connection metastoreConn = getConnectionToMetastore(true);
String hiveVersion = MetaStoreSchemaInfo.getHiveSchemaVersion();
String dbVersion = getMetaStoreSchemaVersion(metastoreConn);
System.out.println("Hive distribution version:\t " + hiveVersion);
System.out.println("Metastore schema version:\t " + dbVersion);
assertCompatibleVersion(hiveVersion, dbVersion);
}
private String getMetaStoreSchemaVersion(Connection metastoreConn)
throws HiveMetaException {
return getMetaStoreSchemaVersion(metastoreConn, false);
}
// read schema version from metastore
private String getMetaStoreSchemaVersion(Connection metastoreConn,
boolean checkDuplicatedVersion) throws HiveMetaException {
String versionQuery;
if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
versionQuery = "select t.\"SCHEMA_VERSION\" from \"VERSION\" t";
} else {
versionQuery = "select t.SCHEMA_VERSION from VERSION t";
}
try(Statement stmt = metastoreConn.createStatement();
ResultSet res = stmt.executeQuery(versionQuery)) {
if (!res.next()) {
throw new HiveMetaException("Could not find version info in metastore VERSION table");
}
String currentSchemaVersion = res.getString(1);
if (checkDuplicatedVersion && res.next()) {
throw new HiveMetaException("Multiple versions were found in metastore.");
}
return currentSchemaVersion;
} catch (SQLException e) {
throw new HiveMetaException("Failed to get schema version, Cause:" + e.getMessage());
}
}
boolean validateLocations(Connection conn, URI[] defaultServers) throws HiveMetaException {
System.out.println("Validating database/table/partition locations");
boolean rtn;
rtn = checkMetaStoreDBLocation(conn, defaultServers);
rtn = checkMetaStoreTableLocation(conn, defaultServers) && rtn;
rtn = checkMetaStorePartitionLocation(conn, defaultServers) && rtn;
rtn = checkMetaStoreSkewedColumnsLocation(conn, defaultServers) && rtn;
System.out.println((rtn ? "Succeeded" : "Failed") + " in database/table/partition location validation");
return rtn;
}
private String getNameOrID(ResultSet res, int nameInx, int idInx) throws SQLException {
String itemName = res.getString(nameInx);
return (itemName == null || itemName.isEmpty()) ? "ID: " + res.getString(idInx) : "Name: " + itemName;
}
private boolean checkMetaStoreDBLocation(Connection conn, URI[] defaultServers)
throws HiveMetaException {
String dbLoc;
boolean isValid = true;
int numOfInvalid = 0;
if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
dbLoc = "select dbt.\"DB_ID\", dbt.\"NAME\", dbt.\"DB_LOCATION_URI\" from \"DBS\" dbt";
} else {
dbLoc = "select dbt.DB_ID, dbt.NAME, dbt.DB_LOCATION_URI from DBS dbt";
}
try(Statement stmt = conn.createStatement();
ResultSet res = stmt.executeQuery(dbLoc)) {
while (res.next()) {
String locValue = res.getString(3);
String dbName = getNameOrID(res,2,1);
if (!checkLocation("Database " + dbName, locValue, defaultServers)) {
numOfInvalid++;
}
}
} catch (SQLException e) {
throw new HiveMetaException("Failed to get DB Location Info.", e);
}
if (numOfInvalid > 0) {
isValid = false;
System.err.println("Total number of invalid DB locations is: "+ numOfInvalid);
}
return isValid;
}
private boolean checkMetaStoreTableLocation(Connection conn, URI[] defaultServers)
throws HiveMetaException {
String tabLoc, tabIDRange;
boolean isValid = true;
int numOfInvalid = 0;
if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
tabIDRange = "select max(\"TBL_ID\"), min(\"TBL_ID\") from \"TBLS\" ";
} else {
tabIDRange = "select max(TBL_ID), min(TBL_ID) from TBLS";
}
if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
tabLoc = "select tbl.\"TBL_ID\", tbl.\"TBL_NAME\", sd.\"LOCATION\", dbt.\"DB_ID\", dbt.\"NAME\" from \"TBLS\" tbl inner join " +
"\"SDS\" sd on tbl.\"SD_ID\" = sd.\"SD_ID\" and tbl.\"TBL_TYPE\" != '" + TableType.VIRTUAL_VIEW +
"' and tbl.\"TBL_ID\" >= ? and tbl.\"TBL_ID\"<= ? " + "inner join \"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" ";
} else {
tabLoc = "select tbl.TBL_ID, tbl.TBL_NAME, sd.LOCATION, dbt.DB_ID, dbt.NAME from TBLS tbl join SDS sd on tbl.SD_ID = sd.SD_ID and tbl.TBL_TYPE !='"
+ TableType.VIRTUAL_VIEW + "' and tbl.TBL_ID >= ? and tbl.TBL_ID <= ? inner join DBS dbt on tbl.DB_ID = dbt.DB_ID";
}
long maxID = 0, minID = 0;
long rtnSize = 2000;
try {
Statement stmt = conn.createStatement();
ResultSet res = stmt.executeQuery(tabIDRange);
if (res.next()) {
maxID = res.getLong(1);
minID = res.getLong(2);
}
res.close();
stmt.close();
PreparedStatement pStmt = conn.prepareStatement(tabLoc);
while (minID <= maxID) {
pStmt.setLong(1, minID);
pStmt.setLong(2, minID + rtnSize);
res = pStmt.executeQuery();
while (res.next()) {
String locValue = res.getString(3);
String entity = "Database " + getNameOrID(res, 5, 4) +
", Table " + getNameOrID(res,2,1);
if (!checkLocation(entity, locValue, defaultServers)) {
numOfInvalid++;
}
}
res.close();
minID += rtnSize + 1;
}
pStmt.close();
} catch (SQLException e) {
throw new HiveMetaException("Failed to get Table Location Info.", e);
}
if (numOfInvalid > 0) {
isValid = false;
System.err.println("Total number of invalid TABLE locations is: "+ numOfInvalid);
}
return isValid;
}
private boolean checkMetaStorePartitionLocation(Connection conn, URI[] defaultServers)
throws HiveMetaException {
String partLoc, partIDRange;
boolean isValid = true;
int numOfInvalid = 0;
if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
partIDRange = "select max(\"PART_ID\"), min(\"PART_ID\") from \"PARTITIONS\" ";
} else {
partIDRange = "select max(PART_ID), min(PART_ID) from PARTITIONS";
}
if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
partLoc = "select pt.\"PART_ID\", pt.\"PART_NAME\", sd.\"LOCATION\", tbl.\"TBL_ID\", tbl.\"TBL_NAME\",dbt.\"DB_ID\", dbt.\"NAME\" from \"PARTITIONS\" pt "
+ "inner join \"SDS\" sd on pt.\"SD_ID\" = sd.\"SD_ID\" and pt.\"PART_ID\" >= ? and pt.\"PART_ID\"<= ? "
+ " inner join \"TBLS\" tbl on pt.\"TBL_ID\" = tbl.\"TBL_ID\" inner join "
+ "\"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" ";
} else {
partLoc = "select pt.PART_ID, pt.PART_NAME, sd.LOCATION, tbl.TBL_ID, tbl.TBL_NAME, dbt.DB_ID, dbt.NAME from PARTITIONS pt "
+ "inner join SDS sd on pt.SD_ID = sd.SD_ID and pt.PART_ID >= ? and pt.PART_ID <= ? "
+ "inner join TBLS tbl on tbl.TBL_ID = pt.TBL_ID inner join DBS dbt on tbl.DB_ID = dbt.DB_ID ";
}
long maxID = 0, minID = 0;
long rtnSize = 2000;
try {
Statement stmt = conn.createStatement();
ResultSet res = stmt.executeQuery(partIDRange);
if (res.next()) {
maxID = res.getLong(1);
minID = res.getLong(2);
}
res.close();
stmt.close();
PreparedStatement pStmt = conn.prepareStatement(partLoc);
while (minID <= maxID) {
pStmt.setLong(1, minID);
pStmt.setLong(2, minID + rtnSize);
res = pStmt.executeQuery();
while (res.next()) {
String locValue = res.getString(3);
String entity = "Database " + getNameOrID(res,7,6) +
", Table " + getNameOrID(res,5,4) +
", Partition " + getNameOrID(res,2,1);
if (!checkLocation(entity, locValue, defaultServers)) {
numOfInvalid++;
}
}
res.close();
minID += rtnSize + 1;
}
pStmt.close();
} catch (SQLException e) {
throw new HiveMetaException("Failed to get Partiton Location Info.", e);
}
if (numOfInvalid > 0) {
isValid = false;
System.err.println("Total number of invalid PARTITION locations is: "+ numOfInvalid);
}
return isValid;
}
private boolean checkMetaStoreSkewedColumnsLocation(Connection conn, URI[] defaultServers)
throws HiveMetaException {
String skewedColLoc, skewedColIDRange;
boolean isValid = true;
int numOfInvalid = 0;
if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
skewedColIDRange = "select max(\"STRING_LIST_ID_KID\"), min(\"STRING_LIST_ID_KID\") from \"SKEWED_COL_VALUE_LOC_MAP\" ";
} else {
skewedColIDRange = "select max(STRING_LIST_ID_KID), min(STRING_LIST_ID_KID) from SKEWED_COL_VALUE_LOC_MAP";
}
if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
skewedColLoc = "select t.\"TBL_NAME\", t.\"TBL_ID\", sk.\"STRING_LIST_ID_KID\", sk.\"LOCATION\" from \"TBLS\" t, \"SDS\" s, \"SKEWED_COL_VALUE_LOC_MAP\" sk "
+ "where sk.\"SD_ID\" = s.\"SD_ID\" and s.\"SD_ID\" = t.\"SD_ID\" and sk.\"STRING_LIST_ID_KID\" >= ? and sk.\"STRING_LIST_ID_KID\" <= ? ";
} else {
skewedColLoc = "select t.TBL_NAME, t.TBL_ID, sk.STRING_LIST_ID_KID, sk.LOCATION from TBLS t, SDS s, SKEWED_COL_VALUE_LOC_MAP sk "
+ "where sk.SD_ID = s.SD_ID and s.SD_ID = t.SD_ID and sk.STRING_LIST_ID_KID >= ? and sk.STRING_LIST_ID_KID <= ? ";
}
long maxID = 0, minID = 0;
long rtnSize = 2000;
try {
Statement stmt = conn.createStatement();
ResultSet res = stmt.executeQuery(skewedColIDRange);
if (res.next()) {
maxID = res.getLong(1);
minID = res.getLong(2);
}
res.close();
stmt.close();
PreparedStatement pStmt = conn.prepareStatement(skewedColLoc);
while (minID <= maxID) {
pStmt.setLong(1, minID);
pStmt.setLong(2, minID + rtnSize);
res = pStmt.executeQuery();
while (res.next()) {
String locValue = res.getString(4);
String entity = "Table " + getNameOrID(res,1,2) +
", String list " + res.getString(3);
if (!checkLocation(entity, locValue, defaultServers)) {
numOfInvalid++;
}
}
res.close();
minID += rtnSize + 1;
}
pStmt.close();
} catch (SQLException e) {
throw new HiveMetaException("Failed to get skewed columns location info.", e);
}
if (numOfInvalid > 0) {
isValid = false;
System.err.println("Total number of invalid SKEWED_COL_VALUE_LOC_MAP locations is: "+ numOfInvalid);
}
return isValid;
}
/**
* Check if the location is valid for the given entity
* @param entity the entity to represent a database, partition or table
* @param entityLocation the location
* @param defaultServers a list of the servers that the location needs to match.
* The location host needs to match one of the given servers.
* If empty, then no check against such list.
* @return true if the location is valid
*/
private boolean checkLocation(
String entity,
String entityLocation,
URI[] defaultServers) {
boolean isValid = true;
if (entityLocation == null) {
System.err.println(entity + ", error: empty location");
isValid = false;
} else {
try {
URI currentUri = new Path(entityLocation).toUri();
String scheme = currentUri.getScheme();
if (StringUtils.isEmpty(scheme)) {
System.err.println(entity + ", location: "+ entityLocation + ", error: missing location scheme");
isValid = false;
} else if (ArrayUtils.isNotEmpty(defaultServers) && currentUri.getAuthority() != null) {
String authority = currentUri.getAuthority();
boolean matchServer = false;
for(URI server : defaultServers) {
if (StringUtils.equalsIgnoreCase(server.getScheme(), scheme) &&
StringUtils.equalsIgnoreCase(server.getAuthority(), authority)) {
matchServer = true;
break;
}
}
if (!matchServer) {
System.err.println(entity + ", location: " + entityLocation + ", error: mismatched server");
isValid = false;
}
}
} catch (Exception pe) {
System.err.println(entity + ", error: invalid location " + pe.getMessage());
isValid =false;
}
}
return isValid;
}
// test the connection metastore using the config property
private void testConnectionToMetastore() throws HiveMetaException {
Connection conn = getConnectionToMetastore(true);
try {
conn.close();
} catch (SQLException e) {
throw new HiveMetaException("Failed to close metastore connection", e);
}
}
/**
* check if the current schema version in metastore matches the Hive version
* @throws MetaException
*/
public void verifySchemaVersion() throws HiveMetaException {
// don't check version if its a dry run
if (dryRun) {
return;
}
String newSchemaVersion = getMetaStoreSchemaVersion(
getConnectionToMetastore(false));
// verify that the new version is added to schema
assertCompatibleVersion(MetaStoreSchemaInfo.getHiveSchemaVersion(), newSchemaVersion);
}
private void assertCompatibleVersion(String hiveSchemaVersion, String dbSchemaVersion)
throws HiveMetaException {
if (!MetaStoreSchemaInfo.isVersionCompatible(hiveSchemaVersion, dbSchemaVersion)) {
throw new HiveMetaException("Metastore schema version is not compatible. Hive Version: "
+ hiveSchemaVersion + ", Database Schema Version: " + dbSchemaVersion);
}
}
/**
* Perform metastore schema upgrade. extract the current schema version from metastore
* @throws MetaException
*/
public void doUpgrade() throws HiveMetaException {
String fromVersion = getMetaStoreSchemaVersion(
getConnectionToMetastore(false));
if (fromVersion == null || fromVersion.isEmpty()) {
throw new HiveMetaException("Schema version not stored in the metastore. " +
"Metastore schema is too old or corrupt. Try specifying the version manually");
}
doUpgrade(fromVersion);
}
/**
* Perform metastore schema upgrade
*
* @param fromSchemaVer
* Existing version of the metastore. If null, then read from the metastore
* @throws MetaException
*/
public void doUpgrade(String fromSchemaVer) throws HiveMetaException {
if (MetaStoreSchemaInfo.getHiveSchemaVersion().equals(fromSchemaVer)) {
System.out.println("No schema upgrade required from version " + fromSchemaVer);
return;
}
// Find the list of scripts to execute for this upgrade
List upgradeScripts =
metaStoreSchemaInfo.getUpgradeScripts(fromSchemaVer);
testConnectionToMetastore();
System.out.println("Starting upgrade metastore schema from version " +
fromSchemaVer + " to " + MetaStoreSchemaInfo.getHiveSchemaVersion());
String scriptDir = metaStoreSchemaInfo.getMetaStoreScriptDir();
try {
for (String scriptFile : upgradeScripts) {
System.out.println("Upgrade script " + scriptFile);
if (!dryRun) {
runPreUpgrade(scriptDir, scriptFile);
runBeeLine(scriptDir, scriptFile);
System.out.println("Completed " + scriptFile);
}
}
} catch (IOException eIO) {
throw new HiveMetaException(
"Upgrade FAILED! Metastore state would be inconsistent !!", eIO);
}
// Revalidated the new version after upgrade
verifySchemaVersion();
}
/**
* Initialize the metastore schema to current version
*
* @throws MetaException
*/
public void doInit() throws HiveMetaException {
doInit(MetaStoreSchemaInfo.getHiveSchemaVersion());
// Revalidated the new version after upgrade
verifySchemaVersion();
}
/**
* Initialize the metastore schema
*
* @param toVersion
* If null then current hive version is used
* @throws MetaException
*/
public void doInit(String toVersion) throws HiveMetaException {
testConnectionToMetastore();
System.out.println("Starting metastore schema initialization to " + toVersion);
String initScriptDir = metaStoreSchemaInfo.getMetaStoreScriptDir();
String initScriptFile = metaStoreSchemaInfo.generateInitFileName(toVersion);
try {
System.out.println("Initialization script " + initScriptFile);
if (!dryRun) {
runBeeLine(initScriptDir, initScriptFile);
System.out.println("Initialization script completed");
}
} catch (IOException e) {
throw new HiveMetaException("Schema initialization FAILED!" +
" Metastore state would be inconsistent !!", e);
}
}
public void doValidate() throws HiveMetaException {
System.out.println("Starting metastore validation\n");
Connection conn = getConnectionToMetastore(false);
boolean success = true;
try {
if (validateSchemaVersions(conn)) {
System.out.println("[SUCCESS]\n");
} else {
success = false;
System.out.println("[FAIL]\n");
}
if (validateSequences(conn)) {
System.out.println("[SUCCESS]\n");
} else {
success = false;
System.out.println("[FAIL]\n");
}
if (validateSchemaTables(conn)) {
System.out.println("[SUCCESS]\n");
} else {
success = false;
System.out.println("[FAIL]\n");
}
if (validateLocations(conn, this.validationServers)) {
System.out.println("[SUCCESS]\n");
} else {
success = false;
System.out.println("[FAIL]\n");
}
if (validateColumnNullValues(conn)) {
System.out.println("[SUCCESS]\n");
} else {
success = false;
System.out.println("[FAIL]\n");
}
} finally {
if (conn != null) {
try {
conn.close();
} catch (SQLException e) {
throw new HiveMetaException("Failed to close metastore connection", e);
}
}
}
System.out.print("Done with metastore validation: ");
if (!success) {
System.out.println("[FAIL]");
System.exit(1);
} else {
System.out.println("[SUCCESS]");
}
}
boolean validateSequences(Connection conn) throws HiveMetaException {
Map> seqNameToTable =
new ImmutableMap.Builder>()
.put("MDatabase", Pair.of("DBS", "DB_ID"))
.put("MRole", Pair.of("ROLES", "ROLE_ID"))
.put("MGlobalPrivilege", Pair.of("GLOBAL_PRIVS", "USER_GRANT_ID"))
.put("MTable", Pair.of("TBLS","TBL_ID"))
.put("MStorageDescriptor", Pair.of("SDS", "SD_ID"))
.put("MSerDeInfo", Pair.of("SERDES", "SERDE_ID"))
.put("MColumnDescriptor", Pair.of("CDS", "CD_ID"))
.put("MTablePrivilege", Pair.of("TBL_PRIVS", "TBL_GRANT_ID"))
.put("MTableColumnStatistics", Pair.of("TAB_COL_STATS", "CS_ID"))
.put("MPartition", Pair.of("PARTITIONS", "PART_ID"))
.put("MPartitionColumnStatistics", Pair.of("PART_COL_STATS", "CS_ID"))
.put("MFunction", Pair.of("FUNCS", "FUNC_ID"))
.put("MIndex", Pair.of("IDXS", "INDEX_ID"))
.put("MStringList", Pair.of("SKEWED_STRING_LIST", "STRING_LIST_ID"))
.build();
System.out.println("Validating sequence number for SEQUENCE_TABLE");
boolean isValid = true;
try {
Statement stmt = conn.createStatement();
for (String seqName : seqNameToTable.keySet()) {
String tableName = seqNameToTable.get(seqName).getLeft();
String tableKey = seqNameToTable.get(seqName).getRight();
String seqQuery = getDbCommandParser(dbType).needsQuotedIdentifier() ?
("select t.\"NEXT_VAL\" from \"SEQUENCE_TABLE\" t WHERE t.\"SEQUENCE_NAME\"='org.apache.hadoop.hive.metastore.model." + seqName + "'")
: ("select t.NEXT_VAL from SEQUENCE_TABLE t WHERE t.SEQUENCE_NAME='org.apache.hadoop.hive.metastore.model." + seqName + "'");
String maxIdQuery = getDbCommandParser(dbType).needsQuotedIdentifier() ?
("select max(\"" + tableKey + "\") from \"" + tableName + "\"")
: ("select max(" + tableKey + ") from " + tableName);
ResultSet res = stmt.executeQuery(maxIdQuery);
if (res.next()) {
long maxId = res.getLong(1);
if (maxId > 0) {
ResultSet resSeq = stmt.executeQuery(seqQuery);
if (!resSeq.next()) {
isValid = false;
System.err.println("Missing SEQUENCE_NAME " + seqName + " from SEQUENCE_TABLE");
} else if (resSeq.getLong(1) < maxId) {
isValid = false;
System.err.println("NEXT_VAL for " + seqName + " in SEQUENCE_TABLE < max("+ tableKey + ") in " + tableName);
}
}
}
}
System.out.println((isValid ? "Succeeded" :"Failed") + " in sequence number validation for SEQUENCE_TABLE");
return isValid;
} catch(SQLException e) {
throw new HiveMetaException("Failed to validate sequence number for SEQUENCE_TABLE", e);
}
}
boolean validateSchemaVersions(Connection conn) throws HiveMetaException {
System.out.println("Validating schema version");
try {
String newSchemaVersion = getMetaStoreSchemaVersion(conn, true);
assertCompatibleVersion(MetaStoreSchemaInfo.getHiveSchemaVersion(), newSchemaVersion);
} catch (HiveMetaException hme) {
if (hme.getMessage().contains("Metastore schema version is not compatible")
|| hme.getMessage().contains("Multiple versions were found in metastore")
|| hme.getMessage().contains("Could not find version info in metastore VERSION table")) {
System.out.println("Failed in schema version validation: " + hme.getMessage());
return false;
} else {
throw hme;
}
}
System.out.println("Succeeded in schema version validation.");
return true;
}
boolean validateSchemaTables(Connection conn) throws HiveMetaException {
String version = null;
ResultSet rs = null;
DatabaseMetaData metadata = null;
List dbTables = new ArrayList();
List schemaTables = new ArrayList();
List subScripts = new ArrayList();
Connection hmsConn = getConnectionToMetastore(false);
System.out.println("Validating metastore schema tables");
try {
version = getMetaStoreSchemaVersion(hmsConn);
} catch (HiveMetaException he) {
System.err.println("Failed to determine schema version from Hive Metastore DB," + he.getMessage());
LOG.debug("Failed to determine schema version from Hive Metastore DB," + he.getMessage());
return false;
}
// re-open the hms connection
hmsConn = getConnectionToMetastore(false);
LOG.debug("Validating tables in the schema for version " + version);
try {
metadata = conn.getMetaData();
String[] types = {"TABLE"};
rs = metadata.getTables(null, null, "%", types);
String table = null;
while (rs.next()) {
table = rs.getString("TABLE_NAME");
dbTables.add(table.toLowerCase());
LOG.debug("Found table " + table + " in HMS dbstore");
}
} catch (SQLException e) {
throw new HiveMetaException("Failed to retrieve schema tables from Hive Metastore DB," + e.getMessage());
} finally {
if (rs != null) {
try {
rs.close();
} catch (SQLException e) {
throw new HiveMetaException("Failed to close resultset", e);
}
}
}
// parse the schema file to determine the tables that are expected to exist
// we are using oracle schema because it is simpler to parse, no quotes or backticks etc
String baseDir = new File(metaStoreSchemaInfo.getMetaStoreScriptDir()).getParent();
String schemaFile = baseDir + "/" + dbType + "/hive-schema-" + version + "." + dbType + ".sql";
try {
LOG.debug("Parsing schema script " + schemaFile);
subScripts.addAll(findCreateTable(schemaFile, schemaTables));
while (subScripts.size() > 0) {
schemaFile = baseDir + "/" + dbType + "/" + subScripts.remove(0);
LOG.debug("Parsing subscript " + schemaFile);
subScripts.addAll(findCreateTable(schemaFile, schemaTables));
}
} catch (Exception e) {
System.err.println("Exception in parsing schema file. Cause:" + e.getMessage());
System.out.println("Schema table validation failed!!!");
return false;
}
LOG.debug("Schema tables:[ " + Arrays.toString(schemaTables.toArray()) + " ]");
LOG.debug("DB tables:[ " + Arrays.toString(dbTables.toArray()) + " ]");
// now diff the lists
int schemaSize = schemaTables.size();
schemaTables.removeAll(dbTables);
if (schemaTables.size() > 0) {
System.out.println("Table(s) [ " + Arrays.toString(schemaTables.toArray())
+ " ] are missing from the metastore database schema.");
System.out.println("Schema table validation failed!!!");
return false;
} else {
System.out.println("Succeeded in schema table validation.");
return true;
}
}
private List findCreateTable(String path, List tableList)
throws Exception {
NestedScriptParser sp = HiveSchemaHelper.getDbCommandParser(dbType);
Matcher matcher = null;
Pattern regexp = null;
List subs = new ArrayList();
int groupNo = 0;
switch (dbType) {
case HiveSchemaHelper.DB_ORACLE:
regexp = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*");
groupNo = 3;
break;
case HiveSchemaHelper.DB_MYSQL:
regexp = Pattern.compile("(CREATE TABLE) (\\S+).*");
groupNo = 2;
break;
case HiveSchemaHelper.DB_MSSQL:
regexp = Pattern.compile("(CREATE TABLE) (\\S+).*");
groupNo = 2;
break;
case HiveSchemaHelper.DB_DERBY:
regexp = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*");
groupNo = 3;
break;
case HiveSchemaHelper.DB_POSTGRACE:
regexp = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*");
groupNo = 3;
break;
default:
regexp = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*");
groupNo = 3;
break;
}
if (!(new File(path)).exists()) {
throw new Exception(path + " does not exist. Potentially incorrect version in the metastore VERSION table");
}
try (
BufferedReader reader = new BufferedReader(new FileReader(path));
){
String line = null;
while ((line = reader.readLine()) != null) {
if (sp.isNestedScript(line)) {
String subScript = null;
subScript = sp.getScriptName(line);
LOG.debug("Schema subscript " + subScript + " found");
subs.add(subScript);
continue;
}
line = line.replaceAll("\\(", " ");
line = line.replaceAll("IF NOT EXISTS ", "");
line = line.replaceAll("`","");
line = line.replaceAll("'","");
line = line.replaceAll("\"","");
matcher = regexp.matcher(line);
if (matcher.find()) {
String table = matcher.group(groupNo);
if (dbType.equals("derby"))
table = table.replaceAll("APP.","");
tableList.add(table.toLowerCase());
LOG.debug("Found table " + table + " in the schema");
}
}
} catch (IOException ex){
throw new Exception(ex.getMessage());
}
return subs;
}
boolean validateColumnNullValues(Connection conn) throws HiveMetaException {
System.out.println("Validating columns for incorrect NULL values");
boolean isValid = true;
try {
Statement stmt = conn.createStatement();
String tblQuery = getDbCommandParser(dbType).needsQuotedIdentifier() ?
("select t.* from \"TBLS\" t WHERE t.\"SD_ID\" IS NULL and (t.\"TBL_TYPE\"='" + TableType.EXTERNAL_TABLE + "' or t.\"TBL_TYPE\"='" + TableType.MANAGED_TABLE + "')")
: ("select t.* from TBLS t WHERE t.SD_ID IS NULL and (t.TBL_TYPE='" + TableType.EXTERNAL_TABLE + "' or t.TBL_TYPE='" + TableType.MANAGED_TABLE + "')");
ResultSet res = stmt.executeQuery(tblQuery);
while (res.next()) {
long tableId = res.getLong("TBL_ID");
String tableName = res.getString("TBL_NAME");
String tableType = res.getString("TBL_TYPE");
isValid = false;
System.err.println("SD_ID in TBLS should not be NULL for Table Name=" + tableName + ", Table ID=" + tableId + ", Table Type=" + tableType);
}
System.out.println((isValid ? "Succeeded" : "Failed") + " in column validation for incorrect NULL values");
return isValid;
} catch(SQLException e) {
throw new HiveMetaException("Failed to validate columns for incorrect NULL values", e);
}
}
/**
* Run pre-upgrade scripts corresponding to a given upgrade script,
* if any exist. The errors from pre-upgrade are ignored.
* Pre-upgrade scripts typically contain setup statements which
* may fail on some database versions and failure is ignorable.
*
* @param scriptDir upgrade script directory name
* @param scriptFile upgrade script file name
*/
private void runPreUpgrade(String scriptDir, String scriptFile) {
for (int i = 0;; i++) {
String preUpgradeScript =
MetaStoreSchemaInfo.getPreUpgradeScriptName(i, scriptFile);
File preUpgradeScriptFile = new File(scriptDir, preUpgradeScript);
if (!preUpgradeScriptFile.isFile()) {
break;
}
try {
runBeeLine(scriptDir, preUpgradeScript);
System.out.println("Completed " + preUpgradeScript);
} catch (Exception e) {
// Ignore the pre-upgrade script errors
System.err.println("Warning in pre-upgrade script " + preUpgradeScript + ": "
+ e.getMessage());
if (verbose) {
e.printStackTrace();
}
}
}
}
/***
* Run beeline with the given metastore script. Flatten the nested scripts
* into single file.
*/
private void runBeeLine(String scriptDir, String scriptFile)
throws IOException, HiveMetaException {
NestedScriptParser dbCommandParser = getDbCommandParser(dbType);
// expand the nested script
String sqlCommands = dbCommandParser.buildCommand(scriptDir, scriptFile);
File tmpFile = File.createTempFile("schematool", ".sql");
tmpFile.deleteOnExit();
// write out the buffer into a file. Add beeline commands for autocommit and close
FileWriter fstream = new FileWriter(tmpFile.getPath());
BufferedWriter out = new BufferedWriter(fstream);
out.write("!autocommit on" + System.getProperty("line.separator"));
out.write(sqlCommands);
out.write("!closeall" + System.getProperty("line.separator"));
out.close();
runBeeLine(tmpFile.getPath());
}
// Generate the beeline args per hive conf and execute the given script
public void runBeeLine(String sqlScriptFile) throws IOException {
CommandBuilder builder = new CommandBuilder(hiveConf, userName, passWord, sqlScriptFile);
// run the script using Beeline
try (BeeLine beeLine = new BeeLine()) {
if (!verbose) {
beeLine.setOutputStream(new PrintStream(new NullOutputStream()));
beeLine.getOpts().setSilent(true);
}
beeLine.getOpts().setAllowMultiLineCommand(false);
beeLine.getOpts().setIsolation("TRANSACTION_READ_COMMITTED");
// We can be pretty sure that an entire line can be processed as a single command since
// we always add a line separator at the end while calling dbCommandParser.buildCommand.
beeLine.getOpts().setEntireLineAsCommand(true);
LOG.debug("Going to run command <" + builder.buildToLog() + ">");
int status = beeLine.begin(builder.buildToRun(), null);
if (status != 0) {
throw new IOException("Schema script failed, errorcode " + status);
}
}
}
static class CommandBuilder {
private final HiveConf hiveConf;
private final String userName;
private final String password;
private final String sqlScriptFile;
CommandBuilder(HiveConf hiveConf, String userName, String password, String sqlScriptFile) {
this.hiveConf = hiveConf;
this.userName = userName;
this.password = password;
this.sqlScriptFile = sqlScriptFile;
}
String[] buildToRun() throws IOException {
return argsWith(password);
}
String buildToLog() throws IOException {
logScript();
return StringUtils.join(argsWith(BeeLine.PASSWD_MASK), " ");
}
private String[] argsWith(String password) throws IOException {
return new String[] { "-u",
HiveSchemaHelper.getValidConfVar(ConfVars.METASTORECONNECTURLKEY, hiveConf), "-d",
HiveSchemaHelper.getValidConfVar(ConfVars.METASTORE_CONNECTION_DRIVER, hiveConf), "-n",
userName, "-p", password, "-f", sqlScriptFile };
}
private void logScript() throws IOException {
if (LOG.isDebugEnabled()) {
LOG.debug("Going to invoke file that contains:");
try (BufferedReader reader = new BufferedReader(new FileReader(sqlScriptFile))) {
String line;
while ((line = reader.readLine()) != null) {
LOG.debug("script: " + line);
}
}
}
}
}
// Create the required command line options
@SuppressWarnings("static-access")
private static void initOptions(Options cmdLineOptions) {
Option help = new Option("help", "print this message");
Option upgradeOpt = new Option("upgradeSchema", "Schema upgrade");
Option upgradeFromOpt = OptionBuilder.withArgName("upgradeFrom").hasArg().
withDescription("Schema upgrade from a version").
create("upgradeSchemaFrom");
Option initOpt = new Option("initSchema", "Schema initialization");
Option initToOpt = OptionBuilder.withArgName("initTo").hasArg().
withDescription("Schema initialization to a version").
create("initSchemaTo");
Option infoOpt = new Option("info", "Show config and schema details");
Option validateOpt = new Option("validate", "Validate the database");
OptionGroup optGroup = new OptionGroup();
optGroup.addOption(upgradeOpt).addOption(initOpt).
addOption(help).addOption(upgradeFromOpt).
addOption(initToOpt).addOption(infoOpt).addOption(validateOpt);
optGroup.setRequired(true);
Option userNameOpt = OptionBuilder.withArgName("user")
.hasArgs()
.withDescription("Override config file user name")
.create("userName");
Option passwdOpt = OptionBuilder.withArgName("password")
.hasArgs()
.withDescription("Override config file password")
.create("passWord");
Option dbTypeOpt = OptionBuilder.withArgName("databaseType")
.hasArgs().withDescription("Metastore database type")
.create("dbType");
Option dbOpts = OptionBuilder.withArgName("databaseOpts")
.hasArgs().withDescription("Backend DB specific options")
.create("dbOpts");
Option dryRunOpt = new Option("dryRun", "list SQL scripts (no execute)");
Option verboseOpt = new Option("verbose", "only print SQL statements");
Option serversOpt = OptionBuilder.withArgName("serverList")
.hasArgs().withDescription("a comma-separated list of servers used in location validation")
.create("servers");
cmdLineOptions.addOption(help);
cmdLineOptions.addOption(dryRunOpt);
cmdLineOptions.addOption(userNameOpt);
cmdLineOptions.addOption(passwdOpt);
cmdLineOptions.addOption(dbTypeOpt);
cmdLineOptions.addOption(verboseOpt);
cmdLineOptions.addOption(dbOpts);
cmdLineOptions.addOption(serversOpt);
cmdLineOptions.addOptionGroup(optGroup);
}
public static void main(String[] args) {
CommandLineParser parser = new GnuParser();
CommandLine line = null;
String dbType = null;
String schemaVer = null;
Options cmdLineOptions = new Options();
// Argument handling
initOptions(cmdLineOptions);
try {
line = parser.parse(cmdLineOptions, args);
} catch (ParseException e) {
System.err.println("HiveSchemaTool:Parsing failed. Reason: " + e.getLocalizedMessage());
printAndExit(cmdLineOptions);
}
if (line.hasOption("help")) {
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp("schemaTool", cmdLineOptions);
return;
}
if (line.hasOption("dbType")) {
dbType = line.getOptionValue("dbType");
if ((!dbType.equalsIgnoreCase(HiveSchemaHelper.DB_DERBY) &&
!dbType.equalsIgnoreCase(HiveSchemaHelper.DB_MSSQL) &&
!dbType.equalsIgnoreCase(HiveSchemaHelper.DB_MYSQL) &&
!dbType.equalsIgnoreCase(HiveSchemaHelper.DB_POSTGRACE) && !dbType
.equalsIgnoreCase(HiveSchemaHelper.DB_ORACLE))) {
System.err.println("Unsupported dbType " + dbType);
printAndExit(cmdLineOptions);
}
} else {
System.err.println("no dbType supplied");
printAndExit(cmdLineOptions);
}
System.setProperty(HiveConf.ConfVars.METASTORE_SCHEMA_VERIFICATION.varname, "true");
try {
HiveSchemaTool schemaTool = new HiveSchemaTool(dbType);
if (line.hasOption("userName")) {
schemaTool.setUserName(line.getOptionValue("userName"));
}
if (line.hasOption("passWord")) {
schemaTool.setPassWord(line.getOptionValue("passWord"));
}
if (line.hasOption("dryRun")) {
schemaTool.setDryRun(true);
}
if (line.hasOption("verbose")) {
schemaTool.setVerbose(true);
}
if (line.hasOption("dbOpts")) {
schemaTool.setDbOpts(line.getOptionValue("dbOpts"));
}
if (line.hasOption("validate") && line.hasOption("servers")) {
schemaTool.setValidationServers(line.getOptionValue("servers"));
}
if (line.hasOption("info")) {
schemaTool.showInfo();
} else if (line.hasOption("upgradeSchema")) {
schemaTool.doUpgrade();
} else if (line.hasOption("upgradeSchemaFrom")) {
schemaVer = line.getOptionValue("upgradeSchemaFrom");
schemaTool.doUpgrade(schemaVer);
} else if (line.hasOption("initSchema")) {
schemaTool.doInit();
} else if (line.hasOption("initSchemaTo")) {
schemaVer = line.getOptionValue("initSchemaTo");
schemaTool.doInit(schemaVer);
} else if (line.hasOption("validate")) {
schemaTool.doValidate();
} else {
System.err.println("no valid option supplied");
printAndExit(cmdLineOptions);
}
} catch (HiveMetaException e) {
System.err.println(e);
if (e.getCause() != null) {
Throwable t = e.getCause();
System.err.println("Underlying cause: "
+ t.getClass().getName() + " : "
+ t.getMessage());
if (e.getCause() instanceof SQLException) {
System.err.println("SQL Error code: " + ((SQLException)t).getErrorCode());
}
}
if (line.hasOption("verbose")) {
e.printStackTrace();
} else {
System.err.println("Use --verbose for detailed stacktrace.");
}
System.err.println("*** schemaTool failed ***");
System.exit(1);
}
System.out.println("schemaTool completed");
}
}