org.apache.hadoop.hive.ql.stats.jdbc.JDBCStatsAggregator Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.stats.jdbc;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.SQLRecoverableException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.stats.StatsAggregator;
public class JDBCStatsAggregator implements StatsAggregator {
private Connection conn;
private String connectionString;
private Configuration hiveconf;
private Task sourceTask;
private final Map columnMapping;
private final Log LOG = LogFactory.getLog(this.getClass().getName());
private int timeout = 30;
private final String comment = "Hive stats aggregation: " + this.getClass().getName();
private int maxRetries;
private long waitWindow;
private final Random r;
public JDBCStatsAggregator() {
columnMapping = new HashMap();
r = new Random();
}
@Override
public boolean connect(Configuration hiveconf, Task sourceTask) {
this.hiveconf = hiveconf;
timeout = (int) HiveConf.getTimeVar(
hiveconf, HiveConf.ConfVars.HIVE_STATS_JDBC_TIMEOUT, TimeUnit.SECONDS);
connectionString = HiveConf.getVar(hiveconf, HiveConf.ConfVars.HIVESTATSDBCONNECTIONSTRING);
String driver = HiveConf.getVar(hiveconf, HiveConf.ConfVars.HIVESTATSJDBCDRIVER);
maxRetries = HiveConf.getIntVar(hiveconf, HiveConf.ConfVars.HIVE_STATS_RETRIES_MAX);
waitWindow = HiveConf.getTimeVar(
hiveconf, HiveConf.ConfVars.HIVE_STATS_RETRIES_WAIT, TimeUnit.MILLISECONDS);
this.sourceTask = sourceTask;
try {
JavaUtils.loadClass(driver).newInstance();
} catch (Exception e) {
LOG.error("Error during instantiating JDBC driver " + driver + ". ", e);
return false;
}
// stats is non-blocking -- throw an exception when timeout
DriverManager.setLoginTimeout(timeout);
// function pointer for executeWithRetry to setQueryTimeout
Utilities.SQLCommand setQueryTimeout = new Utilities.SQLCommand() {
@Override
public Void run(PreparedStatement stmt) throws SQLException {
Utilities.setQueryTimeout(stmt, timeout);
return null;
}
};
// retry connection and statement preparations
for (int failures = 0;; failures++) {
try {
conn = Utilities.connectWithRetry(connectionString, waitWindow, maxRetries);
for (String statType : JDBCStatsUtils.getSupportedStatistics()) {
// prepare statements
PreparedStatement selStmt = Utilities.prepareWithRetry(conn,
JDBCStatsUtils.getSelectAggr(statType, comment), waitWindow, maxRetries);
columnMapping.put(statType, selStmt);
// set query timeout
Utilities.executeWithRetry(setQueryTimeout, selStmt, waitWindow, failures);
}
return true;
} catch (SQLRecoverableException e) {
if (failures > maxRetries) {
LOG.error("Error during JDBC connection and preparing statement: " + e);
return false;
}
long waitTime = Utilities.getRandomWaitTime(waitWindow, failures, r);
try {
Thread.sleep(waitTime);
} catch (InterruptedException e1) {
}
} catch (SQLException e) {
// for SQLTransientException (maxRetries already achieved at Utilities retry functions
// or SQLNonTransientException, declare a real failure
return false;
}
}
}
@Override
public String aggregateStats(String fileID, String statType) {
if (!JDBCStatsUtils.isValidStatistic(statType)) {
LOG.warn("Invalid statistic: " + statType + ", supported stats: " +
JDBCStatsUtils.getSupportedStatistics());
return null;
}
Utilities.SQLCommand execQuery = new Utilities.SQLCommand() {
@Override
public ResultSet run(PreparedStatement stmt) throws SQLException {
return stmt.executeQuery();
}
};
JDBCStatsUtils.validateRowId(fileID);
String keyPrefix = Utilities.escapeSqlLike(fileID) + "%";
for (int failures = 0;; failures++) {
try {
long retval = 0;
PreparedStatement selStmt = columnMapping.get(statType);
selStmt.setString(1, keyPrefix);
selStmt.setString(2, Character.toString(Utilities.sqlEscapeChar));
ResultSet result = Utilities.executeWithRetry(execQuery, selStmt, waitWindow, maxRetries);
if (result.next()) {
retval = result.getLong(1);
} else {
LOG.warn("Nothing published. Nothing aggregated.");
return null;
}
return Long.toString(retval);
} catch (SQLRecoverableException e) {
// need to start from scratch (connection)
if (failures >= maxRetries) {
return null;
}
// close the current connection
closeConnection();
long waitTime = Utilities.getRandomWaitTime(waitWindow, failures, r);
try {
Thread.sleep(waitTime);
} catch (InterruptedException iex) {
}
// getting a new connection
if (!connect(hiveconf, sourceTask)) {
// if cannot reconnect, just fail because connect() already handles retries.
LOG.error("Error during publishing aggregation. " + e);
return null;
}
} catch (SQLException e) {
// for SQLTransientException (already handled by Utilities.*WithRetries() functions
// and SQLNonTransientException, just declare failure.
LOG.error("Error during publishing aggregation. " + e);
return null;
}
}
}
@Override
public boolean closeConnection() {
if (conn == null) {
return true;
}
try {
conn.close();
// In case of derby, explicitly close the database connection
if (HiveConf.getVar(hiveconf, HiveConf.ConfVars.HIVESTATSDBCLASS).equalsIgnoreCase(
"jdbc:derby")) {
try {
// The following closes the derby connection. It throws an exception that has to be caught
// and ignored.
DriverManager.getConnection(connectionString + ";shutdown=true");
} catch (Exception e) {
// Do nothing because we know that an exception is thrown anyway.
}
}
return true;
} catch (SQLException e) {
LOG.error("Error during JDBC termination. " + e);
return false;
}
}
@Override
public boolean cleanUp(String rowID) {
Utilities.SQLCommand execUpdate = new Utilities.SQLCommand() {
@Override
public Void run(PreparedStatement stmt) throws SQLException {
stmt.executeUpdate();
return null;
}
};
try {
JDBCStatsUtils.validateRowId(rowID);
String keyPrefix = Utilities.escapeSqlLike(rowID) + "%";
PreparedStatement delStmt = Utilities.prepareWithRetry(conn,
JDBCStatsUtils.getDeleteAggr(rowID, comment), waitWindow, maxRetries);
delStmt.setString(1, keyPrefix);
delStmt.setString(2, Character.toString(Utilities.sqlEscapeChar));
for (int failures = 0;; failures++) {
try {
Utilities.executeWithRetry(execUpdate, delStmt, waitWindow, maxRetries);
return true;
} catch (SQLRecoverableException e) {
// need to start from scratch (connection)
if (failures >= maxRetries) {
LOG.error("Error during clean-up after " + maxRetries + " retries. " + e);
return false;
}
// close the current connection
closeConnection();
long waitTime = Utilities.getRandomWaitTime(waitWindow, failures, r);
try {
Thread.sleep(waitTime);
} catch (InterruptedException iex) {
}
// getting a new connection
if (!connect(hiveconf, sourceTask)) {
LOG.error("Error during clean-up. " + e);
return false;
}
} catch (SQLException e) {
// for SQLTransientException (already handled by Utilities.*WithRetries() functions
// and SQLNonTransientException, just declare failure.
LOG.error("Error during clean-up. " + e);
return false;
}
}
} catch (SQLException e) {
LOG.error("Error during publishing aggregation. " + e);
return false;
}
}
}