Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.mapreduce.index;
import java.io.IOException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.codec.binary.Hex;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.regionserver.ScanInfoUtil;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.phoenix.jdbc.PhoenixConnection;
import org.apache.phoenix.jdbc.PhoenixResultSet;
import org.apache.phoenix.mapreduce.PhoenixJobCounters;
import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.OutputFormat;
import org.apache.phoenix.mapreduce.index.IndexScrutinyTool.SourceTable;
import org.apache.phoenix.mapreduce.util.ConnectionUtil;
import org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil;
import org.apache.phoenix.parse.HintNode.Hint;
import org.apache.phoenix.query.ConnectionQueryServices;
import org.apache.phoenix.schema.PTable;
import org.apache.phoenix.schema.PTableType;
import org.apache.phoenix.util.ColumnInfo;
import org.apache.phoenix.util.EnvironmentEdgeManager;
import org.apache.phoenix.util.MetaDataUtil;
import org.apache.phoenix.util.PhoenixRuntime;
import org.apache.phoenix.util.QueryUtil;
import org.apache.phoenix.util.SchemaUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Joiner;
/**
* Mapper that reads from the data table and checks the rows against the index table
*/
public class IndexScrutinyMapper extends Mapper {
private static final Logger LOGGER = LoggerFactory.getLogger(IndexScrutinyMapper.class);
protected Connection connection;
private List targetTblColumnMetadata;
private long batchSize;
// holds a batch of rows from the table the mapper is iterating over
// Each row is a pair - the row TS, and the row values
protected List>> currentBatchValues = new ArrayList<>();
protected String targetTableQuery;
protected int numTargetPkCols;
protected boolean outputInvalidRows;
protected OutputFormat outputFormat = OutputFormat.FILE;
private String qSourceTable;
private String qTargetTable;
private long executeTimestamp;
private int numSourcePkCols;
private final PhoenixIndexDBWritable indxWritable = new PhoenixIndexDBWritable();
private List sourceTblColumnMetadata;
// used to write results to the output table
protected Connection outputConn;
protected PreparedStatement outputUpsertStmt;
private long outputMaxRows;
private MessageDigest md5;
private long ttl;
private long scnTimestamp;
private long maxLookbackAgeMillis;
protected long getScrutinyTs(){
return scnTimestamp;
}
@Override
protected void setup(final Context context) throws IOException, InterruptedException {
super.setup(context);
final Configuration configuration = context.getConfiguration();
try {
// get a connection with correct CURRENT_SCN (so incoming writes don't throw off the
// scrutiny)
final Properties overrideProps = new Properties();
String scn = configuration.get(PhoenixConfigurationUtil.CURRENT_SCN_VALUE);
overrideProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, scn);
scnTimestamp = new Long(scn);
connection = ConnectionUtil.getOutputConnection(configuration, overrideProps);
connection.setAutoCommit(false);
batchSize = PhoenixConfigurationUtil.getScrutinyBatchSize(configuration);
outputInvalidRows =
PhoenixConfigurationUtil.getScrutinyOutputInvalidRows(configuration);
outputFormat = PhoenixConfigurationUtil.getScrutinyOutputFormat(configuration);
executeTimestamp = PhoenixConfigurationUtil.getScrutinyExecuteTimestamp(configuration);
// get the index table and column names
String qDataTable = PhoenixConfigurationUtil.getScrutinyDataTableName(configuration);
final PTable pdataTable = PhoenixRuntime.getTable(connection, qDataTable);
final String qIndexTable =
PhoenixConfigurationUtil.getScrutinyIndexTableName(configuration);
final PTable pindexTable = PhoenixRuntime.getTable(connection, qIndexTable);
// set the target table based on whether we're running the MR over the data or index
// table
SourceTable sourceTable =
PhoenixConfigurationUtil.getScrutinySourceTable(configuration);
SourceTargetColumnNames columnNames =
SourceTable.DATA_TABLE_SOURCE.equals(sourceTable)
? new SourceTargetColumnNames.DataSourceColNames(pdataTable,
pindexTable)
: new SourceTargetColumnNames.IndexSourceColNames(pdataTable,
pindexTable);
qSourceTable = columnNames.getQualifiedSourceTableName();
qTargetTable = columnNames.getQualifiedTargetTableName();
List targetColNames = columnNames.getTargetColNames();
List sourceColNames = columnNames.getSourceColNames();
List targetPkColNames = columnNames.getTargetPkColNames();
String targetPksCsv =
Joiner.on(",").join(SchemaUtil.getEscapedFullColumnNames(targetPkColNames));
numSourcePkCols = columnNames.getSourcePkColNames().size();
numTargetPkCols = targetPkColNames.size();
if (outputInvalidRows && OutputFormat.TABLE.equals(outputFormat)) {
outputConn = ConnectionUtil.getOutputConnection(configuration, new Properties());
String upsertQuery = PhoenixConfigurationUtil.getUpsertStatement(configuration);
this.outputUpsertStmt = outputConn.prepareStatement(upsertQuery);
}
outputMaxRows = PhoenixConfigurationUtil.getScrutinyOutputMax(configuration);
// Create the query against the target table
// Our query projection should be all the index column names (or their data table
// equivalent
// name)
targetTableQuery =
QueryUtil.constructSelectStatement(qTargetTable, columnNames.getCastedTargetColNames(), targetPksCsv,
Hint.NO_INDEX, false) + " IN ";
targetTblColumnMetadata =
PhoenixRuntime.generateColumnInfo(connection, qTargetTable, targetColNames);
sourceTblColumnMetadata =
PhoenixRuntime.generateColumnInfo(connection, qSourceTable, sourceColNames);
LOGGER.info("Target table base query: " + targetTableQuery);
md5 = MessageDigest.getInstance("MD5");
ttl = getTableTtl();
maxLookbackAgeMillis = ScanInfoUtil.getMaxLookbackInMillis(configuration);
} catch (SQLException | NoSuchAlgorithmException e) {
tryClosingResourceSilently(this.outputUpsertStmt);
tryClosingResourceSilently(this.connection);
tryClosingResourceSilently(this.outputConn);
throw new RuntimeException(e);
}
postSetup();
}
protected void postSetup() {
}
private static void tryClosingResourceSilently(AutoCloseable res) {
if (res != null) {
try {
res.close();
} catch (Exception e) {
LOGGER.error("Closing resource: " + res + " failed :", e);
}
}
}
@Override
protected void map(NullWritable key, PhoenixIndexDBWritable record, Context context)
throws IOException, InterruptedException {
try {
final List