
org.apache.phoenix.execute.MutationState Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phoenix-server-hbase-2.6
Show all versions of phoenix-server-hbase-2.6
Phoenix HBase Server Side JAR
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.execute;
import static org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants.UPSERT_CF;
import static org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants.UPSERT_STATUS_CQ;
import static org.apache.phoenix.monitoring.MetricType.DELETE_AGGREGATE_FAILURE_SQL_COUNTER;
import static org.apache.phoenix.monitoring.MetricType.DELETE_AGGREGATE_SUCCESS_SQL_COUNTER;
import static org.apache.phoenix.monitoring.MetricType.UPSERT_AGGREGATE_FAILURE_SQL_COUNTER;
import static org.apache.phoenix.monitoring.MetricType.UPSERT_AGGREGATE_SUCCESS_SQL_COUNTER;
import static org.apache.phoenix.monitoring.MetricType.NUM_METADATA_LOOKUP_FAILURES;
import static org.apache.phoenix.query.QueryServices.INDEX_REGION_OBSERVER_ENABLED_ALL_TABLES_ATTRIB;
import static org.apache.phoenix.query.QueryServices.SOURCE_OPERATION_ATTRIB;
import static org.apache.phoenix.query.QueryServicesOptions.DEFAULT_INDEX_REGION_OBSERVER_ENABLED_ALL_TABLES;
import static org.apache.phoenix.thirdparty.com.google.common.base.Preconditions.checkNotNull;
import static org.apache.phoenix.monitoring.GlobalClientMetrics.GLOBAL_MUTATION_BATCH_FAILED_COUNT;
import static org.apache.phoenix.monitoring.GlobalClientMetrics.GLOBAL_MUTATION_BATCH_SIZE;
import static org.apache.phoenix.monitoring.GlobalClientMetrics.GLOBAL_MUTATION_BYTES;
import static org.apache.phoenix.monitoring.GlobalClientMetrics.GLOBAL_MUTATION_COMMIT_TIME;
import static org.apache.phoenix.monitoring.GlobalClientMetrics.GLOBAL_MUTATION_INDEX_COMMIT_FAILURE_COUNT;
import static org.apache.phoenix.monitoring.GlobalClientMetrics.GLOBAL_MUTATION_SYSCAT_TIME;
import static org.apache.phoenix.query.QueryServices.WILDCARD_QUERY_DYNAMIC_COLS_ATTRIB;
import static org.apache.phoenix.query.QueryServicesOptions.DEFAULT_WILDCARD_QUERY_DYNAMIC_COLS_ATTRIB;
import java.io.IOException;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import javax.annotation.Nonnull;
import javax.annotation.concurrent.Immutable;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.htrace.Span;
import org.apache.htrace.TraceScope;
import org.apache.phoenix.cache.ServerCacheClient.ServerCache;
import org.apache.phoenix.compile.MutationPlan;
import org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants;
import org.apache.phoenix.coprocessorclient.MetaDataProtocol.MetaDataMutationResult;
import org.apache.phoenix.exception.SQLExceptionCode;
import org.apache.phoenix.exception.SQLExceptionInfo;
import org.apache.phoenix.hbase.index.exception.IndexWriteException;
import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr;
import org.apache.phoenix.index.IndexMaintainer;
import org.apache.phoenix.index.IndexMetaDataCacheClient;
import org.apache.phoenix.index.PhoenixIndexBuilderHelper;
import org.apache.phoenix.index.PhoenixIndexFailurePolicyHelper;
import org.apache.phoenix.index.PhoenixIndexFailurePolicyHelper.MutateCommand;
import org.apache.phoenix.index.PhoenixIndexMetaData;
import org.apache.phoenix.jdbc.PhoenixConnection;
import org.apache.phoenix.jdbc.PhoenixStatement.Operation;
import org.apache.phoenix.monitoring.GlobalClientMetrics;
import org.apache.phoenix.monitoring.MutationMetricQueue;
import org.apache.phoenix.monitoring.MutationMetricQueue.MutationMetric;
import org.apache.phoenix.monitoring.MutationMetricQueue.NoOpMutationMetricsQueue;
import org.apache.phoenix.monitoring.ReadMetricQueue;
import org.apache.phoenix.monitoring.TableMetricsManager;
import org.apache.phoenix.query.QueryConstants;
import org.apache.phoenix.query.QueryServices;
import org.apache.phoenix.query.QueryServicesOptions;
import org.apache.phoenix.schema.IllegalDataException;
import org.apache.phoenix.schema.MaxMutationSizeBytesExceededException;
import org.apache.phoenix.schema.MaxMutationSizeExceededException;
import org.apache.phoenix.schema.MetaDataClient;
import org.apache.phoenix.schema.PColumn;
import org.apache.phoenix.schema.PIndexState;
import org.apache.phoenix.schema.PMetaData;
import org.apache.phoenix.schema.PName;
import org.apache.phoenix.schema.PRow;
import org.apache.phoenix.schema.PTable;
import org.apache.phoenix.schema.PTableRef;
import org.apache.phoenix.schema.PTableType;
import org.apache.phoenix.schema.RowKeySchema;
import org.apache.phoenix.schema.SortOrder;
import org.apache.phoenix.schema.TableNotFoundException;
import org.apache.phoenix.schema.TableRef;
import org.apache.phoenix.schema.ValueSchema.Field;
import org.apache.phoenix.schema.types.PInteger;
import org.apache.phoenix.schema.types.PLong;
import org.apache.phoenix.schema.types.PTimestamp;
import org.apache.phoenix.thirdparty.com.google.common.base.Strings;
import org.apache.phoenix.trace.util.Tracing;
import org.apache.phoenix.transaction.PhoenixTransactionContext;
import org.apache.phoenix.transaction.PhoenixTransactionContext.PhoenixVisibilityLevel;
import org.apache.phoenix.transaction.TransactionFactory;
import org.apache.phoenix.transaction.TransactionFactory.Provider;
import org.apache.phoenix.util.ClientUtil;
import org.apache.phoenix.util.EncodedColumnsUtil;
import org.apache.phoenix.util.EnvironmentEdgeManager;
import org.apache.phoenix.util.IndexUtil;
import org.apache.phoenix.util.LogUtil;
import org.apache.phoenix.util.PhoenixKeyValueUtil;
import org.apache.phoenix.util.SQLCloseable;
import org.apache.phoenix.util.SchemaUtil;
import org.apache.phoenix.util.SizedUtil;
import org.apache.phoenix.util.TransactionUtil;
import org.apache.phoenix.util.WALAnnotationUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.phoenix.thirdparty.com.google.common.base.Preconditions;
import org.apache.phoenix.thirdparty.com.google.common.base.Predicate;
import org.apache.phoenix.thirdparty.com.google.common.collect.Iterators;
import org.apache.phoenix.thirdparty.com.google.common.collect.Lists;
import org.apache.phoenix.thirdparty.com.google.common.collect.Maps;
import org.apache.phoenix.thirdparty.com.google.common.collect.Sets;
/**
* Tracks the uncommitted state
*/
public class MutationState implements SQLCloseable {
private static final Logger LOGGER = LoggerFactory.getLogger(MutationState.class);
private static final int[] EMPTY_STATEMENT_INDEX_ARRAY = new int[0];
private static final int MAX_COMMIT_RETRIES = 3;
private final PhoenixConnection connection;
private final int maxSize;
private final long maxSizeBytes;
private final long batchSize;
private final long batchSizeBytes;
private long batchCount = 0L;
// For each table, maintain a list of mutation batches. Each element in the
// list is a set of row mutations which can be sent in a single commit batch.
// A regular upsert and a conditional upsert on the same row conflict with
// each other so they are split and send separately in different commit batches.
private final Map> mutationsMap;
private final Set uncommittedPhysicalNames = Sets.newHashSetWithExpectedSize(10);
private long sizeOffset;
private int numRows = 0;
private int numUpdatedRowsForAutoCommit = 0;
private long estimatedSize = 0;
private int[] uncommittedStatementIndexes = EMPTY_STATEMENT_INDEX_ARRAY;
private boolean isExternalTxContext = false;
private Map> txMutations = Collections.emptyMap();
private PhoenixTransactionContext phoenixTransactionContext = PhoenixTransactionContext.NULL_CONTEXT;
private final MutationMetricQueue mutationMetricQueue;
private ReadMetricQueue readMetricQueue;
private Map timeInExecuteMutationMap = new HashMap<>();
private static boolean allUpsertsMutations = true;
private static boolean allDeletesMutations = true;
private final boolean indexRegionObserverEnabledAllTables;
public static void resetAllMutationState(){
allDeletesMutations = true;
allUpsertsMutations = true;
}
public MutationState(int maxSize, long maxSizeBytes, PhoenixConnection connection) {
this(maxSize, maxSizeBytes, connection, false, null);
}
public MutationState(int maxSize, long maxSizeBytes, PhoenixConnection connection,
PhoenixTransactionContext txContext) {
this(maxSize, maxSizeBytes, connection, false, txContext);
}
public MutationState(MutationState mutationState) {
this(mutationState, mutationState.connection);
}
public MutationState(MutationState mutationState, PhoenixConnection connection) {
this(mutationState.maxSize, mutationState.maxSizeBytes, connection, true, mutationState
.getPhoenixTransactionContext());
}
public MutationState(int maxSize, long maxSizeBytes, PhoenixConnection connection,
long sizeOffset) {
this(maxSize, maxSizeBytes, connection, false, null, sizeOffset);
}
private MutationState(int maxSize, long maxSizeBytes, PhoenixConnection connection,
boolean subTask, PhoenixTransactionContext txContext) {
this(maxSize, maxSizeBytes, connection, subTask, txContext, 0);
}
private MutationState(int maxSize, long maxSizeBytes, PhoenixConnection connection,
boolean subTask, PhoenixTransactionContext txContext, long sizeOffset) {
this(maxSize, maxSizeBytes, connection, Maps.> newHashMapWithExpectedSize(5),
subTask, txContext);
this.sizeOffset = sizeOffset;
}
MutationState(int maxSize, long maxSizeBytes, PhoenixConnection connection,
Map> mutationsMap, boolean subTask, PhoenixTransactionContext txContext) {
this.maxSize = maxSize;
this.maxSizeBytes = maxSizeBytes;
this.connection = connection;
this.batchSize = connection.getMutateBatchSize();
this.batchSizeBytes = connection.getMutateBatchSizeBytes();
this.mutationsMap = mutationsMap;
boolean isMetricsEnabled = connection.isRequestLevelMetricsEnabled();
this.mutationMetricQueue = isMetricsEnabled ? new MutationMetricQueue()
: NoOpMutationMetricsQueue.NO_OP_MUTATION_METRICS_QUEUE;
if (subTask) {
// this code path is only used while running child scans, we can't pass the txContext to child scans
// as it is not thread safe, so we use the tx member variable
phoenixTransactionContext = txContext.newTransactionContext(txContext, subTask);
} else if (txContext != null) {
isExternalTxContext = true;
phoenixTransactionContext = txContext.newTransactionContext(txContext, subTask);
}
this.indexRegionObserverEnabledAllTables = Boolean.parseBoolean(
this.connection.getQueryServices().getConfiguration().get(
INDEX_REGION_OBSERVER_ENABLED_ALL_TABLES_ATTRIB,
DEFAULT_INDEX_REGION_OBSERVER_ENABLED_ALL_TABLES));
}
public MutationState(TableRef table, MultiRowMutationState mutations, long sizeOffset,
int maxSize, long maxSizeBytes, PhoenixConnection connection) throws SQLException {
this(maxSize, maxSizeBytes, connection, false, null, sizeOffset);
if (!mutations.isEmpty()) {
addMutations(this.mutationsMap, table, mutations);
}
this.numRows = mutations.size();
this.estimatedSize = PhoenixKeyValueUtil.getEstimatedRowMutationSizeWithBatch(this.mutationsMap);
throwIfTooBig();
}
// add a new batch of row mutations
private void addMutations(Map> mutationMap, TableRef table,
MultiRowMutationState mutations) {
List batches = mutationMap.get(table);
if (batches == null) {
batches = Lists.newArrayListWithExpectedSize(1);
}
batches.add(mutations);
mutationMap.put(table, batches);
}
// remove a batch of mutations which have been committed
private void removeMutations(Map> mutationMap, TableRef table){
List batches = mutationMap.get(table);
if (batches == null || batches.isEmpty()) {
mutationMap.remove(table);
return;
}
// mutation batches are committed in FIFO order so always remove from the head
batches.remove(0);
if (batches.isEmpty()) {
mutationMap.remove(table);
}
}
public long getEstimatedSize() {
return estimatedSize;
}
public int getMaxSize() {
return maxSize;
}
public long getMaxSizeBytes() {
return maxSizeBytes;
}
public PhoenixTransactionContext getPhoenixTransactionContext() {
return phoenixTransactionContext;
}
/**
* Commit a write fence when creating an index so that we can detect when a data table transaction is started before
* the create index but completes after it. In this case, we need to rerun the data table transaction after the
* index creation so that the index rows are generated.
*
* @param dataTable
* the data table upon which an index is being added
* @throws SQLException
*/
public void commitDDLFence(PTable dataTable) throws SQLException {
// Is this still useful after PHOENIX-6627?
if (dataTable.isTransactional()) {
try {
phoenixTransactionContext.commitDDLFence(dataTable);
} finally {
// The client expects a transaction to be in progress on the txContext while the
// VisibilityFence.prepareWait() starts a new tx and finishes/aborts it. After it's
// finished, we start a new one here.
// TODO: seems like an autonomous tx capability would be useful here.
phoenixTransactionContext.begin();
}
}
}
public boolean checkpointIfNeccessary(MutationPlan plan) throws SQLException {
if (!phoenixTransactionContext.isTransactionRunning() || plan.getTargetRef() == null
|| plan.getTargetRef().getTable() == null || !plan.getTargetRef().getTable().isTransactional()) { return false; }
Set sources = plan.getSourceRefs();
if (sources.isEmpty()) { return false; }
// For a DELETE statement, we're always querying the table being deleted from. This isn't
// a problem, but it potentially could be if there are other references to the same table
// nested in the DELETE statement (as a sub query or join, for example).
TableRef ignoreForExcludeCurrent = plan.getOperation() == Operation.DELETE && sources.size() == 1 ? plan
.getTargetRef() : null;
boolean excludeCurrent = false;
String targetPhysicalName = plan.getTargetRef().getTable().getPhysicalName().getString();
for (TableRef source : sources) {
if (source.getTable().isTransactional() && !source.equals(ignoreForExcludeCurrent)) {
String sourcePhysicalName = source.getTable().getPhysicalName().getString();
if (targetPhysicalName.equals(sourcePhysicalName)) {
excludeCurrent = true;
break;
}
}
}
// If we're querying the same table we're updating, we must exclude our writes to
// it from being visible.
if (excludeCurrent) {
// If any source tables have uncommitted data prior to last checkpoint,
// then we must create a new checkpoint.
boolean hasUncommittedData = false;
for (TableRef source : sources) {
String sourcePhysicalName = source.getTable().getPhysicalName().getString();
// Tracking uncommitted physical table names is an optimization that prevents us from
// having to do a checkpoint if no data has yet been written. If we're using an
// external transaction context, it's possible that data was already written at the
// current transaction timestamp, so we always checkpoint in that case is we're
// reading and writing to the same table.
if (source.getTable().isTransactional()
&& (isExternalTxContext || uncommittedPhysicalNames.contains(sourcePhysicalName))) {
hasUncommittedData = true;
break;
}
}
phoenixTransactionContext.checkpoint(hasUncommittedData);
if (hasUncommittedData) {
uncommittedPhysicalNames.clear();
}
return true;
}
return false;
}
// Though MutationState is not thread safe in general, this method should be because it may
// be called by TableResultIterator in a multi-threaded manner. Since we do not want to expose
// the Transaction outside of MutationState, this seems reasonable, as the member variables
// would not change as these threads are running. We also clone mutationState to ensure that
// the transaction context won't change due to a commit when auto commit is true.
public Table getHTable(PTable table) throws SQLException {
Table htable = this.getConnection().getQueryServices().getTable(table.getPhysicalName().getBytes());
if (table.isTransactional() && phoenixTransactionContext.isTransactionRunning()) {
// We're only using this table for reading, so we want it wrapped even if it's an index
htable = phoenixTransactionContext.getTransactionalTable(htable, table.isImmutableRows() || table.getType() == PTableType.INDEX);
}
return htable;
}
public PhoenixConnection getConnection() {
return connection;
}
public boolean isTransactionStarted() {
return phoenixTransactionContext.isTransactionRunning();
}
public long getInitialWritePointer() {
return phoenixTransactionContext.getTransactionId(); // First write pointer - won't change with checkpointing
}
// For testing
public long getWritePointer() {
return phoenixTransactionContext.getWritePointer();
}
// For testing
public PhoenixVisibilityLevel getVisibilityLevel() {
return phoenixTransactionContext.getVisibilityLevel();
}
public boolean startTransaction(Provider provider) throws SQLException {
if (provider == null) { return false; }
if (!connection.getQueryServices().getProps()
.getBoolean(QueryServices.TRANSACTIONS_ENABLED, QueryServicesOptions.DEFAULT_TRANSACTIONS_ENABLED)) { throw new SQLExceptionInfo.Builder(
SQLExceptionCode.CANNOT_START_TXN_IF_TXN_DISABLED).build().buildException(); }
if (connection.getSCN() != null) { throw new SQLExceptionInfo.Builder(
SQLExceptionCode.CANNOT_START_TRANSACTION_WITH_SCN_SET).build().buildException(); }
if (phoenixTransactionContext == PhoenixTransactionContext.NULL_CONTEXT) {
phoenixTransactionContext = provider.getTransactionProvider().getTransactionContext(connection);
} else {
if (provider != phoenixTransactionContext.getProvider()) { throw new SQLExceptionInfo.Builder(
SQLExceptionCode.CANNOT_MIX_TXN_PROVIDERS)
.setMessage(phoenixTransactionContext.getProvider().name() + " and " + provider.name()).build()
.buildException(); }
}
if (!isTransactionStarted()) {
// Clear any transactional state in case transaction was ended outside
// of Phoenix so we don't carry the old transaction state forward. We
// cannot call reset() here due to the case of having mutations and
// then transitioning from non transactional to transactional (which
// would end up clearing our uncommitted state).
resetTransactionalState();
phoenixTransactionContext.begin();
return true;
}
return false;
}
public static MutationState emptyMutationState(int maxSize, long maxSizeBytes,
PhoenixConnection connection) {
MutationState state = new MutationState(maxSize, maxSizeBytes, connection,
Collections.> emptyMap(), false, null);
state.sizeOffset = 0;
return state;
}
private void throwIfTooBig() throws SQLException {
if (numRows > maxSize) {
int mutationSize = numRows;
resetState();
throw new MaxMutationSizeExceededException(maxSize, mutationSize);
}
if (estimatedSize > maxSizeBytes) {
long mutationSizeByte = estimatedSize;
resetState();
throw new MaxMutationSizeBytesExceededException(maxSizeBytes, mutationSizeByte);
}
}
public long getUpdateCount() {
return sizeOffset + numRows;
}
public int getNumUpdatedRowsForAutoCommit() {
return numUpdatedRowsForAutoCommit;
}
public int getNumRows() {
return numRows;
}
private MultiRowMutationState getLastMutationBatch(Map> mutations, TableRef tableRef) {
List mutationBatches = mutations.get(tableRef);
if (mutationBatches == null || mutationBatches.isEmpty()) {
return null;
}
return mutationBatches.get(mutationBatches.size() - 1);
}
private void joinMutationState(TableRef tableRef, MultiRowMutationState srcRows,
Map> dstMutations) {
PTable table = tableRef.getTable();
boolean isIndex = table.getType() == PTableType.INDEX;
boolean incrementRowCount = dstMutations == this.mutationsMap;
// we only need to check if the new mutation batch (srcRows) conflicts with the
// last mutation batch since we try to merge it with that only
MultiRowMutationState existingRows = getLastMutationBatch(dstMutations, tableRef);
if (existingRows == null) { // no rows found for this table
// Size new map at batch size as that's what it'll likely grow to.
MultiRowMutationState newRows = new MultiRowMutationState(connection.getMutateBatchSize());
newRows.putAll(srcRows);
addMutations(dstMutations, tableRef, newRows);
if (incrementRowCount && !isIndex) {
numRows += srcRows.size();
// if we added all the rows from newMutationState we can just increment the
// estimatedSize by newMutationState.estimatedSize
estimatedSize += srcRows.estimatedSize;
}
return;
}
// for conflicting rows
MultiRowMutationState conflictingRows = new MultiRowMutationState(connection.getMutateBatchSize());
// Rows for this table already exist, check for conflicts
for (Map.Entry rowEntry : srcRows.entrySet()) {
ImmutableBytesPtr key = rowEntry.getKey();
RowMutationState newRowMutationState = rowEntry.getValue();
RowMutationState existingRowMutationState = existingRows.get(key);
if (existingRowMutationState == null) {
existingRows.put(key, newRowMutationState);
if (incrementRowCount && !isIndex) { // Don't count index rows in row count
numRows++;
// increment estimated size by the size of the new row
estimatedSize += newRowMutationState.calculateEstimatedSize();
}
continue;
}
Map existingValues = existingRowMutationState.getColumnValues();
Map newValues = newRowMutationState.getColumnValues();
if (existingValues != PRow.DELETE_MARKER && newValues != PRow.DELETE_MARKER) {
// Check if we can merge existing column values with new column values
long beforeMergeSize = existingRowMutationState.calculateEstimatedSize();
boolean isMerged = existingRowMutationState.join(rowEntry.getValue());
if (isMerged) {
// decrement estimated size by the size of the old row
estimatedSize -= beforeMergeSize;
// increment estimated size by the size of the new row
estimatedSize += existingRowMutationState.calculateEstimatedSize();
} else {
// cannot merge regular upsert and conditional upsert
// conflicting row is not a new row so no need to increment numRows
conflictingRows.put(key, newRowMutationState);
}
} else {
existingRows.put(key, newRowMutationState);
}
}
if (!conflictingRows.isEmpty()) {
addMutations(dstMutations, tableRef, conflictingRows);
}
}
private void joinMutationState(Map> srcMutations,
Map> dstMutations) {
// Merge newMutation with this one, keeping state from newMutation for any overlaps
for (Map.Entry> entry : srcMutations.entrySet()) {
TableRef tableRef = entry.getKey();
for (MultiRowMutationState srcRows : entry.getValue()) {
// Replace existing entries for the table with new entries
joinMutationState(tableRef, srcRows, dstMutations);
}
}
}
/**
* Combine a newer mutation with this one, where in the event of overlaps, the newer one will take precedence.
* Combine any metrics collected for the newer mutation.
*
* @param newMutationState
* the newer mutation state
*/
public void join(MutationState newMutationState) throws SQLException {
if (this == newMutationState) { // Doesn't make sense
return;
}
phoenixTransactionContext.join(newMutationState.getPhoenixTransactionContext());
this.sizeOffset += newMutationState.sizeOffset;
joinMutationState(newMutationState.mutationsMap, this.mutationsMap);
if (!newMutationState.txMutations.isEmpty()) {
if (txMutations.isEmpty()) {
txMutations = Maps.newHashMapWithExpectedSize(this.mutationsMap.size());
}
joinMutationState(newMutationState.txMutations, this.txMutations);
}
mutationMetricQueue.combineMetricQueues(newMutationState.mutationMetricQueue);
if (readMetricQueue == null) {
readMetricQueue = newMutationState.readMetricQueue;
} else if (readMetricQueue != null && newMutationState.readMetricQueue != null) {
readMetricQueue.combineReadMetrics(newMutationState.readMetricQueue);
}
throwIfTooBig();
}
private static ImmutableBytesPtr getNewRowKeyWithRowTimestamp(ImmutableBytesPtr ptr, long rowTimestamp, PTable table) {
RowKeySchema schema = table.getRowKeySchema();
int rowTimestampColPos = table.getRowTimestampColPos();
Field rowTimestampField = schema.getField(rowTimestampColPos);
byte[] rowTimestampBytes = rowTimestampField.getDataType() == PTimestamp.INSTANCE ?
PTimestamp.INSTANCE.toBytes(new Timestamp(rowTimestamp), rowTimestampField.getSortOrder()) :
PLong.INSTANCE.toBytes(rowTimestamp, rowTimestampField.getSortOrder());
int oldOffset = ptr.getOffset();
int oldLength = ptr.getLength();
// Move the pointer to the start byte of the row timestamp pk
schema.position(ptr, 0, rowTimestampColPos);
byte[] b = ptr.get();
int newOffset = ptr.getOffset();
int length = ptr.getLength();
for (int i = newOffset; i < newOffset + length; i++) {
// modify the underlying bytes array with the bytes of the row timestamp
b[i] = rowTimestampBytes[i - newOffset];
}
// move the pointer back to where it was before.
ptr.set(ptr.get(), oldOffset, oldLength);
return ptr;
}
private Iterator>> addRowMutations(final TableRef tableRef,
final MultiRowMutationState values, final long mutationTimestamp, final long serverTimestamp,
boolean includeAllIndexes, final boolean sendAll) {
final PTable table = tableRef.getTable();
final List indexList = includeAllIndexes ?
Lists.newArrayList(IndexMaintainer.maintainedIndexes(table.getIndexes().iterator())) :
IndexUtil.getClientMaintainedIndexes(table);
final Iterator indexes = indexList.iterator();
final List mutationList = Lists.newArrayListWithExpectedSize(values.size());
final List mutationsPertainingToIndex = indexes.hasNext() ? Lists
.newArrayListWithExpectedSize(values.size()) : null;
generateMutations(tableRef, mutationTimestamp, serverTimestamp, values, mutationList,
mutationsPertainingToIndex);
return new Iterator>>() {
boolean isFirst = true;
Map> indexMutationsMap = null;
@Override
public boolean hasNext() {
return isFirst || indexes.hasNext();
}
@Override
public Pair> next() {
if (isFirst) {
isFirst = false;
return new Pair<>(table, mutationList);
}
PTable index = indexes.next();
List indexMutations = null;
try {
if (!mutationsPertainingToIndex.isEmpty()) {
if (table.isTransactional()) {
if (indexMutationsMap == null) {
PhoenixTxIndexMutationGenerator generator = PhoenixTxIndexMutationGenerator.newGenerator(connection, table,
indexList, mutationsPertainingToIndex.get(0).getAttributesMap());
try (Table htable = connection.getQueryServices().getTable(
table.getPhysicalName().getBytes())) {
Collection> allMutations = generator.getIndexUpdates(htable,
mutationsPertainingToIndex.iterator());
indexMutationsMap = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
for (Pair mutation : allMutations) {
List mutations = indexMutationsMap.get(mutation.getSecond());
if (mutations == null) {
mutations = Lists.newArrayList();
indexMutationsMap.put(mutation.getSecond(), mutations);
}
mutations.add(mutation.getFirst());
}
}
}
indexMutations = indexMutationsMap.get(index.getPhysicalName().getBytes());
} else {
indexMutations = IndexUtil.generateIndexData(table, index, values,
mutationsPertainingToIndex, connection.getKeyValueBuilder(), connection);
}
}
// we may also have to include delete mutations for immutable tables if we are not processing all
// the tables in the mutations map
if (!sendAll) {
TableRef key = new TableRef(index);
List multiRowMutationState = mutationsMap.remove(key);
if (multiRowMutationState != null) {
final List deleteMutations = Lists.newArrayList();
// for index table there will only be 1 mutation batch in the list
generateMutations(key, mutationTimestamp, serverTimestamp, multiRowMutationState.get(0), deleteMutations, null);
if (indexMutations == null) {
indexMutations = deleteMutations;
} else {
indexMutations.addAll(deleteMutations);
}
}
}
} catch (SQLException | IOException e) {
throw new IllegalDataException(e);
}
return new Pair>(index,
indexMutations == null ? Collections. emptyList()
: indexMutations);
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
private void generateMutations(final TableRef tableRef, final long mutationTimestamp, final long serverTimestamp,
final MultiRowMutationState values, final List mutationList,
final List mutationsPertainingToIndex) {
final PTable table = tableRef.getTable();
boolean tableWithRowTimestampCol = table.getRowTimestampColPos() != -1;
Iterator> iterator = values.entrySet().iterator();
long timestampToUse = mutationTimestamp;
MultiRowMutationState modifiedValues = new MultiRowMutationState(16);
boolean wildcardIncludesDynamicCols = connection.getQueryServices().getProps().getBoolean(
WILDCARD_QUERY_DYNAMIC_COLS_ATTRIB, DEFAULT_WILDCARD_QUERY_DYNAMIC_COLS_ATTRIB);
while (iterator.hasNext()) {
Map.Entry rowEntry = iterator.next();
byte[] onDupKeyBytes = rowEntry.getValue().getOnDupKeyBytes();
boolean hasOnDupKey = onDupKeyBytes != null;
ImmutableBytesPtr key = rowEntry.getKey();
RowMutationState state = rowEntry.getValue();
if (tableWithRowTimestampCol) {
RowTimestampColInfo rowTsColInfo = state.getRowTimestampColInfo();
if (rowTsColInfo.useServerTimestamp()) {
// regenerate the key with this timestamp.
key = getNewRowKeyWithRowTimestamp(key, serverTimestamp, table);
// since we are about to modify the byte[] stored in key (which changes its hashcode)
// we need to remove the entry from the values map and add a new entry with the modified byte[]
modifiedValues.put(key, state);
iterator.remove();
timestampToUse = serverTimestamp;
} else {
if (rowTsColInfo.getTimestamp() != null) {
timestampToUse = rowTsColInfo.getTimestamp();
}
}
}
PRow row = table.newRow(connection.getKeyValueBuilder(), timestampToUse, key, hasOnDupKey);
List rowMutations, rowMutationsPertainingToIndex;
if (rowEntry.getValue().getColumnValues() == PRow.DELETE_MARKER) { // means delete
row.delete();
rowMutations = row.toRowMutations();
String sourceOfDelete = getConnection().getSourceOfOperation();
if (sourceOfDelete != null) {
byte[] sourceOfDeleteBytes = Bytes.toBytes(sourceOfDelete);
// Set the source of operation attribute.
for (Mutation mutation: rowMutations) {
mutation.setAttribute(SOURCE_OPERATION_ATTRIB, sourceOfDeleteBytes);
}
}
// The DeleteCompiler already generates the deletes for indexes, so no need to do it again
rowMutationsPertainingToIndex = Collections.emptyList();
} else {
for (Map.Entry valueEntry : rowEntry.getValue().getColumnValues().entrySet()) {
row.setValue(valueEntry.getKey(), valueEntry.getValue());
}
if (wildcardIncludesDynamicCols && row.setAttributesForDynamicColumnsIfReqd()) {
row.setAttributeToProcessDynamicColumnsMetadata();
}
rowMutations = row.toRowMutations();
// Pass through ON DUPLICATE KEY info through mutations
// In the case of the same clause being used on many statements, this will be
// inefficient because we're transmitting the same information for each mutation.
// TODO: use our ServerCache
for (Mutation mutation : rowMutations) {
if (onDupKeyBytes != null) {
mutation.setAttribute(PhoenixIndexBuilderHelper.ATOMIC_OP_ATTRIB, onDupKeyBytes);
}
}
rowMutationsPertainingToIndex = rowMutations;
}
annotateMutationsWithMetadata(table, rowMutations);
mutationList.addAll(rowMutations);
if (mutationsPertainingToIndex != null) mutationsPertainingToIndex.addAll(rowMutationsPertainingToIndex);
}
values.putAll(modifiedValues);
}
private void annotateMutationsWithMetadata(PTable table, List rowMutations) {
if (table == null) {
return;
}
// Annotate each mutation with enough phoenix metadata so that anyone interested can
// deterministically figure out exactly what Phoenix schema object created the mutation
// Server-side we can annotate the HBase WAL with these.
for (Mutation mutation : rowMutations) {
annotateMutationWithMetadata(table, mutation);
}
//only annotate external schema id if the change detection flag is on the table.
if (!table.isChangeDetectionEnabled()) {
return;
}
//annotate each mutation with enough metadata so that anyone interested can
// deterministically figure out exactly what Phoenix schema object created the mutation
// Server-side we can annotate the HBase WAL with these.
for (Mutation mutation : rowMutations) {
annotateMutationWithMetadataWithExternalSchemaId(table, mutation);
}
}
private void annotateMutationWithMetadataWithExternalSchemaId(PTable table, Mutation mutation) {
byte[] externalSchemaRegistryId = table.getExternalSchemaId() != null ?
Bytes.toBytes(table.getExternalSchemaId()) : null;
WALAnnotationUtil.annotateMutation(mutation, externalSchemaRegistryId);
}
private void annotateMutationWithMetadata(PTable table, Mutation mutation) {
byte[] tenantId = table.getTenantId() != null ? table.getTenantId().getBytes() : null;
byte[] schemaName = table.getSchemaName() != null ? table.getSchemaName().getBytes() : null;
byte[] tableName = table.getTableName() != null ? table.getTableName().getBytes() : null;
byte[] tableType = table.getType().getValue().getBytes();
byte[] externalSchemaRegistryId = table.getExternalSchemaId() != null ?
Bytes.toBytes(table.getExternalSchemaId()) : null;
byte[] lastDDLTimestamp =
table.getLastDDLTimestamp() != null ? Bytes.toBytes(table.getLastDDLTimestamp()) : null;
WALAnnotationUtil.annotateMutation(mutation, tenantId, schemaName, tableName, tableType, lastDDLTimestamp);
}
/**
* Get the unsorted list of HBase mutations for the tables with uncommitted data.
*
* @return list of HBase mutations for uncommitted data.
*/
public Iterator>> toMutations(Long timestamp) {
return toMutations(false, timestamp);
}
public Iterator>> toMutations() {
return toMutations(false, null);
}
public Iterator>> toMutations(final boolean includeMutableIndexes) {
return toMutations(includeMutableIndexes, null);
}
public Iterator>> toMutations(final boolean includeMutableIndexes,
final Long tableTimestamp) {
final Iterator>> iterator = this.mutationsMap.entrySet().iterator();
if (!iterator.hasNext()) { return Collections.emptyIterator(); }
Long scn = connection.getSCN();
final long serverTimestamp = getTableTimestamp(tableTimestamp, scn);
final long mutationTimestamp = getMutationTimestamp(scn);
return new Iterator>>() {
private Map.Entry> current = iterator.next();
private int batchOffset = 0;
private Iterator>> innerIterator = init();
private Iterator>> init() {
final Iterator>> mutationIterator =
addRowMutations(current.getKey(), current.getValue().get(batchOffset),
mutationTimestamp, serverTimestamp, includeMutableIndexes, true);
return new Iterator>>() {
@Override
public boolean hasNext() {
return mutationIterator.hasNext();
}
@Override
public Pair> next() {
Pair> pair = mutationIterator.next();
return new Pair>(pair.getFirst().getPhysicalName()
.getBytes(), pair.getSecond());
}
@Override
public void remove() {
mutationIterator.remove();
}
};
}
@Override
public boolean hasNext() {
return innerIterator.hasNext() ||
batchOffset + 1 < current.getValue().size() ||
iterator.hasNext();
}
@Override
public Pair> next() {
if (!innerIterator.hasNext()) {
++batchOffset;
if (batchOffset == current.getValue().size()) {
current = iterator.next();
batchOffset = 0;
}
innerIterator = init();
}
return innerIterator.next();
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
public static long getTableTimestamp(final Long tableTimestamp, Long scn) {
return (tableTimestamp != null && tableTimestamp != QueryConstants.UNSET_TIMESTAMP) ? tableTimestamp
: (scn == null ? HConstants.LATEST_TIMESTAMP : scn);
}
public static long getMutationTimestamp(final Long scn) {
return scn == null ? HConstants.LATEST_TIMESTAMP : scn;
}
/**
* Validates that the meta data is valid against the server meta data if we haven't yet done so. Otherwise, for
* every UPSERT VALUES call, we'd need to hit the server to see if the meta data has changed.
*
* @return the server time to use for the upsert
* @throws SQLException
* if the table or any columns no longer exist
*/
private long[] validateAll(Map commitBatch) throws SQLException {
int i = 0;
long[] timeStamps = new long[commitBatch.size()];
for (Map.Entry entry : commitBatch.entrySet()) {
TableRef tableRef = entry.getKey();
timeStamps[i++] = validateAndGetServerTimestamp(tableRef, entry.getValue());
}
return timeStamps;
}
private long validateAndGetServerTimestamp(TableRef tableRef, MultiRowMutationState rowKeyToColumnMap)
throws SQLException {
MetaDataClient client = new MetaDataClient(connection);
long serverTimeStamp = tableRef.getTimeStamp();
PTable table = null;
long startTime = EnvironmentEdgeManager.currentTimeMillis();
try {
// If we're auto committing, we've already validated the schema when we got the ColumnResolver,
// so no need to do it again here.
table = tableRef.getTable();
// We generally don't re-resolve SYSTEM tables, but if it relies on ROW_TIMESTAMP, we must
// get the latest timestamp in order to upsert data with the correct server-side timestamp
// in case the ROW_TIMESTAMP is not provided in the UPSERT statement.
boolean hitServerForLatestTimestamp =
table.getRowTimestampColPos() != -1 && table.getType() == PTableType.SYSTEM;
MetaDataMutationResult result = client.updateCache(table.getSchemaName().getString(),
table.getTableName().getString(), hitServerForLatestTimestamp);
PTable resolvedTable = result.getTable();
if (resolvedTable == null) { throw new TableNotFoundException(table.getSchemaName().getString(), table
.getTableName().getString()); }
// Always update tableRef table as the one we've cached may be out of date since when we executed
// the UPSERT VALUES call and updated in the cache before this.
tableRef.setTable(resolvedTable);
List indexes = resolvedTable.getIndexes();
for (PTable idxTtable : indexes) {
// If index is still active, but has a non zero INDEX_DISABLE_TIMESTAMP value, then infer that
// our failure mode is block writes on index failure.
if ((idxTtable.getIndexState() == PIndexState.ACTIVE || idxTtable.getIndexState() == PIndexState.PENDING_ACTIVE)
&& idxTtable.getIndexDisableTimestamp() > 0) { throw new SQLExceptionInfo.Builder(
SQLExceptionCode.INDEX_FAILURE_BLOCK_WRITE).setSchemaName(table.getSchemaName().getString())
.setTableName(table.getTableName().getString()).build().buildException(); }
}
long timestamp = result.getMutationTime();
if (timestamp != QueryConstants.UNSET_TIMESTAMP) {
serverTimeStamp = timestamp;
if (result.wasUpdated()) {
List columns = Lists.newArrayListWithExpectedSize(table.getColumns().size());
for (Map.Entry rowEntry : rowKeyToColumnMap.entrySet()) {
RowMutationState valueEntry = rowEntry.getValue();
if (valueEntry != null) {
Map colValues = valueEntry.getColumnValues();
if (colValues != PRow.DELETE_MARKER) {
for (PColumn column : colValues.keySet()) {
if (!column.isDynamic()) columns.add(column);
}
}
}
}
for (PColumn column : columns) {
if (column != null) {
resolvedTable.getColumnFamily(column.getFamilyName().getString()).getPColumnForColumnName(
column.getName().getString());
}
}
}
}
} catch(Throwable e) {
if (table != null) {
TableMetricsManager.updateMetricsForSystemCatalogTableMethod(table.getTableName().toString(),
NUM_METADATA_LOOKUP_FAILURES, 1);
}
throw e;
} finally {
long endTime = EnvironmentEdgeManager.currentTimeMillis();
GLOBAL_MUTATION_SYSCAT_TIME.update(endTime - startTime);
}
return serverTimeStamp == QueryConstants.UNSET_TIMESTAMP ? HConstants.LATEST_TIMESTAMP : serverTimeStamp;
}
static MutationBytes calculateMutationSize(List mutations,
boolean updateGlobalClientMetrics) {
long byteSize = 0;
long temp;
long deleteSize = 0, deleteCounter = 0;
long upsertsize = 0, upsertCounter = 0;
long atomicUpsertsize = 0;
if (GlobalClientMetrics.isMetricsEnabled()) {
for (Mutation mutation : mutations) {
temp = PhoenixKeyValueUtil.calculateMutationDiskSize(mutation);
byteSize += temp;
if (mutation instanceof Delete) {
deleteSize += temp;
deleteCounter++;
allUpsertsMutations = false;
} else if (mutation instanceof Put) {
upsertsize += temp;
upsertCounter++;
if (mutation.getAttribute(PhoenixIndexBuilderHelper.ATOMIC_OP_ATTRIB) != null) {
atomicUpsertsize += temp;
}
allDeletesMutations = false;
} else {
allUpsertsMutations = false;
allDeletesMutations = false;
}
}
}
if (updateGlobalClientMetrics) {
GLOBAL_MUTATION_BYTES.update(byteSize);
}
return new MutationBytes(deleteCounter, deleteSize, byteSize, upsertCounter, upsertsize, atomicUpsertsize);
}
public long getBatchSizeBytes() {
return batchSizeBytes;
}
public long getBatchCount() {
return batchCount;
}
public static final class MutationBytes {
private long deleteMutationCounter;
private long deleteMutationBytes;
private long totalMutationBytes;
private long upsertMutationCounter;
private long upsertMutationBytes;
private long atomicUpsertMutationBytes; // needed to calculate atomic upsert commit time
public MutationBytes(long deleteMutationCounter, long deleteMutationBytes, long totalMutationBytes,
long upsertMutationCounter, long upsertMutationBytes, long atomicUpsertMutationBytes) {
this.deleteMutationCounter = deleteMutationCounter;
this.deleteMutationBytes = deleteMutationBytes;
this.totalMutationBytes = totalMutationBytes;
this.upsertMutationCounter = upsertMutationCounter;
this.upsertMutationBytes = upsertMutationBytes;
this.atomicUpsertMutationBytes = atomicUpsertMutationBytes;
}
public long getDeleteMutationCounter() {
return deleteMutationCounter;
}
public long getDeleteMutationBytes() {
return deleteMutationBytes;
}
public long getTotalMutationBytes() {
return totalMutationBytes;
}
public long getUpsertMutationCounter() {
return upsertMutationCounter;
}
public long getUpsertMutationBytes() {
return upsertMutationBytes;
}
public long getAtomicUpsertMutationBytes() { return atomicUpsertMutationBytes; }
}
public enum MutationMetadataType {
TENANT_ID,
SCHEMA_NAME,
LOGICAL_TABLE_NAME,
TIMESTAMP,
TABLE_TYPE,
EXTERNAL_SCHEMA_ID
}
private static class TableInfo {
private final boolean isDataTable;
@Nonnull
private final PName hTableName;
@Nonnull
private final TableRef origTableRef;
private final PTable pTable;
public TableInfo(boolean isDataTable, PName hTableName, TableRef origTableRef, PTable pTable) {
super();
checkNotNull(hTableName);
checkNotNull(origTableRef);
this.isDataTable = isDataTable;
this.hTableName = hTableName;
this.origTableRef = origTableRef;
this.pTable = pTable;
}
public boolean isDataTable() {
return isDataTable;
}
public PName getHTableName() {
return hTableName;
}
public TableRef getOrigTableRef() {
return origTableRef;
}
public PTable getPTable() {
return pTable;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + hTableName.hashCode();
result = prime * result + (isDataTable ? 1231 : 1237);
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
TableInfo other = (TableInfo)obj;
if (!hTableName.equals(other.hTableName)) return false;
if (isDataTable != other.isDataTable) return false;
if (!pTable.equals(other.pTable)) return false;
return true;
}
}
/**
* Split the mutation batches for each table into separate commit batches.
* Each commit batch contains only one mutation batch (MultiRowMutationState) for a table.
* @param tableRefIterator
* @return List of commit batches
*/
private List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy