org.apache.phoenix.coprocessor.IndexerRegionScanner Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.coprocessor;
import static org.apache.phoenix.hbase.index.write.AbstractParallelWriterIndexCommitter.INDEX_WRITER_KEEP_ALIVE_TIME_CONF_KEY;
import static org.apache.phoenix.query.QueryConstants.AGG_TIMESTAMP;
import static org.apache.phoenix.query.QueryConstants.EMPTY_COLUMN_VALUE_BYTES;
import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN;
import static org.apache.phoenix.query.QueryConstants.SINGLE_COLUMN_FAMILY;
import static org.apache.phoenix.query.QueryConstants.UNGROUPED_AGG_ROW_KEY;
import static org.apache.phoenix.util.ScanUtil.isDummy;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Durability;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.phoenix.cache.ServerCacheClient;
import org.apache.phoenix.compile.ScanRanges;
import org.apache.phoenix.filter.SkipScanFilter;
import org.apache.phoenix.hbase.index.ValueGetter;
import org.apache.phoenix.hbase.index.parallel.EarlyExitFailure;
import org.apache.phoenix.hbase.index.parallel.Task;
import org.apache.phoenix.hbase.index.parallel.TaskBatch;
import org.apache.phoenix.hbase.index.parallel.ThreadPoolBuilder;
import org.apache.phoenix.hbase.index.parallel.ThreadPoolManager;
import org.apache.phoenix.hbase.index.parallel.WaitForCompletionTaskRunner;
import org.apache.phoenix.hbase.index.util.GenericKeyValueBuilder;
import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr;
import org.apache.phoenix.index.PhoenixIndexCodec;
import org.apache.phoenix.mapreduce.index.IndexTool;
import org.apache.phoenix.mapreduce.index.IndexVerificationResultRepository;
import org.apache.phoenix.query.KeyRange;
import org.apache.phoenix.schema.types.PLong;
import org.apache.phoenix.schema.types.PVarbinary;
import org.apache.phoenix.util.KeyValueUtil;
import org.apache.phoenix.util.ServerUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Throwables;
import com.google.common.collect.Maps;
public class IndexerRegionScanner extends GlobalIndexRegionScanner {
private static final Logger LOGGER = LoggerFactory.getLogger(IndexerRegionScanner.class);
protected Map indexKeyToDataPutMap;
protected UngroupedAggregateRegionObserver.MutationList mutations;
private boolean partialRebuild = false;
IndexerRegionScanner (final RegionScanner innerScanner, final Region region, final Scan scan,
final RegionCoprocessorEnvironment env,
UngroupedAggregateRegionObserver ungroupedAggregateRegionObserver) throws IOException {
super(innerScanner, region, scan, env, ungroupedAggregateRegionObserver);
indexHTable = hTableFactory.getTable(new ImmutableBytesPtr(indexMaintainer.getIndexTableName()));
indexTableTTL = indexHTable.getTableDescriptor().getColumnFamilies()[0].getTimeToLive();
pool = new WaitForCompletionTaskRunner(ThreadPoolManager.getExecutor(
new ThreadPoolBuilder("IndexVerify",
env.getConfiguration()).setMaxThread(NUM_CONCURRENT_INDEX_VERIFY_THREADS_CONF_KEY,
DEFAULT_CONCURRENT_INDEX_VERIFY_THREADS).setCoreTimeout(
INDEX_WRITER_KEEP_ALIVE_TIME_CONF_KEY), env));
if (scan.getAttribute(BaseScannerRegionObserver.INDEX_REBUILD_PAGING) == null) {
partialRebuild = true;
}
if (verify) {
indexKeyToDataPutMap = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
verificationResult = new IndexToolVerificationResult(scan);
verificationResultRepository =
new IndexVerificationResultRepository(indexMaintainer.getIndexTableName(), hTableFactory);
} else {
mutations = new UngroupedAggregateRegionObserver.MutationList(maxBatchSize);
}
}
@Override
public HRegionInfo getRegionInfo() {
return region.getRegionInfo();
}
@Override
public boolean isFilterDone() { return false; }
@Override
public void close() throws IOException {
innerScanner.close();
try {
if (verify) {
verificationResultRepository.logToIndexToolResultTable(verificationResult,
IndexTool.IndexVerifyType.ONLY, region.getRegionInfo().getRegionName());
}
} finally {
this.pool.stop("IndexerRegionScanner is closing");
hTableFactory.shutdown();
indexHTable.close();
if (verify) {
verificationResultRepository.close();
}
}
}
private boolean verifySingleIndexRow(Result indexRow, final Put dataRow,
IndexToolVerificationResult.PhaseResult verificationPhaseResult) throws IOException {
ValueGetter valueGetter = new SimpleValueGetter(dataRow);
long ts = getMaxTimestamp(dataRow);
Put indexPut = indexMaintainer.buildUpdateMutation(GenericKeyValueBuilder.INSTANCE,
valueGetter, new ImmutableBytesWritable(dataRow.getRow()), ts, null, null);
if (indexPut == null) {
// This means the data row does not have any covered column values
indexPut = new Put(indexRow.getRow());
}
// Add the empty column
indexPut.addColumn(indexMaintainer.getEmptyKeyValueFamily().copyBytesIfNecessary(),
indexMaintainer.getEmptyKeyValueQualifier(), ts, EMPTY_COLUMN_VALUE_BYTES);
int cellCount = 0;
long currentTime = EnvironmentEdgeManager.currentTime();
for (List cells : indexPut.getFamilyCellMap().values()) {
if (cells == null) {
break;
}
for (Cell expectedCell : cells) {
byte[] family = CellUtil.cloneFamily(expectedCell);
byte[] qualifier = CellUtil.cloneQualifier(expectedCell);
Cell actualCell = indexRow.getColumnLatestCell(family, qualifier);
if (actualCell == null) {
// Check if cell expired as per the current server's time and data table ttl
// Index table should have the same ttl as the data table, hence we might not
// get a value back from index if it has already expired between our rebuild and
// verify
// or if cell timestamp is beyond maxlookback
if (isTimestampBeforeTTL(indexTableTTL, currentTime, expectedCell.getTimestamp())) {
continue;
}
return false;
}
if (actualCell.getTimestamp() < ts) {
// Skip older cells since a Phoenix index row is composed of cells with the same timestamp
continue;
}
// Check all columns
if (!CellUtil.matchingValue(actualCell, expectedCell) || actualCell.getTimestamp() != ts) {
if(isTimestampBeyondMaxLookBack(maxLookBackInMills, currentTime, actualCell.getTimestamp())) {
verificationPhaseResult
.setBeyondMaxLookBackInvalidIndexRowCount(verificationPhaseResult
.getBeyondMaxLookBackInvalidIndexRowCount()+1);
continue;
}
return false;
}
cellCount++;
}
}
return cellCount == indexRow.rawCells().length;
}
private void verifyIndexRows(List keys, Map perTaskDataKeyToDataPutMap,
IndexToolVerificationResult.PhaseResult verificationPhaseResult) throws IOException {
ScanRanges scanRanges = ScanRanges.createPointLookup(keys);
Scan indexScan = new Scan();
indexScan.setTimeRange(scan.getTimeRange().getMin(), scan.getTimeRange().getMax());
scanRanges.initializeScan(indexScan);
SkipScanFilter skipScanFilter = scanRanges.getSkipScanFilter();
indexScan.setFilter(skipScanFilter);
indexScan.setCacheBlocks(false);
try (ResultScanner resultScanner = indexHTable.getScanner(indexScan)) {
for (Result result = resultScanner.next(); (result != null); result = resultScanner.next()) {
Put dataPut = indexKeyToDataPutMap.get(result.getRow());
if (dataPut == null) {
// This should never happen
exceptionMessage = "Index verify failed - Missing data row - " + indexHTable.getName();
throw new IOException(exceptionMessage);
}
if (verifySingleIndexRow(result, dataPut, verificationPhaseResult)) {
verificationPhaseResult.setValidIndexRowCount(verificationPhaseResult.getValidIndexRowCount()+1);
} else {
verificationPhaseResult.setInvalidIndexRowCount(verificationPhaseResult.getInvalidIndexRowCount()+1);
}
perTaskDataKeyToDataPutMap.remove(dataPut.getRow());
}
} catch (Throwable t) {
ServerUtil.throwIOException(indexHTable.getName().toString(), t);
}
// Check if any expected rows from index(which we didn't get) are already expired due to TTL
if (!perTaskDataKeyToDataPutMap.isEmpty()) {
Iterator> itr = perTaskDataKeyToDataPutMap.entrySet().iterator();
long currentTime = EnvironmentEdgeManager.currentTime();
while(itr.hasNext()) {
Entry entry = itr.next();
long ts = getMaxTimestamp(entry.getValue());
if (isTimestampBeforeTTL(indexTableTTL, currentTime, ts)) {
itr.remove();
verificationPhaseResult.setExpiredIndexRowCount(verificationPhaseResult.getExpiredIndexRowCount()+1);
}
}
}
// Check if any expected rows from index(which we didn't get) are beyond max look back and have been compacted away
if (!perTaskDataKeyToDataPutMap.isEmpty()) {
for (Entry entry : perTaskDataKeyToDataPutMap.entrySet()) {
Put put = entry.getValue();
long ts = getMaxTimestamp(put);
long currentTime = EnvironmentEdgeManager.currentTime();
if (isTimestampBeyondMaxLookBack(maxLookBackInMills, currentTime, ts)) {
verificationPhaseResult.
setBeyondMaxLookBackMissingIndexRowCount(verificationPhaseResult.getBeyondMaxLookBackMissingIndexRowCount() + 1);
} else {
verificationPhaseResult.setMissingIndexRowCount(
verificationPhaseResult.getMissingIndexRowCount() + 1);
}
}
}
}
private void addVerifyTask(TaskBatch tasks, final List keys, final Map perTaskDataKeyToDataPutMap,
final IndexToolVerificationResult.PhaseResult verificationPhaseResult) {
tasks.add(new Task() {
@Override
public Boolean call() throws Exception {
try {
if (Thread.currentThread().isInterrupted()) {
exceptionMessage = "Pool closed, not attempting to verify index rows! " + indexHTable.getName();
throw new IOException(exceptionMessage);
}
verifyIndexRows(keys, perTaskDataKeyToDataPutMap, verificationPhaseResult);
} catch (Exception e) {
throw e;
}
return Boolean.TRUE;
}
});
}
private void parallelizeIndexVerify(IndexToolVerificationResult.PhaseResult verificationPhaseResult) throws IOException {
int taskCount = (indexKeyToDataPutMap.size() + rowCountPerTask - 1) / rowCountPerTask;
TaskBatch tasks = new TaskBatch<>(taskCount);
List |
© 2015 - 2024 Weber Informatics LLC | Privacy Policy