org.apache.phoenix.coprocessor.UncoveredIndexRegionScanner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of phoenix-server-hbase-2.6
Show all versions of phoenix-server-hbase-2.6
Phoenix HBase Server Side JAR
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.phoenix.coprocessor;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.regionserver.Region;
import org.apache.hadoop.hbase.regionserver.RegionScanner;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.phoenix.compile.ScanRanges;
import org.apache.phoenix.execute.TupleProjector;
import org.apache.phoenix.filter.SkipScanFilter;
import org.apache.phoenix.hbase.index.util.ImmutableBytesPtr;
import org.apache.phoenix.index.IndexMaintainer;
import org.apache.phoenix.query.KeyRange;
import org.apache.phoenix.query.QueryConstants;
import org.apache.phoenix.query.QueryServices;
import org.apache.phoenix.query.QueryServicesOptions;
import org.apache.phoenix.schema.SortOrder;
import org.apache.phoenix.schema.tuple.ResultTuple;
import org.apache.phoenix.schema.types.PVarbinary;
import org.apache.phoenix.thirdparty.com.google.common.collect.Maps;
import org.apache.phoenix.util.ByteUtil;
import org.apache.phoenix.util.EnvironmentEdgeManager;
import org.apache.phoenix.util.IndexUtil;
import org.apache.phoenix.util.ScanUtil;
import org.apache.phoenix.util.ServerUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import static org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants.EMPTY_COLUMN_FAMILY_NAME;
import static org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants.EMPTY_COLUMN_QUALIFIER_NAME;
import static org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants.INDEX_PAGE_ROWS;
import static org.apache.phoenix.coprocessorclient.BaseScannerRegionObserverConstants.SERVER_PAGE_SIZE_MS;
import static org.apache.phoenix.query.QueryServices.INDEX_PAGE_SIZE_IN_ROWS;
import static org.apache.phoenix.util.ScanUtil.getDummyResult;
import static org.apache.phoenix.util.ScanUtil.isDummy;
public abstract class UncoveredIndexRegionScanner extends BaseRegionScanner {
private static final Logger LOGGER =
LoggerFactory.getLogger(UncoveredIndexRegionScanner.class);
/**
* The states of the processing a page of index rows
*/
protected enum State {
INITIAL, SCANNING_INDEX, SCANNING_DATA, SCANNING_DATA_INTERRUPTED, READY
}
protected State state = State.INITIAL;
protected final byte[][] viewConstants;
protected final RegionCoprocessorEnvironment env;
protected long pageSizeInRows;
protected final long ageThreshold;
protected byte[] emptyCF;
protected byte[] emptyCQ;
protected final Scan scan;
protected final Scan dataTableScan;
protected final RegionScanner innerScanner;
protected final Region region;
protected final IndexMaintainer indexMaintainer;
protected final TupleProjector tupleProjector;
protected final ImmutableBytesWritable ptr;
protected List> indexRows = null;
protected Map dataRows = null;
protected Iterator> indexRowIterator = null;
protected Map indexToDataRowKeyMap = null;
protected int indexRowCount = 0;
protected final long pageSizeMs;
protected byte[] lastIndexRowKey = null;
private byte[] previousResultRowKey = null;
private final byte[] initStartRowKey;
private final boolean includeInitStartRowKey;
public UncoveredIndexRegionScanner(final RegionScanner innerScanner,
final Region region,
final Scan scan,
final RegionCoprocessorEnvironment env,
final Scan dataTableScan,
final TupleProjector tupleProjector,
final IndexMaintainer indexMaintainer,
final byte[][] viewConstants,
final ImmutableBytesWritable ptr,
final long pageSizeMs,
final long queryLimit) {
super(innerScanner);
final Configuration config = env.getConfiguration();
byte[] pageSizeFromScan =
scan.getAttribute(INDEX_PAGE_ROWS);
if (pageSizeFromScan != null) {
pageSizeInRows = (int) Bytes.toLong(pageSizeFromScan);
} else {
pageSizeInRows = (int)
config.getLong(INDEX_PAGE_SIZE_IN_ROWS,
QueryServicesOptions.DEFAULT_INDEX_PAGE_SIZE_IN_ROWS);
}
if (queryLimit != -1) {
pageSizeInRows = Long.min(pageSizeInRows, queryLimit);
}
ageThreshold = env.getConfiguration().getLong(
QueryServices.GLOBAL_INDEX_ROW_AGE_THRESHOLD_TO_DELETE_MS_ATTRIB,
QueryServicesOptions.DEFAULT_GLOBAL_INDEX_ROW_AGE_THRESHOLD_TO_DELETE_MS);
emptyCF = scan.getAttribute(EMPTY_COLUMN_FAMILY_NAME);
emptyCQ = scan.getAttribute(EMPTY_COLUMN_QUALIFIER_NAME);
this.indexMaintainer = indexMaintainer;
this.viewConstants = viewConstants;
this.scan = scan;
this.dataTableScan = dataTableScan;
this.innerScanner = innerScanner;
this.region = region;
this.env = env;
this.ptr = ptr;
this.tupleProjector = tupleProjector;
this.pageSizeMs = pageSizeMs;
// If scan start rowkey is empty, use region boundaries. Reverse region boundaries
// for reverse scan.
this.initStartRowKey =
ServerUtil.getScanStartRowKeyFromScanOrRegionBoundaries(scan, region);
this.includeInitStartRowKey = scan.includeStartRow();
}
@Override
public long getMvccReadPoint() {
return innerScanner.getMvccReadPoint();
}
@Override
public RegionInfo getRegionInfo() {
return region.getRegionInfo();
}
@Override
public boolean isFilterDone() {
return false;
}
@Override
public void close() throws IOException {
innerScanner.close();
}
@Override
public long getMaxResultSize() {
return innerScanner.getMaxResultSize();
}
@Override
public int getBatch() {
return innerScanner.getBatch();
}
protected abstract void scanDataTableRows(long startTime) throws IOException;
protected Scan prepareDataTableScan(Collection dataRowKeys) throws IOException {
List keys = new ArrayList<>(dataRowKeys.size());
for (byte[] dataRowKey : dataRowKeys) {
// If the data table scan was interrupted because of paging we retry the scan
// but on retry we should only fetch data table rows which we haven't already
// fetched.
if (!dataRows.containsKey(new ImmutableBytesPtr(dataRowKey))) {
keys.add(PVarbinary.INSTANCE.getKeyRange(dataRowKey, SortOrder.ASC));
}
}
if (!keys.isEmpty()) {
ScanRanges scanRanges = ScanRanges.createPointLookup(keys);
Scan dataScan = new Scan(dataTableScan);
dataScan.setTimeRange(scan.getTimeRange().getMin(), scan.getTimeRange().getMax());
scanRanges.initializeScan(dataScan);
SkipScanFilter skipScanFilter = scanRanges.getSkipScanFilter();
dataScan.setFilter(new SkipScanFilter(skipScanFilter, false, true));
dataScan.setAttribute(SERVER_PAGE_SIZE_MS,
Bytes.toBytes(Long.valueOf(pageSizeMs)));
return dataScan;
} else {
LOGGER.info("All data rows have already been fetched");
return null;
}
}
protected boolean scanIndexTableRows(List result,
final long startTime,
final byte[] actualStartKey,
final int offset) throws IOException {
boolean hasMore = false;
if (actualStartKey != null) {
do {
hasMore = innerScanner.nextRaw(result);
if (result.isEmpty()) {
return hasMore;
}
if (ScanUtil.isDummy(result)) {
return true;
}
Cell firstCell = result.get(0);
if (Bytes.compareTo(firstCell.getRowArray(), firstCell.getRowOffset(),
firstCell.getRowLength(), actualStartKey, 0, actualStartKey.length) < 0) {
result.clear();
if (EnvironmentEdgeManager.currentTimeMillis() - startTime >= pageSizeMs) {
byte[] rowKey = CellUtil.cloneRow(firstCell);
ScanUtil.getDummyResult(rowKey, result);
return true;
}
} else {
break;
}
} while (hasMore);
}
do {
List row = new ArrayList();
if (result.isEmpty()) {
hasMore = innerScanner.nextRaw(row);
} else {
row.addAll(result);
result.clear();
}
if (!row.isEmpty()) {
if (isDummy(row)) {
result.addAll(row);
// We got a dummy request from lower layers. This means that
// the scan took more than pageSizeMs. Just return true here.
// The client will drop this dummy request and continue to scan.
// Then the lower layer scanner will continue
// wherever it stopped due to this dummy request
return true;
}
Cell firstCell = row.get(0);
byte[] indexRowKey = firstCell.getRowArray();
ptr.set(indexRowKey, firstCell.getRowOffset() + offset,
firstCell.getRowLength() - offset);
lastIndexRowKey = ptr.copyBytes();
indexToDataRowKeyMap.put(offset == 0 ? lastIndexRowKey :
CellUtil.cloneRow(firstCell), indexMaintainer.buildDataRowKey(
new ImmutableBytesWritable(lastIndexRowKey),
viewConstants));
indexRows.add(row);
indexRowCount++;
if (hasMore && (EnvironmentEdgeManager.currentTimeMillis() - startTime)
>= pageSizeMs) {
getDummyResult(lastIndexRowKey, result);
// We do not need to change the state, State.SCANNING_INDEX
// since we will continue scanning the index table after
// the client drops the dummy request and then calls the next
// method on its ResultScanner within ScanningResultIterator
return true;
}
}
} while (hasMore && indexRowCount < pageSizeInRows);
return hasMore;
}
protected boolean scanIndexTableRows(List result,
final long startTime) throws IOException {
return scanIndexTableRows(result, startTime, null, 0);
}
private boolean verifyIndexRowAndRepairIfNecessary(Result dataRow, byte[] indexRowKey,
long indexTimestamp)
throws IOException {
Put put = new Put(dataRow.getRow());
for (Cell cell : dataRow.rawCells()) {
put.add(cell);
}
if (indexMaintainer.checkIndexRow(indexRowKey, put)) {
if (IndexUtil.getMaxTimestamp(put) != indexTimestamp) {
Mutation[] mutations;
Put indexPut = new Put(indexRowKey);
indexPut.addColumn(emptyCF, emptyCQ, indexTimestamp, QueryConstants.VERIFIED_BYTES);
if ((EnvironmentEdgeManager.currentTimeMillis() - indexTimestamp) > ageThreshold) {
Delete indexDelete = indexMaintainer.buildRowDeleteMutation(indexRowKey,
IndexMaintainer.DeleteType.SINGLE_VERSION, indexTimestamp);
mutations = new Mutation[]{indexPut, indexDelete};
} else {
mutations = new Mutation[]{indexPut};
}
region.batchMutate(mutations);
}
return true;
}
if (indexMaintainer.isAgedEnough(IndexUtil.getMaxTimestamp(put), ageThreshold)) {
region.delete(indexMaintainer.createDelete(indexRowKey, IndexUtil.getMaxTimestamp(put), false));
}
return false;
}
private boolean getNextCoveredIndexRow(List result) throws IOException {
if (indexRowIterator.hasNext()) {
List indexRow = indexRowIterator.next();
result.addAll(indexRow);
try {
byte[] indexRowKey = CellUtil.cloneRow(indexRow.get(0));
Result dataRow = dataRows.get(new ImmutableBytesPtr(
indexToDataRowKeyMap.get(indexRowKey)));
if (dataRow != null) {
long ts = indexRow.get(0).getTimestamp();
if (!indexMaintainer.isUncovered()
|| verifyIndexRowAndRepairIfNecessary(dataRow, indexRowKey, ts)) {
if (tupleProjector != null) {
IndexUtil.addTupleAsOneCell(result, new ResultTuple(dataRow),
tupleProjector, ptr);
}
} else {
result.clear();
}
} else {
if (indexMaintainer.isUncovered()) {
long ts = indexRow.get(0).getTimestamp();
// Since we also scan the empty column for uncovered global indexes, this mean the data row
// does not exist. Delete the index row if the index is an uncovered global index
if (indexMaintainer.isAgedEnough(ts, ageThreshold)) {
region.delete(indexMaintainer.createDelete(indexRowKey, ts, false));
}
result.clear();
} else {
// The data row satisfying the scan does not exist. This could be because
// the data row may not include the columns corresponding to the uncovered
// index columns either. Just return the index row. Nothing to do here
}
}
} catch (Throwable e) {
LOGGER.error("Exception in UncoveredIndexRegionScanner for region "
+ region.getRegionInfo().getRegionNameAsString(), e);
throw e;
}
return true;
} else {
return false;
}
}
/**
* A page of index rows are scanned and then their corresponding data table rows are retrieved
* from the data table regions in parallel. These data rows are then joined with index rows.
* The join is for adding uncovered columns to index rows.
*
* This implementation conforms to server paging such that if the server side operation takes
* more than pageSizeInMs, a dummy result is returned to signal the client that more work
* to do on the server side. This is done to prevent RPC timeouts.
*
* @param result
* @return boolean to indicate if there are more rows to scan
* @throws IOException
*/
@Override
public boolean next(List result) throws IOException {
long startTime = EnvironmentEdgeManager.currentTimeMillis();
boolean hasMore;
region.startRegionOperation();
try {
synchronized (innerScanner) {
if (state == State.READY && !indexRowIterator.hasNext()) {
state = State.INITIAL;
}
if (state == State.INITIAL) {
indexRowCount = 0;
indexRows = new ArrayList<>();
dataRows = Maps.newConcurrentMap();
indexToDataRowKeyMap = Maps.newTreeMap(Bytes.BYTES_COMPARATOR);
state = State.SCANNING_INDEX;
}
if (state == State.SCANNING_INDEX) {
hasMore = scanIndexTableRows(result, startTime);
if (isDummy(result)) {
updateDummyWithPrevRowKey(result, initStartRowKey, includeInitStartRowKey,
scan);
return hasMore;
}
state = State.SCANNING_DATA;
}
if (state == State.SCANNING_DATA) {
scanDataTableRows(startTime);
indexRowIterator = indexRows.iterator();
}
if (state == State.READY) {
boolean moreRows = getNextCoveredIndexRow(result);
if (!result.isEmpty()) {
previousResultRowKey = CellUtil.cloneRow(result.get(0));
}
return moreRows;
} else {
updateDummyWithPrevRowKey(result, initStartRowKey, includeInitStartRowKey,
scan);
return true;
}
}
} catch (Throwable e) {
LOGGER.error("Exception in UncoveredIndexRegionScanner for region "
+ region.getRegionInfo().getRegionNameAsString(), e);
throw e;
} finally {
region.closeRegionOperation();
}
}
/**
* Add dummy cell to the result list based on either the previous rowkey returned to the
* client or the start rowkey and start rowkey include params.
*
* @param result result to add the dummy cell to.
* @param initStartRowKey scan start rowkey.
* @param includeInitStartRowKey scan start rowkey included.
* @param scan scan object.
*/
private void updateDummyWithPrevRowKey(List result, byte[] initStartRowKey,
boolean includeInitStartRowKey, Scan scan) {
result.clear();
if (previousResultRowKey != null) {
getDummyResult(previousResultRowKey, result);
} else {
if (includeInitStartRowKey && initStartRowKey.length > 0) {
byte[] prevKey;
// In order to generate largest possible rowkey that is less than
// initStartRowKey, we need to check size of the region name that can be
// used by hbase client for meta lookup, in case meta cache is expired at client.
// Once we know regionLookupInMetaLen, use it to generate largest possible
// rowkey that is lower than initStartRowKey by using
// ByteUtil#previousKeyWithLength function, which appends "\\xFF" bytes to
// prev rowkey upto the length provided. e.g. for the given key
// "\\x01\\xC1\\x06", the previous key with length 5 would be
// "\\x01\\xC1\\x05\\xFF\\xFF" by padding 2 bytes "\\xFF".
// The length of the largest scan start rowkey should not exceed
// HConstants#MAX_ROW_LENGTH.
int regionLookupInMetaLen =
RegionInfo.createRegionName(region.getTableDescriptor().getTableName(),
new byte[1], HConstants.NINES, false).length;
if (Bytes.compareTo(initStartRowKey, initStartRowKey.length - 1,
1, ByteUtil.ZERO_BYTE, 0, 1) == 0) {
// If initStartRowKey has last byte as "\\x00", we can discard the last
// byte and send the key as dummy rowkey.
prevKey = new byte[initStartRowKey.length - 1];
System.arraycopy(initStartRowKey, 0, prevKey, 0, prevKey.length);
} else if (initStartRowKey.length <
(HConstants.MAX_ROW_LENGTH - 1 - regionLookupInMetaLen)) {
prevKey = ByteUtil.previousKeyWithLength(ByteUtil.concat(initStartRowKey,
new byte[HConstants.MAX_ROW_LENGTH
- initStartRowKey.length - 1 - regionLookupInMetaLen]),
HConstants.MAX_ROW_LENGTH - 1 - regionLookupInMetaLen);
} else {
prevKey = initStartRowKey;
}
getDummyResult(prevKey, result);
} else {
getDummyResult(initStartRowKey, result);
}
}
}
}
| | | | | | | |
© 2015 - 2025 Weber Informatics LLC | Privacy Policy