All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.org.apache.hadoop.hbase.regionserver.querymatcher.NewVersionBehaviorTracker Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.regionserver.querymatcher; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.NavigableMap; import java.util.NavigableSet; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellComparator; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.PrivateCellUtil; import org.apache.hadoop.hbase.KeyValue.Type; import org.apache.yetus.audience.InterfaceAudience; import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode; /** * A tracker both implementing ColumnTracker and DeleteTracker, used for mvcc-sensitive scanning. * We should make sure in one QueryMatcher the ColumnTracker and DeleteTracker is the same instance. */ @InterfaceAudience.Private public class NewVersionBehaviorTracker implements ColumnTracker, DeleteTracker { private byte[] lastCqArray; private int lastCqLength; private int lastCqOffset; private long lastCqTs; private long lastCqMvcc; private byte lastCqType; private int columnIndex; private int countCurrentCol; protected int maxVersions; private int resultMaxVersions; private byte[][] columns; private int minVersions; private long oldestStamp; private CellComparator comparator; // These two maps have same structure. // Each node is a versions deletion (DeleteFamily or DeleteColumn). Key is the mvcc of the marker, // value is a data structure which contains infos we need that happens before this node's mvcc and // after the previous node's mvcc. The last node is a special node whose key is max_long that // saves infos after last deletion. See DeleteVersionsNode's comments for details. // The delColMap is constructed and used for each cq, and thedelFamMap is constructed when cq is // null and saving family-level delete markers. Each time the cq is changed, we should // reconstruct delColMap as a deep copy of delFamMap. protected NavigableMap delColMap = new TreeMap<>(); protected NavigableMap delFamMap = new TreeMap<>(); /** * Note maxVersion and minVersion must set according to cf's conf, not user's scan parameter. * * @param columns columns specified user in query * @param comparartor the cell comparator * @param minVersion The minimum number of versions to keep(used when TTL is set). * @param maxVersion The maximum number of versions in CF's conf * @param resultMaxVersions maximum versions to return per column, which may be different from * maxVersion * @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL */ public NewVersionBehaviorTracker(NavigableSet columns, CellComparator comparartor, int minVersion, int maxVersion, int resultMaxVersions, long oldestUnexpiredTS) { this.maxVersions = maxVersion; this.minVersions = minVersion; this.resultMaxVersions = resultMaxVersions; this.oldestStamp = oldestUnexpiredTS; if (columns != null && columns.size() > 0) { this.columns = new byte[columns.size()][]; int i = 0; for (byte[] column : columns) { this.columns[i++] = column; } } this.comparator = comparartor; reset(); } @Override public void beforeShipped() throws IOException { // Do nothing } /** * A data structure which contains infos we need that happens before this node's mvcc and * after the previous node's mvcc. A node means there is a version deletion at the mvcc and ts. */ protected class DeleteVersionsNode { public long ts; public long mvcc; // > // Key is ts of version deletes, value is its mvccs. // We may delete more than one time for a version. private Map> deletesMap = new HashMap<>(); // > // Key is mvcc of version deletes, value is mvcc of visible puts before the delete effect. private NavigableMap> mvccCountingMap = new TreeMap<>(); protected DeleteVersionsNode(long ts, long mvcc) { this.ts = ts; this.mvcc = mvcc; mvccCountingMap.put(Long.MAX_VALUE, new TreeSet()); } protected DeleteVersionsNode() { this(Long.MIN_VALUE, Long.MAX_VALUE); } public void addVersionDelete(Cell cell) { SortedSet set = deletesMap.get(cell.getTimestamp()); if (set == null) { set = new TreeSet<>(); deletesMap.put(cell.getTimestamp(), set); } set.add(cell.getSequenceId()); // The init set should be the puts whose mvcc is smaller than this Delete. Because // there may be some Puts masked by them. The Puts whose mvcc is larger than this Delete can // not be copied to this node because we may delete one version and the oldest put may not be // masked. SortedSet nextValue = mvccCountingMap.ceilingEntry(cell.getSequenceId()).getValue(); SortedSet thisValue = new TreeSet<>(nextValue.headSet(cell.getSequenceId())); mvccCountingMap.put(cell.getSequenceId(), thisValue); } protected DeleteVersionsNode getDeepCopy() { DeleteVersionsNode node = new DeleteVersionsNode(ts, mvcc); for (Map.Entry> e : deletesMap.entrySet()) { node.deletesMap.put(e.getKey(), new TreeSet<>(e.getValue())); } for (Map.Entry> e : mvccCountingMap.entrySet()) { node.mvccCountingMap.put(e.getKey(), new TreeSet<>(e.getValue())); } return node; } } /** * Reset the map if it is different with the last Cell. * Save the cq array/offset/length for next Cell. * * @return If this put has duplicate ts with last cell, return the mvcc of last cell. * Else return MAX_VALUE. */ protected long prepare(Cell cell) { boolean matchCq = PrivateCellUtil.matchingQualifier(cell, lastCqArray, lastCqOffset, lastCqLength); if (!matchCq) { // The last cell is family-level delete and this is not, or the cq is changed, // we should construct delColMap as a deep copy of delFamMap. delColMap.clear(); for (Map.Entry e : delFamMap.entrySet()) { delColMap.put(e.getKey(), e.getValue().getDeepCopy()); } countCurrentCol = 0; } if (matchCq && !PrivateCellUtil.isDelete(lastCqType) && lastCqType == cell.getTypeByte() && lastCqTs == cell.getTimestamp()) { // Put with duplicate timestamp, ignore. return lastCqMvcc; } lastCqArray = cell.getQualifierArray(); lastCqOffset = cell.getQualifierOffset(); lastCqLength = cell.getQualifierLength(); lastCqTs = cell.getTimestamp(); lastCqMvcc = cell.getSequenceId(); lastCqType = cell.getTypeByte(); return Long.MAX_VALUE; } // DeleteTracker @Override public void add(Cell cell) { prepare(cell); byte type = cell.getTypeByte(); switch (Type.codeToType(type)) { // By the order of seen. We put null cq at first. case DeleteFamily: // Delete all versions of all columns of the specified family delFamMap.put(cell.getSequenceId(), new DeleteVersionsNode(cell.getTimestamp(), cell.getSequenceId())); break; case DeleteFamilyVersion: // Delete all columns of the specified family and specified version delFamMap.ceilingEntry(cell.getSequenceId()).getValue().addVersionDelete(cell); break; // These two kinds of markers are mix with Puts. case DeleteColumn: // Delete all versions of the specified column delColMap.put(cell.getSequenceId(), new DeleteVersionsNode(cell.getTimestamp(), cell.getSequenceId())); break; case Delete: // Delete the specified version of the specified column. delColMap.ceilingEntry(cell.getSequenceId()).getValue().addVersionDelete(cell); break; default: throw new AssertionError("Unknown delete marker type for " + cell); } } /** * This method is not idempotent, we will save some info to judge VERSION_MASKED. * @param cell - current cell to check if deleted by a previously seen delete * @return We don't distinguish DeleteColumn and DeleteFamily. We only return code for column. */ @Override public DeleteResult isDeleted(Cell cell) { long duplicateMvcc = prepare(cell); for (Map.Entry e : delColMap.tailMap(cell.getSequenceId()) .entrySet()) { DeleteVersionsNode node = e.getValue(); long deleteMvcc = Long.MAX_VALUE; SortedSet deleteVersionMvccs = node.deletesMap.get(cell.getTimestamp()); if (deleteVersionMvccs != null) { SortedSet tail = deleteVersionMvccs.tailSet(cell.getSequenceId()); if (!tail.isEmpty()) { deleteMvcc = tail.first(); } } SortedMap> subMap = node.mvccCountingMap .subMap(cell.getSequenceId(), true, Math.min(duplicateMvcc, deleteMvcc), true); for (Map.Entry> seg : subMap.entrySet()) { if (seg.getValue().size() >= maxVersions) { return DeleteResult.VERSION_MASKED; } seg.getValue().add(cell.getSequenceId()); } if (deleteMvcc < Long.MAX_VALUE) { return DeleteResult.VERSION_DELETED; } if (cell.getTimestamp() <= node.ts) { return DeleteResult.COLUMN_DELETED; } } if (duplicateMvcc < Long.MAX_VALUE) { return DeleteResult.VERSION_MASKED; } return DeleteResult.NOT_DELETED; } @Override public boolean isEmpty() { return delColMap.size() == 1 && delColMap.get(Long.MAX_VALUE).mvccCountingMap.size() == 1 && delFamMap.size() == 1 && delFamMap.get(Long.MAX_VALUE).mvccCountingMap.size() == 1; } @Override public void update() { // ignore } //ColumnTracker @Override public MatchCode checkColumn(Cell cell, byte type) throws IOException { if (columns == null) { return MatchCode.INCLUDE; } while (!done()) { int c = CellUtil.compareQualifiers(cell, columns[columnIndex], 0, columns[columnIndex].length); if (c < 0) { return MatchCode.SEEK_NEXT_COL; } if (c == 0) { // We drop old version in #isDeleted, so here we must return INCLUDE. return MatchCode.INCLUDE; } columnIndex++; } // No more columns left, we are done with this query return MatchCode.SEEK_NEXT_ROW; } @Override public MatchCode checkVersions(Cell cell, long timestamp, byte type, boolean ignoreCount) throws IOException { assert !PrivateCellUtil.isDelete(type); // We drop old version in #isDeleted, so here we won't SKIP because of versioning. But we should // consider TTL. if (ignoreCount) { return MatchCode.INCLUDE; } countCurrentCol++; if (timestamp < this.oldestStamp) { if (countCurrentCol == minVersions) { return MatchCode.INCLUDE_AND_SEEK_NEXT_COL; } if (countCurrentCol > minVersions) { // This may not be reached, only for safety. return MatchCode.SEEK_NEXT_COL; } } if (countCurrentCol == resultMaxVersions) { // We have enough number of versions for user's requirement. return MatchCode.INCLUDE_AND_SEEK_NEXT_COL; } if (countCurrentCol > resultMaxVersions) { // This may not be reached, only for safety return MatchCode.SEEK_NEXT_COL; } return MatchCode.INCLUDE; } @Override public void reset() { delColMap.clear(); delFamMap.clear(); lastCqArray = null; lastCqLength = 0; lastCqOffset = 0; lastCqTs = Long.MIN_VALUE; lastCqMvcc = 0; lastCqType = 0; columnIndex = 0; countCurrentCol = 0; resetInternal(); } protected void resetInternal(){ delFamMap.put(Long.MAX_VALUE, new DeleteVersionsNode()); } @Override public boolean done() { return columns != null && columnIndex >= columns.length; } @Override public ColumnCount getColumnHint() { if (columns != null) { if (columnIndex < columns.length) { return new ColumnCount(columns[columnIndex]); } } return null; } @Override public MatchCode getNextRowOrNextColumn(Cell cell) { // TODO maybe we can optimize. return MatchCode.SEEK_NEXT_COL; } @Override public boolean isDone(long timestamp) { // We can not skip Cells with small ts. return false; } @Override public CellComparator getCellComparator() { return this.comparator; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy