All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.org.apache.hadoop.hbase.regionserver.querymatcher.ExplicitColumnTracker Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver.querymatcher;

import java.io.IOException;
import java.util.NavigableSet;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode;

/**
 * This class is used for the tracking and enforcement of columns and numbers of versions during the
 * course of a Get or Scan operation, when explicit column qualifiers have been asked for in the
 * query. With a little magic (see {@link ScanQueryMatcher}), we can use this matcher for both scans
 * and gets. The main difference is 'next' and 'done' collapse for the scan case (since we see all
 * columns in order), and we only reset between rows.
 * 

* This class is utilized by {@link ScanQueryMatcher} mainly through two methods: *

    *
  • {@link #checkColumn} is called when a Put satisfies all other conditions of the query.
  • *
  • {@link #getNextRowOrNextColumn} is called whenever ScanQueryMatcher believes that the current * column should be skipped (by timestamp, filter etc.)
  • *
*

* These two methods returns a * {@link org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher.MatchCode} to define * what action should be taken. *

* This class is NOT thread-safe as queries are never multi-threaded */ @InterfaceAudience.Private public class ExplicitColumnTracker implements ColumnTracker { private final int maxVersions; private final int minVersions; /** * Contains the list of columns that the ExplicitColumnTracker is tracking. Each ColumnCount * instance also tracks how many versions of the requested column have been returned. */ private final ColumnCount[] columns; private int index; private ColumnCount column; /** * Keeps track of the latest timestamp included for current column. Used to eliminate duplicates. */ private long latestTSOfCurrentColumn; private long oldestStamp; /** * Default constructor. * @param columns columns specified user in query * @param minVersions minimum number of versions to keep * @param maxVersions maximum versions to return per column * @param oldestUnexpiredTS the oldest timestamp we are interested in, based on TTL */ public ExplicitColumnTracker(NavigableSet columns, int minVersions, int maxVersions, long oldestUnexpiredTS) { this.maxVersions = maxVersions; this.minVersions = minVersions; this.oldestStamp = oldestUnexpiredTS; this.columns = new ColumnCount[columns.size()]; int i = 0; for (byte[] column : columns) { this.columns[i++] = new ColumnCount(column); } reset(); } /** * Done when there are no more columns to match against. */ @Override public boolean done() { return this.index >= columns.length; } @Override public ColumnCount getColumnHint() { return this.column; } /** * {@inheritDoc} */ @Override public ScanQueryMatcher.MatchCode checkColumn(Cell cell, byte type) { // delete markers should never be passed to an // *Explicit*ColumnTracker assert !PrivateCellUtil.isDelete(type); do { // No more columns left, we are done with this query if (done()) { return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row } // No more columns to match against, done with storefile if (this.column == null) { return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row } // Compare specific column to current column int ret = CellUtil.compareQualifiers(cell, column.getBuffer(), column.getOffset(), column.getLength()); // Column Matches. Return include code. The caller would call checkVersions // to limit the number of versions. if (ret == 0) { return ScanQueryMatcher.MatchCode.INCLUDE; } resetTS(); if (ret < 0) { // The current KV is smaller than the column the ExplicitColumnTracker // is interested in, so seek to that column of interest. return ScanQueryMatcher.MatchCode.SEEK_NEXT_COL; } // The current KV is bigger than the column the ExplicitColumnTracker // is interested in. That means there is no more data for the column // of interest. Advance the ExplicitColumnTracker state to next // column of interest, and check again. if (ret > 0) { ++this.index; if (done()) { // No more to match, do not include, done with this row. return ScanQueryMatcher.MatchCode.SEEK_NEXT_ROW; // done_row } // This is the recursive case. this.column = this.columns[this.index]; } } while (true); } @Override public ScanQueryMatcher.MatchCode checkVersions(Cell cell, long timestamp, byte type, boolean ignoreCount) throws IOException { assert !PrivateCellUtil.isDelete(type); if (ignoreCount) { return ScanQueryMatcher.MatchCode.INCLUDE; } // Check if it is a duplicate timestamp if (sameAsPreviousTS(timestamp)) { // If duplicate, skip this Key return ScanQueryMatcher.MatchCode.SKIP; } int count = this.column.increment(); if (count >= maxVersions || (count >= minVersions && isExpired(timestamp))) { // Done with versions for this column ++this.index; resetTS(); if (done()) { // We have served all the requested columns. this.column = null; return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW; } // We are done with current column; advance to next column // of interest. this.column = this.columns[this.index]; return ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_COL; } setTS(timestamp); return ScanQueryMatcher.MatchCode.INCLUDE; } // Called between every row. @Override public void reset() { this.index = 0; this.column = this.columns[this.index]; for (ColumnCount col : this.columns) { col.setCount(0); } resetTS(); } private void resetTS() { latestTSOfCurrentColumn = HConstants.LATEST_TIMESTAMP; } private void setTS(long timestamp) { latestTSOfCurrentColumn = timestamp; } private boolean sameAsPreviousTS(long timestamp) { return timestamp == latestTSOfCurrentColumn; } private boolean isExpired(long timestamp) { return timestamp < oldestStamp; } @Override public void doneWithColumn(Cell cell) { while (this.column != null) { int compare = CellUtil.compareQualifiers(cell, column.getBuffer(), column.getOffset(), column.getLength()); resetTS(); if (compare >= 0) { ++this.index; if (done()) { // Will not hit any more columns in this storefile this.column = null; } else { this.column = this.columns[this.index]; } if (compare > 0) { continue; } } return; } } @Override public MatchCode getNextRowOrNextColumn(Cell cell) { doneWithColumn(cell); if (getColumnHint() == null) { return MatchCode.SEEK_NEXT_ROW; } else { return MatchCode.SEEK_NEXT_COL; } } @Override public boolean isDone(long timestamp) { return minVersions <= 0 && isExpired(timestamp); } @Override public void beforeShipped() throws IOException { // do nothing } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy