All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.regionserver.StoreFileScanner Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.PriorityQueue;
import java.util.concurrent.atomic.LongAdder;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.TimeRange;
import org.apache.hadoop.hbase.io.hfile.HFileScanner;
import org.apache.hadoop.hbase.regionserver.querymatcher.ScanQueryMatcher;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;

/**
 * KeyValueScanner adaptor over the Reader. It also provides hooks into bloom filter things.
 */
@InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.PHOENIX)
@InterfaceStability.Evolving
public class StoreFileScanner implements KeyValueScanner {
  // the reader it comes from:
  private final StoreFileReader reader;
  private final HFileScanner hfs;
  private Cell cur = null;
  private boolean closed = false;

  private boolean realSeekDone;
  private boolean delayedReseek;
  private Cell delayedSeekKV;

  private final boolean enforceMVCC;
  private final boolean hasMVCCInfo;
  // A flag represents whether could stop skipping KeyValues for MVCC
  // if have encountered the next row. Only used for reversed scan
  private boolean stopSkippingKVsIfNextRow = false;

  private static LongAdder seekCount;

  private final boolean canOptimizeForNonNullColumn;

  private final long readPt;

  // Order of this scanner relative to other scanners when duplicate key-value is found.
  // Higher values means scanner has newer data.
  private final long scannerOrder;

  /**
   * Implements a {@link KeyValueScanner} on top of the specified {@link HFileScanner}
   * @param useMVCC                     If true, scanner will filter out updates with MVCC larger
   *                                    than {@code readPt}.
   * @param readPt                      MVCC value to use to filter out the updates newer than this
   *                                    scanner.
   * @param hasMVCC                     Set to true if underlying store file reader has MVCC info.
   * @param scannerOrder                Order of the scanner relative to other scanners. See
   *                                    {@link KeyValueScanner#getScannerOrder()}.
   * @param canOptimizeForNonNullColumn {@code true} if we can make sure there is no null column,
   *                                    otherwise {@code false}. This is a hint for optimization.
   */
  public StoreFileScanner(StoreFileReader reader, HFileScanner hfs, boolean useMVCC,
    boolean hasMVCC, long readPt, long scannerOrder, boolean canOptimizeForNonNullColumn) {
    this.readPt = readPt;
    this.reader = reader;
    this.hfs = hfs;
    this.enforceMVCC = useMVCC;
    this.hasMVCCInfo = hasMVCC;
    this.scannerOrder = scannerOrder;
    this.canOptimizeForNonNullColumn = canOptimizeForNonNullColumn;
    this.reader.incrementRefCount();
  }

  /**
   * Return an array of scanners corresponding to the given set of store files.
   */
  public static List getScannersForStoreFiles(Collection files,
    boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean useDropBehind, long readPt)
    throws IOException {
    return getScannersForStoreFiles(files, cacheBlocks, usePread, isCompaction, useDropBehind, null,
      readPt);
  }

  /**
   * Return an array of scanners corresponding to the given set of store files, And set the
   * ScanQueryMatcher for each store file scanner for further optimization
   */
  public static List getScannersForStoreFiles(Collection files,
    boolean cacheBlocks, boolean usePread, boolean isCompaction, boolean canUseDrop,
    ScanQueryMatcher matcher, long readPt) throws IOException {
    if (files.isEmpty()) {
      return Collections.emptyList();
    }
    List scanners = new ArrayList<>(files.size());
    boolean canOptimizeForNonNullColumn = matcher != null ? !matcher.hasNullColumnInQuery() : false;
    PriorityQueue sortedFiles =
      new PriorityQueue<>(files.size(), StoreFileComparators.SEQ_ID);
    for (HStoreFile file : files) {
      // The sort function needs metadata so we need to open reader first before sorting the list.
      file.initReader();
      sortedFiles.add(file);
    }
    boolean succ = false;
    try {
      for (int i = 0, n = files.size(); i < n; i++) {
        HStoreFile sf = sortedFiles.remove();
        StoreFileScanner scanner;
        if (usePread) {
          scanner = sf.getPreadScanner(cacheBlocks, readPt, i, canOptimizeForNonNullColumn);
        } else {
          scanner = sf.getStreamScanner(canUseDrop, cacheBlocks, isCompaction, readPt, i,
            canOptimizeForNonNullColumn);
        }
        scanners.add(scanner);
      }
      succ = true;
    } finally {
      if (!succ) {
        for (StoreFileScanner scanner : scanners) {
          scanner.close();
        }
      }
    }
    return scanners;
  }

  /**
   * Get scanners for compaction. We will create a separated reader for each store file to avoid
   * contention with normal read request.
   */
  public static List getScannersForCompaction(Collection files,
    boolean canUseDropBehind, long readPt) throws IOException {
    List scanners = new ArrayList<>(files.size());
    List sortedFiles = new ArrayList<>(files);
    Collections.sort(sortedFiles, StoreFileComparators.SEQ_ID);
    boolean succ = false;
    try {
      for (int i = 0, n = sortedFiles.size(); i < n; i++) {
        scanners.add(
          sortedFiles.get(i).getStreamScanner(canUseDropBehind, false, true, readPt, i, false));
      }
      succ = true;
    } finally {
      if (!succ) {
        for (StoreFileScanner scanner : scanners) {
          scanner.close();
        }
      }
    }
    return scanners;
  }

  @Override
  public String toString() {
    return "StoreFileScanner[" + hfs.toString() + ", cur=" + cur + "]";
  }

  @Override
  public Cell peek() {
    return cur;
  }

  @Override
  public Cell next() throws IOException {
    Cell retKey = cur;

    try {
      // only seek if we aren't at the end. cur == null implies 'end'.
      if (cur != null) {
        hfs.next();
        setCurrentCell(hfs.getCell());
        if (hasMVCCInfo || this.reader.isBulkLoaded()) {
          skipKVsNewerThanReadpoint();
        }
      }
    } catch (FileNotFoundException e) {
      throw e;
    } catch (IOException e) {
      throw new IOException("Could not iterate " + this, e);
    }
    return retKey;
  }

  @Override
  public boolean seek(Cell key) throws IOException {
    if (seekCount != null) seekCount.increment();

    try {
      try {
        if (!seekAtOrAfter(hfs, key)) {
          this.cur = null;
          return false;
        }

        setCurrentCell(hfs.getCell());

        if (!hasMVCCInfo && this.reader.isBulkLoaded()) {
          return skipKVsNewerThanReadpoint();
        } else {
          return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
        }
      } finally {
        realSeekDone = true;
      }
    } catch (FileNotFoundException e) {
      throw e;
    } catch (IOException ioe) {
      throw new IOException("Could not seek " + this + " to key " + key, ioe);
    }
  }

  @Override
  public boolean reseek(Cell key) throws IOException {
    if (seekCount != null) seekCount.increment();

    try {
      try {
        if (!reseekAtOrAfter(hfs, key)) {
          this.cur = null;
          return false;
        }
        setCurrentCell(hfs.getCell());

        if (!hasMVCCInfo && this.reader.isBulkLoaded()) {
          return skipKVsNewerThanReadpoint();
        } else {
          return !hasMVCCInfo ? true : skipKVsNewerThanReadpoint();
        }
      } finally {
        realSeekDone = true;
      }
    } catch (FileNotFoundException e) {
      throw e;
    } catch (IOException ioe) {
      throw new IOException("Could not reseek " + this + " to key " + key, ioe);
    }
  }

  protected void setCurrentCell(Cell newVal) throws IOException {
    this.cur = newVal;
    if (this.cur != null && this.reader.isBulkLoaded() && !this.reader.isSkipResetSeqId()) {
      PrivateCellUtil.setSequenceId(cur, this.reader.getSequenceID());
    }
  }

  protected boolean skipKVsNewerThanReadpoint() throws IOException {
    // We want to ignore all key-values that are newer than our current
    // readPoint
    Cell startKV = cur;
    while (enforceMVCC && cur != null && (cur.getSequenceId() > readPt)) {
      boolean hasNext = hfs.next();
      setCurrentCell(hfs.getCell());
      if (
        hasNext && this.stopSkippingKVsIfNextRow && getComparator().compareRows(cur, startKV) > 0
      ) {
        return false;
      }
    }

    if (cur == null) {
      return false;
    }

    return true;
  }

  @Override
  public void close() {
    if (closed) return;
    cur = null;
    this.hfs.close();
    if (this.reader != null) {
      this.reader.readCompleted();
    }
    closed = true;
  }

  /** Returns false if not found or if k is after the end. */
  public static boolean seekAtOrAfter(HFileScanner s, Cell k) throws IOException {
    int result = s.seekTo(k);
    if (result < 0) {
      if (result == HConstants.INDEX_KEY_MAGIC) {
        // using faked key
        return true;
      }
      // Passed KV is smaller than first KV in file, work from start of file
      return s.seekTo();
    } else if (result > 0) {
      // Passed KV is larger than current KV in file, if there is a next
      // it is the "after", if not then this scanner is done.
      return s.next();
    }
    // Seeked to the exact key
    return true;
  }

  static boolean reseekAtOrAfter(HFileScanner s, Cell k) throws IOException {
    // This function is similar to seekAtOrAfter function
    int result = s.reseekTo(k);
    if (result <= 0) {
      if (result == HConstants.INDEX_KEY_MAGIC) {
        // using faked key
        return true;
      }
      // If up to now scanner is not seeked yet, this means passed KV is smaller
      // than first KV in file, and it is the first time we seek on this file.
      // So we also need to work from the start of file.
      if (!s.isSeeked()) {
        return s.seekTo();
      }
      return true;
    }
    // passed KV is larger than current KV in file, if there is a next
    // it is after, if not then this scanner is done.
    return s.next();
  }

  /**
   * @see KeyValueScanner#getScannerOrder()
   */
  @Override
  public long getScannerOrder() {
    return scannerOrder;
  }

  /**
   * Pretend we have done a seek but don't do it yet, if possible. The hope is that we find
   * requested columns in more recent files and won't have to seek in older files. Creates a fake
   * key/value with the given row/column and the highest (most recent) possible timestamp we might
   * get from this file. When users of such "lazy scanner" need to know the next KV precisely (e.g.
   * when this scanner is at the top of the heap), they run {@link #enforceSeek()}.
   * 

* Note that this function does guarantee that the current KV of this scanner will be advanced to * at least the given KV. Because of this, it does have to do a real seek in cases when the seek * timestamp is older than the highest timestamp of the file, e.g. when we are trying to seek to * the next row/column and use OLDEST_TIMESTAMP in the seek key. */ @Override public boolean requestSeek(Cell kv, boolean forward, boolean useBloom) throws IOException { if (kv.getFamilyLength() == 0) { useBloom = false; } boolean haveToSeek = true; if (useBloom) { // check ROWCOL Bloom filter first. if (reader.getBloomFilterType() == BloomType.ROWCOL) { haveToSeek = reader.passesGeneralRowColBloomFilter(kv); } else if ( canOptimizeForNonNullColumn && ((PrivateCellUtil.isDeleteFamily(kv) || PrivateCellUtil.isDeleteFamilyVersion(kv))) ) { // if there is no such delete family kv in the store file, // then no need to seek. haveToSeek = reader.passesDeleteFamilyBloomFilter(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength()); } } delayedReseek = forward; delayedSeekKV = kv; if (haveToSeek) { // This row/column might be in this store file (or we did not use the // Bloom filter), so we still need to seek. realSeekDone = false; long maxTimestampInFile = reader.getMaxTimestamp(); long seekTimestamp = kv.getTimestamp(); if (seekTimestamp > maxTimestampInFile) { // Create a fake key that is not greater than the real next key. // (Lower timestamps correspond to higher KVs.) // To understand this better, consider that we are asked to seek to // a higher timestamp than the max timestamp in this file. We know that // the next point when we have to consider this file again is when we // pass the max timestamp of this file (with the same row/column). setCurrentCell(PrivateCellUtil.createFirstOnRowColTS(kv, maxTimestampInFile)); } else { // This will be the case e.g. when we need to seek to the next // row/column, and we don't know exactly what they are, so we set the // seek key's timestamp to OLDEST_TIMESTAMP to skip the rest of this // row/column. enforceSeek(); } return cur != null; } // Multi-column Bloom filter optimization. // Create a fake key/value, so that this scanner only bubbles up to the top // of the KeyValueHeap in StoreScanner after we scanned this row/column in // all other store files. The query matcher will then just skip this fake // key/value and the store scanner will progress to the next column. This // is obviously not a "real real" seek, but unlike the fake KV earlier in // this method, we want this to be propagated to ScanQueryMatcher. setCurrentCell(PrivateCellUtil.createLastOnRowCol(kv)); realSeekDone = true; return true; } StoreFileReader getReader() { return reader; } CellComparator getComparator() { return reader.getComparator(); } @Override public boolean realSeekDone() { return realSeekDone; } @Override public void enforceSeek() throws IOException { if (realSeekDone) return; if (delayedReseek) { reseek(delayedSeekKV); } else { seek(delayedSeekKV); } } @Override public boolean isFileScanner() { return true; } @Override public Path getFilePath() { return reader.getHFileReader().getPath(); } // Test methods static final long getSeekCount() { return seekCount.sum(); } static final void instrument() { seekCount = new LongAdder(); } @Override public boolean shouldUseScanner(Scan scan, HStore store, long oldestUnexpiredTS) { // if the file has no entries, no need to validate or create a scanner. byte[] cf = store.getColumnFamilyDescriptor().getName(); TimeRange timeRange = scan.getColumnFamilyTimeRange().get(cf); if (timeRange == null) { timeRange = scan.getTimeRange(); } return reader.passesTimerangeFilter(timeRange, oldestUnexpiredTS) && reader.passesKeyRangeFilter(scan) && reader.passesBloomFilter(scan, scan.getFamilyMap().get(cf)); } @Override public boolean seekToPreviousRow(Cell originalKey) throws IOException { try { try { boolean keepSeeking = false; Cell key = originalKey; do { Cell seekKey = PrivateCellUtil.createFirstOnRow(key); if (seekCount != null) seekCount.increment(); if (!hfs.seekBefore(seekKey)) { this.cur = null; return false; } Cell curCell = hfs.getCell(); Cell firstKeyOfPreviousRow = PrivateCellUtil.createFirstOnRow(curCell); if (seekCount != null) seekCount.increment(); if (!seekAtOrAfter(hfs, firstKeyOfPreviousRow)) { this.cur = null; return false; } setCurrentCell(hfs.getCell()); this.stopSkippingKVsIfNextRow = true; boolean resultOfSkipKVs; try { resultOfSkipKVs = skipKVsNewerThanReadpoint(); } finally { this.stopSkippingKVsIfNextRow = false; } if (!resultOfSkipKVs || getComparator().compareRows(cur, firstKeyOfPreviousRow) > 0) { keepSeeking = true; key = firstKeyOfPreviousRow; continue; } else { keepSeeking = false; } } while (keepSeeking); return true; } finally { realSeekDone = true; } } catch (FileNotFoundException e) { throw e; } catch (IOException ioe) { throw new IOException("Could not seekToPreviousRow " + this + " to key " + originalKey, ioe); } } @Override public boolean seekToLastRow() throws IOException { Optional lastRow = reader.getLastRowKey(); if (!lastRow.isPresent()) { return false; } Cell seekKey = PrivateCellUtil.createFirstOnRow(lastRow.get()); if (seek(seekKey)) { return true; } else { return seekToPreviousRow(seekKey); } } @Override public boolean backwardSeek(Cell key) throws IOException { seek(key); if (cur == null || getComparator().compareRows(cur, key) > 0) { return seekToPreviousRow(key); } return true; } @Override public Cell getNextIndexedKey() { return hfs.getNextIndexedKey(); } @Override public void shipped() throws IOException { this.hfs.shipped(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy