All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.org.apache.hadoop.hbase.filter.TimestampsFilter Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.filter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.TreeSet;

import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.PrivateCellUtil;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.shaded.protobuf.generated.FilterProtos;

import org.apache.hbase.thirdparty.com.google.common.base.Preconditions;
import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;

/**
 * Filter that returns only cells whose timestamp (version) is
 * in the specified list of timestamps (versions).
 * 

* Note: Use of this filter overrides any time range/time stamp * options specified using {@link org.apache.hadoop.hbase.client.Get#setTimeRange(long, long)}, * {@link org.apache.hadoop.hbase.client.Scan#setTimeRange(long, long)}, * {@link org.apache.hadoop.hbase.client.Get#setTimestamp(long)}, * or {@link org.apache.hadoop.hbase.client.Scan#setTimestamp(long)}. */ @InterfaceAudience.Public public class TimestampsFilter extends FilterBase { private final boolean canHint; TreeSet timestamps; private static final int MAX_LOG_TIMESTAMPS = 5; // Used during scans to hint the scan to stop early // once the timestamps fall below the minTimestamp. long minTimestamp = Long.MAX_VALUE; /** * Constructor for filter that retains only the specified timestamps in the list. * @param timestamps */ public TimestampsFilter(List timestamps) { this(timestamps, false); } /** * Constructor for filter that retains only those * cells whose timestamp (version) is in the specified * list of timestamps. * * @param timestamps list of timestamps that are wanted. * @param canHint should the filter provide a seek hint? This can skip * past delete tombstones, so it should only be used when that * is not an issue ( no deletes, or don't care if data * becomes visible) */ public TimestampsFilter(List timestamps, boolean canHint) { for (Long timestamp : timestamps) { Preconditions.checkArgument(timestamp >= 0, "must be positive %s", timestamp); } this.canHint = canHint; this.timestamps = new TreeSet<>(timestamps); init(); } /** * @return the list of timestamps */ public List getTimestamps() { List list = new ArrayList<>(timestamps.size()); list.addAll(timestamps); return list; } private void init() { if (this.timestamps.size() > 0) { minTimestamp = this.timestamps.first(); } } /** * Gets the minimum timestamp requested by filter. * @return minimum timestamp requested by filter. */ public long getMin() { return minTimestamp; } @Override public boolean filterRowKey(Cell cell) throws IOException { // Impl in FilterBase might do unnecessary copy for Off heap backed Cells. return false; } @Deprecated @Override public ReturnCode filterKeyValue(final Cell c) { return filterCell(c); } @Override public ReturnCode filterCell(final Cell c) { if (this.timestamps.contains(c.getTimestamp())) { return ReturnCode.INCLUDE; } else if (c.getTimestamp() < minTimestamp) { // The remaining versions of this column are guaranteed // to be lesser than all of the other values. return ReturnCode.NEXT_COL; } return canHint ? ReturnCode.SEEK_NEXT_USING_HINT : ReturnCode.SKIP; } /** * Pick the next cell that the scanner should seek to. Since this can skip any number of cells * any of which can be a delete this can resurect old data. * * The method will only be used if canHint was set to true while creating the filter. * * @throws IOException This will never happen. */ @Override public Cell getNextCellHint(Cell currentCell) throws IOException { if (!canHint) { return null; } Long nextTimestampObject = timestamps.lower(currentCell.getTimestamp()); if (nextTimestampObject == null) { // This should only happen if the current column's // timestamp is below the last one in the list. // // It should never happen as the filterCell should return NEXT_COL // but it's always better to be extra safe and protect against future // behavioral changes. return PrivateCellUtil.createLastOnRowCol(currentCell); } // Since we know the nextTimestampObject isn't null here there must still be // timestamps that can be included. Cast the Long to a long and return the // a cell with the current row/cf/col and the next found timestamp. long nextTimestamp = nextTimestampObject; return PrivateCellUtil.createFirstOnRowColTS(currentCell, nextTimestamp); } public static Filter createFilterFromArguments(ArrayList filterArguments) { ArrayList timestamps = new ArrayList<>(filterArguments.size()); for (int i = 0; ibytes * @see #toByteArray */ public static TimestampsFilter parseFrom(final byte[] pbBytes) throws DeserializationException { FilterProtos.TimestampsFilter proto; try { proto = FilterProtos.TimestampsFilter.parseFrom(pbBytes); } catch (InvalidProtocolBufferException e) { throw new DeserializationException(e); } return new TimestampsFilter(proto.getTimestampsList(), proto.hasCanHint() && proto.getCanHint()); } /** * @param o the other filter to compare with * @return true if and only if the fields of the filter that are serialized * are equal to the corresponding fields in other. Used for testing. */ @Override boolean areSerializedFieldsEqual(Filter o) { if (o == this) return true; if (!(o instanceof TimestampsFilter)) return false; TimestampsFilter other = (TimestampsFilter)o; return this.getTimestamps().equals(other.getTimestamps()); } @Override public String toString() { return toString(MAX_LOG_TIMESTAMPS); } protected String toString(int maxTimestamps) { StringBuilder tsList = new StringBuilder(); int count = 0; for (Long ts : this.timestamps) { if (count >= maxTimestamps) { break; } ++count; tsList.append(ts.toString()); if (count < this.timestamps.size() && count < maxTimestamps) { tsList.append(", "); } } return String.format("%s (%d/%d): [%s] canHint: [%b]", this.getClass().getSimpleName(), count, this.timestamps.size(), tsList.toString(), canHint); } @Override public boolean equals(Object obj) { return obj instanceof Filter && areSerializedFieldsEqual((Filter) obj); } @Override public int hashCode() { return Objects.hash(getTimestamps()); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy