All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.bigtable.hbase.adapters.read.ScanAdapter Maven / Gradle / Ivy

Go to download

This project contains artifacts that adapt bigtable client to work with hbase.

There is a newer version: 2.14.8
Show newest version
/*
 * Copyright 2015 Google LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.cloud.bigtable.hbase.adapters.read;

import static com.google.cloud.bigtable.data.v2.models.Filters.FILTERS;

import com.google.api.core.InternalApi;
import com.google.bigtable.v2.RowSet;
import com.google.cloud.bigtable.data.v2.models.Filters;
import com.google.cloud.bigtable.data.v2.models.Filters.ChainFilter;
import com.google.cloud.bigtable.data.v2.models.Filters.InterleaveFilter;
import com.google.cloud.bigtable.data.v2.models.Filters.TimestampRangeFilter;
import com.google.cloud.bigtable.data.v2.models.Query;
import com.google.cloud.bigtable.hbase.BigtableExtendedScan;
import com.google.cloud.bigtable.hbase.BigtableFixedProtoScan;
import com.google.cloud.bigtable.hbase.adapters.filters.FilterAdapter;
import com.google.cloud.bigtable.hbase.adapters.filters.FilterAdapterContext;
import com.google.cloud.bigtable.hbase.util.RowKeyWrapper;
import com.google.cloud.bigtable.hbase.util.TimestampConverter;
import com.google.common.base.Optional;
import com.google.common.collect.BoundType;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import com.google.common.collect.TreeRangeSet;
import com.google.protobuf.ByteString;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.io.TimeRange;

/**
 * An adapter for {@link Scan} operation that makes use of the proto filter language.
 *
 * 

For internal use only - public for technical reasons. */ @InternalApi("For internal usage only") public class ScanAdapter implements ReadOperationAdapter { private static final int UNSET_MAX_RESULTS_PER_COLUMN_FAMILY = -1; private static final boolean OPEN_CLOSED_AVAILABLE = isOpenClosedAvailable(); private static final boolean LIMIT_AVAILABLE = isLimitAvailable(); private static final boolean REVERSED_AVAILABLE = isReversedAvailable(); /** * HBase supports include(Stop|Start)Row only at 1.4.0+, so check to make sure that the HBase * runtime dependency supports this feature. Specifically, Beam uses HBase 1.2.0. */ private static boolean isOpenClosedAvailable() { try { new Scan().includeStopRow(); return true; } catch (NoSuchMethodError e) { return false; } } private static boolean isLimitAvailable() { try { new Scan().setLimit(1); return true; } catch (NoSuchMethodError e) { return false; } } private static boolean isReversedAvailable() { try { new Scan().setReversed(true); return true; } catch (NoSuchMethodError e) { return false; } } private final FilterAdapter filterAdapter; private final RowRangeAdapter rowRangeAdapter; /** * Constructor for ScanAdapter. * * @param filterAdapter a {@link FilterAdapter} object. * @param rowRangeAdapter a {@link RowRangeAdapter} object. */ public ScanAdapter(FilterAdapter filterAdapter, RowRangeAdapter rowRangeAdapter) { this.filterAdapter = filterAdapter; this.rowRangeAdapter = rowRangeAdapter; } /** * throwIfUnsupportedScan. * * @param scan a {@link Scan} object. */ public void throwIfUnsupportedScan(Scan scan) { if (scan.getFilter() != null) { filterAdapter.throwIfUnsupportedFilter(scan, scan.getFilter()); } if (scan.getMaxResultsPerColumnFamily() != UNSET_MAX_RESULTS_PER_COLUMN_FAMILY) { throw new UnsupportedOperationException( "Limiting of max results per column family is not supported."); } } /** * Given a {@link Scan}, build a {@link Filters.Filter} that include matching columns * * @param scan a {@link Scan} object. * @param hooks a {@link ReadHooks} object. * @return a {@link Filters.Filter} object. */ public Filters.Filter buildFilter(Scan scan, ReadHooks hooks) { ChainFilter chain = FILTERS.chain(); buildStartFilter(scan).forEach(chain::filter); Optional userFilter = createUserFilter(scan, hooks); if (userFilter.isPresent()) { chain.filter(userFilter.get()); } Optional colFamilyTimeFilter = createColFamilyTimeRange(scan); if (colFamilyTimeFilter.isPresent()) { chain.filter(colFamilyTimeFilter.get()); } return chain; } private List buildStartFilter(Scan scan) { List filterList = new ArrayList<>(); Optional familyFilter = createColumnFamilyFilter(scan); if (familyFilter.isPresent()) { filterList.add(familyFilter.get()); } boolean hasTimeRange = false; if (scan.getTimeRange() != null && !scan.getTimeRange().isAllTime()) { filterList.add(createTimeRangeFilter(scan.getTimeRange())); hasTimeRange = true; } // maxVersions should appear as early as possible, but it must appear after timeRange if (scan.getMaxVersions() != Integer.MAX_VALUE) { int i = hasTimeRange ? filterList.size() : 0; filterList.add(i, createColumnLimitFilter(scan.getMaxVersions())); } return filterList; } /** {@inheritDoc} */ @Override public Query adapt(Scan scan, ReadHooks readHooks, Query query) { if (scan instanceof BigtableFixedProtoScan) { return Query.fromProto(((BigtableFixedProtoScan) scan).getRequest()); } else { throwIfUnsupportedScan(scan); query.reversed(scan.isReversed()); toByteStringRange(scan, query); query.filter(buildFilter(scan, readHooks)); if (LIMIT_AVAILABLE && scan.getLimit() > 0) { query.limit(scan.getLimit()); } return query; } } private void toByteStringRange(Scan scan, Query query) { RangeSet rangeSet = narrowRange(getRangeSet(scan), scan.getFilter()); rowRangeAdapter.rangeSetToByteStringRange(rangeSet, query); } private RangeSet getRangeSet(Scan scan) { if (scan instanceof BigtableExtendedScan) { RowSet rowSet = ((BigtableExtendedScan) scan).getRowSet(); return rowRangeAdapter.rowSetToRangeSet(rowSet); } else { RangeSet rangeSet = TreeRangeSet.create(); if (scan.isGetScan()) { rangeSet.add(Range.singleton(new RowKeyWrapper(ByteString.copyFrom(scan.getStartRow())))); return rangeSet; } ByteString startRow; BoundType startBound; ByteString stopRow; BoundType stopBound; // For reverse scans, HBase wants the lexicographically greater key to be the start. But // java-bigtable keeps the bounds the same as forward scans. So this will flip the ranges for // reverse scans. Please note that prior to hbase 1.4 the only range bound that was available // was [start, stop). if (REVERSED_AVAILABLE && scan.isReversed()) { startRow = ByteString.copyFrom(scan.getStopRow()); startBound = (!OPEN_CLOSED_AVAILABLE || !scan.includeStopRow()) ? BoundType.OPEN : BoundType.CLOSED; stopRow = ByteString.copyFrom(scan.getStartRow()); stopBound = (!OPEN_CLOSED_AVAILABLE || scan.includeStartRow()) ? BoundType.CLOSED : BoundType.OPEN; } else { startRow = ByteString.copyFrom(scan.getStartRow()); startBound = (!OPEN_CLOSED_AVAILABLE || scan.includeStartRow()) ? BoundType.CLOSED : BoundType.OPEN; stopRow = ByteString.copyFrom(scan.getStopRow()); stopBound = (!OPEN_CLOSED_AVAILABLE || !scan.includeStopRow()) ? BoundType.OPEN : BoundType.CLOSED; } rangeSet.add(rowRangeAdapter.boundedRange(startBound, startRow, stopBound, stopRow)); return rangeSet; } } private static ByteString quoteRegex(byte[] unquoted) { try { return ReaderExpressionHelper.quoteRegularExpression(unquoted); } catch (IOException e) { throw new IllegalStateException("IOException when writing to ByteArrayOutputStream", e); } } private Optional createUserFilter(Scan scan, ReadHooks hooks) { if (scan.getFilter() == null) { return Optional.absent(); } try { return filterAdapter.adaptFilter(new FilterAdapterContext(scan, hooks), scan.getFilter()); } catch (IOException ioe) { throw new RuntimeException("Failed to adapt filter", ioe); } } private RangeSet narrowRange(RangeSet rangeSet, Filter filter) { if (filter == null) { return rangeSet; } RangeSet filterRangeSet = filterAdapter.getIndexScanHint(filter); if (filterRangeSet.encloses(Range.all())) { return rangeSet; } rangeSet.removeAll(filterRangeSet.complement()); return rangeSet; } private Filters.Filter createColumnQualifierFilter(byte[] unquotedQualifier) { return FILTERS.qualifier().regex(quoteRegex(unquotedQualifier)); } private Filters.Filter createFamilyFilter(byte[] familyName) { return FILTERS.family().exactMatch(new String(familyName)); } private Filters.Filter createColumnLimitFilter(int maxVersionsPerColumn) { return FILTERS.limit().cellsPerColumn(maxVersionsPerColumn); } private Filters.Filter createTimeRangeFilter(TimeRange timeRange) { TimestampRangeFilter rangeBuilder = FILTERS.timestamp().range(); rangeBuilder.startClosed(convertUnits(timeRange.getMin())); if (timeRange.getMax() != Long.MAX_VALUE) { rangeBuilder.endOpen(convertUnits(timeRange.getMax())); } return rangeBuilder; } private long convertUnits(long hbaseUnits) { return TimestampConverter.hbase2bigtable(hbaseUnits); } private Optional createColumnFamilyFilter(Scan scan) { if (!scan.hasFamilies()) { return Optional.absent(); } // Build a filter of the form: // (fam1 | (qual1 + qual2 + qual3)) + (fam2 | qual1) + (fam3) InterleaveFilter interleave = FILTERS.interleave(); Map> familyMap = scan.getFamilyMap(); for (Map.Entry> entry : familyMap.entrySet()) { Filters.Filter familyFilter = createFamilyFilter(entry.getKey()); NavigableSet qualifiers = entry.getValue(); // Add a qualifier filter for each specified qualifier: if (qualifiers != null) { InterleaveFilter columnFilters = FILTERS.interleave(); for (byte[] qualifier : qualifiers) { columnFilters.filter(createColumnQualifierFilter(qualifier)); } // Build filter of the form "family | (qual1 + qual2 + qual3)" interleave.filter(FILTERS.chain().filter(familyFilter).filter(columnFilters)); } else { interleave.filter(familyFilter); } } return Optional.of(interleave); } private Optional createColFamilyTimeRange(Scan scan) { if (scan.getColumnFamilyTimeRange().isEmpty()) { return Optional.absent(); } // Builds filter of the form // ("family1" & "rangeStart, rangeEnd") | ("family2" & "rangeStart2, rangeEnd2") InterleaveFilter interleave = FILTERS.interleave(); Map range = scan.getColumnFamilyTimeRange(); for (Map.Entry entry : range.entrySet()) { interleave.filter( FILTERS .chain() .filter(createFamilyFilter(entry.getKey())) .filter(createTimeRangeFilter(entry.getValue()))); } return Optional.of(interleave); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy