io.prestosql.plugin.accumulo.index.IndexLookup Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.prestosql.plugin.accumulo.index;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.Multimap;
import io.airlift.concurrent.BoundedExecutor;
import io.airlift.log.Logger;
import io.airlift.units.Duration;
import io.prestosql.plugin.accumulo.model.AccumuloColumnConstraint;
import io.prestosql.plugin.accumulo.model.TabletSplitMetadata;
import io.prestosql.plugin.accumulo.serializers.AccumuloRowSerializer;
import io.prestosql.spi.PrestoException;
import io.prestosql.spi.connector.ConnectorSession;
import org.apache.accumulo.core.client.BatchScanner;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.hadoop.io.Text;
import javax.annotation.PreDestroy;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletionService;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import static com.google.common.base.Preconditions.checkArgument;
import static io.airlift.concurrent.Threads.daemonThreadsNamed;
import static io.prestosql.plugin.accumulo.AccumuloClient.getRangesFromDomain;
import static io.prestosql.plugin.accumulo.AccumuloErrorCode.UNEXPECTED_ACCUMULO_ERROR;
import static io.prestosql.plugin.accumulo.conf.AccumuloSessionProperties.getIndexCardinalityCachePollingDuration;
import static io.prestosql.plugin.accumulo.conf.AccumuloSessionProperties.getIndexSmallCardThreshold;
import static io.prestosql.plugin.accumulo.conf.AccumuloSessionProperties.getIndexThreshold;
import static io.prestosql.plugin.accumulo.conf.AccumuloSessionProperties.getNumIndexRowsPerSplit;
import static io.prestosql.plugin.accumulo.conf.AccumuloSessionProperties.isIndexMetricsEnabled;
import static io.prestosql.plugin.accumulo.conf.AccumuloSessionProperties.isIndexShortCircuitEnabled;
import static io.prestosql.plugin.accumulo.conf.AccumuloSessionProperties.isOptimizeIndexEnabled;
import static io.prestosql.plugin.accumulo.index.Indexer.CARDINALITY_CQ_AS_TEXT;
import static io.prestosql.plugin.accumulo.index.Indexer.METRICS_TABLE_ROWID_AS_TEXT;
import static io.prestosql.plugin.accumulo.index.Indexer.METRICS_TABLE_ROWS_CF_AS_TEXT;
import static io.prestosql.plugin.accumulo.index.Indexer.getIndexTableName;
import static io.prestosql.plugin.accumulo.index.Indexer.getMetricsTableName;
import static io.prestosql.spi.StandardErrorCode.FUNCTION_IMPLEMENTATION_ERROR;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.Executors.newCachedThreadPool;
/**
* Class to assist the Presto connector, and maybe external applications,
* leverage the secondary * index built by the {@link Indexer}.
* Leverages {@link ColumnCardinalityCache} to assist in * retrieving row IDs.
* Currently pretty bound to the Presto connector APIs.
*/
public class IndexLookup
{
private static final Logger LOG = Logger.get(IndexLookup.class);
private static final Range METRICS_TABLE_ROWID_RANGE = new Range(METRICS_TABLE_ROWID_AS_TEXT);
private final ColumnCardinalityCache cardinalityCache;
private final Connector connector;
private final ExecutorService coreExecutor;
private final BoundedExecutor executorService;
@Inject
public IndexLookup(Connector connector, ColumnCardinalityCache cardinalityCache)
{
this.connector = requireNonNull(connector, "connector is null");
this.cardinalityCache = requireNonNull(cardinalityCache, "cardinalityCache is null");
// Create a bounded executor with a pool size at 4x number of processors
this.coreExecutor = newCachedThreadPool(daemonThreadsNamed("cardinality-lookup-%s"));
this.executorService = new BoundedExecutor(coreExecutor, 4 * Runtime.getRuntime().availableProcessors());
}
@PreDestroy
public void shutdown()
{
coreExecutor.shutdownNow();
}
/**
* Scans the index table, applying the index based on the given column constraints to return a set of tablet splits.
*
* If this function returns true, the output parameter tabletSplits contains a list of TabletSplitMetadata objects.
* These in turn contain a collection of Ranges containing the exact row IDs determined using the index.
*
* If this function returns false, the secondary index should not be used. In this case,
* either the accumulo session has disabled secondary indexing,
* or the number of row IDs that would be used by the secondary index is greater than the configured threshold
* (again retrieved from the session).
*
* @param schema Schema name
* @param table Table name
* @param session Current client session
* @param constraints All column constraints (this method will filter for if the column is indexed)
* @param rowIdRanges Collection of Accumulo ranges based on any predicate against a record key
* @param tabletSplits Output parameter containing the bundles of row IDs determined by the use of the index.
* @param serializer Instance of a row serializer
* @param auths Scan-time authorizations
* @return True if the tablet splits are valid and should be used, false otherwise
* @throws Exception If something bad happens. What are the odds?
*/
public boolean applyIndex(
String schema,
String table,
ConnectorSession session,
Collection constraints,
Collection rowIdRanges,
List tabletSplits,
AccumuloRowSerializer serializer,
Authorizations auths)
throws Exception
{
// Early out if index is disabled
if (!isOptimizeIndexEnabled(session)) {
LOG.debug("Secondary index is disabled");
return false;
}
LOG.debug("Secondary index is enabled");
// Collect Accumulo ranges for each indexed column constraint
Multimap constraintRanges = getIndexedConstraintRanges(constraints, serializer);
// If there is no constraints on an index column, we again will bail out
if (constraintRanges.isEmpty()) {
LOG.debug("Query contains no constraints on indexed columns, skipping secondary index");
return false;
}
// If metrics are not enabled
if (!isIndexMetricsEnabled(session)) {
LOG.debug("Use of index metrics is disabled");
// Get the ranges via the index table
List indexRanges = getIndexRanges(getIndexTableName(schema, table), constraintRanges, rowIdRanges, auths);
if (!indexRanges.isEmpty()) {
// Bin the ranges into TabletMetadataSplits and return true to use the tablet splits
binRanges(getNumIndexRowsPerSplit(session), indexRanges, tabletSplits);
LOG.debug("Number of splits for %s.%s is %d with %d ranges", schema, table, tabletSplits.size(), indexRanges.size());
}
else {
LOG.debug("Query would return no results, returning empty list of splits");
}
return true;
}
else {
LOG.debug("Use of index metrics is enabled");
// Get ranges using the metrics
return getRangesWithMetrics(session, schema, table, constraintRanges, rowIdRanges, tabletSplits, auths);
}
}
private static Multimap getIndexedConstraintRanges(Collection constraints, AccumuloRowSerializer serializer)
{
ImmutableListMultimap.Builder builder = ImmutableListMultimap.builder();
for (AccumuloColumnConstraint columnConstraint : constraints) {
if (columnConstraint.isIndexed()) {
for (Range range : getRangesFromDomain(columnConstraint.getDomain(), serializer)) {
builder.put(columnConstraint, range);
}
}
else {
LOG.warn("Query contains constraint on non-indexed column %s. Is it worth indexing?", columnConstraint.getName());
}
}
return builder.build();
}
private boolean getRangesWithMetrics(
ConnectorSession session,
String schema,
String table,
Multimap constraintRanges,
Collection rowIdRanges,
List tabletSplits,
Authorizations auths)
throws Exception
{
String metricsTable = getMetricsTableName(schema, table);
long numRows = getNumRowsInTable(metricsTable, auths);
// Get the cardinalities from the metrics table
Multimap cardinalities;
if (isIndexShortCircuitEnabled(session)) {
cardinalities = cardinalityCache.getCardinalities(
schema,
table,
auths,
constraintRanges,
(long) (numRows * getIndexSmallCardThreshold(session)),
getIndexCardinalityCachePollingDuration(session));
}
else {
// disable short circuit using 0
cardinalities = cardinalityCache.getCardinalities(schema, table, auths, constraintRanges, 0, new Duration(0, TimeUnit.MILLISECONDS));
}
Optional> entry = cardinalities.entries().stream().findFirst();
if (!entry.isPresent()) {
return false;
}
Entry lowestCardinality = entry.get();
String indexTable = getIndexTableName(schema, table);
double threshold = getIndexThreshold(session);
List indexRanges;
// If the smallest cardinality in our list is above the lowest cardinality threshold,
// we should look at intersecting the row ID ranges to try and get under the threshold.
if (smallestCardAboveThreshold(session, numRows, lowestCardinality.getKey())) {
// If we only have one column, we can skip the intersection process and just check the index threshold
if (cardinalities.size() == 1) {
long numEntries = lowestCardinality.getKey();
double ratio = ((double) numEntries / (double) numRows);
LOG.debug("Use of index would scan %s of %s rows, ratio %s. Threshold %2f, Using for index table? %s", numEntries, numRows, ratio, threshold, ratio < threshold);
if (ratio >= threshold) {
return false;
}
}
// Else, get the intersection of all row IDs for all column constraints
LOG.debug("%d indexed columns, intersecting ranges", constraintRanges.size());
indexRanges = getIndexRanges(indexTable, constraintRanges, rowIdRanges, auths);
LOG.debug("Intersection results in %d ranges from secondary index", indexRanges.size());
}
else {
// Else, we don't need to intersect the columns and we can just use the column with the lowest cardinality,
// so get all those row IDs in a set of ranges.
LOG.debug("Not intersecting columns, using column with lowest cardinality ");
ImmutableMultimap.Builder lcBldr = ImmutableMultimap.builder();
lcBldr.putAll(lowestCardinality.getValue(), constraintRanges.get(lowestCardinality.getValue()));
indexRanges = getIndexRanges(indexTable, lcBldr.build(), rowIdRanges, auths);
}
if (indexRanges.isEmpty()) {
LOG.debug("Query would return no results, returning empty list of splits");
return true;
}
// Okay, we now check how many rows we would scan by using the index vs. the overall number
// of rows
long numEntries = indexRanges.size();
double ratio = (double) numEntries / (double) numRows;
LOG.debug("Use of index would scan %d of %d rows, ratio %s. Threshold %2f, Using for table? %b", numEntries, numRows, ratio, threshold, ratio < threshold, table);
// If the percentage of scanned rows, the ratio, less than the configured threshold
if (ratio < threshold) {
// Bin the ranges into TabletMetadataSplits and return true to use the tablet splits
binRanges(getNumIndexRowsPerSplit(session), indexRanges, tabletSplits);
LOG.debug("Number of splits for %s.%s is %d with %d ranges", schema, table, tabletSplits.size(), indexRanges.size());
return true;
}
else {
// We are going to do too much work to use the secondary index, so return false
return false;
}
}
private static boolean smallestCardAboveThreshold(ConnectorSession session, long numRows, long smallestCardinality)
{
double ratio = ((double) smallestCardinality / (double) numRows);
double threshold = getIndexSmallCardThreshold(session);
LOG.debug("Smallest cardinality is %d, num rows is %d, ratio is %2f with threshold of %f", smallestCardinality, numRows, ratio, threshold);
return ratio > threshold;
}
private long getNumRowsInTable(String metricsTable, Authorizations auths)
throws TableNotFoundException
{
// Create scanner against the metrics table, pulling the special column and the rows column
Scanner scanner = connector.createScanner(metricsTable, auths);
scanner.setRange(METRICS_TABLE_ROWID_RANGE);
scanner.fetchColumn(METRICS_TABLE_ROWS_CF_AS_TEXT, CARDINALITY_CQ_AS_TEXT);
// Scan the entry and get the number of rows
long numRows = -1;
for (Entry entry : scanner) {
if (numRows > 0) {
throw new PrestoException(FUNCTION_IMPLEMENTATION_ERROR, "Should have received only one entry when scanning for number of rows in metrics table");
}
numRows = Long.parseLong(entry.getValue().toString());
}
scanner.close();
LOG.debug("Number of rows in table is %d", numRows);
return numRows;
}
private List getIndexRanges(String indexTable, Multimap constraintRanges, Collection rowIDRanges, Authorizations auths)
{
Set finalRanges = new HashSet<>();
// For each column/constraint pair we submit a task to scan the index ranges
List>> tasks = new ArrayList<>();
CompletionService> executor = new ExecutorCompletionService<>(executorService);
for (Entry> constraintEntry : constraintRanges.asMap().entrySet()) {
tasks.add(executor.submit(() -> {
// Create a batch scanner against the index table, setting the ranges
BatchScanner scan = connector.createBatchScanner(indexTable, auths, 10);
scan.setRanges(constraintEntry.getValue());
// Fetch the column family for this specific column
scan.fetchColumnFamily(new Text(Indexer.getIndexColumnFamily(constraintEntry.getKey().getFamily().getBytes(UTF_8), constraintEntry.getKey().getQualifier().getBytes(UTF_8)).array()));
// For each entry in the scanner
Text tmpQualifier = new Text();
Set columnRanges = new HashSet<>();
for (Entry entry : scan) {
entry.getKey().getColumnQualifier(tmpQualifier);
// Add to our column ranges if it is in one of the row ID ranges
if (inRange(tmpQualifier, rowIDRanges)) {
columnRanges.add(new Range(tmpQualifier));
}
}
LOG.debug("Retrieved %d ranges for index column %s", columnRanges.size(), constraintEntry.getKey().getName());
scan.close();
return columnRanges;
}));
}
tasks.forEach(future ->
{
try {
// If finalRanges is null, we have not yet added any column ranges
if (finalRanges.isEmpty()) {
finalRanges.addAll(future.get());
}
else {
// Retain only the row IDs for this column that have already been added
// This is your set intersection operation!
finalRanges.retainAll(future.get());
}
}
catch (ExecutionException | InterruptedException e) {
if (e instanceof InterruptedException) {
Thread.currentThread().interrupt();
}
throw new PrestoException(UNEXPECTED_ACCUMULO_ERROR, "Exception when getting index ranges", e.getCause());
}
});
return ImmutableList.copyOf(finalRanges);
}
private static void binRanges(int numRangesPerBin, List splitRanges, List prestoSplits)
{
checkArgument(numRangesPerBin > 0, "number of ranges per bin must positivebe greater than zero");
int toAdd = splitRanges.size();
int fromIndex = 0;
int toIndex = Math.min(toAdd, numRangesPerBin);
do {
// Add the sublist of range handles
// Use an empty location because we are binning multiple Ranges spread across many tablet servers
prestoSplits.add(new TabletSplitMetadata(Optional.empty(), splitRanges.subList(fromIndex, toIndex)));
toAdd -= toIndex - fromIndex;
fromIndex = toIndex;
toIndex += Math.min(toAdd, numRangesPerBin);
}
while (toAdd > 0);
}
/**
* Gets a Boolean value indicating if the given value is in one of the Ranges in the given collection
*
* @param text Text object to check against the Range collection
* @param ranges Ranges to look into
* @return True if the text object is in one of the ranges, false otherwise
*/
private static boolean inRange(Text text, Collection ranges)
{
Key kCq = new Key(text);
return ranges.stream().anyMatch(r -> !r.beforeStartKey(kCq) && !r.afterEndKey(kCq));
}
}