All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.accumulo.tserver.compaction.strategies.TooManyDeletesCompactionStrategy Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.accumulo.tserver.compaction.strategies;

import static org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer.DELETES_STAT;
import static org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer.TOTAL_STAT;

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.Map.Entry;
import java.util.function.Predicate;

import org.apache.accumulo.core.client.rfile.RFile.WriterOptions;
import org.apache.accumulo.core.client.summary.SummarizerConfiguration;
import org.apache.accumulo.core.client.summary.Summary;
import org.apache.accumulo.core.client.summary.summarizers.DeletesSummarizer;
import org.apache.accumulo.core.metadata.schema.DataFileValue;
import org.apache.accumulo.server.fs.FileRef;
import org.apache.accumulo.tserver.compaction.CompactionPlan;
import org.apache.accumulo.tserver.compaction.DefaultCompactionStrategy;
import org.apache.accumulo.tserver.compaction.MajorCompactionRequest;

/**
 * This compaction strategy works in concert with the {@link DeletesSummarizer}. Using the
 * statistics from DeleteSummarizer this strategy will compact all files in a table when the number
 * of deletes/non-deletes exceeds a threshold.
 *
 * 

* This strategy has two options. First the {@value #THRESHOLD_OPT} option allows setting the point * at which a compaction will be triggered. This options defaults to {@value #THRESHOLD_OPT_DEFAULT} * and must be in the range (0.0, 1.0]. The second option is {@value #PROCEED_ZERO_NO_SUMMARY_OPT} * which determines if the strategy should proceed when a bulk imported file has no summary * information. * *

* If the delete summarizer was configured on a table that already had files, then those files will * have not summary information. This strategy can still proceed in this situation. It will fall * back to using Accumulo's estimated entries per file in this case. For the files without summary * information the estimated number of deletes will be zero. This fall back method will * underestimate deletes which will not lead to false positives, except for the case of bulk * imported files. Accumulo estimates that bulk imported files have zero entires. The second option * {@value #PROCEED_ZERO_NO_SUMMARY_OPT} determines if this strategy should proceed when it sees * bulk imported files that do not have summary data. This option defaults to * {@value #PROCEED_ZERO_NO_SUMMARY_OPT_DEFAULT}. * *

* Bulk files can be generated with summary information by calling * {@code AccumuloFileOutputFormat#setSummarizers(JobConf, SummarizerConfiguration...)} or * {@link WriterOptions#withSummarizers(SummarizerConfiguration...)} * *

* When this strategy does not decide to compact based on the number of deletes, then it will defer * the decision to the {@link DefaultCompactionStrategy}. * *

* Configuring this compaction strategy for a table will cause it to always queue compactions, even * though it may not decide to compact. These queued compactions may show up on the Accumulo monitor * page. This is because summary data can not be read until after compaction is queued and dequeued. * When the compaction is dequeued it can then decide not to compact. See ACCUMULO-4573 * * @since 2.0.0 */ public class TooManyDeletesCompactionStrategy extends DefaultCompactionStrategy { private boolean shouldCompact = false; private double threshold; private boolean proceed_bns; /** * This option should be a floating point number between 1 and 0. */ public static final String THRESHOLD_OPT = "threshold"; /** * The default threshold. */ public static final String THRESHOLD_OPT_DEFAULT = ".25"; public static final String PROCEED_ZERO_NO_SUMMARY_OPT = "proceed_zero_no_summary"; public static final String PROCEED_ZERO_NO_SUMMARY_OPT_DEFAULT = "false"; @Override public void init(Map options) { this.threshold = Double.parseDouble(options.getOrDefault(THRESHOLD_OPT, THRESHOLD_OPT_DEFAULT)); if (threshold <= 0.0 || threshold > 1.0) { throw new IllegalArgumentException( "Threshold must be in range (0.0, 1.0], saw : " + threshold); } this.proceed_bns = Boolean.parseBoolean( options.getOrDefault(PROCEED_ZERO_NO_SUMMARY_OPT, PROCEED_ZERO_NO_SUMMARY_OPT_DEFAULT)); } @Override public boolean shouldCompact(MajorCompactionRequest request) { Collection configuredSummarizers = SummarizerConfiguration.fromTableProperties(request.getTableProperties()); // check if delete summarizer is configured for table if (configuredSummarizers.stream().map(sc -> sc.getClassName()) .anyMatch(cn -> cn.equals(DeletesSummarizer.class.getName()))) { // This is called before gatherInformation, so need to always queue for compaction until // context // can be gathered. Also its not safe to request summary // information here as its a blocking operation. Blocking operations are not allowed in // shouldCompact. return true; } else { return super.shouldCompact(request); } } @Override public void gatherInformation(MajorCompactionRequest request) throws IOException { super.gatherInformation(request); Predicate summarizerPredicate = conf -> conf.getClassName().equals(DeletesSummarizer.class.getName()) && conf.getOptions().isEmpty(); long total = 0; long deletes = 0; for (Entry entry : request.getFiles().entrySet()) { Collection

summaries = request.getSummaries(Collections.singleton(entry.getKey()), summarizerPredicate); if (summaries.size() == 1) { Summary summary = summaries.iterator().next(); total += summary.getStatistics().get(TOTAL_STAT); deletes += summary.getStatistics().get(DELETES_STAT); } else { long numEntries = entry.getValue().getNumEntries(); if (numEntries == 0 && !proceed_bns) { shouldCompact = false; return; } else { // no summary data so use Accumulo's estimate of total entries in file total += entry.getValue().getNumEntries(); } } } long nonDeletes = total - deletes; if (nonDeletes >= 0) { // check nonDeletes >= 0 because if this is not true then its clear evidence that the // estimates are off double ratio = deletes / (double) nonDeletes; shouldCompact = ratio >= threshold; } else { shouldCompact = false; } } @Override public CompactionPlan getCompactionPlan(MajorCompactionRequest request) { if (shouldCompact) { CompactionPlan cp = new CompactionPlan(); cp.inputFiles.addAll(request.getFiles().keySet()); return cp; } // fall back to default return super.getCompactionPlan(request); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy