org.apache.accumulo.tserver.compaction.strategies.ConfigurableCompactionStrategy Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of accumulo-tserver Show documentation
Show all versions of accumulo-tserver Show documentation
The tablet server for Apache Accumulo to host tablets of data tables.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.tserver.compaction.strategies;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.regex.Pattern;
import org.apache.accumulo.core.client.summary.SummarizerConfiguration;
import org.apache.accumulo.core.client.summary.Summary;
import org.apache.accumulo.core.compaction.CompactionSettings;
import org.apache.accumulo.core.conf.ConfigurationCopy;
import org.apache.accumulo.core.file.FileSKVIterator;
import org.apache.accumulo.core.metadata.schema.DataFileValue;
import org.apache.accumulo.core.sample.impl.SamplerConfigurationImpl;
import org.apache.accumulo.server.fs.FileRef;
import org.apache.accumulo.tserver.compaction.CompactionPlan;
import org.apache.accumulo.tserver.compaction.CompactionStrategy;
import org.apache.accumulo.tserver.compaction.MajorCompactionRequest;
import org.apache.accumulo.tserver.compaction.WriteParameters;
import org.apache.hadoop.fs.Path;
/**
* The compaction strategy used by the shell compact command.
*/
public class ConfigurableCompactionStrategy extends CompactionStrategy {
private abstract static class Test {
// Do any work that blocks in this method. This method is not always called before
// shouldCompact(). See CompactionStrategy javadocs.
void gatherInformation(MajorCompactionRequest request) {}
abstract boolean shouldCompact(Entry file,
MajorCompactionRequest request);
}
private static class SummaryTest extends Test {
private boolean selectExtraSummary;
private boolean selectNoSummary;
private boolean summaryConfigured = true;
private boolean gatherCalled = false;
// files that do not need compaction
private Set okFiles = Collections.emptySet();
public SummaryTest(boolean selectExtraSummary, boolean selectNoSummary) {
this.selectExtraSummary = selectExtraSummary;
this.selectNoSummary = selectNoSummary;
}
@Override
void gatherInformation(MajorCompactionRequest request) {
gatherCalled = true;
Collection configs =
SummarizerConfiguration.fromTableProperties(request.getTableProperties());
if (configs.size() == 0) {
summaryConfigured = false;
} else {
Set configsSet = configs instanceof Set
? (Set) configs : new HashSet<>(configs);
okFiles = new HashSet<>();
for (FileRef fref : request.getFiles().keySet()) {
Map sMap = new HashMap<>();
Collection summaries;
summaries = request.getSummaries(Collections.singletonList(fref),
conf -> configsSet.contains(conf));
for (Summary summary : summaries) {
sMap.put(summary.getSummarizerConfiguration(), summary);
}
boolean needsCompaction = false;
for (SummarizerConfiguration sc : configs) {
Summary summary = sMap.get(sc);
if (summary == null && selectNoSummary) {
needsCompaction = true;
break;
}
if (summary != null && summary.getFileStatistics().getExtra() > 0
&& selectExtraSummary) {
needsCompaction = true;
break;
}
}
if (!needsCompaction) {
okFiles.add(fref);
}
}
}
}
@Override
public boolean shouldCompact(Entry file,
MajorCompactionRequest request) {
if (!gatherCalled) {
Collection configs =
SummarizerConfiguration.fromTableProperties(request.getTableProperties());
return configs.size() > 0;
}
if (!summaryConfigured) {
return false;
}
// Its possible the set of files could change between gather and now. So this will default to
// compacting any files that are unknown.
return !okFiles.contains(file.getKey());
}
}
private static class NoSampleTest extends Test {
private Set filesWithSample = Collections.emptySet();
private boolean samplingConfigured = true;
private boolean gatherCalled = false;
@Override
void gatherInformation(MajorCompactionRequest request) {
gatherCalled = true;
SamplerConfigurationImpl sc = SamplerConfigurationImpl
.newSamplerConfig(new ConfigurationCopy(request.getTableProperties()));
if (sc == null) {
samplingConfigured = false;
} else {
filesWithSample = new HashSet<>();
for (FileRef fref : request.getFiles().keySet()) {
try (FileSKVIterator reader = request.openReader(fref)) {
if (reader.getSample(sc) != null) {
filesWithSample.add(fref);
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
}
@Override
public boolean shouldCompact(Entry file,
MajorCompactionRequest request) {
if (!gatherCalled) {
SamplerConfigurationImpl sc = SamplerConfigurationImpl
.newSamplerConfig(new ConfigurationCopy(request.getTableProperties()));
return sc != null;
}
if (!samplingConfigured) {
return false;
}
return !filesWithSample.contains(file.getKey());
}
}
private abstract static class FileSizeTest extends Test {
private final long esize;
private FileSizeTest(String s) {
this.esize = Long.parseLong(s);
}
@Override
public boolean shouldCompact(Entry file,
MajorCompactionRequest request) {
return shouldCompact(file.getValue().getSize(), esize);
}
public abstract boolean shouldCompact(long fsize, long esize);
}
private abstract static class PatternPathTest extends Test {
private Pattern pattern;
private PatternPathTest(String p) {
this.pattern = Pattern.compile(p);
}
@Override
public boolean shouldCompact(Entry file,
MajorCompactionRequest request) {
return pattern.matcher(getInput(file.getKey().path())).matches();
}
public abstract String getInput(Path path);
}
private List tests = new ArrayList<>();
private boolean andTest = true;
private int minFiles = 1;
private WriteParameters writeParams = new WriteParameters();
@Override
public void init(Map options) {
boolean selectNoSummary = false;
boolean selectExtraSummary = false;
Set> es = options.entrySet();
for (Entry entry : es) {
switch (CompactionSettings.valueOf(entry.getKey())) {
case SF_EXTRA_SUMMARY:
selectExtraSummary = true;
break;
case SF_NO_SUMMARY:
selectNoSummary = true;
break;
case SF_NO_SAMPLE:
tests.add(new NoSampleTest());
break;
case SF_LT_ESIZE_OPT:
tests.add(new FileSizeTest(entry.getValue()) {
@Override
public boolean shouldCompact(long fsize, long esize) {
return fsize < esize;
}
});
break;
case SF_GT_ESIZE_OPT:
tests.add(new FileSizeTest(entry.getValue()) {
@Override
public boolean shouldCompact(long fsize, long esize) {
return fsize > esize;
}
});
break;
case SF_NAME_RE_OPT:
tests.add(new PatternPathTest(entry.getValue()) {
@Override
public String getInput(Path path) {
return path.getName();
}
});
break;
case SF_PATH_RE_OPT:
tests.add(new PatternPathTest(entry.getValue()) {
@Override
public String getInput(Path path) {
return path.toString();
}
});
break;
case MIN_FILES_OPT:
minFiles = Integer.parseInt(entry.getValue());
break;
case OUTPUT_COMPRESSION_OPT:
writeParams.setCompressType(entry.getValue());
break;
case OUTPUT_BLOCK_SIZE_OPT:
writeParams.setBlockSize(Long.parseLong(entry.getValue()));
break;
case OUTPUT_INDEX_BLOCK_SIZE_OPT:
writeParams.setIndexBlockSize(Long.parseLong(entry.getValue()));
break;
case OUTPUT_HDFS_BLOCK_SIZE_OPT:
writeParams.setHdfsBlockSize(Long.parseLong(entry.getValue()));
break;
case OUTPUT_REPLICATION_OPT:
writeParams.setReplication(Integer.parseInt(entry.getValue()));
break;
default:
throw new IllegalArgumentException("Unknown option " + entry.getKey());
}
}
if (selectExtraSummary || selectNoSummary) {
tests.add(new SummaryTest(selectExtraSummary, selectNoSummary));
}
}
private List getFilesToCompact(MajorCompactionRequest request) {
List filesToCompact = new ArrayList<>();
for (Entry entry : request.getFiles().entrySet()) {
boolean compact = false;
for (Test test : tests) {
if (andTest) {
compact = test.shouldCompact(entry, request);
if (!compact)
break;
} else {
compact |= test.shouldCompact(entry, request);
}
}
if (compact || tests.isEmpty())
filesToCompact.add(entry.getKey());
}
return filesToCompact;
}
@Override
public boolean shouldCompact(MajorCompactionRequest request) {
return getFilesToCompact(request).size() >= minFiles;
}
@Override
public void gatherInformation(MajorCompactionRequest request) {
// Gather any information that requires blocking calls here. This is only called before
// getCompactionPlan() is called.
for (Test test : tests) {
test.gatherInformation(request);
}
}
@Override
public CompactionPlan getCompactionPlan(MajorCompactionRequest request) {
List filesToCompact = getFilesToCompact(request);
if (filesToCompact.size() >= minFiles) {
CompactionPlan plan = new CompactionPlan();
plan.inputFiles.addAll(filesToCompact);
plan.writeParameters = writeParams;
return plan;
}
return null;
}
}