org.apache.accumulo.tserver.tablet.Compactor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of accumulo-tserver Show documentation
Show all versions of accumulo-tserver Show documentation
The tablet server for Apache Accumulo to host tablets of data tables.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.tserver.tablet;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.conf.IterConfigUtil;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.dataImpl.KeyExtent;
import org.apache.accumulo.core.file.FileOperations;
import org.apache.accumulo.core.file.FileSKVIterator;
import org.apache.accumulo.core.file.FileSKVWriter;
import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
import org.apache.accumulo.core.iterators.system.DeletingIterator;
import org.apache.accumulo.core.iterators.system.MultiIterator;
import org.apache.accumulo.core.iterators.system.TimeSettingIterator;
import org.apache.accumulo.core.metadata.schema.DataFileValue;
import org.apache.accumulo.core.util.LocalityGroupUtil;
import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
import org.apache.accumulo.core.util.ratelimit.RateLimiter;
import org.apache.accumulo.server.ServerContext;
import org.apache.accumulo.server.fs.FileRef;
import org.apache.accumulo.server.fs.VolumeManager;
import org.apache.accumulo.server.problems.ProblemReport;
import org.apache.accumulo.server.problems.ProblemReportingIterator;
import org.apache.accumulo.server.problems.ProblemReports;
import org.apache.accumulo.server.problems.ProblemType;
import org.apache.accumulo.tserver.InMemoryMap;
import org.apache.accumulo.tserver.MinorCompactionReason;
import org.apache.accumulo.tserver.TabletIteratorEnvironment;
import org.apache.accumulo.tserver.compaction.MajorCompactionReason;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.htrace.Trace;
import org.apache.htrace.TraceScope;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Compactor implements Callable {
private static final Logger log = LoggerFactory.getLogger(Compactor.class);
private static final AtomicLong nextCompactorID = new AtomicLong(0);
public static class CompactionCanceledException extends Exception {
private static final long serialVersionUID = 1L;
}
public interface CompactionEnv {
boolean isCompactionEnabled();
IteratorScope getIteratorScope();
RateLimiter getReadLimiter();
RateLimiter getWriteLimiter();
}
private final Map filesToCompact;
private final InMemoryMap imm;
private final FileRef outputFile;
private final boolean propogateDeletes;
private final AccumuloConfiguration acuTableConf;
private final CompactionEnv env;
private final VolumeManager fs;
protected final KeyExtent extent;
private final List iterators;
// things to report
private String currentLocalityGroup = "";
private final long startTime;
private int reason;
private final AtomicLong entriesRead = new AtomicLong(0);
private final AtomicLong entriesWritten = new AtomicLong(0);
private final DateFormat dateFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS");
// a unique id to identify a compactor
private final long compactorID = nextCompactorID.getAndIncrement();
protected volatile Thread thread;
private final ServerContext context;
public long getCompactorID() {
return compactorID;
}
private synchronized void setLocalityGroup(String name) {
this.currentLocalityGroup = name;
}
public synchronized String getCurrentLocalityGroup() {
return currentLocalityGroup;
}
private void clearStats() {
entriesRead.set(0);
entriesWritten.set(0);
}
protected static final Set runningCompactions =
Collections.synchronizedSet(new HashSet<>());
public static List getRunningCompactions() {
ArrayList compactions = new ArrayList<>();
synchronized (runningCompactions) {
for (Compactor compactor : runningCompactions) {
compactions.add(new CompactionInfo(compactor));
}
}
return compactions;
}
public Compactor(ServerContext context, Tablet tablet, Map files,
InMemoryMap imm, FileRef outputFile, boolean propogateDeletes, CompactionEnv env,
List iterators, int reason, AccumuloConfiguration tableConfiguation) {
this.context = context;
this.extent = tablet.getExtent();
this.fs = tablet.getTabletServer().getFileSystem();
this.acuTableConf = tableConfiguation;
this.filesToCompact = files;
this.imm = imm;
this.outputFile = outputFile;
this.propogateDeletes = propogateDeletes;
this.env = env;
this.iterators = iterators;
this.reason = reason;
startTime = System.currentTimeMillis();
}
public VolumeManager getFileSystem() {
return fs;
}
KeyExtent getExtent() {
return extent;
}
String getOutputFile() {
return outputFile.toString();
}
MajorCompactionReason getMajorCompactionReason() {
return MajorCompactionReason.values()[reason];
}
protected Map> getLocalityGroups(AccumuloConfiguration acuTableConf)
throws IOException {
try {
return LocalityGroupUtil.getLocalityGroups(acuTableConf);
} catch (LocalityGroupConfigurationError e) {
throw new IOException(e);
}
}
@Override
public CompactionStats call() throws IOException, CompactionCanceledException {
FileSKVWriter mfw = null;
CompactionStats majCStats = new CompactionStats();
boolean remove = runningCompactions.add(this);
clearStats();
final Path outputFilePath = outputFile.path();
final String outputFilePathName = outputFilePath.toString();
String oldThreadName = Thread.currentThread().getName();
String newThreadName = "MajC compacting " + extent + " started "
+ dateFormatter.format(new Date()) + " file: " + outputFile;
Thread.currentThread().setName(newThreadName);
thread = Thread.currentThread();
try {
FileOperations fileFactory = FileOperations.getInstance();
FileSystem ns = this.fs.getVolumeByPath(outputFilePath).getFileSystem();
mfw = fileFactory.newWriterBuilder()
.forFile(outputFilePathName, ns, ns.getConf(), context.getCryptoService())
.withTableConfiguration(acuTableConf).withRateLimiter(env.getWriteLimiter()).build();
Map> lGroups = getLocalityGroups(acuTableConf);
long t1 = System.currentTimeMillis();
HashSet allColumnFamilies = new HashSet<>();
if (mfw.supportsLocalityGroups()) {
for (Entry> entry : lGroups.entrySet()) {
setLocalityGroup(entry.getKey());
compactLocalityGroup(entry.getKey(), entry.getValue(), true, mfw, majCStats);
allColumnFamilies.addAll(entry.getValue());
}
}
setLocalityGroup("");
compactLocalityGroup(null, allColumnFamilies, false, mfw, majCStats);
long t2 = System.currentTimeMillis();
FileSKVWriter mfwTmp = mfw;
mfw = null; // set this to null so we do not try to close it again in finally if the close
// fails
try {
mfwTmp.close(); // if the close fails it will cause the compaction to fail
} catch (IOException ex) {
if (!fs.deleteRecursively(outputFile.path())) {
if (fs.exists(outputFile.path())) {
log.error("Unable to delete {}", outputFile);
}
}
throw ex;
}
log.debug(String.format(
"Compaction %s %,d read | %,d written | %,6d entries/sec"
+ " | %,6.3f secs | %,12d bytes | %9.3f byte/sec",
extent, majCStats.getEntriesRead(), majCStats.getEntriesWritten(),
(int) (majCStats.getEntriesRead() / ((t2 - t1) / 1000.0)), (t2 - t1) / 1000.0,
mfwTmp.getLength(), mfwTmp.getLength() / ((t2 - t1) / 1000.0)));
majCStats.setFileSize(mfwTmp.getLength());
return majCStats;
} catch (IOException | RuntimeException e) {
log.error("{}", e.getMessage(), e);
throw e;
} finally {
Thread.currentThread().setName(oldThreadName);
if (remove) {
thread = null;
runningCompactions.remove(this);
}
try {
if (mfw != null) {
// compaction must not have finished successfully, so close its output file
try {
mfw.close();
} finally {
if (!fs.deleteRecursively(outputFile.path()))
if (fs.exists(outputFile.path()))
log.error("Unable to delete {}", outputFile);
}
}
} catch (IOException | RuntimeException e) {
log.warn("{}", e.getMessage(), e);
}
}
}
private List>
openMapDataFiles(ArrayList readers) throws IOException {
List> iters = new ArrayList<>(filesToCompact.size());
for (FileRef mapFile : filesToCompact.keySet()) {
try {
FileOperations fileFactory = FileOperations.getInstance();
FileSystem fs = this.fs.getVolumeByPath(mapFile.path()).getFileSystem();
FileSKVIterator reader;
reader = fileFactory.newReaderBuilder()
.forFile(mapFile.path().toString(), fs, fs.getConf(), context.getCryptoService())
.withTableConfiguration(acuTableConf).withRateLimiter(env.getReadLimiter()).build();
readers.add(reader);
SortedKeyValueIterator iter = new ProblemReportingIterator(context,
extent.getTableId(), mapFile.path().toString(), false, reader);
if (filesToCompact.get(mapFile).isTimeSet()) {
iter = new TimeSettingIterator(iter, filesToCompact.get(mapFile).getTime());
}
iters.add(iter);
} catch (Throwable e) {
ProblemReports.getInstance(context).report(new ProblemReport(extent.getTableId(),
ProblemType.FILE_READ, mapFile.path().toString(), e));
log.warn("Some problem opening map file {} {}", mapFile, e.getMessage(), e);
// failed to open some map file... close the ones that were opened
for (FileSKVIterator reader : readers) {
try {
reader.close();
} catch (Throwable e2) {
log.warn("Failed to close map file", e2);
}
}
readers.clear();
if (e instanceof IOException)
throw (IOException) e;
throw new IOException("Failed to open map data files", e);
}
}
return iters;
}
private void compactLocalityGroup(String lgName, Set columnFamilies,
boolean inclusive, FileSKVWriter mfw, CompactionStats majCStats)
throws IOException, CompactionCanceledException {
ArrayList readers = new ArrayList<>(filesToCompact.size());
try (TraceScope span = Trace.startSpan("compact")) {
long entriesCompacted = 0;
List> iters = openMapDataFiles(readers);
if (imm != null) {
iters.add(imm.compactionIterator());
}
CountingIterator citr =
new CountingIterator(new MultiIterator(iters, extent.toDataRange()), entriesRead);
SortedKeyValueIterator delIter =
DeletingIterator.wrap(citr, propogateDeletes, DeletingIterator.getBehavior(acuTableConf));
ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);
// if(env.getIteratorScope() )
TabletIteratorEnvironment iterEnv;
if (env.getIteratorScope() == IteratorScope.majc)
iterEnv = new TabletIteratorEnvironment(context, IteratorScope.majc, !propogateDeletes,
acuTableConf, getExtent().getTableId(), getMajorCompactionReason());
else if (env.getIteratorScope() == IteratorScope.minc)
iterEnv = new TabletIteratorEnvironment(context, IteratorScope.minc, acuTableConf,
getExtent().getTableId());
else
throw new IllegalArgumentException();
SortedKeyValueIterator itr = iterEnv.getTopLevelIterator(IterConfigUtil
.convertItersAndLoad(env.getIteratorScope(), cfsi, acuTableConf, iterators, iterEnv));
itr.seek(extent.toDataRange(), columnFamilies, inclusive);
if (!inclusive) {
mfw.startDefaultLocalityGroup();
} else {
mfw.startNewLocalityGroup(lgName, columnFamilies);
}
try (TraceScope write = Trace.startSpan("write")) {
while (itr.hasTop() && env.isCompactionEnabled()) {
mfw.append(itr.getTopKey(), itr.getTopValue());
itr.next();
entriesCompacted++;
if (entriesCompacted % 1024 == 0) {
// Periodically update stats, do not want to do this too often since its volatile
entriesWritten.addAndGet(1024);
}
}
if (itr.hasTop() && !env.isCompactionEnabled()) {
// cancel major compaction operation
try {
try {
mfw.close();
} catch (IOException e) {
log.error("{}", e.getMessage(), e);
}
fs.deleteRecursively(outputFile.path());
} catch (Exception e) {
log.warn("Failed to delete Canceled compaction output file {}", outputFile, e);
}
throw new CompactionCanceledException();
}
} finally {
CompactionStats lgMajcStats = new CompactionStats(citr.getCount(), entriesCompacted);
majCStats.add(lgMajcStats);
}
} finally {
// close sequence files opened
for (FileSKVIterator reader : readers) {
try {
reader.close();
} catch (Throwable e) {
log.warn("Failed to close map file", e);
}
}
}
}
Collection getFilesToCompact() {
return filesToCompact.keySet();
}
boolean hasIMM() {
return imm != null;
}
boolean willPropogateDeletes() {
return propogateDeletes;
}
long getEntriesRead() {
return entriesRead.get();
}
long getEntriesWritten() {
return entriesWritten.get();
}
long getStartTime() {
return startTime;
}
Iterable getIterators() {
return this.iterators;
}
MinorCompactionReason getMinCReason() {
return MinorCompactionReason.values()[reason];
}
}