All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.accumulo.server.tabletserver.Compactor Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.server.tabletserver;

import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.KeyExtent;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.data.thrift.IterInfo;
import org.apache.accumulo.core.file.FileOperations;
import org.apache.accumulo.core.file.FileSKVIterator;
import org.apache.accumulo.core.file.FileSKVWriter;
import org.apache.accumulo.core.iterators.IteratorEnvironment;
import org.apache.accumulo.core.iterators.IteratorUtil;
import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope;
import org.apache.accumulo.core.iterators.SortedKeyValueIterator;
import org.apache.accumulo.core.iterators.WrappingIterator;
import org.apache.accumulo.core.iterators.system.ColumnFamilySkippingIterator;
import org.apache.accumulo.core.iterators.system.DeletingIterator;
import org.apache.accumulo.core.iterators.system.MultiIterator;
import org.apache.accumulo.core.iterators.system.TimeSettingIterator;
import org.apache.accumulo.core.tabletserver.thrift.ActiveCompaction;
import org.apache.accumulo.core.tabletserver.thrift.CompactionReason;
import org.apache.accumulo.core.tabletserver.thrift.CompactionType;
import org.apache.accumulo.core.util.LocalityGroupUtil;
import org.apache.accumulo.core.util.LocalityGroupUtil.LocalityGroupConfigurationError;
import org.apache.accumulo.core.util.MetadataTable.DataFileValue;
import org.apache.accumulo.server.conf.TableConfiguration;
import org.apache.accumulo.server.problems.ProblemReport;
import org.apache.accumulo.server.problems.ProblemReportingIterator;
import org.apache.accumulo.server.problems.ProblemReports;
import org.apache.accumulo.server.problems.ProblemType;
import org.apache.accumulo.server.tabletserver.Tablet.MajorCompactionReason;
import org.apache.accumulo.server.tabletserver.Tablet.MinorCompactionReason;
import org.apache.accumulo.trace.instrument.Span;
import org.apache.accumulo.trace.instrument.Trace;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.Logger;

public class Compactor implements Callable {

  public class CountingIterator extends WrappingIterator {

    private long count;

    @Override
    public CountingIterator deepCopy(IteratorEnvironment env) {
      return new CountingIterator(this, env);
    }

    private CountingIterator(CountingIterator other, IteratorEnvironment env) {
      setSource(other.getSource().deepCopy(env));
      count = 0;
    }

    public CountingIterator(SortedKeyValueIterator source) {
      this.setSource(source);
      count = 0;
    }

    @Override
    public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) {
      throw new UnsupportedOperationException();
    }

    @Override
    public void next() throws IOException {
      super.next();
      count++;
      if (count % 1024 == 0) {
        entriesRead.addAndGet(1024);
      }
    }

    public long getCount() {
      return count;
    }
  }

  private static final Logger log = Logger.getLogger(Compactor.class);

  static class CompactionCanceledException extends Exception {
    private static final long serialVersionUID = 1L;
  }

  static interface CompactionEnv {
    boolean isCompactionEnabled();

    IteratorScope getIteratorScope();
  }

  private Map filesToCompact;
  private InMemoryMap imm;
  private String outputFile;
  private boolean propogateDeletes;
  private TableConfiguration acuTableConf;
  private CompactionEnv env;
  private Configuration conf;
  private FileSystem fs;
  protected KeyExtent extent;
  private List iterators;

  // things to report
  private String currentLocalityGroup = "";
  private long startTime;

  private MajorCompactionReason reason;
  protected MinorCompactionReason mincReason;

  private AtomicLong entriesRead = new AtomicLong(0);
  private AtomicLong entriesWritten = new AtomicLong(0);
  private DateFormat dateFormatter = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss.SSS");

  private synchronized void setLocalityGroup(String name) {
    this.currentLocalityGroup = name;
  }

  private void clearStats() {
    entriesRead.set(0);
    entriesWritten.set(0);
  }

  protected static final Set runningCompactions = Collections.synchronizedSet(new HashSet());

  public static class CompactionInfo {

    private Compactor compactor;
    private String localityGroup;
    private long entriesRead;
    private long entriesWritten;

    CompactionInfo(Compactor compactor) {
      this.localityGroup = compactor.currentLocalityGroup;
      this.entriesRead = compactor.entriesRead.get();
      this.entriesWritten = compactor.entriesWritten.get();
      this.compactor = compactor;
    }

    public ActiveCompaction toThrift() {

      CompactionType type;

      if (compactor.imm != null)
        if (compactor.filesToCompact.size() > 0)
          type = CompactionType.MERGE;
        else
          type = CompactionType.MINOR;
      else if (!compactor.propogateDeletes)
        type = CompactionType.FULL;
      else
        type = CompactionType.MAJOR;

      CompactionReason reason;

      if (compactor.imm != null) {
        switch (compactor.mincReason) {
          case USER:
            reason = CompactionReason.USER;
            break;
          case CLOSE:
            reason = CompactionReason.CLOSE;
            break;
          case SYSTEM:
          default:
            reason = CompactionReason.SYSTEM;
            break;
        }
      } else {
        switch (compactor.reason) {
          case USER:
            reason = CompactionReason.USER;
            break;
          case CHOP:
            reason = CompactionReason.CHOP;
            break;
          case IDLE:
            reason = CompactionReason.IDLE;
            break;
          case NORMAL:
          default:
            reason = CompactionReason.SYSTEM;
            break;
        }
      }

      List iiList = new ArrayList();
      Map> iterOptions = new HashMap>();

      for (IteratorSetting iterSetting : compactor.iterators) {
        iiList.add(new IterInfo(iterSetting.getPriority(), iterSetting.getIteratorClass(), iterSetting.getName()));
        iterOptions.put(iterSetting.getName(), iterSetting.getOptions());
      }

      return new ActiveCompaction(compactor.extent.toThrift(), System.currentTimeMillis() - compactor.startTime, new ArrayList(
          compactor.filesToCompact.keySet()), compactor.outputFile, type, reason, localityGroup, entriesRead, entriesWritten, iiList, iterOptions);
    }
  }

  public static List getRunningCompactions() {
    ArrayList compactions = new ArrayList();

    synchronized (runningCompactions) {
      for (Compactor compactor : runningCompactions) {
        compactions.add(new CompactionInfo(compactor));
      }
    }

    return compactions;
  }

  Compactor(Configuration conf, FileSystem fs, Map files, InMemoryMap imm, String outputFile, boolean propogateDeletes,
      TableConfiguration acuTableConf, KeyExtent extent, CompactionEnv env, List iterators, MajorCompactionReason reason) {
    this.extent = extent;
    this.conf = conf;
    this.fs = fs;
    this.filesToCompact = files;
    this.imm = imm;
    this.outputFile = outputFile;
    this.propogateDeletes = propogateDeletes;
    this.acuTableConf = acuTableConf;
    this.env = env;
    this.iterators = iterators;
    this.reason = reason;

    startTime = System.currentTimeMillis();
  }

  Compactor(Configuration conf, FileSystem fs, Map files, InMemoryMap imm, String outputFile, boolean propogateDeletes,
      TableConfiguration acuTableConf, KeyExtent extent, CompactionEnv env) {
    this(conf, fs, files, imm, outputFile, propogateDeletes, acuTableConf, extent, env, new ArrayList(), null);
  }

  public FileSystem getFileSystem() {
    return fs;
  }

  KeyExtent getExtent() {
    return extent;
  }

  String getOutputFile() {
    return outputFile;
  }

  @Override
  public CompactionStats call() throws IOException, CompactionCanceledException {

    FileSKVWriter mfw = null;

    CompactionStats majCStats = new CompactionStats();

    boolean remove = runningCompactions.add(this);

    clearStats();

    String oldThreadName = Thread.currentThread().getName();
    String newThreadName = "MajC compacting " + extent.toString() + " started " + dateFormatter.format(new Date()) + " file: " + outputFile;
    Thread.currentThread().setName(newThreadName);
    try {
      FileOperations fileFactory = FileOperations.getInstance();
      mfw = fileFactory.openWriter(outputFile, fs, conf, acuTableConf);

      Map> lGroups;
      try {
        lGroups = LocalityGroupUtil.getLocalityGroups(acuTableConf);
      } catch (LocalityGroupConfigurationError e) {
        throw new IOException(e);
      }

      long t1 = System.currentTimeMillis();

      HashSet allColumnFamilies = new HashSet();

      if (mfw.supportsLocalityGroups()) {
        for (Entry> entry : lGroups.entrySet()) {
          setLocalityGroup(entry.getKey());
          compactLocalityGroup(entry.getKey(), entry.getValue(), true, mfw, majCStats);
          allColumnFamilies.addAll(entry.getValue());
        }
      }

      setLocalityGroup("");
      compactLocalityGroup(null, allColumnFamilies, false, mfw, majCStats);

      long t2 = System.currentTimeMillis();

      FileSKVWriter mfwTmp = mfw;
      mfw = null; // set this to null so we do not try to close it again in finally if the close fails
      mfwTmp.close(); // if the close fails it will cause the compaction to fail

      // Verify the file, since hadoop 0.20.2 sometimes lies about the success of close()
      try {
        FileSKVIterator openReader = fileFactory.openReader(outputFile, false, fs, conf, acuTableConf);
        openReader.close();
      } catch (IOException ex) {
        log.error("Verification of successful compaction fails!!! " + extent + " " + outputFile, ex);
        throw ex;
      }

      log.debug(String.format("Compaction %s %,d read | %,d written | %,6d entries/sec | %6.3f secs", extent, majCStats.getEntriesRead(),
          majCStats.getEntriesWritten(), (int) (majCStats.getEntriesRead() / ((t2 - t1) / 1000.0)), (t2 - t1) / 1000.0));

      majCStats.setFileSize(fileFactory.getFileSize(outputFile, fs, conf, acuTableConf));
      return majCStats;
    } catch (IOException e) {
      log.error(e, e);
      throw e;
    } catch (RuntimeException e) {
      log.error(e, e);
      throw e;
    } finally {
      Thread.currentThread().setName(oldThreadName);
      if (remove)
        runningCompactions.remove(this);

      try {
        if (mfw != null) {
          // compaction must not have finished successfully, so close its output file
          try {
            mfw.close();
          } finally {
            Path path = new Path(outputFile);
            if (!fs.delete(path, true))
              if (fs.exists(path))
                log.error("Unable to delete " + outputFile);
          }
        }
      } catch (IOException e) {
        log.warn(e, e);
      } catch (RuntimeException exception) {
        log.warn(exception, exception);
      }
    }
  }

  private List> openMapDataFiles(String lgName, ArrayList readers) throws IOException {

    List> iters = new ArrayList>(filesToCompact.size());

    for (String mapFile : filesToCompact.keySet()) {
      try {

        FileOperations fileFactory = FileOperations.getInstance();

        FileSKVIterator reader;

        reader = fileFactory.openReader(mapFile, false, fs, conf, acuTableConf);

        readers.add(reader);

        SortedKeyValueIterator iter = new ProblemReportingIterator(extent.getTableId().toString(), mapFile, false, reader);

        if (filesToCompact.get(mapFile).isTimeSet()) {
          iter = new TimeSettingIterator(iter, filesToCompact.get(mapFile).getTime());
        }

        iters.add(iter);

      } catch (Throwable e) {

        ProblemReports.getInstance().report(new ProblemReport(extent.getTableId().toString(), ProblemType.FILE_READ, mapFile, e));

        log.warn("Some problem opening map file " + mapFile + " " + e.getMessage(), e);
        // failed to open some map file... close the ones that were opened
        for (FileSKVIterator reader : readers) {
          try {
            reader.close();
          } catch (Throwable e2) {
            log.warn("Failed to close map file", e2);
          }
        }

        readers.clear();

        if (e instanceof IOException)
          throw (IOException) e;
        throw new IOException("Failed to open map data files", e);
      }
    }

    return iters;
  }

  private void compactLocalityGroup(String lgName, Set columnFamilies, boolean inclusive, FileSKVWriter mfw, CompactionStats majCStats)
      throws IOException, CompactionCanceledException {
    ArrayList readers = new ArrayList(filesToCompact.size());
    Span span = Trace.start("compact");
    try {
      long entriesCompacted = 0;
      List> iters = openMapDataFiles(lgName, readers);

      if (imm != null) {
        iters.add(imm.compactionIterator());
      }

      CountingIterator citr = new CountingIterator(new MultiIterator(iters, extent.toDataRange()));
      DeletingIterator delIter = new DeletingIterator(citr, propogateDeletes);
      ColumnFamilySkippingIterator cfsi = new ColumnFamilySkippingIterator(delIter);

      // if(env.getIteratorScope() )

      TabletIteratorEnvironment iterEnv;
      if (env.getIteratorScope() == IteratorScope.majc)
        iterEnv = new TabletIteratorEnvironment(IteratorScope.majc, !propogateDeletes, acuTableConf);
      else if (env.getIteratorScope() == IteratorScope.minc)
        iterEnv = new TabletIteratorEnvironment(IteratorScope.minc, acuTableConf);
      else
        throw new IllegalArgumentException();

      SortedKeyValueIterator itr = iterEnv.getTopLevelIterator(IteratorUtil.loadIterators(env.getIteratorScope(), cfsi, extent, acuTableConf,
          iterators, iterEnv));

      itr.seek(extent.toDataRange(), columnFamilies, inclusive);

      if (!inclusive) {
        mfw.startDefaultLocalityGroup();
      } else {
        mfw.startNewLocalityGroup(lgName, columnFamilies);
      }

      Span write = Trace.start("write");
      try {
        while (itr.hasTop() && env.isCompactionEnabled()) {
          mfw.append(itr.getTopKey(), itr.getTopValue());
          itr.next();
          entriesCompacted++;

          if (entriesCompacted % 1024 == 0) {
            // Periodically update stats, do not want to do this too often since its volatile
            entriesWritten.addAndGet(1024);
          }
        }

        if (itr.hasTop() && !env.isCompactionEnabled()) {
          // cancel major compaction operation
          try {
            try {
              mfw.close();
            } catch (IOException e) {
              log.error(e, e);
            }
            fs.delete(new Path(outputFile), true);
          } catch (Exception e) {
            log.warn("Failed to delete Canceled compaction output file " + outputFile, e);
          }
          throw new CompactionCanceledException();
        }

      } finally {
        CompactionStats lgMajcStats = new CompactionStats(citr.getCount(), entriesCompacted);
        majCStats.add(lgMajcStats);
        write.stop();
      }

    } finally {
      // close sequence files opened
      for (FileSKVIterator reader : readers) {
        try {
          reader.close();
        } catch (Throwable e) {
          log.warn("Failed to close map file", e);
        }
      }
      span.stop();
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy