All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.accumulo.server.tabletserver.LargestFirstMemoryManager Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.server.tabletserver;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import java.util.TreeMap;

import org.apache.accumulo.core.client.Instance;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.client.impl.Tables;
import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.data.impl.KeyExtent;
import org.apache.accumulo.server.conf.ServerConfiguration;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The LargestFirstMemoryManager attempts to keep memory between 80% and 90% full. It adapts over time the point at which it should start a compaction based on
 * how full memory gets between successive calls. It will also flush idle tablets based on a per-table configurable idle time. It will only attempt to flush
 * tablets up to 20% of all memory. And, as the name of the class would suggest, it flushes the tablet with the highest memory footprint. However, it actually
 * chooses the tablet as a function of its size doubled for every 15 minutes of idle time.
 */
public class LargestFirstMemoryManager implements MemoryManager {

  private static final Logger log = LoggerFactory.getLogger(LargestFirstMemoryManager.class);
  private static final long ZERO_TIME = System.currentTimeMillis();
  private static final int TSERV_MINC_MAXCONCURRENT_NUMWAITING_MULTIPLIER = 2;
  private static final double MAX_FLUSH_AT_ONCE_PERCENT = 0.20;

  private long maxMemory = -1;
  private int maxConcurrentMincs;
  private int numWaitingMultiplier;
  private long prevIngestMemory;
  // The fraction of memory that needs to be used before we begin flushing.
  private double compactionThreshold;
  private long maxObserved;
  private final HashMap mincIdleThresholds = new HashMap();
  private ServerConfiguration config = null;

  private static class TabletInfo {
    final KeyExtent extent;
    final long memTableSize;
    final long idleTime;
    final long load;

    public TabletInfo(KeyExtent extent, long memTableSize, long idleTime, long load) {
      this.extent = extent;
      this.memTableSize = memTableSize;
      this.idleTime = idleTime;
      this.load = load;
    }
  }

  // A little map that will hold the "largest" N tablets, where largest is a result of the timeMemoryLoad function
  private static class LargestMap {
    final int max;
    final TreeMap> map = new TreeMap>();

    LargestMap(int n) {
      max = n;
    }

    public boolean put(Long key, TabletInfo value) {
      if (map.size() == max) {
        if (key.compareTo(map.firstKey()) < 0)
          return false;
        try {
          add(key, value);
          return true;
        } finally {
          map.remove(map.firstKey());
        }
      } else {
        add(key, value);
        return true;
      }
    }

    private void add(Long key, TabletInfo value) {
      List lst = map.get(key);
      if (lst != null) {
        lst.add(value);
      } else {
        lst = new ArrayList();
        lst.add(value);
        map.put(key, lst);
      }
    }

    public boolean isEmpty() {
      return map.isEmpty();
    }

    public Entry> lastEntry() {
      return map.lastEntry();
    }

    public void remove(Long key) {
      map.remove(key);
    }
  }

  LargestFirstMemoryManager(long maxMemory, int maxConcurrentMincs, int numWaitingMultiplier) {
    this();
    this.maxMemory = maxMemory;
    this.maxConcurrentMincs = maxConcurrentMincs;
    this.numWaitingMultiplier = numWaitingMultiplier;
  }

  @Override
  public void init(ServerConfiguration conf) {
    this.config = conf;
    maxMemory = conf.getConfiguration().getMemoryInBytes(Property.TSERV_MAXMEM);
    maxConcurrentMincs = conf.getConfiguration().getCount(Property.TSERV_MINC_MAXCONCURRENT);
    numWaitingMultiplier = TSERV_MINC_MAXCONCURRENT_NUMWAITING_MULTIPLIER;
  }

  public LargestFirstMemoryManager() {
    prevIngestMemory = 0;
    compactionThreshold = 0.5;
    maxObserved = 0;
  }

  protected long getMinCIdleThreshold(KeyExtent extent) {
    Text tableId = extent.getTableId();
    if (!mincIdleThresholds.containsKey(tableId))
      mincIdleThresholds.put(tableId, config.getTableConfiguration(tableId.toString()).getTimeInMillis(Property.TABLE_MINC_COMPACT_IDLETIME));
    return mincIdleThresholds.get(tableId);
  }

  protected boolean tableExists(Instance instance, String tableId) {
    return Tables.exists(instance, tableId);
  }

  @Override
  public MemoryManagementActions getMemoryManagementActions(List tablets) {
    if (maxMemory < 0)
      throw new IllegalStateException("need to initialize " + LargestFirstMemoryManager.class.getName());

    final Instance instance = config.getInstance();
    final int maxMinCs = maxConcurrentMincs * numWaitingMultiplier;

    mincIdleThresholds.clear();
    final MemoryManagementActions result = new MemoryManagementActions();
    result.tabletsToMinorCompact = new ArrayList();

    LargestMap largestMemTablets = new LargestMap(maxMinCs);
    final LargestMap largestIdleMemTablets = new LargestMap(maxConcurrentMincs);
    final long now = currentTimeMillis();

    long ingestMemory = 0;
    long compactionMemory = 0;
    int numWaitingMincs = 0;

    // find the largest and most idle tablets
    for (TabletState ts : tablets) {
      // Make sure that the table still exists
      if (!tableExists(instance, ts.getExtent().getTableId().toString())) {
        log.trace("Ignoring extent for deleted table: {}", ts.getExtent());
        continue;
      }

      final long memTabletSize = ts.getMemTableSize();
      final long minorCompactingSize = ts.getMinorCompactingMemTableSize();
      final long idleTime = now - Math.max(ts.getLastCommitTime(), ZERO_TIME);
      final long timeMemoryLoad = timeMemoryLoad(memTabletSize, idleTime);
      ingestMemory += memTabletSize;
      if (minorCompactingSize == 0 && memTabletSize > 0) {
        TabletInfo tabletInfo = new TabletInfo(ts.getExtent(), memTabletSize, idleTime, timeMemoryLoad);
        try {
          // If the table was deleted, getMinCIdleThreshold will throw an exception
          if (idleTime > getMinCIdleThreshold(ts.getExtent())) {
            largestIdleMemTablets.put(timeMemoryLoad, tabletInfo);
          }
        } catch (IllegalArgumentException e) {
          Throwable cause = e.getCause();
          if (null != cause && cause instanceof TableNotFoundException) {
            log.trace("Ignoring extent for deleted table: {}", ts.getExtent());

            // The table might have been deleted during the iteration of the tablets
            // We just want to eat this exception, do nothing with this tablet, and continue
            continue;
          }

          throw e;
        }
        // Only place the tablet into largestMemTablets map when the table still exists
        largestMemTablets.put(timeMemoryLoad, tabletInfo);
      }

      compactionMemory += minorCompactingSize;
      if (minorCompactingSize > 0)
        numWaitingMincs++;
    }

    if (ingestMemory + compactionMemory > maxObserved) {
      maxObserved = ingestMemory + compactionMemory;
    }

    final long memoryChange = ingestMemory - prevIngestMemory;
    prevIngestMemory = ingestMemory;

    boolean startMinC = false;

    if (numWaitingMincs < maxMinCs) {
      // based on previous ingest memory increase, if we think that the next increase will
      // take us over the threshold for non-compacting memory, then start a minor compaction
      // or if the idle time of the chosen tablet is greater than the threshold, start a minor compaction
      if (memoryChange >= 0 && ingestMemory + memoryChange > compactionThreshold * maxMemory) {
        startMinC = true;
      } else if (!largestIdleMemTablets.isEmpty()) {
        startMinC = true;
        // switch largestMemTablets to largestIdleMemTablets
        largestMemTablets = largestIdleMemTablets;
        log.debug("IDLE minor compaction chosen");
      }
    }

    if (startMinC) {
      long toBeCompacted = compactionMemory;
      outer: for (int i = numWaitingMincs; i < maxMinCs && !largestMemTablets.isEmpty(); /* empty */) {
        Entry> lastEntry = largestMemTablets.lastEntry();
        for (TabletInfo largest : lastEntry.getValue()) {
          toBeCompacted += largest.memTableSize;
          result.tabletsToMinorCompact.add(largest.extent);
          log.debug(String.format("COMPACTING %s  total = %,d ingestMemory = %,d", largest.extent.toString(), (ingestMemory + compactionMemory), ingestMemory));
          log.debug(String.format("chosenMem = %,d chosenIT = %.2f load %,d", largest.memTableSize, largest.idleTime / 1000.0, largest.load));
          if (toBeCompacted > ingestMemory * MAX_FLUSH_AT_ONCE_PERCENT)
            break outer;
          i++;
        }
        largestMemTablets.remove(lastEntry.getKey());
      }
    } else if (memoryChange < 0) {
      // before idle mincs, starting a minor compaction meant that memoryChange >= 0.
      // we thought we might want to remove the "else" if that changed,
      // however it seems performing idle compactions shouldn't make the threshold
      // change more often, so it is staying for now.
      // also, now we have the case where memoryChange < 0 due to an idle compaction, yet
      // we are still adjusting the threshold. should this be tracked and prevented?

      // memory change < 0 means a minor compaction occurred
      // we want to see how full the memory got during the compaction
      // (the goal is for it to have between 80% and 90% memory utilization)
      // and adjust the compactionThreshold accordingly

      log.debug(String.format("BEFORE compactionThreshold = %.3f maxObserved = %,d", compactionThreshold, maxObserved));
      if (compactionThreshold < 0.82 && maxObserved < 0.8 * maxMemory) {
        // 0.82 * 1.1 is about 0.9, which is our desired max threshold
        compactionThreshold *= 1.1;
      } else if (compactionThreshold > 0.056 && maxObserved > 0.9 * maxMemory) {
        // 0.056 * 0.9 is about 0.05, which is our desired min threshold
        compactionThreshold *= 0.9;
      }
      maxObserved = 0;

      log.debug(String.format("AFTER compactionThreshold = %.3f", compactionThreshold));
    }

    return result;
  }

  protected long currentTimeMillis() {
    return System.currentTimeMillis();
  }

  @Override
  public void tabletClosed(KeyExtent extent) {}

  // The load function: memory times the idle time, doubling every 15 mins
  static long timeMemoryLoad(long mem, long time) {
    double minutesIdle = time / 60000.0;

    return (long) (mem * Math.pow(2, minutesIdle / 15.0));
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy