All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.accumulo.server.util.TableDiskUsage Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.server.util;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.conf.DefaultConfiguration;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.data.impl.KeyExtent;
import org.apache.accumulo.core.metadata.MetadataTable;
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.core.util.NumUtil;
import org.apache.accumulo.server.cli.ClientOpts;
import org.apache.accumulo.server.fs.VolumeManager;
import org.apache.accumulo.server.fs.VolumeManagerImpl;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.beust.jcommander.Parameter;
import com.google.common.base.Joiner;

public class TableDiskUsage {

  private static final Logger log = LoggerFactory.getLogger(TableDiskUsage.class);
  private int nextInternalId = 0;
  private Map internalIds = new HashMap();
  private Map externalIds = new HashMap();
  private Map tableFiles = new HashMap();
  private Map fileSizes = new HashMap();

  void addTable(String tableId) {
    if (internalIds.containsKey(tableId))
      throw new IllegalArgumentException("Already added table " + tableId);

    // Keep an internal counter for each table added
    int iid = nextInternalId++;

    // Store the table id to the internal id
    internalIds.put(tableId, iid);
    // Store the internal id to the table id
    externalIds.put(iid, tableId);
  }

  void linkFileAndTable(String tableId, String file) {
    // get the internal id for this table
    int internalId = internalIds.get(tableId);

    // Initialize a bitset for tables (internal IDs) that reference this file
    Integer[] tables = tableFiles.get(file);
    if (tables == null) {
      tables = new Integer[internalIds.size()];
      for (int i = 0; i < tables.length; i++)
        tables[i] = 0;
      tableFiles.put(file, tables);
    }

    // Update the bitset to track that this table has seen this file
    tables[internalId] = 1;
  }

  void addFileSize(String file, long size) {
    fileSizes.put(file, size);
  }

  Map,Long> calculateUsage() {

    // Bitset of tables that contain a file and total usage by all files that share that usage
    Map,Long> usage = new HashMap,Long>();

    if (log.isTraceEnabled()) {
      log.trace("fileSizes " + fileSizes);
    }
    // For each file w/ referenced-table bitset
    for (Entry entry : tableFiles.entrySet()) {
      if (log.isTraceEnabled()) {
        log.trace("file " + entry.getKey() + " table bitset " + Arrays.toString(entry.getValue()));
      }
      List key = Arrays.asList(entry.getValue());
      Long size = fileSizes.get(entry.getKey());

      Long tablesUsage = usage.get(key);
      if (tablesUsage == null)
        tablesUsage = 0l;

      tablesUsage += size;

      usage.put(key, tablesUsage);

    }

    Map,Long> externalUsage = new HashMap,Long>();

    for (Entry,Long> entry : usage.entrySet()) {
      List externalKey = new ArrayList();
      List key = entry.getKey();
      // table bitset
      for (int i = 0; i < key.size(); i++)
        if (key.get(i) != 0)
          // Convert by internal id to the table id
          externalKey.add(externalIds.get(i));

      // list of table ids and size of files shared across the tables
      externalUsage.put(externalKey, entry.getValue());
    }

    // mapping of all enumerations of files being referenced by tables and total size of files who share the same reference
    return externalUsage;
  }

  public interface Printer {
    void print(String line);
  }

  public static void printDiskUsage(AccumuloConfiguration acuConf, Collection tables, VolumeManager fs, Connector conn, boolean humanReadable)
      throws TableNotFoundException, IOException {
    printDiskUsage(acuConf, tables, fs, conn, new Printer() {
      @Override
      public void print(String line) {
        System.out.println(line);
      }
    }, humanReadable);
  }

  public static Map,Long> getDiskUsage(AccumuloConfiguration acuConf, Set tableIds, VolumeManager fs, Connector conn)
      throws IOException {
    TableDiskUsage tdu = new TableDiskUsage();

    // Add each tableID
    for (String tableId : tableIds)
      tdu.addTable(tableId);

    HashSet tablesReferenced = new HashSet(tableIds);
    HashSet emptyTableIds = new HashSet();
    HashSet nameSpacesReferenced = new HashSet();

    // For each table ID
    for (String tableId : tableIds) {
      Scanner mdScanner = null;
      try {
        mdScanner = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
      } catch (TableNotFoundException e) {
        throw new RuntimeException(e);
      }
      mdScanner.fetchColumnFamily(DataFileColumnFamily.NAME);
      mdScanner.setRange(new KeyExtent(new Text(tableId), null, null).toMetadataRange());

      if (!mdScanner.iterator().hasNext()) {
        emptyTableIds.add(tableId);
      }

      // Read each file referenced by that table
      for (Entry entry : mdScanner) {
        String file = entry.getKey().getColumnQualifier().toString();
        String parts[] = file.split("/");
        // the filename
        String uniqueName = parts[parts.length - 1];
        if (file.contains(":") || file.startsWith("../")) {
          String ref = parts[parts.length - 3];
          // Track any tables which are referenced externally by the current table
          if (!ref.equals(tableId)) {
            tablesReferenced.add(ref);
          }
          if (file.contains(":") && parts.length > 3) {
            List base = Arrays.asList(Arrays.copyOf(parts, parts.length - 3));
            nameSpacesReferenced.add(Joiner.on("/").join(base));
          }
        }

        // add this file to this table
        tdu.linkFileAndTable(tableId, uniqueName);
      }
    }

    // Each table seen (provided by user, or reference by table the user provided)
    for (String tableId : tablesReferenced) {
      for (String tableDir : nameSpacesReferenced) {
        // Find each file and add its size
        FileStatus[] files = fs.globStatus(new Path(tableDir + "/" + tableId + "/*/*"));
        if (files != null) {
          for (FileStatus fileStatus : files) {
            // Assumes that all filenames are unique
            String name = fileStatus.getPath().getName();
            tdu.addFileSize(name, fileStatus.getLen());
          }
        }
      }
    }

    // Invert tableId->tableName
    HashMap reverseTableIdMap = new HashMap();
    for (Entry entry : conn.tableOperations().tableIdMap().entrySet())
      reverseTableIdMap.put(entry.getValue(), entry.getKey());

    TreeMap,Long> usage = new TreeMap,Long>(new Comparator>() {

      @Override
      public int compare(TreeSet o1, TreeSet o2) {
        int len1 = o1.size();
        int len2 = o2.size();

        int min = Math.min(len1, len2);

        Iterator iter1 = o1.iterator();
        Iterator iter2 = o2.iterator();

        int count = 0;

        while (count < min) {
          String s1 = iter1.next();
          String s2 = iter2.next();

          int cmp = s1.compareTo(s2);

          if (cmp != 0)
            return cmp;

          count++;
        }

        return len1 - len2;
      }
    });

    for (Entry,Long> entry : tdu.calculateUsage().entrySet()) {
      TreeSet tableNames = new TreeSet();
      // Convert size shared by each table id into size shared by each table name
      for (String tableId : entry.getKey())
        tableNames.add(reverseTableIdMap.get(tableId));

      // Make table names to shared file size
      usage.put(tableNames, entry.getValue());
    }

    if (!emptyTableIds.isEmpty()) {
      TreeSet emptyTables = new TreeSet();
      for (String tableId : emptyTableIds) {
        emptyTables.add(reverseTableIdMap.get(tableId));
      }
      usage.put(emptyTables, 0L);
    }

    return usage;
  }

  public static void printDiskUsage(AccumuloConfiguration acuConf, Collection tables, VolumeManager fs, Connector conn, Printer printer,
      boolean humanReadable) throws TableNotFoundException, IOException {

    HashSet tableIds = new HashSet();

    // Get table IDs for all tables requested to be 'du'
    for (String tableName : tables) {
      String tableId = conn.tableOperations().tableIdMap().get(tableName);
      if (tableId == null)
        throw new TableNotFoundException(null, tableName, "Table " + tableName + " not found");

      tableIds.add(tableId);
    }

    Map,Long> usage = getDiskUsage(acuConf, tableIds, fs, conn);

    String valueFormat = humanReadable ? "%9s" : "%,24d";
    for (Entry,Long> entry : usage.entrySet()) {
      Object value = humanReadable ? NumUtil.bigNumberForSize(entry.getValue()) : entry.getValue();
      printer.print(String.format(valueFormat + " %s", value, entry.getKey()));
    }
  }

  static class Opts extends ClientOpts {
    @Parameter(description = "  { 
... } ") List tables = new ArrayList(); } public static void main(String[] args) throws Exception { VolumeManager fs = VolumeManagerImpl.get(); Opts opts = new Opts(); opts.parseArgs(TableDiskUsage.class.getName(), args); Connector conn = opts.getConnector(); org.apache.accumulo.server.util.TableDiskUsage.printDiskUsage(DefaultConfiguration.getInstance(), opts.tables, fs, conn, false); } }