org.apache.accumulo.server.util.TableDiskUsage Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of accumulo-server-base Show documentation
Show all versions of accumulo-server-base Show documentation
A common base library for Apache Accumulo servers.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.accumulo.server.util;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.conf.AccumuloConfiguration;
import org.apache.accumulo.core.conf.DefaultConfiguration;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.data.impl.KeyExtent;
import org.apache.accumulo.core.metadata.MetadataTable;
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.DataFileColumnFamily;
import org.apache.accumulo.core.security.Authorizations;
import org.apache.accumulo.core.util.NumUtil;
import org.apache.accumulo.server.cli.ClientOpts;
import org.apache.accumulo.server.fs.VolumeManager;
import org.apache.accumulo.server.fs.VolumeManagerImpl;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.beust.jcommander.Parameter;
import com.google.common.base.Joiner;
public class TableDiskUsage {
private static final Logger log = LoggerFactory.getLogger(TableDiskUsage.class);
private int nextInternalId = 0;
private Map internalIds = new HashMap();
private Map externalIds = new HashMap();
private Map tableFiles = new HashMap();
private Map fileSizes = new HashMap();
void addTable(String tableId) {
if (internalIds.containsKey(tableId))
throw new IllegalArgumentException("Already added table " + tableId);
// Keep an internal counter for each table added
int iid = nextInternalId++;
// Store the table id to the internal id
internalIds.put(tableId, iid);
// Store the internal id to the table id
externalIds.put(iid, tableId);
}
void linkFileAndTable(String tableId, String file) {
// get the internal id for this table
int internalId = internalIds.get(tableId);
// Initialize a bitset for tables (internal IDs) that reference this file
Integer[] tables = tableFiles.get(file);
if (tables == null) {
tables = new Integer[internalIds.size()];
for (int i = 0; i < tables.length; i++)
tables[i] = 0;
tableFiles.put(file, tables);
}
// Update the bitset to track that this table has seen this file
tables[internalId] = 1;
}
void addFileSize(String file, long size) {
fileSizes.put(file, size);
}
Map,Long> calculateUsage() {
// Bitset of tables that contain a file and total usage by all files that share that usage
Map,Long> usage = new HashMap,Long>();
if (log.isTraceEnabled()) {
log.trace("fileSizes " + fileSizes);
}
// For each file w/ referenced-table bitset
for (Entry entry : tableFiles.entrySet()) {
if (log.isTraceEnabled()) {
log.trace("file " + entry.getKey() + " table bitset " + Arrays.toString(entry.getValue()));
}
List key = Arrays.asList(entry.getValue());
Long size = fileSizes.get(entry.getKey());
Long tablesUsage = usage.get(key);
if (tablesUsage == null)
tablesUsage = 0l;
tablesUsage += size;
usage.put(key, tablesUsage);
}
Map,Long> externalUsage = new HashMap,Long>();
for (Entry,Long> entry : usage.entrySet()) {
List externalKey = new ArrayList();
List key = entry.getKey();
// table bitset
for (int i = 0; i < key.size(); i++)
if (key.get(i) != 0)
// Convert by internal id to the table id
externalKey.add(externalIds.get(i));
// list of table ids and size of files shared across the tables
externalUsage.put(externalKey, entry.getValue());
}
// mapping of all enumerations of files being referenced by tables and total size of files who share the same reference
return externalUsage;
}
public interface Printer {
void print(String line);
}
public static void printDiskUsage(AccumuloConfiguration acuConf, Collection tables, VolumeManager fs, Connector conn, boolean humanReadable)
throws TableNotFoundException, IOException {
printDiskUsage(acuConf, tables, fs, conn, new Printer() {
@Override
public void print(String line) {
System.out.println(line);
}
}, humanReadable);
}
public static Map,Long> getDiskUsage(AccumuloConfiguration acuConf, Set tableIds, VolumeManager fs, Connector conn)
throws IOException {
TableDiskUsage tdu = new TableDiskUsage();
// Add each tableID
for (String tableId : tableIds)
tdu.addTable(tableId);
HashSet tablesReferenced = new HashSet(tableIds);
HashSet emptyTableIds = new HashSet();
HashSet nameSpacesReferenced = new HashSet();
// For each table ID
for (String tableId : tableIds) {
Scanner mdScanner = null;
try {
mdScanner = conn.createScanner(MetadataTable.NAME, Authorizations.EMPTY);
} catch (TableNotFoundException e) {
throw new RuntimeException(e);
}
mdScanner.fetchColumnFamily(DataFileColumnFamily.NAME);
mdScanner.setRange(new KeyExtent(new Text(tableId), null, null).toMetadataRange());
if (!mdScanner.iterator().hasNext()) {
emptyTableIds.add(tableId);
}
// Read each file referenced by that table
for (Entry entry : mdScanner) {
String file = entry.getKey().getColumnQualifier().toString();
String parts[] = file.split("/");
// the filename
String uniqueName = parts[parts.length - 1];
if (file.contains(":") || file.startsWith("../")) {
String ref = parts[parts.length - 3];
// Track any tables which are referenced externally by the current table
if (!ref.equals(tableId)) {
tablesReferenced.add(ref);
}
if (file.contains(":") && parts.length > 3) {
List base = Arrays.asList(Arrays.copyOf(parts, parts.length - 3));
nameSpacesReferenced.add(Joiner.on("/").join(base));
}
}
// add this file to this table
tdu.linkFileAndTable(tableId, uniqueName);
}
}
// Each table seen (provided by user, or reference by table the user provided)
for (String tableId : tablesReferenced) {
for (String tableDir : nameSpacesReferenced) {
// Find each file and add its size
FileStatus[] files = fs.globStatus(new Path(tableDir + "/" + tableId + "/*/*"));
if (files != null) {
for (FileStatus fileStatus : files) {
// Assumes that all filenames are unique
String name = fileStatus.getPath().getName();
tdu.addFileSize(name, fileStatus.getLen());
}
}
}
}
// Invert tableId->tableName
HashMap reverseTableIdMap = new HashMap();
for (Entry entry : conn.tableOperations().tableIdMap().entrySet())
reverseTableIdMap.put(entry.getValue(), entry.getKey());
TreeMap,Long> usage = new TreeMap,Long>(new Comparator>() {
@Override
public int compare(TreeSet o1, TreeSet o2) {
int len1 = o1.size();
int len2 = o2.size();
int min = Math.min(len1, len2);
Iterator iter1 = o1.iterator();
Iterator iter2 = o2.iterator();
int count = 0;
while (count < min) {
String s1 = iter1.next();
String s2 = iter2.next();
int cmp = s1.compareTo(s2);
if (cmp != 0)
return cmp;
count++;
}
return len1 - len2;
}
});
for (Entry,Long> entry : tdu.calculateUsage().entrySet()) {
TreeSet tableNames = new TreeSet();
// Convert size shared by each table id into size shared by each table name
for (String tableId : entry.getKey())
tableNames.add(reverseTableIdMap.get(tableId));
// Make table names to shared file size
usage.put(tableNames, entry.getValue());
}
if (!emptyTableIds.isEmpty()) {
TreeSet emptyTables = new TreeSet();
for (String tableId : emptyTableIds) {
emptyTables.add(reverseTableIdMap.get(tableId));
}
usage.put(emptyTables, 0L);
}
return usage;
}
public static void printDiskUsage(AccumuloConfiguration acuConf, Collection tables, VolumeManager fs, Connector conn, Printer printer,
boolean humanReadable) throws TableNotFoundException, IOException {
HashSet tableIds = new HashSet();
// Get table IDs for all tables requested to be 'du'
for (String tableName : tables) {
String tableId = conn.tableOperations().tableIdMap().get(tableName);
if (tableId == null)
throw new TableNotFoundException(null, tableName, "Table " + tableName + " not found");
tableIds.add(tableId);
}
Map,Long> usage = getDiskUsage(acuConf, tableIds, fs, conn);
String valueFormat = humanReadable ? "%9s" : "%,24d";
for (Entry,Long> entry : usage.entrySet()) {
Object value = humanReadable ? NumUtil.bigNumberForSize(entry.getValue()) : entry.getValue();
printer.print(String.format(valueFormat + " %s", value, entry.getKey()));
}
}
static class Opts extends ClientOpts {
@Parameter(description = " { ... } ")
List tables = new ArrayList();
}
public static void main(String[] args) throws Exception {
VolumeManager fs = VolumeManagerImpl.get();
Opts opts = new Opts();
opts.parseArgs(TableDiskUsage.class.getName(), args);
Connector conn = opts.getConnector();
org.apache.accumulo.server.util.TableDiskUsage.printDiskUsage(DefaultConfiguration.getInstance(), opts.tables, fs, conn, false);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy