org.apache.accumulo.server.client.BulkImporter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of accumulo-server-base Show documentation
Show all versions of accumulo-server-base Show documentation
A common base library for Apache Accumulo servers.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.accumulo.server.client;
import static java.util.concurrent.TimeUnit.MINUTES;
import static org.apache.accumulo.core.util.UtilWaitThread.sleepUninterruptibly;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.clientImpl.ClientContext;
import org.apache.accumulo.core.clientImpl.TabletLocator;
import org.apache.accumulo.core.clientImpl.TabletLocator.TabletLocation;
import org.apache.accumulo.core.clientImpl.bulk.BulkImport;
import org.apache.accumulo.core.clientImpl.thrift.ClientService;
import org.apache.accumulo.core.clientImpl.thrift.ThriftSecurityException;
import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.data.ByteSequence;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.TableId;
import org.apache.accumulo.core.dataImpl.KeyExtent;
import org.apache.accumulo.core.dataImpl.thrift.TKeyExtent;
import org.apache.accumulo.core.file.FileOperations;
import org.apache.accumulo.core.file.FileSKVIterator;
import org.apache.accumulo.core.metadata.MetadataTable;
import org.apache.accumulo.core.rpc.ThriftUtil;
import org.apache.accumulo.core.rpc.clients.ThriftClientTypes;
import org.apache.accumulo.core.spi.crypto.CryptoService;
import org.apache.accumulo.core.tabletserver.thrift.TabletClientService;
import org.apache.accumulo.core.trace.TraceUtil;
import org.apache.accumulo.core.util.HostAndPort;
import org.apache.accumulo.core.util.StopWatch;
import org.apache.accumulo.core.util.threads.ThreadPools;
import org.apache.accumulo.server.ServerContext;
import org.apache.accumulo.server.conf.TableConfiguration;
import org.apache.accumulo.server.fs.VolumeManager;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.thrift.TServiceClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class BulkImporter {
private static final Logger log = LoggerFactory.getLogger(BulkImporter.class);
public static List bulkLoad(ServerContext context, long tid, String tableId,
List files, boolean setTime) throws IOException {
AssignmentStats stats = new BulkImporter(context, tid, tableId, setTime).importFiles(files);
List result = new ArrayList<>();
for (Path p : stats.completeFailures.keySet()) {
result.add(p.toString());
}
return result;
}
private StopWatch timer;
private enum Timers {
EXAMINE_MAP_FILES, QUERY_METADATA, IMPORT_MAP_FILES, SLEEP, TOTAL
}
private final ServerContext context;
private TableId tableId;
private long tid;
private boolean setTime;
private TableConfiguration tableConf;
public BulkImporter(ServerContext context, long tid, String tableId, boolean setTime) {
this.context = context;
this.tid = tid;
this.tableId = TableId.of(tableId);
this.setTime = setTime;
this.tableConf = context.getTableConfiguration(this.tableId);
}
public AssignmentStats importFiles(List files) {
int numThreads = context.getConfiguration().getCount(Property.TSERV_BULK_PROCESS_THREADS);
int numAssignThreads =
context.getConfiguration().getCount(Property.TSERV_BULK_ASSIGNMENT_THREADS);
timer = new StopWatch<>(Timers.class);
timer.start(Timers.TOTAL);
final VolumeManager fs = context.getVolumeManager();
Set paths = new HashSet<>();
for (String file : files) {
paths.add(new Path(file));
}
AssignmentStats assignmentStats = new AssignmentStats(paths.size());
final Map> completeFailures =
Collections.synchronizedSortedMap(new TreeMap<>());
ClientService.Client client = null;
final TabletLocator locator = TabletLocator.getLocator(context, tableId);
try {
final Map> assignments =
Collections.synchronizedSortedMap(new TreeMap<>());
timer.start(Timers.EXAMINE_MAP_FILES);
ExecutorService threadPool = ThreadPools.getServerThreadPools()
.createFixedThreadPool(numThreads, "findOverlapping", false);
for (Path path : paths) {
final Path mapFile = path;
Runnable getAssignments = () -> {
List tabletsToAssignMapFileTo = Collections.emptyList();
try {
tabletsToAssignMapFileTo =
findOverlappingTablets(context, fs, locator, mapFile, tableConf.getCryptoService());
} catch (Exception ex) {
log.warn("Unable to find tablets that overlap file " + mapFile, ex);
}
log.debug("Map file {} found to overlap {} tablets", mapFile,
tabletsToAssignMapFileTo.size());
if (tabletsToAssignMapFileTo.isEmpty()) {
List empty = Collections.emptyList();
completeFailures.put(mapFile, empty);
} else {
assignments.put(mapFile, tabletsToAssignMapFileTo);
}
};
threadPool.execute(getAssignments);
}
threadPool.shutdown();
while (!threadPool.isTerminated()) {
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
timer.stop(Timers.EXAMINE_MAP_FILES);
assignmentStats.attemptingAssignments(assignments);
Map> assignmentFailures =
assignMapFiles(fs, assignments, paths, numAssignThreads, numThreads);
assignmentStats.assignmentsFailed(assignmentFailures);
Map failureCount = new TreeMap<>();
for (Entry> entry : assignmentFailures.entrySet()) {
failureCount.put(entry.getKey(), 1);
}
long sleepTime = 2_000;
while (!assignmentFailures.isEmpty()) {
sleepTime = Math.min(sleepTime * 2, MINUTES.toMillis(1));
locator.invalidateCache();
// assumption about assignment failures is that it caused by a split
// happening or a missing location
//
// for splits we need to find children key extents that cover the
// same key range and are contiguous (no holes, no overlap)
timer.start(Timers.SLEEP);
sleepUninterruptibly(sleepTime, TimeUnit.MILLISECONDS);
timer.stop(Timers.SLEEP);
log.debug("Trying to assign {} map files that previously failed on some key extents",
assignmentFailures.size());
assignments.clear();
// for failed key extents, try to find children key extents to
// assign to
for (Entry> entry : assignmentFailures.entrySet()) {
Iterator keListIter = entry.getValue().iterator();
List tabletsToAssignMapFileTo = new ArrayList<>();
while (keListIter.hasNext()) {
KeyExtent ke = keListIter.next();
timer.start(Timers.QUERY_METADATA);
try {
tabletsToAssignMapFileTo.addAll(findOverlappingTablets(context, fs, locator,
entry.getKey(), ke, tableConf.getCryptoService()));
keListIter.remove();
} catch (Exception ex) {
log.warn("Exception finding overlapping tablets, will retry tablet " + ke, ex);
}
timer.stop(Timers.QUERY_METADATA);
}
if (!tabletsToAssignMapFileTo.isEmpty()) {
assignments.put(entry.getKey(), tabletsToAssignMapFileTo);
}
}
assignmentStats.attemptingAssignments(assignments);
Map> assignmentFailures2 =
assignMapFiles(fs, assignments, paths, numAssignThreads, numThreads);
assignmentStats.assignmentsFailed(assignmentFailures2);
// merge assignmentFailures2 into assignmentFailures
for (Entry> entry : assignmentFailures2.entrySet()) {
assignmentFailures.get(entry.getKey()).addAll(entry.getValue());
Integer fc = failureCount.get(entry.getKey());
if (fc == null) {
fc = 0;
}
failureCount.put(entry.getKey(), fc + 1);
}
// remove map files that have no more key extents to assign
assignmentFailures.values().removeIf(List::isEmpty);
Set> failureIter = failureCount.entrySet();
for (Entry entry : failureIter) {
int retries = context.getConfiguration().getCount(Property.TSERV_BULK_RETRY);
if (entry.getValue() > retries && assignmentFailures.get(entry.getKey()) != null) {
log.error("Map file {} failed more than {} times, giving up.", entry.getKey(), retries);
completeFailures.put(entry.getKey(), assignmentFailures.get(entry.getKey()));
assignmentFailures.remove(entry.getKey());
}
}
}
assignmentStats.assignmentsAbandoned(completeFailures);
Set failedFailures = processFailures(completeFailures);
assignmentStats.unrecoveredMapFiles(failedFailures);
timer.stop(Timers.TOTAL);
printReport(paths);
return assignmentStats;
} finally {
if (client != null) {
ThriftUtil.close(client, context);
}
}
}
private void printReport(Set paths) {
long totalTime = 0;
for (Timers t : Timers.values()) {
if (t == Timers.TOTAL) {
continue;
}
totalTime += timer.get(t);
}
List files = new ArrayList<>();
for (Path path : paths) {
files.add(path.getName());
}
Collections.sort(files);
log.debug("BULK IMPORT TIMING STATISTICS");
log.debug("Files: {}", files);
log.debug(String.format("Examine map files : %,10.2f secs %6.2f%s",
timer.getSecs(Timers.EXAMINE_MAP_FILES),
100.0 * timer.get(Timers.EXAMINE_MAP_FILES) / timer.get(Timers.TOTAL), "%"));
log.debug(String.format("Query %-14s : %,10.2f secs %6.2f%s", MetadataTable.NAME,
timer.getSecs(Timers.QUERY_METADATA),
100.0 * timer.get(Timers.QUERY_METADATA) / timer.get(Timers.TOTAL), "%"));
log.debug(String.format("Import Map Files : %,10.2f secs %6.2f%s",
timer.getSecs(Timers.IMPORT_MAP_FILES),
100.0 * timer.get(Timers.IMPORT_MAP_FILES) / timer.get(Timers.TOTAL), "%"));
log.debug(
String.format("Sleep : %,10.2f secs %6.2f%s", timer.getSecs(Timers.SLEEP),
100.0 * timer.get(Timers.SLEEP) / timer.get(Timers.TOTAL), "%"));
log.debug(String.format("Misc : %,10.2f secs %6.2f%s",
(timer.get(Timers.TOTAL) - totalTime) / 1000.0,
100.0 * (timer.get(Timers.TOTAL) - totalTime) / timer.get(Timers.TOTAL), "%"));
log.debug(String.format("Total : %,10.2f secs", timer.getSecs(Timers.TOTAL)));
}
private Set processFailures(Map> completeFailures) {
// we should check if map file was not assigned to any tablets, then we
// should just move it; not currently being done?
Set>> es = completeFailures.entrySet();
if (completeFailures.isEmpty()) {
return Collections.emptySet();
}
log.debug("The following map files failed ");
for (Entry> entry : es) {
List extents = entry.getValue();
for (KeyExtent keyExtent : extents) {
log.debug("\t{} -> {}", entry.getKey(), keyExtent);
}
}
return Collections.emptySet();
}
private static class AssignmentInfo {
public AssignmentInfo(KeyExtent keyExtent, Long estSize) {
this.ke = keyExtent;
this.estSize = estSize;
}
KeyExtent ke;
long estSize;
}
private static List extentsOf(List locations) {
List result = new ArrayList<>(locations.size());
for (TabletLocation tl : locations) {
result.add(tl.tablet_extent);
}
return result;
}
private Map> estimateSizes(final VolumeManager vm,
Map> assignments, Collection paths, int numThreads) {
long t1 = System.currentTimeMillis();
final Map mapFileSizes = new TreeMap<>();
try {
for (Path path : paths) {
FileSystem fs = vm.getFileSystemByPath(path);
mapFileSizes.put(path, fs.getContentSummary(path).getLength());
}
} catch (IOException e) {
log.error("Failed to get map files in for {}: {}", paths, e.getMessage(), e);
throw new RuntimeException(e);
}
final Map> ais = Collections.synchronizedMap(new TreeMap<>());
ExecutorService threadPool = ThreadPools.getServerThreadPools()
.createFixedThreadPool(numThreads, "estimateSizes", false);
for (final Entry> entry : assignments.entrySet()) {
if (entry.getValue().size() == 1) {
TabletLocation tabletLocation = entry.getValue().get(0);
// if the tablet completely contains the map file, there is no
// need to estimate its
// size
ais.put(entry.getKey(), Collections.singletonList(
new AssignmentInfo(tabletLocation.tablet_extent, mapFileSizes.get(entry.getKey()))));
continue;
}
Runnable estimationTask = () -> {
Map estimatedSizes = null;
try {
Path mapFile = entry.getKey();
FileSystem ns = context.getVolumeManager().getFileSystemByPath(mapFile);
estimatedSizes = BulkImport.estimateSizes(context.getConfiguration(), mapFile,
mapFileSizes.get(entry.getKey()), extentsOf(entry.getValue()), ns, null,
tableConf.getCryptoService());
} catch (IOException e) {
log.warn("Failed to estimate map file sizes {}", e.getMessage());
}
if (estimatedSizes == null) {
// estimation failed, do a simple estimation
estimatedSizes = new TreeMap<>();
long estSize =
(long) (mapFileSizes.get(entry.getKey()) / (double) entry.getValue().size());
for (TabletLocation tl : entry.getValue()) {
estimatedSizes.put(tl.tablet_extent, estSize);
}
}
List assignmentInfoList = new ArrayList<>(estimatedSizes.size());
for (Entry entry2 : estimatedSizes.entrySet()) {
assignmentInfoList.add(new AssignmentInfo(entry2.getKey(), entry2.getValue()));
}
ais.put(entry.getKey(), assignmentInfoList);
};
threadPool.execute(estimationTask);
}
threadPool.shutdown();
while (!threadPool.isTerminated()) {
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
log.error("Encountered InterruptedException while waiting for the threadPool to terminate.",
e);
throw new RuntimeException(e);
}
}
long t2 = System.currentTimeMillis();
log.debug(String.format("Estimated map files sizes in %6.2f secs", (t2 - t1) / 1000.0));
return ais;
}
private static Map locationsOf(Map> assignments) {
Map result = new HashMap<>();
for (List entry : assignments.values()) {
for (TabletLocation tl : entry) {
result.put(tl.tablet_extent, tl.tablet_location);
}
}
return result;
}
private Map> assignMapFiles(VolumeManager fs,
Map> assignments, Collection paths, int numThreads,
int numMapThreads) {
timer.start(Timers.EXAMINE_MAP_FILES);
Map> assignInfo =
estimateSizes(fs, assignments, paths, numMapThreads);
timer.stop(Timers.EXAMINE_MAP_FILES);
Map> ret;
timer.start(Timers.IMPORT_MAP_FILES);
ret = assignMapFiles(assignInfo, locationsOf(assignments), numThreads);
timer.stop(Timers.IMPORT_MAP_FILES);
return ret;
}
private class AssignmentTask implements Runnable {
final Map> assignmentFailures;
HostAndPort location;
private Map> assignmentsPerTablet;
public AssignmentTask(Map> assignmentFailures, String location,
Map> assignmentsPerTablet) {
this.assignmentFailures = assignmentFailures;
this.location = HostAndPort.fromString(location);
this.assignmentsPerTablet = assignmentsPerTablet;
}
private void handleFailures(Collection failures, String message) {
failures.forEach(ke -> {
List mapFiles = assignmentsPerTablet.get(ke);
synchronized (assignmentFailures) {
mapFiles.forEach(pathSize -> assignmentFailures
.computeIfAbsent(pathSize.path, k -> new ArrayList<>()).add(ke));
}
log.info("Could not assign {} map files to tablet {} because : {}. Will retry ...",
mapFiles.size(), ke, message);
});
}
@Override
public void run() {
HashSet uniqMapFiles = new HashSet<>();
for (List mapFiles : assignmentsPerTablet.values()) {
for (PathSize ps : mapFiles) {
uniqMapFiles.add(ps.path);
}
}
log.debug("Assigning {} map files to {} tablets at {}", uniqMapFiles.size(),
assignmentsPerTablet.size(), location);
try {
List failures = assignMapFiles(context, location, assignmentsPerTablet);
handleFailures(failures, "Not Serving Tablet");
} catch (AccumuloException | AccumuloSecurityException e) {
handleFailures(assignmentsPerTablet.keySet(), e.getMessage());
}
}
}
private static class PathSize {
public PathSize(Path mapFile, long estSize) {
this.path = mapFile;
this.estSize = estSize;
}
Path path;
long estSize;
@Override
public String toString() {
return path + " " + estSize;
}
}
private Map> assignMapFiles(Map> assignments,
Map locations, int numThreads) {
// group assignments by tablet
Map> assignmentsPerTablet = new TreeMap<>();
assignments.forEach((mapFile, tabletsToAssignMapFileTo) -> tabletsToAssignMapFileTo
.forEach(assignmentInfo -> assignmentsPerTablet
.computeIfAbsent(assignmentInfo.ke, k -> new ArrayList<>())
.add(new PathSize(mapFile, assignmentInfo.estSize))));
// group assignments by tabletserver
Map> assignmentFailures = Collections.synchronizedMap(new TreeMap<>());
TreeMap>> assignmentsPerTabletServer = new TreeMap<>();
assignmentsPerTablet.forEach((ke, pathSizes) -> {
String location = locations.get(ke);
if (location == null) {
synchronized (assignmentFailures) {
pathSizes.forEach(pathSize -> assignmentFailures
.computeIfAbsent(pathSize.path, k -> new ArrayList<>()).add(ke));
}
log.warn(
"Could not assign {} map files to tablet {} because it had no location, will retry ...",
pathSizes.size(), ke);
} else {
assignmentsPerTabletServer.computeIfAbsent(location, k -> new TreeMap<>()).put(ke,
pathSizes);
}
});
ExecutorService threadPool =
ThreadPools.getServerThreadPools().createFixedThreadPool(numThreads, "submit", false);
for (Entry>> entry : assignmentsPerTabletServer
.entrySet()) {
String location = entry.getKey();
threadPool.execute(new AssignmentTask(assignmentFailures, location, entry.getValue()));
}
threadPool.shutdown();
while (!threadPool.isTerminated()) {
try {
threadPool.awaitTermination(60, TimeUnit.SECONDS);
} catch (InterruptedException e) {
log.error(
"Encountered InterruptedException while waiting for the thread pool to terminate.", e);
throw new RuntimeException(e);
}
}
return assignmentFailures;
}
private List assignMapFiles(ClientContext context, HostAndPort location,
Map> assignmentsPerTablet)
throws AccumuloException, AccumuloSecurityException {
try {
long timeInMillis = context.getConfiguration().getTimeInMillis(Property.TSERV_BULK_TIMEOUT);
TabletClientService.Iface client =
ThriftUtil.getClient(ThriftClientTypes.TABLET_SERVER, location, context, timeInMillis);
try {
HashMap> files =
new HashMap<>();
for (Entry> entry : assignmentsPerTablet.entrySet()) {
HashMap tabletFiles =
new HashMap<>();
files.put(entry.getKey(), tabletFiles);
for (PathSize pathSize : entry.getValue()) {
org.apache.accumulo.core.dataImpl.thrift.MapFileInfo mfi =
new org.apache.accumulo.core.dataImpl.thrift.MapFileInfo(pathSize.estSize);
tabletFiles.put(pathSize.path.toString(), mfi);
}
}
log.debug("Asking {} to bulk load {}", location, files);
List failures =
client.bulkImport(TraceUtil.traceInfo(), context.rpcCreds(), tid,
files.entrySet().stream()
.collect(Collectors.toMap(entry -> entry.getKey().toThrift(), Entry::getValue)),
setTime);
return failures.stream().map(KeyExtent::fromThrift).collect(Collectors.toList());
} finally {
ThriftUtil.returnClient((TServiceClient) client, context);
}
} catch (ThriftSecurityException e) {
throw new AccumuloSecurityException(e.user, e.code, e);
} catch (Exception t) {
log.error("Encountered unknown exception in assignMapFiles.", t);
throw new AccumuloException(t);
}
}
public static List findOverlappingTablets(ServerContext context, VolumeManager fs,
TabletLocator locator, Path file, CryptoService cs) throws Exception {
return findOverlappingTablets(context, fs, locator, file, null, null, cs);
}
public static List findOverlappingTablets(ServerContext context, VolumeManager fs,
TabletLocator locator, Path file, KeyExtent failed, CryptoService cs) throws Exception {
locator.invalidateCache(failed);
Text start = getStartRowForExtent(failed);
return findOverlappingTablets(context, fs, locator, file, start, failed.endRow(), cs);
}
protected static Text getStartRowForExtent(KeyExtent extent) {
Text start = extent.prevEndRow();
if (start != null) {
start = new Text(start);
// ACCUMULO-3967 We want the first possible key in this tablet, not the following row from the
// previous tablet
start.append(byte0, 0, 1);
}
return start;
}
static final byte[] byte0 = {0};
public static List findOverlappingTablets(ServerContext context, VolumeManager vm,
TabletLocator locator, Path file, Text startRow, Text endRow, CryptoService cs)
throws Exception {
List result = new ArrayList<>();
Collection columnFamilies = Collections.emptyList();
String filename = file.toString();
// log.debug(filename + " finding overlapping tablets " + startRow + " -> " + endRow);
FileSystem fs = vm.getFileSystemByPath(file);
try (FileSKVIterator reader =
FileOperations.getInstance().newReaderBuilder().forFile(filename, fs, fs.getConf(), cs)
.withTableConfiguration(context.getConfiguration()).seekToBeginning().build()) {
Text row = startRow;
if (row == null) {
row = new Text();
}
while (true) {
// log.debug(filename + " Seeking to row " + row);
reader.seek(new Range(row, null), columnFamilies, false);
if (!reader.hasTop()) {
// log.debug(filename + " not found");
break;
}
row = reader.getTopKey().getRow();
TabletLocation tabletLocation = locator.locateTablet(context, row, false, true);
// log.debug(filename + " found row " + row + " at location " + tabletLocation);
result.add(tabletLocation);
row = tabletLocation.tablet_extent.endRow();
if (row != null && (endRow == null || row.compareTo(endRow) < 0)) {
row = new Text(row);
row.append(byte0, 0, byte0.length);
} else {
break;
}
}
}
// log.debug(filename + " to be sent to " + result);
return result;
}
public static class AssignmentStats {
private Map counts;
private int numUniqueMapFiles;
private Map> completeFailures = null;
private Set failedFailures = null;
AssignmentStats(int fileCount) {
counts = new HashMap<>();
numUniqueMapFiles = fileCount;
}
void attemptingAssignments(Map> assignments) {
for (Entry> entry : assignments.entrySet()) {
for (TabletLocation tl : entry.getValue()) {
Integer count = getCount(tl.tablet_extent);
counts.put(tl.tablet_extent, count + 1);
}
}
}
void assignmentsFailed(Map> assignmentFailures) {
for (Entry> entry : assignmentFailures.entrySet()) {
for (KeyExtent ke : entry.getValue()) {
Integer count = getCount(ke);
counts.put(ke, count - 1);
}
}
}
void assignmentsAbandoned(Map> completeFailures) {
this.completeFailures = completeFailures;
}
private Integer getCount(KeyExtent parent) {
Integer count = counts.get(parent);
if (count == null) {
count = 0;
}
return count;
}
void unrecoveredMapFiles(Set failedFailures) {
this.failedFailures = failedFailures;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
int totalAssignments = 0;
int tabletsImportedTo = 0;
int min = Integer.MAX_VALUE, max = Integer.MIN_VALUE;
for (Entry entry : counts.entrySet()) {
totalAssignments += entry.getValue();
if (entry.getValue() > 0) {
tabletsImportedTo++;
}
if (entry.getValue() < min) {
min = entry.getValue();
}
if (entry.getValue() > max) {
max = entry.getValue();
}
}
double stddev = 0;
for (Entry entry : counts.entrySet()) {
stddev += Math.pow(entry.getValue() - totalAssignments / (double) counts.size(), 2);
}
stddev = stddev / counts.size();
stddev = Math.sqrt(stddev);
Set failedTablets = new HashSet<>();
for (List ft : completeFailures.values()) {
failedTablets.addAll(ft);
}
sb.append("BULK IMPORT ASSIGNMENT STATISTICS\n");
sb.append(String.format("# of map files : %,10d%n", numUniqueMapFiles));
sb.append(String.format("# map files with failures : %,10d %6.2f%s%n",
completeFailures.size(), completeFailures.size() * 100.0 / numUniqueMapFiles, "%"));
sb.append(String.format("# failed failed map files : %,10d %s%n", failedFailures.size(),
failedFailures.isEmpty() ? "" : " <-- THIS IS BAD"));
sb.append(String.format("# of tablets : %,10d%n", counts.size()));
sb.append(String.format("# tablets imported to : %,10d %6.2f%s%n", tabletsImportedTo,
tabletsImportedTo * 100.0 / counts.size(), "%"));
sb.append(String.format("# tablets with failures : %,10d %6.2f%s%n", failedTablets.size(),
failedTablets.size() * 100.0 / counts.size(), "%"));
sb.append(String.format("min map files per tablet : %,10d%n", min));
sb.append(String.format("max map files per tablet : %,10d%n", max));
sb.append(String.format("avg map files per tablet : %,10.2f (std dev = %.2f)%n",
totalAssignments / (double) counts.size(), stddev));
return sb.toString();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy