All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.hadoop.fs.gcs.FsBenchmark Maven / Gradle / Ivy

package com.google.cloud.hadoop.fs.gcs;

import static com.google.common.util.concurrent.Futures.immediateVoidFuture;
import static java.lang.Integer.parseInt;
import static java.lang.Long.parseLong;
import static java.util.Collections.newSetFromMap;
import static java.util.concurrent.Executors.newSingleThreadExecutor;
import static java.util.concurrent.Executors.newSingleThreadScheduledExecutor;
import static java.util.concurrent.TimeUnit.SECONDS;
import static java.util.stream.Collectors.toMap;

import com.google.common.collect.ImmutableList;
import com.google.common.flogger.GoogleLogger;
import com.google.common.util.concurrent.Futures;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.LongSummaryStatistics;
import java.util.Map;
import java.util.Optional;
import java.util.Random;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileSystem.Statistics;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

/**
 * File system operations benchmark tool.
 *
 * 

Usage: * *

{@code
 * hadoop jar /usr/lib/hadoop/lib/gcs-connector.jar com.google.cloud.hadoop.fs.gcs.FsBenchmark \
 *     {read,random-read,write} --file=gs:// [--no-warmup] [--verbose]
 * }
* * for write benchmark, the --file parameter takes a GCS directory location where temp files will be * created. Please clean up the dir after the test */ public class FsBenchmark extends Configured implements Tool { private static final GoogleLogger logger = GoogleLogger.forEnclosingClass(); public static void main(String[] args) throws Exception { // Let ToolRunner handle generic command-line options int result = ToolRunner.run(new FsBenchmark(), args); System.exit(result); } private FsBenchmark() { super(new Configuration()); } @Override public int run(String[] args) throws IOException { String cmd = args[0]; Map cmdArgs = ImmutableList.copyOf(args).subList(1, args.length).stream() .collect( toMap( arg -> arg.split("=")[0], arg -> arg.contains("=") ? arg.split("=")[1] : "", (u, v) -> { throw new IllegalStateException(String.format("Duplicate key %s", u)); }, HashMap::new)); URI testUri = new Path(cmdArgs.getOrDefault("--file", cmdArgs.get("--bucket"))).toUri(); FileSystem fs = FileSystem.get(testUri, getConf()); int res = 0; try { res = runWithInstrumentation(fs, cmd, cmdArgs); } catch (Throwable e) { logger.atSevere().withCause(e).log( "Failed to execute '%s' command with arguments: %s", cmd, cmdArgs); } System.out.println(res == 0 ? "Success!" : "Failure!"); return res; } /** Helper to dispatch ToolRunner.run but with try/catch, progress-reporting, and statistics. */ private int runWithInstrumentation(FileSystem fs, String cmd, Map cmdArgs) { Statistics statistics = FileSystem.getStatistics().get(fs.getScheme()); Optional progressReporter = Optional.empty(); Future statsFuture = immediateVoidFuture(); if (cmdArgs.containsKey("--verbose")) { progressReporter = Optional.of(newSingleThreadScheduledExecutor()); statsFuture = progressReporter .get() .scheduleWithFixedDelay( () -> System.out.printf("Progress stats: %s%n", statistics), parseLong(cmdArgs.getOrDefault("--verbose-delay-seconds", "5")), parseLong(cmdArgs.getOrDefault("--verbose-interval-seconds", "15")), SECONDS); } try { return runInternal(fs, cmd, cmdArgs); } finally { statsFuture.cancel(/* mayInterruptIfRunning= */ true); progressReporter.ifPresent(ExecutorService::shutdownNow); System.out.printf("Final stats: %s%n", statistics); } } /** * Add a few custom commands that we handle, delegating all else to superclass. * *
    *
  • {@code concurrent-seek [read_size]} - Benchmark random seek and read * performance with multiple threads. *
  • {@code read } - Benchmark sequential read performance in * single thread. *
  • {@code seek } - Benchmarks the performance of performing forward seeks of varying * strides through the specified file and prints results to stdout. *
*/ private int runInternal(FileSystem fs, String cmd, Map cmdArgs) { switch (cmd) { case "write": return benchmarkWrite(fs, cmdArgs); case "read": return benchmarkRead(fs, cmdArgs); case "random-read": return benchmarkRandomRead(fs, cmdArgs); } throw new IllegalArgumentException("Unknown command: " + cmd); } private int benchmarkWrite(FileSystem fs, Map args) { if (args.size() < 1) { System.err.println( "Usage: write" + " --file=gs://${BUCKET}/path/to/test/dir/" + " [--total-size=]" + " [--write-size=]" + " [--num-writes=]" + " [--num-threads=]"); return 1; } Path testFile = new Path(args.get("--file")); benchmarkWrite( fs, testFile, parseInt(args.getOrDefault("--write-size", String.valueOf(1024))), parseInt(args.getOrDefault("--num-writes", String.valueOf(1))), parseInt(args.getOrDefault("--num-threads", String.valueOf(1))), parseLong(args.getOrDefault("--total-size", String.valueOf(10 * 1024)))); return 0; } private void benchmarkWrite( FileSystem fs, Path testFile, int writeSize, int numWrites, int numThreads, long totalSize) { System.out.printf( "Running write test using %d bytes writes to fully write '%s' file %d times in %d threads%n", writeSize, testFile, numWrites, numThreads); Set writeFileBytesList = newSetFromMap(new ConcurrentHashMap<>()); Set writeFileTimeNsList = newSetFromMap(new ConcurrentHashMap<>()); Set writeCallBytesList = newSetFromMap(new ConcurrentHashMap<>()); Set writeCallTimeNsList = newSetFromMap(new ConcurrentHashMap<>()); String tempFilenameKey = UUID.randomUUID().toString().substring(0, 6); ExecutorService executor = Executors.newFixedThreadPool(numThreads); CountDownLatch initLatch = new CountDownLatch(numThreads); CountDownLatch startLatch = new CountDownLatch(1); CountDownLatch stopLatch = new CountDownLatch(numThreads); List> futures = new ArrayList<>(numThreads); for (int i = 0; i < numThreads; i++) { int fileCounter = i; futures.add( executor.submit( () -> { LongSummaryStatistics writeFileBytes = newLongSummaryStatistics(writeFileBytesList); LongSummaryStatistics writeFileTimeNs = newLongSummaryStatistics(writeFileTimeNsList); LongSummaryStatistics writeCallBytes = newLongSummaryStatistics(writeCallBytesList); LongSummaryStatistics writeCallTimeNs = newLongSummaryStatistics(writeCallTimeNsList); byte[] writeBuffer = new byte[writeSize]; Random r = new Random(); r.nextBytes(writeBuffer); String random_file = String.format("/test-%s-%03d.bin", tempFilenameKey, fileCounter); Path testFileToIO = new Path(testFile.toString() + random_file); initLatch.countDown(); startLatch.await(); try { for (int j = 0; j < numWrites; j++) { try (FSDataOutputStream output = fs.create(testFileToIO)) { long writeStart = System.nanoTime(); long fileBytesWrite = 0; do { long writeCallStart = System.nanoTime(); output.write(writeBuffer); fileBytesWrite += writeSize; writeCallBytes.accept(writeSize); writeCallTimeNs.accept(System.nanoTime() - writeCallStart); } while (fileBytesWrite < totalSize); writeFileBytes.accept(fileBytesWrite); writeFileTimeNs.accept(System.nanoTime() - writeStart); } } } finally { stopLatch.countDown(); } return null; })); } executor.shutdown(); awaitUnchecked(initLatch); long startTimeNs = System.nanoTime(); startLatch.countDown(); awaitUnchecked(stopLatch); long runtimeNs = System.nanoTime() - startTimeNs; // Verify that all threads completed without errors futures.forEach(Futures::getUnchecked); printTimeStats("Write call time", writeCallTimeNsList); printSizeStats("Write call data", writeCallBytesList); printThroughputStats("Write call throughput", writeCallTimeNsList, writeCallBytesList); printTimeStats("Write file time", writeFileTimeNsList); printSizeStats("Write file data", writeFileBytesList); printThroughputStats("Write file throughput", writeFileTimeNsList, writeFileBytesList); System.out.printf( "Write average throughput (MiB/s): %.3f%n", bytesToMebibytes(combineStats(writeFileBytesList).getSum()) / nanosToSeconds(runtimeNs)); } private int benchmarkRead(FileSystem fs, Map args) { if (args.size() < 1) { System.err.println( "Usage: read" + " --file=gs://${BUCKET}/path/to/test/object" + " [--read-size=]" + " [--num-reads=]" + " [--num-threads=]"); return 1; } Path testFile = new Path(args.get("--file")); warmup( args, () -> benchmarkRead( fs, testFile, /* readSize= */ 1024, /* numReads= */ 1, /* numThreads= */ 2)); benchmarkRead( fs, testFile, parseInt(args.getOrDefault("--read-size", String.valueOf(1024))), parseInt(args.getOrDefault("--num-reads", String.valueOf(1))), parseInt(args.getOrDefault("--num-threads", String.valueOf(1)))); return 0; } private void benchmarkRead( FileSystem fs, Path testFile, int readSize, int numReads, int numThreads) { System.out.printf( "Running read test using %d bytes reads to fully read '%s' file %d times in %d threads%n", readSize, testFile, numReads, numThreads); Set readFileBytesList = newSetFromMap(new ConcurrentHashMap<>()); Set readFileTimeNsList = newSetFromMap(new ConcurrentHashMap<>()); Set readCallBytesList = newSetFromMap(new ConcurrentHashMap<>()); Set readCallTimeNsList = newSetFromMap(new ConcurrentHashMap<>()); ExecutorService executor = Executors.newFixedThreadPool(numThreads); CountDownLatch initLatch = new CountDownLatch(numThreads); CountDownLatch startLatch = new CountDownLatch(1); CountDownLatch stopLatch = new CountDownLatch(numThreads); List> futures = new ArrayList<>(numThreads); for (int i = 0; i < numThreads; i++) { futures.add( executor.submit( () -> { LongSummaryStatistics readFileBytes = newLongSummaryStatistics(readFileBytesList); LongSummaryStatistics readFileTimeNs = newLongSummaryStatistics(readFileTimeNsList); LongSummaryStatistics readCallBytes = newLongSummaryStatistics(readCallBytesList); LongSummaryStatistics readCallTimeNs = newLongSummaryStatistics(readCallTimeNsList); byte[] readBuffer = new byte[readSize]; initLatch.countDown(); startLatch.await(); try { for (int j = 0; j < numReads; j++) { try (FSDataInputStream input = fs.open(testFile)) { long readStart = System.nanoTime(); long fileBytesRead = 0; int bytesRead; do { long readCallStart = System.nanoTime(); bytesRead = input.read(readBuffer); if (bytesRead > 0) { fileBytesRead += bytesRead; readCallBytes.accept(bytesRead); } readCallTimeNs.accept(System.nanoTime() - readCallStart); } while (bytesRead >= 0); readFileBytes.accept(fileBytesRead); readFileTimeNs.accept(System.nanoTime() - readStart); } } } finally { stopLatch.countDown(); } return null; })); } executor.shutdown(); awaitUnchecked(initLatch); long startTimeNs = System.nanoTime(); startLatch.countDown(); awaitUnchecked(stopLatch); long runtimeNs = System.nanoTime() - startTimeNs; // Verify that all threads completed without errors futures.forEach(Futures::getUnchecked); printTimeStats("Read call time", readCallTimeNsList); printSizeStats("Read call data", readCallBytesList); printThroughputStats("Read call throughput", readCallTimeNsList, readCallBytesList); printTimeStats("Read file time", readFileTimeNsList); printSizeStats("Read file data", readFileBytesList); printThroughputStats("Read file throughput", readFileTimeNsList, readFileBytesList); System.out.printf( "Read average throughput (MiB/s): %.3f%n", bytesToMebibytes(combineStats(readFileBytesList).getSum()) / nanosToSeconds(runtimeNs)); } private int benchmarkRandomRead(FileSystem fs, Map args) { if (args.size() < 1) { System.err.println( "Usage: random-read" + " --file=gs://${BUCKET}/path/to/test/object" + " [--num-open=]" + " [--read-size=]" + " [--num-reads=]" + " [--num-threads=]"); return 1; } Path testFile = new Path(args.get("--file")); warmup( args, () -> benchmarkRandomRead( fs, testFile, /* numOpen= */ 5, /* readSize= */ 1024, /* numReads= */ 20, /* numThreads= */ 5)); benchmarkRandomRead( fs, testFile, parseInt(args.getOrDefault("--num-open", String.valueOf(1))), parseInt(args.getOrDefault("--read-size", String.valueOf(1024))), parseInt(args.getOrDefault("--num-reads", String.valueOf(100))), parseInt(args.getOrDefault("--num-threads", String.valueOf(1)))); return 0; } private void benchmarkRandomRead( FileSystem fs, Path testFile, int numOpen, int readSize, int numReads, int numThreads) { System.out.printf( "Running random read test that reads %d bytes from '%s' file %d times per %d open" + " operations in %d threads%n", readSize, testFile, numReads, numOpen, numThreads); Set openLatencyNsList = newSetFromMap(new ConcurrentHashMap<>()); Set seekLatencyNsList = newSetFromMap(new ConcurrentHashMap<>()); Set readLatencyNsList = newSetFromMap(new ConcurrentHashMap<>()); Set closeLatencyNsList = newSetFromMap(new ConcurrentHashMap<>()); ExecutorService executor = Executors.newFixedThreadPool(numThreads); CountDownLatch initLatch = new CountDownLatch(numThreads); CountDownLatch startLatch = new CountDownLatch(1); CountDownLatch stopLatch = new CountDownLatch(numThreads); List> futures = new ArrayList<>(numThreads); for (int i = 0; i < numThreads; i++) { futures.add( executor.submit( () -> { FileStatus fileStatus = fs.getFileStatus(testFile); long fileSize = fileStatus.getLen(); long maxReadPositionExclusive = fileSize - readSize + 1; LongSummaryStatistics openLatencyNs = newLongSummaryStatistics(openLatencyNsList); LongSummaryStatistics seekLatencyNs = newLongSummaryStatistics(seekLatencyNsList); LongSummaryStatistics readLatencyNs = newLongSummaryStatistics(readLatencyNsList); LongSummaryStatistics closeLatencyNs = newLongSummaryStatistics(closeLatencyNsList); ThreadLocalRandom random = ThreadLocalRandom.current(); byte[] readBuffer = new byte[readSize]; initLatch.countDown(); startLatch.await(); try { for (int j = 0; j < numOpen; j++) { try { long seekPos = random.nextLong(maxReadPositionExclusive); long openStart = System.nanoTime(); FSDataInputStream input = fs.open(testFile); openLatencyNs.accept(System.nanoTime() - openStart); try { for (int k = 0; k < numReads; k++) { long seekStart = System.nanoTime(); input.seek(seekPos); seekLatencyNs.accept(System.nanoTime() - seekStart); long readStart = System.nanoTime(); int numRead = input.read(readBuffer); readLatencyNs.accept(System.nanoTime() - readStart); if (numRead != readSize) { System.err.printf( "Read %d bytes from %d bytes at offset %d!%n", numRead, readSize, seekPos); } } } finally { long closeStart = System.nanoTime(); input.close(); closeLatencyNs.accept(System.nanoTime() - closeStart); } } catch (Throwable e) { logger.atSevere().withCause(e).log("Failed random read from '%s'", testFile); } } } finally { stopLatch.countDown(); } return null; })); } executor.shutdown(); awaitUnchecked(initLatch); long startTime = System.nanoTime(); startLatch.countDown(); awaitUnchecked(stopLatch); double runtimeSeconds = nanosToSeconds(System.nanoTime() - startTime); long operations = combineStats(readLatencyNsList).getCount(); // Verify that all threads completed without errors futures.forEach(Futures::getUnchecked); printTimeStats("Open latency ", combineStats(openLatencyNsList)); printTimeStats("Seek latency ", combineStats(seekLatencyNsList)); printTimeStats("Read latency ", combineStats(readLatencyNsList)); printTimeStats("Close latency", combineStats(closeLatencyNsList)); System.out.printf( "Average QPS: %.3f (%d in total %.3fs)%n", operations / runtimeSeconds, operations, runtimeSeconds); } private static void warmup(Map args, Runnable warmupFn) { if (args.containsKey("--no-warmup")) { System.out.println("=== Skipping warmup ==="); return; } System.out.println("=== Running warmup ==="); ExecutorService warmupExecutor = newSingleThreadExecutor(); try { warmupExecutor.submit(warmupFn).get(); } catch (ExecutionException | InterruptedException e) { if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } throw new RuntimeException("Benchmark warmup failed", e); } finally { warmupExecutor.shutdownNow(); } System.out.println("=== Finished warmup ===\n"); } private static LongSummaryStatistics newLongSummaryStatistics( Collection openLatencyNsList) { LongSummaryStatistics openLatencyNs = new LongSummaryStatistics(); openLatencyNsList.add(openLatencyNs); return openLatencyNs; } private static void awaitUnchecked(CountDownLatch latch) { try { latch.await(); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException("CountDownLatch.await interrupted", e); } } private static void printTimeStats(String name, Collection timeStats) { printTimeStats(name, combineStats(timeStats)); } private static void printTimeStats(String name, LongSummaryStatistics timeStats) { System.out.printf( "%s (ms): min=%.5f, average=%.5f, max=%.5f (count=%d)%n", name, nanosToMillis(timeStats.getMin()), nanosToMillis(timeStats.getAverage()), nanosToMillis(timeStats.getMax()), timeStats.getCount()); } private static void printSizeStats(String name, Collection sizeStats) { printSizeStats(name, combineStats(sizeStats)); } private static void printSizeStats(String name, LongSummaryStatistics sizeStats) { System.out.printf( "%s (MiB): min=%.5f, average=%.5f, max=%.5f (count=%d)%n", name, bytesToMebibytes(sizeStats.getMin()), bytesToMebibytes(sizeStats.getAverage()), bytesToMebibytes(sizeStats.getMax()), sizeStats.getCount()); } private static void printThroughputStats( String name, Collection timeStats, Collection sizeStats) { printThroughputStats(name, combineStats(timeStats), combineStats(sizeStats).getAverage()); } private static void printThroughputStats( String name, LongSummaryStatistics timeStats, double bytesProcessed) { System.out.printf( "%s (MiB/s): min=%.3f, average=%.3f, max=%.3f (count=%d)%n", name, bytesToMebibytes(bytesProcessed) / nanosToSeconds(timeStats.getMax()), bytesToMebibytes(bytesProcessed) / nanosToSeconds(timeStats.getAverage()), bytesToMebibytes(bytesProcessed) / nanosToSeconds(timeStats.getMin()), timeStats.getCount()); } private static LongSummaryStatistics combineStats(Collection stats) { return stats.stream() .collect( LongSummaryStatistics::new, LongSummaryStatistics::combine, LongSummaryStatistics::combine); } private static double nanosToMillis(double nanos) { return nanos / 1_000_000.0; } private static double nanosToSeconds(double nanos) { return nanos / 1_000_000_000.0; } private static double bytesToMebibytes(double bytes) { return bytes / 1024 / 1024; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy