All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.accumulo.examples.simple.client.RandomBatchScanner Maven / Gradle / Ivy

There is a newer version: 1.10.4
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.accumulo.examples.simple.client;

import static org.apache.accumulo.examples.simple.client.RandomBatchWriter.abs;

import java.util.HashMap;
import java.util.HashSet;
import java.util.Map.Entry;
import java.util.Random;
import java.util.concurrent.TimeUnit;

import org.apache.accumulo.core.cli.BatchScannerOpts;
import org.apache.accumulo.core.cli.ClientOnRequiredTable;
import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.client.AccumuloSecurityException;
import org.apache.accumulo.core.client.BatchScanner;
import org.apache.accumulo.core.client.Connector;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.beust.jcommander.Parameter;

/**
 * Simple example for reading random batches of data from Accumulo. See docs/examples/README.batch
 * for instructions.
 */
public class RandomBatchScanner {
  private static final Logger log = LoggerFactory.getLogger(RandomBatchScanner.class);

  /**
   * Generate a number of ranges, each covering a single random row.
   *
   * @param num
   *          the number of ranges to generate
   * @param min
   *          the minimum row that will be generated
   * @param max
   *          the maximum row that will be generated
   * @param r
   *          a random number generator
   * @param ranges
   *          a set in which to store the generated ranges
   * @param expectedRows
   *          a map in which to store the rows covered by the ranges (initially mapped to false)
   */
  static void generateRandomQueries(int num, long min, long max, Random r, HashSet ranges,
      HashMap expectedRows) {
    log.info(String.format("Generating %,d random queries...", num));
    while (ranges.size() < num) {
      long rowid = (abs(r.nextLong()) % (max - min)) + min;

      Text row1 = new Text(String.format("row_%010d", rowid));

      Range range = new Range(new Text(row1));
      ranges.add(range);
      expectedRows.put(row1, false);
    }

    log.info("finished");
  }

  /**
   * Prints a count of the number of rows mapped to false.
   *
   * @return boolean indicating "were all the rows found?"
   */
  private static boolean checkAllRowsFound(HashMap expectedRows) {
    int count = 0;
    boolean allFound = true;
    for (Entry entry : expectedRows.entrySet())
      if (!entry.getValue())
        count++;

    if (count > 0) {
      log.warn("Did not find " + count + " rows");
      allFound = false;
    }
    return allFound;
  }

  /**
   * Generates a number of random queries, verifies that the key/value pairs returned were in the
   * queried ranges and that the values were generated by
   * {@link RandomBatchWriter#createValue(long, int)}. Prints information about the results.
   *
   * @param num
   *          the number of queries to generate
   * @param min
   *          the min row to query
   * @param max
   *          the max row to query
   * @param evs
   *          the expected size of the values
   * @param r
   *          a random number generator
   * @param tsbr
   *          a batch scanner
   * @return boolean indicating "did the queries go fine?"
   */
  static boolean doRandomQueries(int num, long min, long max, int evs, Random r,
      BatchScanner tsbr) {

    HashSet ranges = new HashSet<>(num);
    HashMap expectedRows = new java.util.HashMap<>();

    generateRandomQueries(num, min, max, r, ranges, expectedRows);

    tsbr.setRanges(ranges);

    CountingVerifyingReceiver receiver = new CountingVerifyingReceiver(expectedRows, evs);

    long t1 = System.currentTimeMillis();

    for (Entry entry : tsbr) {
      receiver.receive(entry.getKey(), entry.getValue());
    }

    long t2 = System.currentTimeMillis();

    log.info(String.format("%6.2f lookups/sec %6.2f secs%n", num / ((t2 - t1) / 1000.0),
        ((t2 - t1) / 1000.0)));
    log.info(String.format("num results : %,d%n", receiver.count));

    return checkAllRowsFound(expectedRows);
  }

  public static class Opts extends ClientOnRequiredTable {
    @Parameter(names = "--min", description = "miniumum row that will be generated")
    long min = 0;
    @Parameter(names = "--max", description = "maximum ow that will be generated")
    long max = 0;
    @Parameter(names = "--num", required = true, description = "number of ranges to generate")
    int num = 0;
    @Parameter(names = "--size", required = true, description = "size of the value to write")
    int size = 0;
    @Parameter(names = "--seed", description = "seed for pseudo-random number generator")
    Long seed = null;
  }

  /**
   * Scans over a specified number of entries to Accumulo using a {@link BatchScanner}. Completes
   * scans twice to compare times for a fresh query with those for a repeated query which has cached
   * metadata and connections already established.
   */
  public static void main(String[] args)
      throws AccumuloException, AccumuloSecurityException, TableNotFoundException {
    Opts opts = new Opts();
    BatchScannerOpts bsOpts = new BatchScannerOpts();
    opts.parseArgs(RandomBatchScanner.class.getName(), args, bsOpts);

    Connector connector = opts.getConnector();
    BatchScanner batchReader = connector.createBatchScanner(opts.getTableName(), opts.auths,
        bsOpts.scanThreads);
    batchReader.setTimeout(bsOpts.scanTimeout, TimeUnit.MILLISECONDS);

    Random r;
    if (opts.seed == null)
      r = new Random();
    else
      r = new Random(opts.seed);

    // do one cold
    boolean status = doRandomQueries(opts.num, opts.min, opts.max, opts.size, r, batchReader);

    System.gc();
    System.gc();
    System.gc();

    if (opts.seed == null)
      r = new Random();
    else
      r = new Random(opts.seed);

    // do one hot (connections already established, metadata table cached)
    status = status && doRandomQueries(opts.num, opts.min, opts.max, opts.size, r, batchReader);

    batchReader.close();
    if (!status) {
      System.exit(1);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy