All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.benchmark.byTask.feeds.ContentItemsSource Maven / Gradle / Ivy

There is a newer version: 9.11.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.benchmark.byTask.feeds;


import java.io.Closeable;
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ArrayList;

import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.Format;

/**
 * Base class for source of data for benchmarking
 * 

* Keeps track of various statistics, such as how many data items were generated, * size in bytes etc. *

* Supports the following configuration parameters: *

    *
  • content.source.forever - specifies whether to generate items * forever (default=true). *
  • content.source.verbose - specifies whether messages should be * output by the content source (default=false). *
  • content.source.encoding - specifies which encoding to use when * reading the files of that content source. Certain implementations may define * a default value if this parameter is not specified. (default=null). *
  • content.source.log.step - specifies for how many items a * message should be logged. If set to 0 it means no logging should occur. * NOTE: if verbose is set to false, logging should not occur even if * logStep is not 0 (default=0). *
*/ public abstract class ContentItemsSource implements Closeable { private long bytesCount; private long totalBytesCount; private int itemCount; private int totalItemCount; private Config config; private int lastPrintedNumUniqueTexts = 0; private long lastPrintedNumUniqueBytes = 0; private int printNum = 0; protected boolean forever; protected int logStep; protected boolean verbose; protected String encoding; /** update count of bytes generated by this source */ protected final synchronized void addBytes(long numBytes) { bytesCount += numBytes; totalBytesCount += numBytes; } /** update count of items generated by this source */ protected final synchronized void addItem() { ++itemCount; ++totalItemCount; } /** * A convenience method for collecting all the files of a content source from * a given directory. The collected {@link Path} instances are stored in the * given files. */ protected final void collectFiles(Path dir, final ArrayList files) throws IOException { Files.walkFileTree(dir, new SimpleFileVisitor() { @Override public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { if (Files.isReadable(file)) { files.add(file.toRealPath()); } return FileVisitResult.CONTINUE; } }); } /** * Returns true whether it's time to log a message (depending on verbose and * the number of items generated). */ protected final boolean shouldLog() { return verbose && logStep > 0 && itemCount % logStep == 0; } /** Called when reading from this content source is no longer required. */ @Override public abstract void close() throws IOException; /** Returns the number of bytes generated since last reset. */ public final long getBytesCount() { return bytesCount; } /** Returns the number of generated items since last reset. */ public final int getItemsCount() { return itemCount; } public final Config getConfig() { return config; } /** Returns the total number of bytes that were generated by this source. */ public final long getTotalBytesCount() { return totalBytesCount; } /** Returns the total number of generated items. */ public final int getTotalItemsCount() { return totalItemCount; } /** * Resets the input for this content source, so that the test would behave as * if it was just started, input-wise. *

* NOTE: the default implementation resets the number of bytes and * items generated since the last reset, so it's important to call * super.resetInputs in case you override this method. */ public void resetInputs() throws IOException { bytesCount = 0; itemCount = 0; } /** * Sets the {@link Config} for this content source. If you override this * method, you must call super.setConfig. */ public void setConfig(Config config) { this.config = config; forever = config.get("content.source.forever", true); logStep = config.get("content.source.log.step", 0); verbose = config.get("content.source.verbose", false); encoding = config.get("content.source.encoding", null); } public void printStatistics(String itemsName) { if (!verbose) { return; } boolean print = false; String col = " "; StringBuilder sb = new StringBuilder(); String newline = System.getProperty("line.separator"); sb.append("------------> ").append(getClass().getSimpleName()).append(" statistics (").append(printNum).append("): ").append(newline); int nut = getTotalItemsCount(); if (nut > lastPrintedNumUniqueTexts) { print = true; sb.append("total count of ").append(itemsName).append(": ").append(Format.format(0,nut,col)).append(newline); lastPrintedNumUniqueTexts = nut; } long nub = getTotalBytesCount(); if (nub > lastPrintedNumUniqueBytes) { print = true; sb.append("total bytes of ").append(itemsName).append(": ").append(Format.format(0,nub,col)).append(newline); lastPrintedNumUniqueBytes = nub; } if (getItemsCount() > 0) { print = true; sb.append("num ").append(itemsName).append(" added since last inputs reset: ").append(Format.format(0,getItemsCount(),col)).append(newline); sb.append("total bytes added for ").append(itemsName).append(" since last inputs reset: ").append(Format.format(0,getBytesCount(),col)).append(newline); } if (print) { System.out.println(sb.append(newline).toString()); printNum++; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy