org.HdrHistogram.HistogramLogReader Maven / Gradle / Ivy
Show all versions of driver-cql-shaded Show documentation
/**
* Written by Gil Tene of Azul Systems, and released to the public domain,
* as explained at http://creativecommons.org/publicdomain/zero/1.0/
*
* @author Gil Tene
*/
package org.HdrHistogram;
import java.io.*;
import java.util.zip.DataFormatException;
/**
* A histogram log reader.
*
* Histogram logs are used to capture full fidelity, per-time-interval
* histograms of a recorded value.
*
* For example, a histogram log can be used to capture high fidelity
* reaction-time logs for some measured system or subsystem component.
* Such a log would capture a full reaction time histogram for each
* logged interval, and could be used to later reconstruct a full
* HdrHistogram of the measured reaction time behavior for any arbitrary
* time range within the log, by adding [only] the relevant interval
* histograms.
*
Histogram log format:
* A histogram log file consists of text lines. Lines beginning with
* the "#" character are optional and treated as comments. Lines
* containing the legend (starting with "Timestamp") are also optional
* and ignored in parsing the histogram log. All other lines must
* be valid interval description lines. Text fields are delimited by
* commas, spaces.
*
* A valid interval description line contains an optional Tag=tagString
* text field, followed by an interval description.
*
* A valid interval description must contain exactly four text fields:
*
* - StartTimestamp: The first field must contain a number parse-able as a Double value,
* representing the start timestamp of the interval in seconds.
* - intervalLength: The second field must contain a number parse-able as a Double value,
* representing the length of the interval in seconds.
* - Interval_Max: The third field must contain a number parse-able as a Double value,
* which generally represents the maximum value of the interval histogram.
* - Interval_Compressed_Histogram: The fourth field must contain a text field
* parse-able as a Base64 text representation of a compressed HdrHistogram.
*
* The log file may contain an optional indication of a starting time. Starting time
* is indicated using a special comments starting with "#[StartTime: " and followed
* by a number parse-able as a double, representing the start time (in seconds)
* that may be added to timestamps in the file to determine an absolute
* timestamp (e.g. since the epoch) for each interval.
*/
public class HistogramLogReader implements Closeable {
private final HistogramLogScanner scanner;
private final HistogramLogScanner.EventHandler handler = new HistogramLogScanner.EventHandler()
{
@Override
public boolean onComment(String comment)
{
return false;
}
@Override
public boolean onBaseTime(double secondsSinceEpoch)
{
baseTimeSec = secondsSinceEpoch; // base time represented as seconds since epoch
observedBaseTime = true;
return false;
}
@Override
public boolean onStartTime(double secondsSinceEpoch)
{
startTimeSec = secondsSinceEpoch; // start time represented as seconds since epoch
observedStartTime = true;
return false;
}
@Override
public boolean onHistogram(String tag, double timestamp, double length,
HistogramLogScanner.EncodableHistogramSupplier lazyReader) {
final double logTimeStampInSec = timestamp; // Timestamp is expected to be in seconds
if (!observedStartTime) {
// No explicit start time noted. Use 1st observed time:
startTimeSec = logTimeStampInSec;
observedStartTime = true;
}
if (!observedBaseTime) {
// No explicit base time noted. Deduce from 1st observed time (compared to start time):
if (logTimeStampInSec < startTimeSec - (365 * 24 * 3600.0)) {
// Criteria Note: if log timestamp is more than a year in the past (compared to
// StartTime), we assume that timestamps in the log are not absolute
baseTimeSec = startTimeSec;
} else {
// Timestamps are absolute
baseTimeSec = 0.0;
}
observedBaseTime = true;
}
final double absoluteStartTimeStampSec = logTimeStampInSec + baseTimeSec;
final double offsetStartTimeStampSec = absoluteStartTimeStampSec - startTimeSec;
final double intervalLengthSec = length; // Timestamp length is expect to be in seconds
final double absoluteEndTimeStampSec = absoluteStartTimeStampSec + intervalLengthSec;
final double startTimeStampToCheckRangeOn = absolute ? absoluteStartTimeStampSec : offsetStartTimeStampSec;
if (startTimeStampToCheckRangeOn < rangeStartTimeSec) {
// keep on trucking
return false;
}
if (startTimeStampToCheckRangeOn > rangeEndTimeSec) {
// after limit we stop on each line
return true;
}
EncodableHistogram histogram;
try
{
histogram = lazyReader.read();
}
catch (DataFormatException e)
{
// stop after exception
return true;
}
histogram.setStartTimeStamp((long) (absoluteStartTimeStampSec * 1000.0));
histogram.setEndTimeStamp((long) (absoluteEndTimeStampSec * 1000.0));
histogram.setTag(tag);
nextHistogram = histogram;
return true;
}
@Override
public boolean onException(Throwable t) {
// We ignore NoSuchElementException, but stop processing.
// Next call to nextIntervalHistogram may return null.
if (t instanceof java.util.NoSuchElementException){
return true;
}
// rethrow
if (t instanceof RuntimeException)
throw (RuntimeException)t;
else
throw new RuntimeException(t);
}
};
private double startTimeSec = 0.0;
private boolean observedStartTime = false;
private double baseTimeSec = 0.0;
private boolean observedBaseTime = false;
// scanner handling state
private boolean absolute;
private double rangeStartTimeSec;
private double rangeEndTimeSec;
private EncodableHistogram nextHistogram;
/**
* Constructs a new HistogramLogReader that produces intervals read from the specified file name.
* @param inputFileName The name of the file to read from
* @throws java.io.FileNotFoundException when unable to find inputFileName
*/
public HistogramLogReader(final String inputFileName) throws FileNotFoundException {
scanner = new HistogramLogScanner(new File(inputFileName));
}
/**
* Constructs a new HistogramLogReader that produces intervals read from the specified InputStream.
* @param inputStream The InputStream to read from
*/
public HistogramLogReader(final InputStream inputStream) {
scanner = new HistogramLogScanner(inputStream);
}
/**
* Constructs a new HistogramLogReader that produces intervals read from the specified file.
* @param inputFile The File to read from
* @throws java.io.FileNotFoundException when unable to find inputFile
*/
public HistogramLogReader(final File inputFile) throws FileNotFoundException {
scanner = new HistogramLogScanner(inputFile);
}
/**
* get the latest start time found in the file so far (or 0.0),
* per the log file format explained above. Assuming the "#[StartTime:" comment
* line precedes the actual intervals recorded in the file, getStartTimeSec() can
* be safely used after each interval is read to determine's the offset of that
* interval's timestamp from the epoch.
* @return latest Start Time found in the file (or 0.0 if non found)
*/
public double getStartTimeSec() {
return startTimeSec;
}
/**
* Read the next interval histogram from the log, if interval falls within a time range.
*
* Returns a histogram object if an interval line was found with an
* associated start timestamp value that falls between startTimeSec and
* endTimeSec, or null if no such interval line is found. Note that
* the range is assumed to be in seconds relative to the actual
* timestamp value found in each interval line in the log, and not
* in absolute time.
*
* Timestamps are assumed to appear in order in the log file, and as such
* this method will return a null upon encountering a timestamp larger than
* rangeEndTimeSec.
*
* The histogram returned will have it's timestamp set to the absolute
* timestamp calculated from adding the interval's indicated timestamp
* value to the latest [optional] start time found in the log.
*
* Upon encountering any unexpected format errors in reading the next
* interval from the file, this method will return a null. Use {@link #hasNext} to determine
* whether or not additional intervals may be available for reading in the log input.
*
* @param startTimeSec The (non-absolute time) start of the expected
* time range, in seconds.
* @param endTimeSec The (non-absolute time) end of the expected time
* range, in seconds.
* @return a histogram, or a null if no appropriate interval found
*/
public EncodableHistogram nextIntervalHistogram(final double startTimeSec,
final double endTimeSec) {
return nextIntervalHistogram(startTimeSec, endTimeSec, false);
}
/**
* Read the next interval histogram from the log, if interval falls within an absolute time range
*
* Returns a histogram object if an interval line was found with an
* associated absolute start timestamp value that falls between
* absoluteStartTimeSec and absoluteEndTimeSec, or null if no such
* interval line is found.
*
* Timestamps are assumed to appear in order in the log file, and as such
* this method will return a null upon encountering a timestamp larger than
* rangeEndTimeSec.
*
* The histogram returned will have it's timestamp set to the absolute
* timestamp calculated from adding the interval's indicated timestamp
* value to the latest [optional] start time found in the log.
*
* Absolute timestamps are calculated by adding the timestamp found
* with the recorded interval to the [latest, optional] start time
* found in the log. The start time is indicated in the log with
* a "#[StartTime: " followed by the start time in seconds.
*
* Upon encountering any unexpected format errors in reading the next
* interval from the file, this method will return a null. Use {@link #hasNext} to determine
* whether or not additional intervals may be available for reading in the log input.
*
* @param absoluteStartTimeSec The (absolute time) start of the expected
* time range, in seconds.
* @param absoluteEndTimeSec The (absolute time) end of the expected
* time range, in seconds.
* @return A histogram, or a null if no appropriate interval found
*/
public EncodableHistogram nextAbsoluteIntervalHistogram(final double absoluteStartTimeSec,
final double absoluteEndTimeSec) {
return nextIntervalHistogram(absoluteStartTimeSec, absoluteEndTimeSec, true);
}
/**
* Read the next interval histogram from the log. Returns a Histogram object if
* an interval line was found, or null if not.
*
Upon encountering any unexpected format errors in reading the next interval
* from the input, this method will return a null. Use {@link #hasNext} to determine
* whether or not additional intervals may be available for reading in the log input.
* @return a DecodedInterval, or a null if no appropriately formatted interval was found
*/
public EncodableHistogram nextIntervalHistogram() {
return nextIntervalHistogram(0.0, Long.MAX_VALUE * 1.0, true);
}
private EncodableHistogram nextIntervalHistogram(final double rangeStartTimeSec,
final double rangeEndTimeSec, boolean absolute) {
this.rangeStartTimeSec = rangeStartTimeSec;
this.rangeEndTimeSec = rangeEndTimeSec;
this.absolute = absolute;
scanner.process(handler);
EncodableHistogram histogram = this.nextHistogram;
nextHistogram = null;
return histogram;
}
/**
* Indicates whether or not additional intervals may exist in the log
* @return true if additional intervals may exist in the log
*/
public boolean hasNext() {
return scanner.hasNextLine();
}
@Override
public void close()
{
scanner.close();
}
}