All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.sadikovi.netflowlib.Buffers Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2016 sadikovi
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.github.sadikovi.netflowlib;

import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.nio.ByteOrder;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.zip.Inflater;
import java.util.zip.InflaterInputStream;

import com.github.sadikovi.netflowlib.record.RecordMaterializer;
import com.github.sadikovi.netflowlib.util.FilterIterator;
import com.github.sadikovi.netflowlib.util.ReadAheadInputStream;
import com.github.sadikovi.netflowlib.util.SafeIterator;
import com.github.sadikovi.netflowlib.util.WrappedByteBuf;

/**
 * All buffers supported in NetFlow reader.
 *
 */
public final class Buffers {
  private Buffers() { }

  public static abstract class RecordBuffer implements Iterable {
    // min length of the buffer in bytes, usually 32768
    public static final int MIN_BUFFER_LENGTH = 32768;
    // length of buffer in bytes ~3Mb (option 1)
    public static final int BUFFER_LENGTH_1 = 3698688;
    // length of buffer in bytes ~1Mb (option 2)
    public static final int BUFFER_LENGTH_2 = 1048576;

    public abstract Iterator iterator();

    @Override
    public String toString() {
      return "Record buffer: " + getClass().getCanonicalName();
    }
  }

  /**
   * [[EmptyRecordBuffer]] is introduced for [[SkipScan]] strategy when entire file needs to be
   * skipped. Essentially returns empty iterator of records.
   */
  public static final class EmptyRecordBuffer extends RecordBuffer {
    public EmptyRecordBuffer() { }

    @Override
    public Iterator iterator() {
      return new Iterator() {
        @Override
        public boolean hasNext() {
          return false;
        }

        @Override
        public Object[] next() {
          throw new NoSuchElementException("Empty iterator");
        }

        @Override
        public void remove() {
          throw new UnsupportedOperationException("Remove operation is not supported");
        }
      };
    }
  }

  /**
   * [[ScanRecordBuffer]] is used when full scan is required, since there is no filtering on a
   * result from [[RecordMaterializer]], though can be used in [[FilterScan]] too.
   */
  public static class ScanRecordBuffer extends RecordBuffer {
    public ScanRecordBuffer(
        DataInputStream in,
        RecordMaterializer recordMaterializer,
        int recordSize,
        ByteOrder byteOrder,
        boolean isCompressed,
        int bufferLength,
        boolean ignoreCorrupt) {
      if (isCompressed) {
        inflater = new Inflater();
        // InflaterInputStream is replaced with ReadAheadInputStream to allow to resolve EOF before
        // actual record reading, we also wrap read ahead stream into buffered input stream
        stream = new BufferedInputStream(
          new ReadAheadInputStream(in, inflater, bufferLength), bufferLength);
        compression = true;
      } else {
        inflater = null;
        stream = new BufferedInputStream(in);
        compression = false;
      }

      this.recordMaterializer = recordMaterializer;
      this.recordSize = recordSize;
      this.ignoreCorrupt = ignoreCorrupt;
      recordBytes = new byte[recordSize];
      buffer = WrappedByteBuf.init(recordBytes, byteOrder);
      numBytesRead = 0;
    }

    @Override
    public Iterator iterator() {
      Iterator iter = new Iterator() {
        @Override
        public boolean hasNext() {
          // `stream.available()` returns either [0, 1] in case of compressed stream and
          // number of bytes left in case of uncompressed stream. When it fails then we
          // reach EOF.
          boolean hasNext = true;
          try {
            // `ReadAheadInputStream` allows to check compressed stream availability correctly,
            // even for empty stream.
            hasNext = stream.available() > 0;
          } catch (IOException io) {
            hasNext = false;
          } finally {
            if (!hasNext) {
              try {
                stream.close();
              } catch (IOException io) {
                stream = null;
              }
              buffer = null;
            }
          }

          return hasNext;
        }

        @Override
        public Object[] next() {
          try {
            numBytesRead = stream.read(recordBytes, 0, recordSize);
            if (numBytesRead < 0) {
              throw new IOException("EOF, " + numBytesRead + " bytes read");
            } else if (numBytesRead < recordSize) {
              // We have to read entire record when there is no compression, anything else is
              // considered failure. When stream is compressed we can read less, but then we need
              // buffer up remaning data.
              if (!compression) {
                throw new IllegalArgumentException(
                  "Failed to read record: " + numBytesRead + " < " + recordSize);
              } else {
                int remaining = recordSize - numBytesRead;
                int addBytes = stream.read(recordBytes, numBytesRead, remaining);
                if (addBytes != remaining) {
                  throw new IllegalArgumentException(
                    "Failed to read record: " + addBytes + " != " + remaining);
                }
              }
            }
          } catch (IOException io) {
            throw new IllegalArgumentException("Unexpected EOF", io);
          }

          return recordMaterializer.processRecord(buffer);
        }

        @Override
        public void remove() {
          throw new UnsupportedOperationException("Remove operation is not supported");
        }
      };

      // when ignoring corrupt records, wrap it into iterator with safe termination on failures
      return ignoreCorrupt ? new SafeIterator(iter) : iter;
    }

    @Override
    public String toString() {
      return "Record buffer: " + getClass().getCanonicalName() + "[compression: " + compression +
        ", record size: " + recordSize + ", ignoreCorrupt: " + ignoreCorrupt + "]";
    }

    // Whether or not input stream is compressed
    private final boolean compression;
    // Reference to inflater to find out EOF mainly
    private final Inflater inflater;
    // Stream to read either standard DataInputStream or InflaterInputStream
    private BufferedInputStream stream;
    // Array of bytes for a record, updated partially when compression buffer needs to be refilled
    private final byte[] recordBytes;
    // Buffer for the record
    private WrappedByteBuf buffer;
    // Number of bytes currently have been read
    private int numBytesRead;
    // Size of record, depends on NetFlow format
    private final int recordSize;
    // Record materializer to process individual record
    private final RecordMaterializer recordMaterializer;
    // Ignore corrupt records and terminate iterator once encountered
    private final boolean ignoreCorrupt;
  }

  /**
   * [[FilterRecordBuffer]] is used when filtering is required on result of [[RecordMaterializer]],
   * in this case all "null" records would be skipped, e.g. records that failed predicate
   * requirement.
   */
  public static final class FilterRecordBuffer extends ScanRecordBuffer {
    public FilterRecordBuffer(
        DataInputStream in,
        RecordMaterializer recordMaterializer,
        int recordSize,
        ByteOrder byteOrder,
        boolean isCompressed,
        int bufferLength,
        boolean ignoreCorrupt) {
      super(in, recordMaterializer, recordSize, byteOrder, isCompressed, bufferLength,
        ignoreCorrupt);
    }

    @Override
    public Iterator iterator() {
      return new FilterIterator(super.iterator());
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy