All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.hbc.common.DelimitedStreamReader Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2013 Twitter, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 **/

package com.twitter.hbc.common;

import com.google.common.base.Preconditions;

import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;

/**
 * Only for charsets whose byte representations of \n and \r are 10 and 13 (Ascii compatible encodings)
 */
public class DelimitedStreamReader {

  private final InputStream inputStream;

  private final byte[] buffer;
  private byte[] strBuffer;
  private int strBufferIndex;
  private final Charset charset;

  private int offset;
  private int end; // first invalid byte

  private static final int DEFAULT_READ_COUNT = 64;
  private static final int MAX_ALLOWABLE_BUFFER_SIZE = 500000;

  private static final byte CR = 13;
  private static final byte LF = 10;

  public DelimitedStreamReader(InputStream stream, Charset charset, int bufferSize) {
    Preconditions.checkArgument(bufferSize > 0);
    this.inputStream = Preconditions.checkNotNull(stream);
    this.charset = Preconditions.checkNotNull(charset);

    this.strBuffer = new byte[bufferSize * 2];

    buffer = new byte[bufferSize];
    offset = 0;
    end = 0;
  }

  public String readLine() throws IOException {
    return readLine(true);
  }

  /**
   * Reads a line from the input stream, where a line is terminated by \r, \n, or \r\n
   * @param trim whether to trim trailing \r and \ns
   */
  private String readLine(boolean trim) throws IOException {
    boolean done = false;
    boolean sawCarriage = false;
    // bytes to trim (the \r and the \n)
    int removalBytes = 0;
    while (!done) {
      if (isReadBufferEmpty()) {
        offset = 0;
        end = 0;
        int bytesRead = inputStream.read(buffer, end, Math.min(DEFAULT_READ_COUNT, buffer.length - end));
        if (bytesRead < 0) {
          // we failed to read anything more...
          throw new IOException("Reached the end of the stream");
        } else {
          end += bytesRead;
        }
      }

      int originalOffset = offset;
      for (; !done && offset < end; offset++) {
        if (buffer[offset] == LF) {
          int cpLength = offset - originalOffset + 1;
          if (trim) {
            int length = 0;
            if (buffer[offset] == LF) {
              length ++;
              if (sawCarriage) {
                length++;
              }
            }
            cpLength -= length;
          }

          if (cpLength > 0) {
            copyToStrBuffer(buffer, originalOffset, cpLength);
          } else {
            // negative length means we need to trim a \r from strBuffer
            removalBytes = cpLength;
          }
          done = true;
        } else {
          // did not see newline:
          sawCarriage = buffer[offset] == CR;
        }
      }

      if (!done) {
        copyToStrBuffer(buffer, originalOffset, end - originalOffset);
        offset = end;
      }
    }
    int strLength = strBufferIndex + removalBytes;
    strBufferIndex = 0;
    return new String(strBuffer, 0, strLength, charset);
  }

  /**
   * Copies from buffer to our internal strBufferIndex, expanding the internal buffer if necessary
   * @param offset offset in the buffer to start copying from
   * @param length length to copy
   */
  private void copyToStrBuffer(byte[] buffer, int offset, int length) {
    Preconditions.checkArgument(length >= 0);
    if (strBuffer.length - strBufferIndex < length) {
      // cannot fit, expanding buffer
      expandStrBuffer(length);
    }
    System.arraycopy(
      buffer, offset, strBuffer, strBufferIndex, Math.min(length, MAX_ALLOWABLE_BUFFER_SIZE - strBufferIndex));
    strBufferIndex += length;
  }

  private void expandStrBuffer(int minLength) {
    byte[] oldBuffer = strBuffer;
    int newLength = Math.min(
      Math.max(oldBuffer.length * 2, minLength),
      MAX_ALLOWABLE_BUFFER_SIZE
    );

    if (newLength > oldBuffer.length) {
      strBuffer = new byte[newLength];
      System.arraycopy(oldBuffer, 0, strBuffer, 0, strBufferIndex);
    }
  }

  /**
   * Reads numBytes bytes, and returns the corresponding string
   */
  public String read(int numBytes) throws IOException {
    Preconditions.checkArgument(numBytes >= 0);
    Preconditions.checkArgument(numBytes <= MAX_ALLOWABLE_BUFFER_SIZE);
    int numBytesRemaining = numBytes;
    // first read whatever we need from our buffer
    if (!isReadBufferEmpty()) {
      int length = Math.min(end - offset, numBytesRemaining);
      copyToStrBuffer(buffer, offset, length);
      offset += length;
      numBytesRemaining -= length;
    }

    // next read the remaining chars directly into our strBuffer
    if (numBytesRemaining > 0) {
      readAmountToStrBuffer(numBytesRemaining);
    }

    if (strBufferIndex > 0 && strBuffer[strBufferIndex - 1] != LF) {
      // the last byte doesn't correspond to lf
      return readLine(false);
    }

    int strBufferLength = strBufferIndex;
    strBufferIndex = 0;
    return new String(strBuffer, 0, strBufferLength, charset);
  }

  private void readAmountToStrBuffer(int length) throws IOException {
    int remainingBytes = length;
    while (remainingBytes > 0) {
      int bytesRead = readStreamToStrBuffer(remainingBytes);
      remainingBytes -= bytesRead;
    }
  }

  private int readStreamToStrBuffer(int length) throws IOException {
    if (length > strBuffer.length - strBufferIndex) {
      expandStrBuffer(length);
    }
    int bytesRead = inputStream.read(strBuffer, strBufferIndex, Math.min(length, strBuffer.length - strBufferIndex));
    if (bytesRead < 0) {
      throw new IOException("Reached end of stream.");
    }
    strBufferIndex += bytesRead;
    return bytesRead;
  }

  private boolean isReadBufferEmpty() {
    return offset >= buffer.length || end - offset <= 0;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy