com.twitter.hbc.common.DelimitedStreamReader Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2013 Twitter, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package com.twitter.hbc.common;
import com.google.common.base.Preconditions;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
/**
* Only for charsets whose byte representations of \n and \r are 10 and 13 (Ascii compatible encodings)
*/
public class DelimitedStreamReader {
private final InputStream inputStream;
private final byte[] buffer;
private byte[] strBuffer;
private int strBufferIndex;
private final Charset charset;
private int offset;
private int end; // first invalid byte
private static final int DEFAULT_READ_COUNT = 64;
private static final int MAX_ALLOWABLE_BUFFER_SIZE = 500000;
private static final byte CR = 13;
private static final byte LF = 10;
public DelimitedStreamReader(InputStream stream, Charset charset, int bufferSize) {
Preconditions.checkArgument(bufferSize > 0);
this.inputStream = Preconditions.checkNotNull(stream);
this.charset = Preconditions.checkNotNull(charset);
this.strBuffer = new byte[bufferSize * 2];
buffer = new byte[bufferSize];
offset = 0;
end = 0;
}
public String readLine() throws IOException {
return readLine(true);
}
/**
* Reads a line from the input stream, where a line is terminated by \r, \n, or \r\n
* @param trim whether to trim trailing \r and \ns
*/
private String readLine(boolean trim) throws IOException {
boolean done = false;
boolean sawCarriage = false;
// bytes to trim (the \r and the \n)
int removalBytes = 0;
while (!done) {
if (isReadBufferEmpty()) {
offset = 0;
end = 0;
int bytesRead = inputStream.read(buffer, end, Math.min(DEFAULT_READ_COUNT, buffer.length - end));
if (bytesRead < 0) {
// we failed to read anything more...
throw new IOException("Reached the end of the stream");
} else {
end += bytesRead;
}
}
int originalOffset = offset;
for (; !done && offset < end; offset++) {
if (buffer[offset] == LF) {
int cpLength = offset - originalOffset + 1;
if (trim) {
int length = 0;
if (buffer[offset] == LF) {
length ++;
if (sawCarriage) {
length++;
}
}
cpLength -= length;
}
if (cpLength > 0) {
copyToStrBuffer(buffer, originalOffset, cpLength);
} else {
// negative length means we need to trim a \r from strBuffer
removalBytes = cpLength;
}
done = true;
} else {
// did not see newline:
sawCarriage = buffer[offset] == CR;
}
}
if (!done) {
copyToStrBuffer(buffer, originalOffset, end - originalOffset);
offset = end;
}
}
int strLength = strBufferIndex + removalBytes;
strBufferIndex = 0;
return new String(strBuffer, 0, strLength, charset);
}
/**
* Copies from buffer to our internal strBufferIndex, expanding the internal buffer if necessary
* @param offset offset in the buffer to start copying from
* @param length length to copy
*/
private void copyToStrBuffer(byte[] buffer, int offset, int length) {
Preconditions.checkArgument(length >= 0);
if (strBuffer.length - strBufferIndex < length) {
// cannot fit, expanding buffer
expandStrBuffer(length);
}
System.arraycopy(
buffer, offset, strBuffer, strBufferIndex, Math.min(length, MAX_ALLOWABLE_BUFFER_SIZE - strBufferIndex));
strBufferIndex += length;
}
private void expandStrBuffer(int minLength) {
byte[] oldBuffer = strBuffer;
int newLength = Math.min(
Math.max(oldBuffer.length * 2, minLength),
MAX_ALLOWABLE_BUFFER_SIZE
);
if (newLength > oldBuffer.length) {
strBuffer = new byte[newLength];
System.arraycopy(oldBuffer, 0, strBuffer, 0, strBufferIndex);
}
}
/**
* Reads numBytes bytes, and returns the corresponding string
*/
public String read(int numBytes) throws IOException {
Preconditions.checkArgument(numBytes >= 0);
Preconditions.checkArgument(numBytes <= MAX_ALLOWABLE_BUFFER_SIZE);
int numBytesRemaining = numBytes;
// first read whatever we need from our buffer
if (!isReadBufferEmpty()) {
int length = Math.min(end - offset, numBytesRemaining);
copyToStrBuffer(buffer, offset, length);
offset += length;
numBytesRemaining -= length;
}
// next read the remaining chars directly into our strBuffer
if (numBytesRemaining > 0) {
readAmountToStrBuffer(numBytesRemaining);
}
if (strBufferIndex > 0 && strBuffer[strBufferIndex - 1] != LF) {
// the last byte doesn't correspond to lf
return readLine(false);
}
int strBufferLength = strBufferIndex;
strBufferIndex = 0;
return new String(strBuffer, 0, strBufferLength, charset);
}
private void readAmountToStrBuffer(int length) throws IOException {
int remainingBytes = length;
while (remainingBytes > 0) {
int bytesRead = readStreamToStrBuffer(remainingBytes);
remainingBytes -= bytesRead;
}
}
private int readStreamToStrBuffer(int length) throws IOException {
if (length > strBuffer.length - strBufferIndex) {
expandStrBuffer(length);
}
int bytesRead = inputStream.read(strBuffer, strBufferIndex, Math.min(length, strBuffer.length - strBufferIndex));
if (bytesRead < 0) {
throw new IOException("Reached end of stream.");
}
strBufferIndex += bytesRead;
return bytesRead;
}
private boolean isReadBufferEmpty() {
return offset >= buffer.length || end - offset <= 0;
}
}