org.apache.avro.io.BinaryDecoder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-core Show documentation
Show all versions of spark-core Show documentation
Shaded version of Apache Spark 2.x.x for Presto
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.io;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.util.Utf8;
/** An {@link Decoder} for binary-format data.
*
* Instances are created using {@link DecoderFactory}.
*
* This class may read-ahead and buffer bytes from the source beyond what is
* required to serve its read methods.
* The number of unused bytes in the buffer can be accessed by
* inputStream().remaining(), if the BinaryDecoder is not 'direct'.
*
* @see Encoder
*/
public class BinaryDecoder extends Decoder {
private ByteSource source = null;
// we keep the buffer and its state variables in this class and not in a
// container class for performance reasons. This improves performance
// over a container object by about 5% to 15%
// for example, we could have a FastBuffer class with these state variables
// and keep a private FastBuffer member here. This simplifies the
// "detach source" code and source access to the buffer, but
// hurts performance.
private byte[] buf = null;
private int minPos = 0;
private int pos = 0;
private int limit = 0;
byte[] getBuf() { return buf; }
int getPos() { return pos; }
int getLimit() { return limit; }
void setBuf(byte[] buf, int pos, int len) {
this.buf = buf;
this.pos = pos;
this.limit = pos+len;
}
void clearBuf() { this.buf = null; }
/** protected constructor for child classes */
protected BinaryDecoder() {
}
BinaryDecoder(InputStream in, int bufferSize) {
super();
configure(in, bufferSize);
}
BinaryDecoder(byte[] data, int offset, int length) {
super();
configure(data, offset, length);
}
BinaryDecoder configure(InputStream in, int bufferSize) {
configureSource(bufferSize, new InputStreamByteSource(in));
return this;
}
BinaryDecoder configure(byte[] data, int offset, int length) {
configureSource(DecoderFactory.DEFAULT_BUFFER_SIZE, new ByteArrayByteSource(
data, offset, length));
return this;
}
/**
* Initializes this decoder with a new ByteSource. Detaches the old source (if
* it exists) from this Decoder. The old source's state no longer depends on
* this Decoder and its InputStream interface will continue to drain the
* remaining buffer and source data.
*
* The decoder will read from the new source. The source will generally
* replace the buffer with its own. If the source allocates a new buffer, it
* will create it with size bufferSize.
*/
private void configureSource(int bufferSize, ByteSource source) {
if (null != this.source) {
this.source.detach();
}
source.attach(bufferSize, this);
this.source = source;
}
@Override
public void readNull() throws IOException {
}
@Override
public boolean readBoolean() throws IOException {
// inlined, shorter version of ensureBounds
if (limit == pos) {
limit = source.tryReadRaw(buf, 0, buf.length);
pos = 0;
if (limit == 0) {
throw new EOFException();
}
}
int n = buf[pos++] & 0xff;
return n == 1;
}
@Override
public int readInt() throws IOException {
ensureBounds(5); // won't throw index out of bounds
int len = 1;
int b = buf[pos] & 0xff;
int n = b & 0x7f;
if (b > 0x7f) {
b = buf[pos + len++] & 0xff;
n ^= (b & 0x7f) << 7;
if (b > 0x7f) {
b = buf[pos + len++] & 0xff;
n ^= (b & 0x7f) << 14;
if (b > 0x7f) {
b = buf[pos + len++] & 0xff;
n ^= (b & 0x7f) << 21;
if (b > 0x7f) {
b = buf[pos + len++] & 0xff;
n ^= (b & 0x7f) << 28;
if (b > 0x7f) {
throw new IOException("Invalid int encoding");
}
}
}
}
}
pos += len;
if (pos > limit) {
throw new EOFException();
}
return (n >>> 1) ^ -(n & 1); // back to two's-complement
}
@Override
public long readLong() throws IOException {
ensureBounds(10);
int b = buf[pos++] & 0xff;
int n = b & 0x7f;
long l;
if (b > 0x7f) {
b = buf[pos++] & 0xff;
n ^= (b & 0x7f) << 7;
if (b > 0x7f) {
b = buf[pos++] & 0xff;
n ^= (b & 0x7f) << 14;
if (b > 0x7f) {
b = buf[pos++] & 0xff;
n ^= (b & 0x7f) << 21;
if (b > 0x7f) {
// only the low 28 bits can be set, so this won't carry
// the sign bit to the long
l = innerLongDecode((long)n);
} else {
l = n;
}
} else {
l = n;
}
} else {
l = n;
}
} else {
l = n;
}
if (pos > limit) {
throw new EOFException();
}
return (l >>> 1) ^ -(l & 1); // back to two's-complement
}
// splitting readLong up makes it faster because of the JVM does more
// optimizations on small methods
private long innerLongDecode(long l) throws IOException {
int len = 1;
int b = buf[pos] & 0xff;
l ^= (b & 0x7fL) << 28;
if (b > 0x7f) {
b = buf[pos + len++] & 0xff;
l ^= (b & 0x7fL) << 35;
if (b > 0x7f) {
b = buf[pos + len++] & 0xff;
l ^= (b & 0x7fL) << 42;
if (b > 0x7f) {
b = buf[pos + len++] & 0xff;
l ^= (b & 0x7fL) << 49;
if (b > 0x7f) {
b = buf[pos + len++] & 0xff;
l ^= (b & 0x7fL) << 56;
if (b > 0x7f) {
b = buf[pos + len++] & 0xff;
l ^= (b & 0x7fL) << 63;
if (b > 0x7f) {
throw new IOException("Invalid long encoding");
}
}
}
}
}
}
pos += len;
return l;
}
@Override
public float readFloat() throws IOException {
ensureBounds(4);
int len = 1;
int n = (buf[pos] & 0xff) | ((buf[pos + len++] & 0xff) << 8)
| ((buf[pos + len++] & 0xff) << 16) | ((buf[pos + len++] & 0xff) << 24);
if ((pos + 4) > limit) {
throw new EOFException();
}
pos += 4;
return Float.intBitsToFloat(n);
}
@Override
public double readDouble() throws IOException {
ensureBounds(8);
int len = 1;
int n1 = (buf[pos] & 0xff) | ((buf[pos + len++] & 0xff) << 8)
| ((buf[pos + len++] & 0xff) << 16) | ((buf[pos + len++] & 0xff) << 24);
int n2 = (buf[pos + len++] & 0xff) | ((buf[pos + len++] & 0xff) << 8)
| ((buf[pos + len++] & 0xff) << 16) | ((buf[pos + len++] & 0xff) << 24);
if ((pos + 8) > limit) {
throw new EOFException();
}
pos += 8;
return Double.longBitsToDouble((((long) n1) & 0xffffffffL)
| (((long) n2) << 32));
}
@Override
public Utf8 readString(Utf8 old) throws IOException {
int length = readInt();
Utf8 result = (old != null ? old : new Utf8());
result.setByteLength(length);
if (0 != length) {
doReadBytes(result.getBytes(), 0, length);
}
return result;
}
private final Utf8 scratchUtf8 = new Utf8();
@Override
public String readString() throws IOException {
return readString(scratchUtf8).toString();
}
@Override
public void skipString() throws IOException {
doSkipBytes(readInt());
}
@Override
public ByteBuffer readBytes(ByteBuffer old) throws IOException {
int length = readInt();
ByteBuffer result;
if (old != null && length <= old.capacity()) {
result = old;
result.clear();
} else {
result = ByteBuffer.allocate(length);
}
doReadBytes(result.array(), result.position(), length);
result.limit(length);
return result;
}
@Override
public void skipBytes() throws IOException {
doSkipBytes(readInt());
}
@Override
public void readFixed(byte[] bytes, int start, int length) throws IOException {
doReadBytes(bytes, start, length);
}
@Override
public void skipFixed(int length) throws IOException {
doSkipBytes(length);
}
@Override
public int readEnum() throws IOException {
return readInt();
}
protected void doSkipBytes(long length) throws IOException {
int remaining = limit - pos;
if (length <= remaining) {
pos += length;
} else {
limit = pos = 0;
length -= remaining;
source.skipSourceBytes(length);
}
}
/**
* Reads length bytes into bytes starting at start.
*
* @throws EOFException
* If there are not enough number of bytes in the source.
* @throws IOException
*/
protected void doReadBytes(byte[] bytes, int start, int length)
throws IOException {
if (length < 0)
throw new AvroRuntimeException("Malformed data. Length is negative: "
+ length);
int remaining = limit - pos;
if (length <= remaining) {
System.arraycopy(buf, pos, bytes, start, length);
pos += length;
} else {
// read the rest of the buffer
System.arraycopy(buf, pos, bytes, start, remaining);
start += remaining;
length -= remaining;
pos = limit;
// finish from the byte source
source.readRaw(bytes, start, length);
}
}
/**
* Returns the number of items to follow in the current array or map. Returns
* 0 if there are no more items in the current array and the array/map has
* ended.
*
* @throws IOException
*/
protected long doReadItemCount() throws IOException {
long result = readLong();
if (result < 0) {
readLong(); // Consume byte-count if present
result = -result;
}
return result;
}
/**
* Reads the count of items in the current array or map and skip those items,
* if possible. If it could skip the items, keep repeating until there are no
* more items left in the array or map. If items cannot be skipped (because
* byte count to skip is not found in the stream) return the count of the
* items found. The client needs to skip the items individually.
*
* @return Zero if there are no more items to skip and end of array/map is
* reached. Positive number if some items are found that cannot be
* skipped and the client needs to skip them individually.
* @throws IOException
*/
private long doSkipItems() throws IOException {
long result = readInt();
while (result < 0) {
long bytecount = readLong();
doSkipBytes(bytecount);
result = readInt();
}
return result;
}
@Override
public long readArrayStart() throws IOException {
return doReadItemCount();
}
@Override
public long arrayNext() throws IOException {
return doReadItemCount();
}
@Override
public long skipArray() throws IOException {
return doSkipItems();
}
@Override
public long readMapStart() throws IOException {
return doReadItemCount();
}
@Override
public long mapNext() throws IOException {
return doReadItemCount();
}
@Override
public long skipMap() throws IOException {
return doSkipItems();
}
@Override
public int readIndex() throws IOException {
return readInt();
}
/**
* Returns true if the current BinaryDecoder is at the end of its source data and
* cannot read any further without throwing an EOFException or other
* IOException.
*
* Not all implementations of BinaryDecoder support isEnd(). Implementations that do
* not support isEnd() will throw a
* {@link java.lang.UnsupportedOperationException}.
*/
public boolean isEnd() throws IOException {
if (limit - pos > 0) {
// buffer not empty, not at end.
return false;
} else {
if (source.isEof()) {
return true;
}
// read from source.
int read = source.tryReadRaw(buf, 0, buf.length);
pos = 0;
limit = read;
if (0 == read) {
// nothing left
return true;
}
return false;
}
}
/**
* Ensures that buf[pos + num - 1] is not out of the buffer array bounds.
* However, buf[pos + num -1] may be >= limit if there is not enough data left
* in the source to fill the array with num bytes.
*
* This method allows readers to read ahead by num bytes safely without
* checking for EOF at each byte. However, readers must ensure that their
* reads are valid by checking that their read did not advance past the limit
* before adjusting pos.
*
* num must be less than the buffer size and greater than 0
*/
private void ensureBounds(int num) throws IOException {
int remaining = limit - pos;
if (remaining < num) {
// move remaining to front
source.compactAndFill(buf, pos, minPos, remaining);
if (pos >= limit)
throw new EOFException();
}
}
/**
* Returns an {@link java.io.InputStream} that is aware of any buffering that
* may occur in this BinaryDecoder. Readers that need to interleave decoding
* Avro data with other reads must access this InputStream to do so unless
* the implementation is 'direct' and does not read beyond the minimum bytes
* necessary from the source.
*/
public InputStream inputStream() {
return source;
}
/**
* BufferAccessor is used by BinaryEncoder to enable {@link ByteSource}s and
* the InputStream returned by {@link BinaryDecoder.inputStream} to access the
* BinaryEncoder's buffer. When a BufferAccessor is created, it is attached to
* a BinaryDecoder and its buffer. Its accessors directly reference the
* BinaryDecoder's buffer. When detach() is called, it stores references to
* the BinaryDecoder's buffer directly. The BinaryDecoder only detaches a
* BufferAccessor when it is initializing to a new ByteSource. Therefore, a
* client that is using the InputStream returned by BinaryDecoder.inputStream
* can continue to use that stream after a BinaryDecoder has been
* reinitialized to read from new data.
*/
static class BufferAccessor {
private final BinaryDecoder decoder;
private byte[] buf;
private int pos;
private int limit;
boolean detached = false;
private BufferAccessor(BinaryDecoder decoder) {
this.decoder = decoder;
}
void detach() {
this.buf = decoder.buf;
this.pos = decoder.pos;
this.limit = decoder.limit;
detached = true;
}
int getPos() {
if (detached)
return this.pos;
else
return decoder.pos;
}
int getLim() {
if (detached)
return this.limit;
else
return decoder.limit;
}
byte[] getBuf() {
if (detached)
return this.buf;
else
return decoder.buf;
}
void setPos(int pos) {
if (detached)
this.pos = pos;
else
decoder.pos = pos;
}
void setLimit(int limit) {
if (detached)
this.limit = limit;
else
decoder.limit = limit;
}
void setBuf(byte[] buf, int offset, int length) {
if (detached) {
this.buf = buf;
this.limit = offset + length;
this.pos = offset;
} else {
decoder.buf = buf;
decoder.limit = offset + length;
decoder.pos = offset;
decoder.minPos = offset;
}
}
}
/**
* ByteSource abstracts the source of data from the core workings of
* BinaryDecoder. This is very important for performance reasons because
* InputStream's API is a barrier to performance due to several quirks:
* InputStream does not in general require that as many bytes as possible have
* been read when filling a buffer.
*
* InputStream's terminating conditions for a read are two-fold: EOFException
* and '-1' on the return from read(). Implementations are supposed to return
* '-1' on EOF but often do not. The extra terminating conditions cause extra
* conditionals on both sides of the API, and slow performance significantly.
*
* ByteSource implementations provide read() and skip() variants that have
* stronger guarantees than InputStream, freeing client code to be simplified
* and faster.
*
* {@link skipSourceBytes} and {@link readRaw} are guaranteed to have read or
* skipped as many bytes as possible, or throw EOFException.
* {@link trySkipBytes} and {@link tryRead} are guaranteed to attempt to read
* or skip as many bytes as possible and never throw EOFException, while
* returning the exact number of bytes skipped or read. {@link isEof} returns
* true if all the source bytes have been read or skipped. This condition can
* also be detected by a client if an EOFException is thrown from
* {@link skipSourceBytes} or {@link readRaw}, or if {@link trySkipBytes} or
* {@link tryRead} return 0;
*
* A ByteSource also implements the InputStream contract for use by APIs that
* require it. The InputStream interface must take into account buffering in
* any decoder that this ByteSource is attached to. The other methods do not
* account for buffering.
*/
abstract static class ByteSource extends InputStream {
// maintain a reference to the buffer, so that if this
// source is detached from the Decoder, and a client still
// has a reference to it via inputStream(), bytes are not
// lost
protected BufferAccessor ba;
protected ByteSource() {
}
abstract boolean isEof();
protected void attach(int bufferSize, BinaryDecoder decoder) {
decoder.buf = new byte[bufferSize];
decoder.pos = 0;
decoder.minPos = 0;
decoder.limit = 0;
this.ba = new BufferAccessor(decoder);
return;
}
protected void detach() {
ba.detach();
}
/**
* Skips length bytes from the source. If length bytes cannot be skipped due
* to end of file/stream/channel/etc an EOFException is thrown
*
* @param length
* the number of bytes to attempt to skip
* @throws IOException
* if an error occurs
* @throws EOFException
* if length bytes cannot be skipped
*/
protected abstract void skipSourceBytes(long length) throws IOException;
/**
* Attempts to skip skipLength bytes from the source. Returns the
* actual number of bytes skipped. This method must attempt to skip as many
* bytes as possible up to skipLength bytes. Skipping 0 bytes signals
* end of stream/channel/file/etc
*
* @param skipLength
* the number of bytes to attempt to skip
* @return the count of actual bytes skipped.
*/
protected abstract long trySkipBytes(long skipLength) throws IOException;
/**
* Reads raw from the source, into a byte[]. Used for reads that are larger
* than the buffer, or otherwise unbuffered. This is a mandatory read -- if
* there is not enough bytes in the source, EOFException is thrown.
*
* @throws IOException
* if an error occurs
* @throws EOFException
* if len bytes cannot be read
* */
protected abstract void readRaw(byte[] data, int off, int len)
throws IOException;
/**
* Attempts to copy up to len bytes from the source into data,
* starting at index off. Returns the actual number of bytes copied
* which may be between 0 and len.
*
* This method must attempt to read as much as possible from the source.
* Returns 0 when at the end of stream/channel/file/etc.
*
* @throws IOException
* if an error occurs reading
**/
protected abstract int tryReadRaw(byte[] data, int off, int len)
throws IOException;
/**
* If this source buffers, compacts the buffer by placing the
* remaining bytes starting at pos at minPos. This may
* be done in the current buffer, or may replace the buffer with a new one.
*
* The end result must be a buffer with at least 16 bytes of remaining space.
*
* @param pos
* @param minPos
* @param remaining
* @throws IOException
*/
protected void compactAndFill(byte[] buf, int pos, int minPos, int remaining)
throws IOException {
System.arraycopy(buf, pos, buf, minPos, remaining);
ba.setPos(minPos);
int newLimit = remaining
+ tryReadRaw(buf, minPos + remaining, buf.length - remaining);
ba.setLimit(newLimit);
}
@Override
public int read(byte[] b, int off, int len) throws IOException {
int lim = ba.getLim();
int pos = ba.getPos();
byte[] buf = ba.getBuf();
int remaining = (lim - pos);
if (remaining >= len) {
System.arraycopy(buf, pos, b, off, len);
pos = pos + len;
ba.setPos(pos);
return len;
} else {
// flush buffer to array
System.arraycopy(buf, pos, b, off, remaining);
pos = pos + remaining;
ba.setPos(pos);
// get the rest from the stream (skip array)
int inputRead = remaining
+ tryReadRaw(b, off + remaining, len - remaining);
if (inputRead == 0) {
return -1;
} else {
return inputRead;
}
}
}
@Override
public long skip(long n) throws IOException {
int lim = ba.getLim();
int pos = ba.getPos();
int remaining = lim - pos;
if (remaining > n) {
pos += n;
ba.setPos(pos);
return n;
} else {
pos = lim;
ba.setPos(pos);
long isSkipCount = trySkipBytes(n - remaining);
return isSkipCount + remaining;
}
}
/**
* returns the number of bytes remaining that this BinaryDecoder has
* buffered from its source
*/
@Override
public int available() throws IOException {
return (ba.getLim() - ba.getPos());
}
}
private static class InputStreamByteSource extends ByteSource {
private InputStream in;
protected boolean isEof = false;
private InputStreamByteSource(InputStream in) {
super();
this.in = in;
}
@Override
protected void skipSourceBytes(long length) throws IOException {
boolean readZero = false;
while (length > 0) {
long n = in.skip(length);
if (n > 0) {
length -= n;
continue;
}
// The inputStream contract is evil.
// zero "might" mean EOF. So check for 2 in a row, we will
// infinite loop waiting for -1 with some classes others
// spuriously will return 0 on occasion without EOF
if (n == 0) {
if (readZero) {
isEof = true;
throw new EOFException();
}
readZero = true;
continue;
}
// read negative
isEof = true;
throw new EOFException();
}
}
@Override
protected long trySkipBytes(long length) throws IOException {
long leftToSkip = length;
try {
boolean readZero = false;
while (leftToSkip > 0) {
long n = in.skip(length);
if (n > 0) {
leftToSkip -= n;
continue;
}
// The inputStream contract is evil.
// zero "might" mean EOF. So check for 2 in a row, we will
// infinite loop waiting for -1 with some classes others
// spuriously will return 0 on occasion without EOF
if (n == 0) {
if (readZero) {
isEof = true;
break;
}
readZero = true;
continue;
}
// read negative
isEof = true;
break;
}
} catch (EOFException eof) {
isEof = true;
}
return length - leftToSkip;
}
@Override
protected void readRaw(byte[] data, int off, int len) throws IOException {
while (len > 0) {
int read = in.read(data, off, len);
if (read < 0) {
isEof = true;
throw new EOFException();
}
len -= read;
off += read;
}
}
@Override
protected int tryReadRaw(byte[] data, int off, int len) throws IOException {
int leftToCopy = len;
try {
while (leftToCopy > 0) {
int read = in.read(data, off, leftToCopy);
if (read < 0) {
isEof = true;
break;
}
leftToCopy -= read;
off += read;
}
} catch (EOFException eof) {
isEof = true;
}
return len - leftToCopy;
}
@Override
public int read() throws IOException {
if (ba.getLim() - ba.getPos() == 0) {
return in.read();
} else {
int position = ba.getPos();
int result = ba.getBuf()[position] & 0xff;
ba.setPos(position + 1);
return result;
}
}
@Override
public boolean isEof() {
return isEof;
}
@Override
public void close() throws IOException {
in.close();
}
}
/**
* This byte source is special. It will avoid copying data by using the
* source's byte[] as a buffer in the decoder.
*
*/
private static class ByteArrayByteSource extends ByteSource {
private byte[] data;
private int position;
private int max;
private boolean compacted = false;
private ByteArrayByteSource(byte[] data, int start, int len) {
super();
// make sure data is not too small, otherwise getLong may try and
// read 10 bytes and get index out of bounds.
if (data.length < 16 || len < 16) {
this.data = new byte[16];
System.arraycopy(data, start, this.data, 0, len);
this.position = 0;
this.max = len;
} else {
// use the array passed in
this.data = data;
this.position = start;
this.max = start + len;
}
}
@Override
protected void attach(int bufferSize, BinaryDecoder decoder) {
// buffer size is not used here, the byte[] source is the buffer.
decoder.buf = this.data;
decoder.pos = this.position;
decoder.minPos = this.position;
decoder.limit = this.max;
this.ba = new BufferAccessor(decoder);
return;
}
@Override
protected void skipSourceBytes(long length) throws IOException {
long skipped = trySkipBytes(length);
if (skipped < length) {
throw new EOFException();
}
}
@Override
protected long trySkipBytes(long length) throws IOException {
// the buffer is shared, so this should return 0
max = ba.getLim();
position = ba.getPos();
long remaining = max - position;
if (remaining >= length) {
position += length;
ba.setPos(position);
return length;
} else {
position += remaining;
ba.setPos(position);
return remaining;
}
}
@Override
protected void readRaw(byte[] data, int off, int len) throws IOException {
int read = tryReadRaw(data, off, len);
if (read < len) {
throw new EOFException();
}
}
@Override
protected int tryReadRaw(byte[] data, int off, int len) throws IOException {
// the buffer is shared, nothing to read
return 0;
}
@Override
protected void compactAndFill(byte[] buf, int pos, int minPos, int remaining)
throws IOException {
// this implementation does not want to mutate the array passed in,
// so it makes a new tiny buffer unless it has been compacted once before
if (!compacted) {
// assumes ensureCapacity is never called with a size more than 16
byte[] tinybuf = new byte[remaining + 16];
System.arraycopy(buf, pos, tinybuf, 0, remaining);
ba.setBuf(tinybuf, 0, remaining);
compacted = true;
}
}
@Override
public int read() throws IOException {
max = ba.getLim();
position = ba.getPos();
if (position >= max) {
return -1;
} else {
int result = ba.getBuf()[position++] & 0xff;
ba.setPos(position);
return result;
}
}
@Override
public void close() throws IOException {
ba.setPos(ba.getLim()); // effectively set isEof to false
}
@Override
public boolean isEof() {
int remaining = ba.getLim() - ba.getPos();
return (remaining == 0);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy