org.apache.lucene.store.DataInput Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.store;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.util.BitUtil;
/**
* Abstract base class for performing read operations of Lucene's low-level
* data types.
*
* {@code DataInput} may only be used from one thread, because it is not
* thread safe (it keeps internal state like file position). To allow
* multithreaded use, every {@code DataInput} instance must be cloned before
* used in another thread. Subclasses must therefore implement {@link #clone()},
* returning a new {@code DataInput} which operates on the same underlying
* resource, but positioned independently.
*/
public abstract class DataInput implements Cloneable {
private static final int SKIP_BUFFER_SIZE = 1024;
/* This buffer is used to skip over bytes with the default implementation of
* skipBytes. The reason why we need to use an instance member instead of
* sharing a single instance across threads is that some delegating
* implementations of DataInput might want to reuse the provided buffer in
* order to eg. update the checksum. If we shared the same buffer across
* threads, then another thread might update the buffer while the checksum is
* being computed, making it invalid. See LUCENE-5583 for more information.
*/
private byte[] skipBuffer;
/** Reads and returns a single byte.
* @see DataOutput#writeByte(byte)
*/
public abstract byte readByte() throws IOException;
/** Reads a specified number of bytes into an array at the specified offset.
* @param b the array to read bytes into
* @param offset the offset in the array to start storing bytes
* @param len the number of bytes to read
* @see DataOutput#writeBytes(byte[],int)
*/
public abstract void readBytes(byte[] b, int offset, int len)
throws IOException;
/** Reads a specified number of bytes into an array at the
* specified offset with control over whether the read
* should be buffered (callers who have their own buffer
* should pass in "false" for useBuffer). Currently only
* {@link BufferedIndexInput} respects this parameter.
* @param b the array to read bytes into
* @param offset the offset in the array to start storing bytes
* @param len the number of bytes to read
* @param useBuffer set to false if the caller will handle
* buffering.
* @see DataOutput#writeBytes(byte[],int)
*/
public void readBytes(byte[] b, int offset, int len, boolean useBuffer)
throws IOException
{
// Default to ignoring useBuffer entirely
readBytes(b, offset, len);
}
/** Reads two bytes and returns a short.
* @see DataOutput#writeByte(byte)
*/
public short readShort() throws IOException {
return (short) (((readByte() & 0xFF) << 8) | (readByte() & 0xFF));
}
/** Reads four bytes and returns an int.
* @see DataOutput#writeInt(int)
*/
public int readInt() throws IOException {
return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16)
| ((readByte() & 0xFF) << 8) | (readByte() & 0xFF);
}
/** Reads an int stored in variable-length format. Reads between one and
* five bytes. Smaller values take fewer bytes. Negative numbers are not
* supported.
*
* The format is described further in {@link DataOutput#writeVInt(int)}.
*
* @see DataOutput#writeVInt(int)
*/
public int readVInt() throws IOException {
/* This is the original code of this method,
* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
* readByte() is inlined. So the loop was unwinded!
byte b = readByte();
int i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = readByte();
i |= (b & 0x7F) << shift;
}
return i;
*/
byte b = readByte();
if (b >= 0) return b;
int i = b & 0x7F;
b = readByte();
i |= (b & 0x7F) << 7;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7F) << 14;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7F) << 21;
if (b >= 0) return i;
b = readByte();
// Warning: the next ands use 0x0F / 0xF0 - beware copy/paste errors:
i |= (b & 0x0F) << 28;
if ((b & 0xF0) == 0) return i;
throw new IOException("Invalid vInt detected (too many bits)");
}
/**
* Read a {@link BitUtil#zigZagDecode(int) zig-zag}-encoded
* {@link #readVInt() variable-length} integer.
* @see DataOutput#writeZInt(int)
*/
public int readZInt() throws IOException {
return BitUtil.zigZagDecode(readVInt());
}
/** Reads eight bytes and returns a long.
* @see DataOutput#writeLong(long)
*/
public long readLong() throws IOException {
return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL);
}
/** Reads a long stored in variable-length format. Reads between one and
* nine bytes. Smaller values take fewer bytes. Negative numbers are not
* supported.
*
* The format is described further in {@link DataOutput#writeVInt(int)}.
*
* @see DataOutput#writeVLong(long)
*/
public long readVLong() throws IOException {
return readVLong(false);
}
private long readVLong(boolean allowNegative) throws IOException {
/* This is the original code of this method,
* but a Hotspot bug (see LUCENE-2975) corrupts the for-loop if
* readByte() is inlined. So the loop was unwinded!
byte b = readByte();
long i = b & 0x7F;
for (int shift = 7; (b & 0x80) != 0; shift += 7) {
b = readByte();
i |= (b & 0x7FL) << shift;
}
return i;
*/
byte b = readByte();
if (b >= 0) return b;
long i = b & 0x7FL;
b = readByte();
i |= (b & 0x7FL) << 7;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 14;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 21;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 28;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 35;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 42;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 49;
if (b >= 0) return i;
b = readByte();
i |= (b & 0x7FL) << 56;
if (b >= 0) return i;
if (allowNegative) {
b = readByte();
i |= (b & 0x7FL) << 63;
if (b == 0 || b == 1) return i;
throw new IOException("Invalid vLong detected (more than 64 bits)");
} else {
throw new IOException("Invalid vLong detected (negative values disallowed)");
}
}
/**
* Read a {@link BitUtil#zigZagDecode(long) zig-zag}-encoded
* {@link #readVLong() variable-length} integer. Reads between one and ten
* bytes.
* @see DataOutput#writeZLong(long)
*/
public long readZLong() throws IOException {
return BitUtil.zigZagDecode(readVLong(true));
}
/** Reads a string.
* @see DataOutput#writeString(String)
*/
public String readString() throws IOException {
int length = readVInt();
final byte[] bytes = new byte[length];
readBytes(bytes, 0, length);
return new String(bytes, 0, length, StandardCharsets.UTF_8);
}
/** Returns a clone of this stream.
*
*
Clones of a stream access the same data, and are positioned at the same
* point as the stream they were cloned from.
*
*
Expert: Subclasses must ensure that clones may be positioned at
* different points in the input from each other and from the stream they
* were cloned from.
*/
@Override
public DataInput clone() {
try {
return (DataInput) super.clone();
} catch (CloneNotSupportedException e) {
throw new Error("This cannot happen: Failing to clone DataInput");
}
}
/** Reads a Map<String,String> previously written
* with {@link DataOutput#writeStringStringMap(Map)}.
* @deprecated Only for reading existing formats. Encode maps with
* {@link DataOutput#writeMapOfStrings(Map)} instead.
*/
@Deprecated
public Map readStringStringMap() throws IOException {
final Map map = new HashMap<>();
final int count = readInt();
for(int i=0;i readMapOfStrings() throws IOException {
int count = readVInt();
if (count == 0) {
return Collections.emptyMap();
} else if (count == 1) {
return Collections.singletonMap(readString(), readString());
} else {
Map map = count > 10 ? new HashMap() : new TreeMap();
for (int i = 0; i < count; i++) {
final String key = readString();
final String val = readString();
map.put(key, val);
}
return Collections.unmodifiableMap(map);
}
}
/** Reads a Set<String> previously written
* with {@link DataOutput#writeStringSet(Set)}.
* @deprecated Only for reading existing formats. Encode maps with
* {@link DataOutput#writeSetOfStrings(Set)} instead. */
@Deprecated
public Set readStringSet() throws IOException {
final Set set = new HashSet<>();
final int count = readInt();
for(int i=0;i readSetOfStrings() throws IOException {
int count = readVInt();
if (count == 0) {
return Collections.emptySet();
} else if (count == 1) {
return Collections.singleton(readString());
} else {
Set set = count > 10 ? new HashSet() : new TreeSet();
for (int i = 0; i < count; i++) {
set.add(readString());
}
return Collections.unmodifiableSet(set);
}
}
/**
* Skip over numBytes
bytes. The contract on this method is that it
* should have the same behavior as reading the same number of bytes into a
* buffer and discarding its content. Negative values of numBytes
* are not supported.
*/
public void skipBytes(final long numBytes) throws IOException {
if (numBytes < 0) {
throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes);
}
if (skipBuffer == null) {
skipBuffer = new byte[SKIP_BUFFER_SIZE];
}
assert skipBuffer.length == SKIP_BUFFER_SIZE;
for (long skipped = 0; skipped < numBytes; ) {
final int step = (int) Math.min(SKIP_BUFFER_SIZE, numBytes - skipped);
readBytes(skipBuffer, 0, step, false);
skipped += step;
}
}
}