io.druid.segment.data.GenericIndexed Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.segment.data;
import com.google.common.collect.Ordering;
import com.google.common.primitives.Ints;
import com.metamx.common.IAE;
import com.metamx.common.guava.CloseQuietly;
import java.io.ByteArrayOutputStream;
import java.io.Closeable;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.WritableByteChannel;
import java.util.Arrays;
import java.util.Iterator;
/**
* A generic, flat storage mechanism. Use static methods fromArray() or fromIterable() to construct. If input
* is sorted, supports binary search index lookups. If input is not sorted, only supports array-like index lookups.
*
* V1 Storage Format:
*
* byte 1: version (0x1)
* byte 2 == 0x1 => allowReverseLookup
* bytes 3-6 => numBytesUsed
* bytes 7-10 => numElements
* bytes 10-((numElements * 4) + 10): integers representing *end* offsets of byte serialized values
* bytes ((numElements * 4) + 10)-(numBytesUsed + 2): 4-byte integer representing length of value, followed by bytes for value
*/
public class GenericIndexed implements Indexed
{
private static final byte version = 0x1;
private int indexOffset;
public static GenericIndexed fromArray(T[] objects, ObjectStrategy strategy)
{
return fromIterable(Arrays.asList(objects), strategy);
}
public static GenericIndexed fromIterable(Iterable objectsIterable, ObjectStrategy strategy)
{
Iterator objects = objectsIterable.iterator();
if (!objects.hasNext()) {
final ByteBuffer buffer = ByteBuffer.allocate(4).putInt(0);
buffer.flip();
return new GenericIndexed(buffer, strategy, true);
}
boolean allowReverseLookup = true;
int count = 0;
ByteArrayOutputStream headerBytes = new ByteArrayOutputStream();
ByteArrayOutputStream valueBytes = new ByteArrayOutputStream();
try {
int offset = 0;
T prevVal = null;
do {
count++;
T next = objects.next();
if (allowReverseLookup && prevVal != null && !(strategy.compare(prevVal, next) < 0)) {
allowReverseLookup = false;
}
final byte[] bytes = strategy.toBytes(next);
offset += 4 + bytes.length;
headerBytes.write(Ints.toByteArray(offset));
valueBytes.write(Ints.toByteArray(bytes.length));
valueBytes.write(bytes);
if (prevVal instanceof Closeable) {
CloseQuietly.close((Closeable) prevVal);
}
prevVal = next;
} while (objects.hasNext());
if (prevVal instanceof Closeable) {
CloseQuietly.close((Closeable) prevVal);
}
}
catch (IOException e) {
throw new RuntimeException(e);
}
ByteBuffer theBuffer = ByteBuffer.allocate(Ints.BYTES + headerBytes.size() + valueBytes.size());
theBuffer.put(Ints.toByteArray(count));
theBuffer.put(headerBytes.toByteArray());
theBuffer.put(valueBytes.toByteArray());
theBuffer.flip();
return new GenericIndexed(theBuffer.asReadOnlyBuffer(), strategy, allowReverseLookup);
}
@Override
public Class extends T> getClazz()
{
return bufferIndexed.getClazz();
}
@Override
public int size()
{
return bufferIndexed.size();
}
@Override
public T get(int index)
{
return bufferIndexed.get(index);
}
@Override
public int indexOf(T value)
{
return bufferIndexed.indexOf(value);
}
@Override
public Iterator iterator()
{
return bufferIndexed.iterator();
}
private final ByteBuffer theBuffer;
private final ObjectStrategy strategy;
private final boolean allowReverseLookup;
private final int size;
private final int valuesOffset;
private final BufferIndexed bufferIndexed;
GenericIndexed(
ByteBuffer buffer,
ObjectStrategy strategy,
boolean allowReverseLookup
)
{
this.theBuffer = buffer;
this.strategy = strategy;
this.allowReverseLookup = allowReverseLookup;
size = theBuffer.getInt();
indexOffset = theBuffer.position();
valuesOffset = theBuffer.position() + (size << 2);
bufferIndexed = new BufferIndexed();
}
class BufferIndexed implements Indexed
{
int lastReadSize;
@Override
public Class extends T> getClazz()
{
return strategy.getClazz();
}
@Override
public int size()
{
return size;
}
@Override
public T get(final int index)
{
return _get(theBuffer.asReadOnlyBuffer(), index);
}
protected T _get(final ByteBuffer copyBuffer, final int index)
{
if (index < 0) {
throw new IAE("Index[%s] < 0", index);
}
if (index >= size) {
throw new IAE(String.format("Index[%s] >= size[%s]", index, size));
}
final int startOffset;
final int endOffset;
if (index == 0) {
startOffset = 4;
endOffset = copyBuffer.getInt(indexOffset);
} else {
copyBuffer.position(indexOffset + ((index - 1) * 4));
startOffset = copyBuffer.getInt() + 4;
endOffset = copyBuffer.getInt();
}
if (startOffset == endOffset) {
return null;
}
copyBuffer.position(valuesOffset + startOffset);
final int size = endOffset - startOffset;
lastReadSize = size;
// fromByteBuffer must not modify the buffer limit
final T value = strategy.fromByteBuffer(copyBuffer, size);
return value;
}
/**
* This method makes no guarantees with respect to thread safety
* @return the size in bytes of the last value read
*/
public int getLastValueSize() {
return lastReadSize;
}
@Override
public int indexOf(T value)
{
if (!allowReverseLookup) {
throw new UnsupportedOperationException("Reverse lookup not allowed.");
}
value = (value != null && value.equals("")) ? null : value;
int minIndex = 0;
int maxIndex = size - 1;
while (minIndex <= maxIndex) {
int currIndex = (minIndex + maxIndex) >>> 1;
T currValue = get(currIndex);
int comparison = strategy.compare(currValue, value);
if (comparison == 0) {
return currIndex;
}
if (comparison < 0) {
minIndex = currIndex + 1;
} else {
maxIndex = currIndex - 1;
}
}
return -(minIndex + 1);
}
@Override
public Iterator iterator()
{
return IndexedIterable.create(this).iterator();
}
}
public long getSerializedSize()
{
return theBuffer.remaining() + 2 + 4 + 4;
}
public void writeToChannel(WritableByteChannel channel) throws IOException
{
channel.write(ByteBuffer.wrap(new byte[]{version, allowReverseLookup ? (byte) 0x1 : (byte) 0x0}));
channel.write(ByteBuffer.wrap(Ints.toByteArray(theBuffer.remaining() + 4)));
channel.write(ByteBuffer.wrap(Ints.toByteArray(size)));
channel.write(theBuffer.asReadOnlyBuffer());
}
/**
* Create a non-thread-safe Indexed, which may perform better than the underlying Indexed.
*
* @return a non-thread-safe Indexed
*/
public GenericIndexed.BufferIndexed singleThreaded()
{
final ByteBuffer copyBuffer = theBuffer.asReadOnlyBuffer();
return new BufferIndexed() {
@Override
public T get(int index)
{
return _get(copyBuffer, index);
}
};
}
public static GenericIndexed read(ByteBuffer buffer, ObjectStrategy strategy)
{
byte versionFromBuffer = buffer.get();
if (version == versionFromBuffer) {
boolean allowReverseLookup = buffer.get() == 0x1;
int size = buffer.getInt();
ByteBuffer bufferToUse = buffer.asReadOnlyBuffer();
bufferToUse.limit(bufferToUse.position() + size);
buffer.position(bufferToUse.limit());
return new GenericIndexed(
bufferToUse,
strategy,
allowReverseLookup
);
}
throw new IAE("Unknown version[%s]", versionFromBuffer);
}
public static final ObjectStrategy STRING_STRATEGY = new CacheableObjectStrategy()
{
@Override
public Class extends String> getClazz()
{
return String.class;
}
@Override
public String fromByteBuffer(final ByteBuffer buffer, final int numBytes)
{
return com.metamx.common.StringUtils.fromUtf8(buffer, numBytes);
}
@Override
public byte[] toBytes(String val)
{
if (val == null) {
return new byte[]{};
}
return com.metamx.common.StringUtils.toUtf8(val);
}
@Override
public int compare(String o1, String o2)
{
return Ordering.natural().nullsFirst().compare(o1, o2);
}
};
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy