io.druid.segment.serde.DictionaryEncodedColumnPartSerde Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
/*
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Metamarkets licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package io.druid.segment.serde;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Ints;
import com.metamx.collections.bitmap.ImmutableBitmap;
import com.metamx.collections.spatial.ImmutableRTree;
import com.metamx.common.IAE;
import io.druid.segment.CompressedVSizeIndexedSupplier;
import io.druid.segment.CompressedVSizeIndexedV3Supplier;
import io.druid.segment.column.ColumnBuilder;
import io.druid.segment.column.ColumnConfig;
import io.druid.segment.column.ValueType;
import io.druid.segment.data.BitmapSerde;
import io.druid.segment.data.BitmapSerdeFactory;
import io.druid.segment.data.ByteBufferSerializer;
import io.druid.segment.data.ByteBufferWriter;
import io.druid.segment.data.CompressedVSizeIntsIndexedSupplier;
import io.druid.segment.data.GenericIndexed;
import io.druid.segment.data.GenericIndexedWriter;
import io.druid.segment.data.IndexedInts;
import io.druid.segment.data.IndexedIntsWriter;
import io.druid.segment.data.IndexedMultivalue;
import io.druid.segment.data.IndexedRTree;
import io.druid.segment.data.VSizeIndexed;
import io.druid.segment.data.VSizeIndexedInts;
import io.druid.segment.data.WritableSupplier;
import javax.annotation.Nullable;
import javax.validation.constraints.NotNull;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.WritableByteChannel;
public class DictionaryEncodedColumnPartSerde implements ColumnPartSerde
{
private static final int NO_FLAGS = 0;
enum Feature
{
MULTI_VALUE,
MULTI_VALUE_V3;
public boolean isSet(int flags) { return (getMask() & flags) != 0; }
public int getMask() { return (1 << ordinal()); }
}
enum VERSION
{
UNCOMPRESSED_SINGLE_VALUE, // 0x0
UNCOMPRESSED_MULTI_VALUE, // 0x1
COMPRESSED; // 0x2
public static VERSION fromByte(byte b)
{
final VERSION[] values = VERSION.values();
Preconditions.checkArgument(b < values.length, "Unsupported dictionary column version[%s]", b);
return values[b];
}
public byte asByte()
{
return (byte) this.ordinal();
}
}
@JsonCreator
public static DictionaryEncodedColumnPartSerde createDeserializer(
@Nullable @JsonProperty("bitmapSerdeFactory") BitmapSerdeFactory bitmapSerdeFactory,
@NotNull @JsonProperty("byteOrder") ByteOrder byteOrder
)
{
return new DictionaryEncodedColumnPartSerde(
byteOrder,
bitmapSerdeFactory != null ? bitmapSerdeFactory : new BitmapSerde.LegacyBitmapSerdeFactory(),
null
);
}
private final ByteOrder byteOrder;
private final BitmapSerdeFactory bitmapSerdeFactory;
private final Serializer serializer;
private DictionaryEncodedColumnPartSerde(
ByteOrder byteOrder,
BitmapSerdeFactory bitmapSerdeFactory,
Serializer serializer
)
{
this.byteOrder = byteOrder;
this.bitmapSerdeFactory = bitmapSerdeFactory;
this.serializer = serializer;
}
@JsonProperty
public BitmapSerdeFactory getBitmapSerdeFactory()
{
return bitmapSerdeFactory;
}
@JsonProperty
public ByteOrder getByteOrder()
{
return byteOrder;
}
public static SerializerBuilder serializerBuilder()
{
return new SerializerBuilder();
}
public static class SerializerBuilder
{
private VERSION version = null;
private int flags = NO_FLAGS;
private GenericIndexedWriter dictionaryWriter = null;
private IndexedIntsWriter valueWriter = null;
private BitmapSerdeFactory bitmapSerdeFactory = null;
private GenericIndexedWriter bitmapIndexWriter = null;
private ByteBufferWriter spatialIndexWriter = null;
private ByteOrder byteOrder = null;
public SerializerBuilder withDictionary(GenericIndexedWriter dictionaryWriter)
{
this.dictionaryWriter = dictionaryWriter;
return this;
}
public SerializerBuilder withBitmapSerdeFactory(BitmapSerdeFactory bitmapSerdeFactory)
{
this.bitmapSerdeFactory = bitmapSerdeFactory;
return this;
}
public SerializerBuilder withBitmapIndex(GenericIndexedWriter bitmapIndexWriter)
{
this.bitmapIndexWriter = bitmapIndexWriter;
return this;
}
public SerializerBuilder withSpatialIndex(ByteBufferWriter spatialIndexWriter)
{
this.spatialIndexWriter = spatialIndexWriter;
return this;
}
public SerializerBuilder withByteOrder(ByteOrder byteOrder)
{
this.byteOrder = byteOrder;
return this;
}
public SerializerBuilder withValue(IndexedIntsWriter valueWriter, boolean hasMultiValue, boolean compressed)
{
this.valueWriter = valueWriter;
if (hasMultiValue) {
if (compressed) {
this.version = VERSION.COMPRESSED;
this.flags |= Feature.MULTI_VALUE_V3.getMask();
} else {
this.version = VERSION.UNCOMPRESSED_MULTI_VALUE;
this.flags |= Feature.MULTI_VALUE.getMask();
}
} else {
if (compressed) {
this.version = VERSION.COMPRESSED;
} else {
this.version = VERSION.UNCOMPRESSED_SINGLE_VALUE;
}
}
return this;
}
public DictionaryEncodedColumnPartSerde build()
{
return new DictionaryEncodedColumnPartSerde(
byteOrder,
bitmapSerdeFactory,
new Serializer()
{
@Override
public long numBytes()
{
long size = 1 + // version
(version.compareTo(VERSION.COMPRESSED) >= 0
? Ints.BYTES
: 0); // flag if version >= compressed
if (dictionaryWriter != null) {
size += dictionaryWriter.getSerializedSize();
}
if (valueWriter != null) {
size += valueWriter.getSerializedSize();
}
if (bitmapIndexWriter != null) {
size += bitmapIndexWriter.getSerializedSize();
}
if (spatialIndexWriter != null) {
size += spatialIndexWriter.getSerializedSize();
}
return size;
}
@Override
public void write(WritableByteChannel channel) throws IOException
{
channel.write(ByteBuffer.wrap(new byte[]{version.asByte()}));
if (version.compareTo(VERSION.COMPRESSED) >= 0) {
channel.write(ByteBuffer.wrap(Ints.toByteArray(flags)));
}
if (dictionaryWriter != null) {
dictionaryWriter.writeToChannel(channel);
}
if (valueWriter != null) {
valueWriter.writeToChannel(channel);
}
if (bitmapIndexWriter != null) {
bitmapIndexWriter.writeToChannel(channel);
}
if (spatialIndexWriter != null) {
spatialIndexWriter.writeToChannel(channel);
}
}
}
);
}
}
public static LegacySerializerBuilder legacySerializerBuilder()
{
return new LegacySerializerBuilder();
}
public static class LegacySerializerBuilder
{
private VERSION version = null;
private int flags = NO_FLAGS;
private GenericIndexed dictionary = null;
private WritableSupplier singleValuedColumn = null;
private WritableSupplier> multiValuedColumn = null;
private BitmapSerdeFactory bitmapSerdeFactory = null;
private GenericIndexed bitmaps = null;
private ImmutableRTree spatialIndex = null;
private ByteOrder byteOrder = null;
private LegacySerializerBuilder()
{
}
public LegacySerializerBuilder withDictionary(GenericIndexed dictionary)
{
this.dictionary = dictionary;
return this;
}
public LegacySerializerBuilder withBitmapSerdeFactory(BitmapSerdeFactory bitmapSerdeFactory)
{
this.bitmapSerdeFactory = bitmapSerdeFactory;
return this;
}
public LegacySerializerBuilder withBitmaps(GenericIndexed bitmaps)
{
this.bitmaps = bitmaps;
return this;
}
public LegacySerializerBuilder withSpatialIndex(ImmutableRTree spatialIndex)
{
this.spatialIndex = spatialIndex;
return this;
}
public LegacySerializerBuilder withByteOrder(ByteOrder byteOrder)
{
this.byteOrder = byteOrder;
return this;
}
public LegacySerializerBuilder withSingleValuedColumn(VSizeIndexedInts singleValuedColumn)
{
Preconditions.checkState(multiValuedColumn == null, "Cannot set both singleValuedColumn and multiValuedColumn");
this.version = VERSION.UNCOMPRESSED_SINGLE_VALUE;
this.singleValuedColumn = singleValuedColumn.asWritableSupplier();
return this;
}
public LegacySerializerBuilder withSingleValuedColumn(CompressedVSizeIntsIndexedSupplier singleValuedColumn)
{
Preconditions.checkState(multiValuedColumn == null, "Cannot set both singleValuedColumn and multiValuedColumn");
this.version = VERSION.COMPRESSED;
this.singleValuedColumn = singleValuedColumn;
return this;
}
public LegacySerializerBuilder withMultiValuedColumn(VSizeIndexed multiValuedColumn)
{
Preconditions.checkState(singleValuedColumn == null, "Cannot set both multiValuedColumn and singleValuedColumn");
this.version = VERSION.UNCOMPRESSED_MULTI_VALUE;
this.flags |= Feature.MULTI_VALUE.getMask();
this.multiValuedColumn = multiValuedColumn.asWritableSupplier();
return this;
}
public LegacySerializerBuilder withMultiValuedColumn(CompressedVSizeIndexedSupplier multiValuedColumn)
{
Preconditions.checkState(singleValuedColumn == null, "Cannot set both singleValuedColumn and multiValuedColumn");
this.version = VERSION.COMPRESSED;
this.flags |= Feature.MULTI_VALUE.getMask();
this.multiValuedColumn = multiValuedColumn;
return this;
}
public DictionaryEncodedColumnPartSerde build()
{
Preconditions.checkArgument(
singleValuedColumn != null ^ multiValuedColumn != null,
"Exactly one of singleValCol[%s] or multiValCol[%s] must be set",
singleValuedColumn, multiValuedColumn
);
return new DictionaryEncodedColumnPartSerde(
byteOrder,
bitmapSerdeFactory,
new Serializer()
{
@Override
public long numBytes()
{
long size = 1 + // version
(version.compareTo(VERSION.COMPRESSED) >= 0 ? Ints.BYTES : 0);// flag if version >= compressed
size += dictionary.getSerializedSize();
if (Feature.MULTI_VALUE.isSet(flags)) {
size += multiValuedColumn.getSerializedSize();
} else {
size += singleValuedColumn.getSerializedSize();
}
size += bitmaps.getSerializedSize();
if (spatialIndex != null) {
size += spatialIndex.size() + Ints.BYTES;
}
return size;
}
@Override
public void write(WritableByteChannel channel) throws IOException
{
channel.write(ByteBuffer.wrap(new byte[]{version.asByte()}));
if (version.compareTo(VERSION.COMPRESSED) >= 0) {
channel.write(ByteBuffer.wrap(Ints.toByteArray(flags)));
}
if (dictionary != null) {
dictionary.writeToChannel(channel);
}
if (Feature.MULTI_VALUE.isSet(flags)) {
if (multiValuedColumn != null) {
multiValuedColumn.writeToChannel(channel);
}
} else {
if (singleValuedColumn != null) {
singleValuedColumn.writeToChannel(channel);
}
}
if (bitmaps != null) {
bitmaps.writeToChannel(channel);
}
if (spatialIndex != null) {
ByteBufferSerializer.writeToChannel(
spatialIndex,
new IndexedRTree.ImmutableRTreeObjectStrategy(bitmapSerdeFactory.getBitmapFactory()),
channel
);
}
}
}
);
}
}
@Override
public Serializer getSerializer()
{
return serializer;
}
@Override
public Deserializer getDeserializer()
{
return new Deserializer()
{
@Override
public void read(ByteBuffer buffer, ColumnBuilder builder, ColumnConfig columnConfig)
{
final VERSION rVersion = VERSION.fromByte(buffer.get());
final int rFlags;
if (rVersion.compareTo(VERSION.COMPRESSED) >= 0) {
rFlags = buffer.getInt();
} else {
rFlags = rVersion.equals(VERSION.UNCOMPRESSED_MULTI_VALUE)
? Feature.MULTI_VALUE.getMask()
: NO_FLAGS;
}
final boolean hasMultipleValues = Feature.MULTI_VALUE.isSet(rFlags) || Feature.MULTI_VALUE_V3.isSet(rFlags);
final GenericIndexed rDictionary = GenericIndexed.read(buffer, GenericIndexed.STRING_STRATEGY);
builder.setType(ValueType.STRING);
final WritableSupplier rSingleValuedColumn;
final WritableSupplier> rMultiValuedColumn;
if (hasMultipleValues) {
rMultiValuedColumn = readMultiValuedColum(rVersion, buffer, rFlags);
rSingleValuedColumn = null;
} else {
rSingleValuedColumn = readSingleValuedColumn(rVersion, buffer);
rMultiValuedColumn = null;
}
builder.setHasMultipleValues(hasMultipleValues)
.setDictionaryEncodedColumn(
new DictionaryEncodedColumnSupplier(
rDictionary,
rSingleValuedColumn,
rMultiValuedColumn,
columnConfig.columnCacheSizeBytes()
)
);
GenericIndexed rBitmaps = GenericIndexed.read(
buffer, bitmapSerdeFactory.getObjectStrategy()
);
builder.setBitmapIndex(
new BitmapIndexColumnPartSupplier(
bitmapSerdeFactory.getBitmapFactory(),
rBitmaps,
rDictionary
)
);
ImmutableRTree rSpatialIndex = null;
if (buffer.hasRemaining()) {
rSpatialIndex = ByteBufferSerializer.read(
buffer, new IndexedRTree.ImmutableRTreeObjectStrategy(bitmapSerdeFactory.getBitmapFactory())
);
builder.setSpatialIndex(new SpatialIndexColumnPartSupplier(rSpatialIndex));
}
}
private WritableSupplier readSingleValuedColumn(VERSION version, ByteBuffer buffer)
{
switch (version) {
case UNCOMPRESSED_SINGLE_VALUE:
return VSizeIndexedInts.readFromByteBuffer(buffer).asWritableSupplier();
case COMPRESSED:
return CompressedVSizeIntsIndexedSupplier.fromByteBuffer(buffer, byteOrder);
}
throw new IAE("Unsupported single-value version[%s]", version);
}
private WritableSupplier> readMultiValuedColum(
VERSION version, ByteBuffer buffer, int flags
)
{
switch (version) {
case UNCOMPRESSED_MULTI_VALUE:
return VSizeIndexed.readFromByteBuffer(buffer).asWritableSupplier();
case COMPRESSED:
if (Feature.MULTI_VALUE.isSet(flags)) {
return CompressedVSizeIndexedSupplier.fromByteBuffer(buffer, byteOrder);
} else if (Feature.MULTI_VALUE_V3.isSet(flags)) {
return CompressedVSizeIndexedV3Supplier.fromByteBuffer(buffer, byteOrder);
} else {
throw new IAE("Unrecognized multi-value flag[%d]", flags);
}
}
throw new IAE("Unsupported multi-value version[%s]", version);
}
};
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy