Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.clickhouse.data.format.ClickHouseRowBinaryProcessor Maven / Gradle / Ivy
package com.clickhouse.data.format;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import com.clickhouse.config.ClickHouseRenameMethod;
import com.clickhouse.data.ClickHouseAggregateFunction;
import com.clickhouse.data.ClickHouseChecker;
import com.clickhouse.data.ClickHouseColumn;
import com.clickhouse.data.ClickHouseDataConfig;
import com.clickhouse.data.ClickHouseDataProcessor;
import com.clickhouse.data.ClickHouseDataType;
import com.clickhouse.data.ClickHouseDeserializer;
import com.clickhouse.data.ClickHouseFormat;
import com.clickhouse.data.ClickHouseInputStream;
import com.clickhouse.data.ClickHouseOutputStream;
import com.clickhouse.data.ClickHouseRecord;
import com.clickhouse.data.ClickHouseSerializer;
import com.clickhouse.data.ClickHouseUtils;
import com.clickhouse.data.ClickHouseValue;
import com.clickhouse.data.value.ClickHouseBitmap;
import com.clickhouse.data.value.ClickHouseBitmapValue;
/**
* Data processor for handling {@link ClickHouseFormat#RowBinary} and
* {@link ClickHouseFormat#RowBinaryWithNamesAndTypes} two formats.
*/
public class ClickHouseRowBinaryProcessor extends ClickHouseDataProcessor {
public static class BitmapSerDe implements ClickHouseDeserializer, ClickHouseSerializer {
private final ClickHouseDataType innerType;
public BitmapSerDe(ClickHouseDataConfig config, ClickHouseColumn column) {
this.innerType = column.getNestedColumns().get(0).getDataType();
}
@Override
public ClickHouseValue deserialize(ClickHouseValue ref, ClickHouseInputStream input) throws IOException {
return ref.update(ClickHouseBitmap.deserialize(input, innerType));
}
@Override
public void serialize(ClickHouseValue value, ClickHouseOutputStream output) throws IOException {
ClickHouseBitmapValue bitmapValue = (ClickHouseBitmapValue) value;
output.write(bitmapValue.getValue().toBytes());
}
}
public static class MapDeserializer extends ClickHouseDeserializer.CompositeDeserializer {
private final ClickHouseValue keyValue;
private final ClickHouseValue valValue;
public MapDeserializer(ClickHouseDataConfig config, ClickHouseColumn column,
ClickHouseDeserializer... deserializers) {
super(deserializers);
if (deserializers.length != 2) {
throw new IllegalArgumentException("Expect 2 deserializers but got " + deserializers.length);
}
this.keyValue = column.getKeyInfo().newValue(config);
this.valValue = column.getValueInfo().newValue(config);
}
@Override
public ClickHouseValue deserialize(ClickHouseValue ref, ClickHouseInputStream input) throws IOException {
int len = input.readVarInt();
if (len == 0) {
return ref.resetToNullOrEmpty();
}
Map map = new LinkedHashMap<>(len * 4 / 3 + 1);
ClickHouseDeserializer kd = deserializers[0];
ClickHouseDeserializer vd = deserializers[1];
for (int i = 0; i < len; i++) {
map.put(kd.deserialize(keyValue, input).asObject(),
vd.deserialize(valValue, input).asObject());
}
return ref.update(map);
}
}
public static class MapSerializer extends ClickHouseSerializer.CompositeSerializer {
private final ClickHouseValue keyValue;
private final ClickHouseValue valValue;
public MapSerializer(ClickHouseDataConfig config, ClickHouseColumn column,
ClickHouseSerializer... serializers) {
super(serializers);
if (serializers.length != 2) {
throw new IllegalArgumentException("Expect 2 serializers but got " + serializers.length);
}
this.keyValue = column.getKeyInfo().newValue(config);
this.valValue = column.getValueInfo().newValue(config);
}
@Override
public void serialize(ClickHouseValue value, ClickHouseOutputStream output) throws IOException {
Map map = value.asMap();
output.writeVarInt(map.size());
if (!map.isEmpty()) {
ClickHouseSerializer ks = serializers[0];
ClickHouseSerializer vs = serializers[1];
for (Entry e : map.entrySet()) {
ks.serialize(keyValue.update(e.getKey()), output);
vs.serialize(valValue.update(e.getValue()), output);
}
}
}
}
public static class NestedDeserializer extends ClickHouseDeserializer.CompositeDeserializer {
protected final ClickHouseValue[] values;
public NestedDeserializer(ClickHouseDataConfig config, ClickHouseColumn column,
ClickHouseDeserializer... deserializers) {
super(deserializers);
List nestedCols = column.getNestedColumns();
int len = nestedCols.size();
if (deserializers.length != len) {
throw new IllegalArgumentException(
ClickHouseUtils.format("Expect %d deserializers but got %d", len, deserializers.length));
}
values = new ClickHouseValue[len];
for (int i = 0; i < len; i++) {
values[i] = nestedCols.get(i).newValue(config);
}
}
@Override
public ClickHouseValue deserialize(ClickHouseValue ref, ClickHouseInputStream input) throws IOException {
int size = input.readVarInt();
int len = values.length;
Object[][] vals = new Object[size][];
for (int i = 0; i < size; i++) {
Object[] objs = new Object[len];
for (int j = 0; j < len; j++) {
ClickHouseDeserializer d = deserializers[j];
objs[j] = d.deserialize(values[j], input).asObject();
}
vals[i] = objs;
}
// ClickHouseNestedValue.of(r, c.getNestedColumns(), values)
return ref.update(vals);
}
}
public static class NestedSerializer extends ClickHouseSerializer.CompositeSerializer {
private final ClickHouseValue[] values;
public NestedSerializer(ClickHouseDataConfig config, ClickHouseColumn column,
ClickHouseSerializer... serializers) {
super(serializers);
List nestedCols = column.getNestedColumns();
int len = nestedCols.size();
if (serializers.length != len) {
throw new IllegalArgumentException(
ClickHouseUtils.format("Expect %d serializers but got %d", len, serializers.length));
}
values = new ClickHouseValue[len];
for (int i = 0; i < len; i++) {
values[i] = nestedCols.get(i).newValue(config);
}
}
@Override
public void serialize(ClickHouseValue value, ClickHouseOutputStream output) throws IOException {
Object[][] vals = (Object[][]) value.asObject();
int size = vals.length;
int len = values.length;
output.writeVarInt(size);
for (int i = 0; i < size; i++) {
Object[] objs = vals[i];
for (int j = 0; j < len; j++) {
serializers[j].serialize(values[j].update(objs[j]), output);
}
}
}
}
public static class TupleDeserializer extends ClickHouseDeserializer.CompositeDeserializer {
private final ClickHouseValue[] values;
public TupleDeserializer(ClickHouseDataConfig config, ClickHouseColumn column,
ClickHouseDeserializer... deserializers) {
super(deserializers);
List nestedCols = column.getNestedColumns();
int len = nestedCols.size();
if (deserializers.length != len) {
throw new IllegalArgumentException(
ClickHouseUtils.format("Expect %d deserializers but got %d", len, deserializers.length));
}
values = new ClickHouseValue[len];
for (int i = 0; i < len; i++) {
values[i] = nestedCols.get(i).newValue(config);
}
}
@Override
public ClickHouseValue deserialize(ClickHouseValue ref, ClickHouseInputStream input) throws IOException {
int len = values.length;
Object[] tupleValues = new Object[len];
for (int i = 0; i < len; i++) {
tupleValues[i] = deserializers[i].deserialize(values[i], input).asObject();
}
return ref.update(tupleValues);
}
}
public static class TupleSerializer extends ClickHouseSerializer.CompositeSerializer {
private final ClickHouseValue[] values;
public TupleSerializer(ClickHouseDataConfig config, ClickHouseColumn column,
ClickHouseSerializer... serializers) {
super(serializers);
List nestedCols = column.getNestedColumns();
int len = nestedCols.size();
if (serializers.length != len) {
throw new IllegalArgumentException(
ClickHouseUtils.format("Expect %d serializers but got %d", len, serializers.length));
}
values = new ClickHouseValue[len];
for (int i = 0; i < len; i++) {
values[i] = nestedCols.get(i).newValue(config);
}
}
@Override
public void serialize(ClickHouseValue value, ClickHouseOutputStream output) throws IOException {
List tupleValues = value.asTuple();
for (int i = 0, len = serializers.length; i < len; i++) {
serializers[i].serialize(values[i].update(tupleValues.get(i)), output);
}
}
}
@Override
protected void readAndFill(ClickHouseRecord r) throws IOException {
ClickHouseInputStream in = input;
ClickHouseDeserializer[] tbl = serde.deserializers;
for (int i = readPosition, len = serde.columns.length; i < len; i++) {
tbl[i].deserialize(r.getValue(i), in);
readPosition = i;
}
readPosition = 0;
}
@Override
protected List readColumns() throws IOException {
if (input.available() < 1) {
input.close();
return Collections.emptyList();
} else if (!config.getFormat().hasHeader()) {
return Collections.emptyList();
}
int size = input.readVarInt();
String[] names = new String[ClickHouseChecker.between(size, "size", 0, Integer.MAX_VALUE)];
for (int i = 0; i < size; i++) {
names[i] = input.readUnicodeString();
}
ClickHouseRenameMethod m = config.getColumnRenameMethod();
List columns = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
columns.add(ClickHouseColumn.of(m.rename(names[i]), input.readUnicodeString()));
}
return columns;
}
/**
* Default constructor.
*
* @param config non-null confinguration contains information like format
* @param input input stream for deserialization, can be null when
* {@code output} is available
* @param output outut stream for serialization, can be null when
* {@code input} is available
* @param columns nullable columns
* @param settings nullable settings
* @throws IOException when failed to read columns from input stream
*/
public ClickHouseRowBinaryProcessor(ClickHouseDataConfig config, ClickHouseInputStream input,
ClickHouseOutputStream output, List columns, Map settings)
throws IOException {
super(config, input, output, columns, settings);
}
protected ClickHouseDeserializer[] getArrayDeserializers(ClickHouseDataConfig config,
List columns) {
ClickHouseDeserializer[] array = new ClickHouseDeserializer[columns.size()];
ClickHouseDataConfig modifiedConfig = new UseObjectConfig(config);
int i = 0;
for (ClickHouseColumn column : columns) {
array[i++] = getDeserializer(modifiedConfig,
ClickHouseColumn.of(column.getColumnName(), ClickHouseDataType.Array, false, column));
}
return array;
}
protected ClickHouseSerializer[] getArraySerializers(ClickHouseDataConfig config, List columns) {
ClickHouseSerializer[] array = new ClickHouseSerializer[columns.size()];
ClickHouseDataConfig modifiedConfig = new UseObjectConfig(config);
int i = 0;
for (ClickHouseColumn column : columns) {
array[i++] = getSerializer(modifiedConfig,
ClickHouseColumn.of(column.getColumnName(), ClickHouseDataType.Array, false, column));
}
return array;
}
@Override
public ClickHouseDeserializer getDeserializer(ClickHouseDataConfig config, ClickHouseColumn column) {
final ClickHouseDeserializer deserializer;
switch (column.getDataType()) {
case Bool:
deserializer = BinaryDataProcessor::readBool;
break;
case Date:
deserializer = BinaryDataProcessor.DateSerDe.of(config);
break;
case Date32:
deserializer = BinaryDataProcessor.Date32SerDe.of(config);
break;
case DateTime:
deserializer = column.getScale() > 0 ? BinaryDataProcessor.DateTime64SerDe.of(config, column)
: BinaryDataProcessor.DateTime32SerDe.of(config, column);
break;
case DateTime32:
deserializer = BinaryDataProcessor.DateTime32SerDe.of(config, column);
break;
case DateTime64:
deserializer = BinaryDataProcessor.DateTime64SerDe.of(config, column);
break;
case Enum8:
deserializer = BinaryDataProcessor::readEnum8;
break;
case Enum16:
deserializer = BinaryDataProcessor::readEnum16;
break;
case FixedString:
deserializer = config.isUseBinaryString() ? new BinaryDataProcessor.FixedBytesSerDe(column)
: new BinaryDataProcessor.FixedStringSerDe(column);
break;
case Int8:
deserializer = BinaryDataProcessor::readByte;
break;
case UInt8:
deserializer = config.isWidenUnsignedTypes() ? BinaryDataProcessor::readUInt8AsShort
: BinaryDataProcessor::readByte;
break;
case Int16:
deserializer = BinaryDataProcessor::readShort;
break;
case UInt16:
deserializer = config.isWidenUnsignedTypes() ? BinaryDataProcessor::readUInt16AsInt
: BinaryDataProcessor::readShort;
break;
case Int32:
deserializer = BinaryDataProcessor::readInteger;
break;
case UInt32:
deserializer = config.isWidenUnsignedTypes() ? BinaryDataProcessor::readUInt32AsLong
: BinaryDataProcessor::readInteger;
break;
case Int64:
case IntervalYear:
case IntervalQuarter:
case IntervalMonth:
case IntervalWeek:
case IntervalDay:
case IntervalHour:
case IntervalMinute:
case IntervalSecond:
case IntervalMicrosecond:
case IntervalMillisecond:
case IntervalNanosecond:
case UInt64:
deserializer = BinaryDataProcessor::readLong;
break;
case Int128:
deserializer = BinaryDataProcessor::readInt128;
break;
case UInt128:
deserializer = BinaryDataProcessor::readUInt128;
break;
case Int256:
deserializer = BinaryDataProcessor::readInt256;
break;
case UInt256:
deserializer = BinaryDataProcessor::readUInt256;
break;
case Decimal:
deserializer = BinaryDataProcessor.DecimalSerDe.of(column);
break;
case Decimal32:
deserializer = BinaryDataProcessor.Decimal32SerDe.of(column);
break;
case Decimal64:
deserializer = BinaryDataProcessor.Decimal64SerDe.of(column);
break;
case Decimal128:
deserializer = BinaryDataProcessor.Decimal128SerDe.of(column);
break;
case Decimal256:
deserializer = BinaryDataProcessor.Decimal256SerDe.of(column);
break;
case Float32:
deserializer = BinaryDataProcessor::readFloat;
break;
case Float64:
deserializer = BinaryDataProcessor::readDouble;
break;
case IPv4:
deserializer = BinaryDataProcessor::readIpv4;
break;
case IPv6:
deserializer = BinaryDataProcessor::readIpv6;
break;
case UUID:
deserializer = BinaryDataProcessor::readUuid;
break;
// Geo types
case Point:
deserializer = BinaryDataProcessor::readGeoPoint;
break;
case Ring:
deserializer = BinaryDataProcessor::readGeoRing;
break;
case Polygon:
deserializer = BinaryDataProcessor::readGeoPolygon;
break;
case MultiPolygon:
deserializer = BinaryDataProcessor::readGeoMultiPolygon;
break;
// String
case JSON:
case Object:
case String:
deserializer = config.isUseBinaryString() ? BinaryDataProcessor::readBinaryString
: BinaryDataProcessor::readTextString;
break;
// nested
case Array: {
if (column.getArrayNestedLevel() == 1) {
ClickHouseColumn baseColumn = column.getArrayBaseColumn();
Class> javaClass = baseColumn.getObjectClassForArray(config);
if (!baseColumn.isNullable() && javaClass.isPrimitive()) {
int byteLength = baseColumn.getDataType().getByteLength();
if (byteLength == Byte.BYTES) { // Bool, *Int8
deserializer = BinaryDataProcessor::readByteArray;
} else if (byteLength == Short.BYTES) { // *Int16
deserializer = BinaryDataProcessor::readShortArray;
} else if (int.class == javaClass) { // Int32
deserializer = BinaryDataProcessor::readIntegerArray;
} else if (long.class == javaClass) { // UInt32, *Int64
deserializer = byteLength == Long.BYTES ? BinaryDataProcessor::readLongArray
: BinaryDataProcessor::readIntegerArray;
} else if (float.class == javaClass) { // Float32
deserializer = BinaryDataProcessor::readFloatArray;
} else if (double.class == javaClass) { // Float64
deserializer = BinaryDataProcessor::readDoubleArray;
} else {
throw new IllegalArgumentException("Unsupported primitive type: " + javaClass);
}
} else {
deserializer = new BinaryDataProcessor.ArrayDeserializer(config, column, true,
getDeserializer(config, column.getNestedColumns().get(0)));
}
} else {
deserializer = new BinaryDataProcessor.ArrayDeserializer(config, column, true,
getDeserializer(config, column.getNestedColumns().get(0)));
}
break;
}
case Map:
deserializer = new MapDeserializer(config, column,
getDeserializers(config, column.getNestedColumns()));
break;
case Nested:
deserializer = new NestedDeserializer(config, column,
getDeserializers(config, column.getNestedColumns()));
break;
case Tuple:
deserializer = new TupleDeserializer(config, column,
getDeserializers(config, column.getNestedColumns()));
break;
// special
case Nothing:
deserializer = ClickHouseDeserializer.EMPTY_VALUE;
break;
case SimpleAggregateFunction:
deserializer = getDeserializer(config, column.getNestedColumns().get(0));
break;
case AggregateFunction:
if (column.getAggregateFunction() != ClickHouseAggregateFunction.groupBitmap) {
throw new IllegalArgumentException("Only groupMap is supported at this point");
}
deserializer = new BitmapSerDe(config, column)::deserialize;
break;
default:
throw new IllegalArgumentException("Unsupported column:" + column.toString());
}
return column.isNullable() ? new BinaryDataProcessor.NullableDeserializer(deserializer) : deserializer;
}
@Override
public ClickHouseSerializer getSerializer(ClickHouseDataConfig config, ClickHouseColumn column) {
final ClickHouseSerializer serializer;
switch (column.getDataType()) {
case Bool:
serializer = BinaryDataProcessor::writeBool;
break;
case Date:
serializer = BinaryDataProcessor.DateSerDe.of(config);
break;
case Date32:
serializer = BinaryDataProcessor.Date32SerDe.of(config);
break;
case DateTime:
serializer = column.getScale() > 0 ? BinaryDataProcessor.DateTime64SerDe.of(config, column)
: BinaryDataProcessor.DateTime32SerDe.of(config, column);
break;
case DateTime32:
serializer = BinaryDataProcessor.DateTime32SerDe.of(config, column);
break;
case DateTime64:
serializer = BinaryDataProcessor.DateTime64SerDe.of(config, column);
break;
case Enum8:
serializer = BinaryDataProcessor::writeEnum8;
break;
case Enum16:
serializer = BinaryDataProcessor::writeEnum16;
break;
case FixedString:
serializer = config.isUseBinaryString() ? new BinaryDataProcessor.FixedBytesSerDe(column)
: new BinaryDataProcessor.FixedStringSerDe(column);
break;
case Int8:
case UInt8:
serializer = BinaryDataProcessor::writeByte;
break;
case Int16:
case UInt16:
serializer = BinaryDataProcessor::writeShort;
break;
case Int32:
case UInt32:
serializer = BinaryDataProcessor::writeInteger;
break;
case Int64:
case IntervalYear:
case IntervalQuarter:
case IntervalMonth:
case IntervalWeek:
case IntervalDay:
case IntervalHour:
case IntervalMinute:
case IntervalSecond:
case IntervalMicrosecond:
case IntervalMillisecond:
case IntervalNanosecond:
case UInt64:
serializer = BinaryDataProcessor::writeLong;
break;
case Int128:
serializer = BinaryDataProcessor::writeInt128;
break;
case UInt128:
serializer = BinaryDataProcessor::writeUInt128;
break;
case Int256:
serializer = BinaryDataProcessor::writeInt256;
break;
case UInt256:
serializer = BinaryDataProcessor::writeUInt256;
break;
case Decimal:
serializer = BinaryDataProcessor.DecimalSerDe.of(column);
break;
case Decimal32:
serializer = BinaryDataProcessor.Decimal32SerDe.of(column);
break;
case Decimal64:
serializer = new BinaryDataProcessor.Decimal64SerDe(column);
break;
case Decimal128:
serializer = new BinaryDataProcessor.Decimal128SerDe(column);
break;
case Decimal256:
serializer = new BinaryDataProcessor.Decimal256SerDe(column);
break;
case Float32:
serializer = BinaryDataProcessor::writeFloat;
break;
case Float64:
serializer = BinaryDataProcessor::writeDouble;
break;
case IPv4:
serializer = BinaryDataProcessor::writeIpv4;
break;
case IPv6:
serializer = BinaryDataProcessor::writeIpv6;
break;
case UUID:
serializer = BinaryDataProcessor::writeUuid;
break;
// Geo types
case Point:
serializer = BinaryDataProcessor::writeGeoPoint;
break;
case Ring:
serializer = BinaryDataProcessor::writeGeoRing;
break;
case Polygon:
serializer = BinaryDataProcessor::writeGeoPolygon;
break;
case MultiPolygon:
serializer = BinaryDataProcessor::writeGeoMultiPolygon;
break;
// String
case JSON:
case Object:
case String:
serializer = config.isUseBinaryString() ? BinaryDataProcessor::writeBinaryString
: BinaryDataProcessor::writeTextString;
break;
// nested
case Array:
serializer = new BinaryDataProcessor.ArraySerializer(config, column, true,
getSerializer(config, column.getNestedColumns().get(0)));
break;
case Map:
serializer = new MapSerializer(config, column, getSerializers(config, column.getNestedColumns()));
break;
case Nested:
serializer = new NestedSerializer(config, column, getSerializers(config, column.getNestedColumns()));
break;
case Tuple:
serializer = new TupleSerializer(config, column, getSerializers(config, column.getNestedColumns()));
break;
// special
case Nothing:
serializer = ClickHouseSerializer.DO_NOTHING;
break;
case SimpleAggregateFunction:
serializer = getSerializer(config, column.getNestedColumns().get(0));
break;
case AggregateFunction:
if (column.getAggregateFunction() != ClickHouseAggregateFunction.groupBitmap) {
throw new IllegalArgumentException("Only groupMap is supported at this point");
}
serializer = new BitmapSerDe(config, column)::serialize;
break;
default:
throw new IllegalArgumentException("Unsupported column:" + column.toString());
}
return column.isNullable() ? new BinaryDataProcessor.NullableSerializer(serializer) : serializer;
}
}