com.clickhouse.client.api.data_formats.internal.BinaryStreamReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of client-v2 Show documentation
Show all versions of client-v2 Show documentation
New client api for ClickHouse
The newest version!
package com.clickhouse.client.api.data_formats.internal;
import com.clickhouse.client.api.ClientException;
import com.clickhouse.data.ClickHouseColumn;
import com.clickhouse.data.ClickHouseDataType;
import com.clickhouse.data.format.BinaryStreamUtils;
import com.clickhouse.data.value.ClickHouseBitmap;
import org.slf4j.Logger;
import org.slf4j.helpers.NOPLogger;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Array;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.net.Inet4Address;
import java.net.Inet6Address;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
import java.util.UUID;
/**
* This class is not thread safe and should not be shared between multiple threads.
* Internally it may use a shared buffer to read data from the input stream.
* It is done mainly to reduce extra memory allocations for reading numbers.
*/
public class BinaryStreamReader {
private final InputStream input;
private final Logger log;
private final TimeZone timeZone;
private final ByteBufferAllocator bufferAllocator;
/**
* Creates a BinaryStreamReader instance that will use {@link DefaultByteBufferAllocator} to allocate buffers.
*
* @param input - source of raw data in a suitable format
* @param timeZone - timezone to use for date and datetime values
* @param log - logger
*/
BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log) {
this(input, timeZone, log, new DefaultByteBufferAllocator());
}
/**
* Createa a BinaryStreamReader instance that will use the provided buffer allocator.
*
* @param input - source of raw data in a suitable format
* @param timeZone - timezone to use for date and datetime values
* @param log - logger
* @param bufferAllocator - byte buffer allocator
*/
BinaryStreamReader(InputStream input, TimeZone timeZone, Logger log, ByteBufferAllocator bufferAllocator) {
this.log = log == null ? NOPLogger.NOP_LOGGER : log;
this.timeZone = timeZone;
this.input = input;
this.bufferAllocator = bufferAllocator;
}
/**
* Reads a value from the internal input stream.
* @param column - column information
* @return value
* @param - target type of the value
* @throws IOException when IO error occurs
*/
public T readValue(ClickHouseColumn column) throws IOException {
return readValue(column, null);
}
/**
* Reads a value from the internal input stream. Method will use type hint to do smarter conversion if possible.
* For example, all datetime values are of {@link ZonedDateTime}; if a type hint is {@link LocalDateTime} then
* {@link ZonedDateTime#toLocalDateTime()} .
* @param column - column information
* @param typeHint - type hint
* @return value
* @param - target type of the value
* @throws IOException when IO error occurs
*/
public T readValue(ClickHouseColumn column, Class> typeHint) throws IOException {
if (column.isNullable()) {
int isNull = readByteOrEOF(input);
if (isNull == 1) { // is Null?
return (T) null;
}
}
try {
switch (column.getDataType()) {
// Primitives
case FixedString: {
byte[] bytes = readNBytes(input, column.getEstimatedLength());
return (T) new String(bytes, 0, column.getEstimatedLength(), StandardCharsets.UTF_8);
}
case String: {
int len = readVarInt(input);
if (len == 0) {
return (T) "";
}
return (T) new String(readNBytes(input, len), StandardCharsets.UTF_8);
}
case Int8:
return (T) Byte.valueOf(readByte());
case UInt8:
return (T) Short.valueOf(readUnsignedByte());
case Int16:
return (T) Short.valueOf(readShortLE());
case UInt16:
return (T) Integer.valueOf(readUnsignedShortLE());
case Int32:
return (T) Integer.valueOf(readIntLE());
case UInt32:
return (T) Long.valueOf(readUnsignedIntLE());
case Int64:
return (T) Long.valueOf(readLongLE());
case UInt64:
return (T) readBigIntegerLE(INT64_SIZE, true);
case Int128:
return (T) readBigIntegerLE(INT128_SIZE, false);
case UInt128:
return (T) readBigIntegerLE(INT128_SIZE, true);
case Int256:
return (T) readBigIntegerLE(INT256_SIZE, false);
case UInt256:
return (T) readBigIntegerLE(INT256_SIZE, true);
case Decimal:
return (T) readDecimal(column.getPrecision(), column.getScale());
case Decimal32:
return (T) readDecimal(ClickHouseDataType.Decimal32.getMaxPrecision(), column.getScale());
case Decimal64:
return (T) readDecimal(ClickHouseDataType.Decimal64.getMaxPrecision(), column.getScale());
case Decimal128:
return (T) readDecimal(ClickHouseDataType.Decimal128.getMaxPrecision(), column.getScale());
case Decimal256:
return (T) readDecimal(ClickHouseDataType.Decimal256.getMaxPrecision(), column.getScale());
case Float32:
return (T) Float.valueOf(readFloatLE());
case Float64:
return (T) Double.valueOf(readDoubleLE());
case Bool:
return (T) Boolean.valueOf(readByteOrEOF(input) == 1);
case Enum8:
return (T) Byte.valueOf((byte) readUnsignedByte());
case Enum16:
return (T) Short.valueOf((short) readUnsignedShortLE());
case Date:
return convertDateTime(readDate(column.getTimeZone() == null ? timeZone :
column.getTimeZone()), typeHint);
case Date32:
return convertDateTime(readDate32(column.getTimeZone() == null ? timeZone :
column.getTimeZone()), typeHint);
case DateTime:
return convertDateTime(readDateTime32(column.getTimeZone() == null ? timeZone :
column.getTimeZone()), typeHint);
case DateTime32:
return convertDateTime(readDateTime32(column.getTimeZone() == null ? timeZone :
column.getTimeZone()), typeHint);
case DateTime64:
return convertDateTime(readDateTime64(3, column.getTimeZone() == null ? timeZone :
column.getTimeZone()), typeHint);
case IntervalYear:
case IntervalQuarter:
case IntervalMonth:
case IntervalWeek:
case IntervalDay:
case IntervalHour:
case IntervalMinute:
case IntervalSecond:
case IntervalMicrosecond:
case IntervalMillisecond:
case IntervalNanosecond:
return (T) readBigIntegerLE(8, true);
case IPv4:
// https://clickhouse.com/docs/en/sql-reference/data-types/ipv4
return (T) Inet4Address.getByAddress(readNBytesLE(input, 4));
case IPv6:
// https://clickhouse.com/docs/en/sql-reference/data-types/ipv6
return (T) Inet6Address.getByAddress(readNBytes(input, 16));
case UUID:
return (T) new UUID(readLongLE(), readLongLE());
case Point:
return (T) readGeoPoint();
case Polygon:
return (T) readGeoPolygon();
case MultiPolygon:
return (T) readGeoMultiPolygon();
case Ring:
return (T) readGeoRing();
// case JSON: // obsolete https://clickhouse.com/docs/en/sql-reference/data-types/json#displaying-json-column
// case Object:
case Array:
return convertArray(readArray(column), typeHint);
case Map:
return (T) readMap(column);
// case Nested:
case Tuple:
return (T) readTuple(column);
case Nothing:
return null;
// case SimpleAggregateFunction:
case AggregateFunction:
return (T) ClickHouseBitmap.deserialize(input, column.getNestedColumns().get(0).getDataType());
default:
throw new IllegalArgumentException("Unsupported data type: " + column.getDataType());
}
} catch (EOFException e) {
throw e;
} catch (Exception e) {
throw new ClientException("Failed to read value for column " + column.getColumnName(), e);
}
}
private static T convertDateTime(ZonedDateTime value, Class> typeHint) {
if (typeHint == null) {
return (T) value;
}
if (typeHint.isAssignableFrom(LocalDateTime.class)) {
return (T) value.toLocalDateTime();
} else if (typeHint.isAssignableFrom(LocalDate.class)) {
return (T) value.toLocalDate();
}
return (T) value;
}
private static T convertArray(ArrayValue value, Class> typeHint) {
if (typeHint == null) {
return (T) value;
}
if (typeHint.isAssignableFrom(List.class)) {
return (T) value.asList();
}
return (T) value;
}
/**
* Read a short value in little-endian from the internal input stream.
*
* @return short value
* @throws IOException when IO error occurs
*/
public short readShortLE() throws IOException {
return readShortLE(input, bufferAllocator.allocate(INT16_SIZE));
}
/**
* Reads a little-endian short from input stream. Uses buff to receive data from the input stream.
*
* @param input - source of bytes
* @param buff - buffer to store data
* @return short value
* @throws IOException when IO error occurs
*/
public static short readShortLE(InputStream input, byte[] buff) throws IOException {
readNBytes(input, buff, 0, 2);
return (short) (buff[0] & 0xFF | (buff[1] & 0xFF) << 8);
}
/**
* Reads an int value in little-endian from the internal input stream.
* @return int value
* @throws IOException when IO error occurs
*/
public int readIntLE() throws IOException {
return readIntLE(input, bufferAllocator.allocate(INT32_SIZE));
}
/**
* Reads a little-endian int from input stream. Uses buff to receive data from the input stream.
*
* @param input - source of bytes
* @param buff - buffer to store data
* @return - int value
* @throws IOException when IO error occurs
*/
public static int readIntLE(InputStream input, byte[] buff) throws IOException {
readNBytes(input, buff, 0, 4);
return (buff[0] & 0xFF) | (buff[1] & 0xFF) << 8 | (buff[2] & 0xFF) << 16 | (buff[3] & 0xFF) << 24;
}
/**
* Reads a long value in little-endian from the internal input stream.
*
* @return long value
* @throws IOException when IO error occurs
*/
public long readLongLE() throws IOException {
return readLongLE(input, bufferAllocator.allocate(INT64_SIZE));
}
/**
* Reads a little-endian long from input stream. Uses buff to receive data from the input stream.
*
* @param input - source of bytes
* @param buff - buffer to store data
* @return - long value
* @throws IOException when IO error occurs
*/
public static long readLongLE(InputStream input, byte[] buff) throws IOException {
readNBytes(input, buff, 0, 8);
return (long) (buff[0] & 0xFF) | (long) (buff[1] & 0xFF) << 8 | (long) (buff[2] & 0xFF) << 16
| (long) (buff[3] & 0xFF) << 24 | (long) (buff[4] & 0xFF) << 32 | (long) (buff[5] & 0xFF) << 40
| (long) (buff[6] & 0xFF) << 48 | (long) (buff[7] & 0xFF) << 56;
}
/**
* Read byte from the internal input stream.
* @return byte value
* @throws IOException when IO error occurs
*/
public byte readByte() throws IOException {
return (byte) readByteOrEOF(input);
}
/**
* Reads an unsigned byte value from the internal input stream.
* @return unsigned byte value
* @throws IOException when IO error occurs
*/
public short readUnsignedByte() throws IOException {
return (short) (readByteOrEOF(input) & 0xFF);
}
/**
* Reads an unsigned short value from the internal input stream.
* @return unsigned short value
* @throws IOException when IO error occurs
*/
public int readUnsignedShortLE() throws IOException {
return readUnsignedShortLE(input, bufferAllocator.allocate(INT16_SIZE));
}
/**
* Reads a little-endian unsigned short from input stream. Uses buff to receive data from the input stream.
*
* @param input - source of bytes
* @param buff - buffer to store data
* @return - unsigned short value
* @throws IOException
*/
public static int readUnsignedShortLE(InputStream input, byte[] buff) throws IOException {
return readShortLE(input, buff) & 0xFFFF;
}
/**
* Reads an unsigned int value in little-endian from the internal input stream.
*
* @return unsigned int value
* @throws IOException when IO error occurs
*/
public long readUnsignedIntLE() throws IOException {
return readIntLE() & 0xFFFFFFFFL;
}
/**
* Reads a little-endian unsigned int from input stream. Uses buff to receive data from the input stream.
*
* @param input - source of bytes
* @param buff - buffer to store data
* @return - unsigned int value
* @throws IOException when IO error occurs
*/
public static long readUnsignedIntLE(InputStream input, byte[] buff) throws IOException {
return readIntLE(input, buff) & 0xFFFFFFFFL;
}
/**
* Reads a big integer value in little-endian from the internal input stream.
* @param len - number of bytes to read
* @param unsigned - whether the value is unsigned
* @return big integer value
* @throws IOException when IO error occurs
*/
public BigInteger readBigIntegerLE(int len, boolean unsigned) throws IOException {
return readBigIntegerLE(input, bufferAllocator.allocate(len), len, unsigned);
}
public static final int INT16_SIZE = 2;
public static final int INT32_SIZE = 4;
public static final int INT64_SIZE = 8;
public static final int INT128_SIZE = 16;
public static final int INT256_SIZE = 32;
/**
* Reads a little-endian big integer from input stream. Uses buff to receive data from the input stream.
*
* @param input - source of bytes
* @param buff - buffer to store data
* @param len - number of bytes to read
* @param unsigned - whether the value is unsigned
* @return - big integer value
* @throws IOException
*/
public static BigInteger readBigIntegerLE(InputStream input, byte[] buff, int len, boolean unsigned) throws IOException {
byte[] bytes = readNBytesLE(input, buff, 0, len);
return unsigned ? new BigInteger(1, bytes) : new BigInteger(bytes);
}
/**
* Reads a decimal value from the internal input stream.
* @return decimal value
* @throws IOException when IO error occurs
*/
public float readFloatLE() throws IOException {
return Float.intBitsToFloat(readIntLE());
}
/**
* Reads a double value from the internal input stream.
* @return double value
* @throws IOException when IO error occurs
*/
public double readDoubleLE() throws IOException {
return Double.longBitsToDouble(readLongLE());
}
/**
* Reads a decimal value from the internal input stream.
* @param precision - precision of the decimal value
* @param scale - scale of the decimal value
* @return decimal value
* @throws IOException when IO error occurs
*/
public BigDecimal readDecimal(int precision, int scale) throws IOException {
BigDecimal v;
if (precision <= ClickHouseDataType.Decimal32.getMaxScale()) {
return BigDecimal.valueOf(readIntLE(), scale);
} else if (precision <= ClickHouseDataType.Decimal64.getMaxScale()) {
v = BigDecimal.valueOf(readLongLE(), scale);
} else if (precision <= ClickHouseDataType.Decimal128.getMaxScale()) {
v = new BigDecimal(readBigIntegerLE(INT128_SIZE, false), scale);
} else {
v = new BigDecimal(readBigIntegerLE(INT256_SIZE, false), scale);
}
return v;
}
public static byte[] readNBytes(InputStream inputStream, int len) throws IOException {
byte[] bytes = new byte[len];
return readNBytes(inputStream, bytes, 0, len);
}
/**
* Reads {@code len} bytes from input stream to buffer.
*
* @param inputStream - source of bytes
* @param buffer - target buffer
* @param offset - target buffer offset
* @param len - number of bytes to read
* @return target buffer
* @throws IOException
*/
public static byte[] readNBytes(InputStream inputStream, byte[] buffer, int offset, int len) throws IOException {
int total = 0;
while (total < len) {
int r = inputStream.read(buffer, offset + total, len - total);
if (r == -1) {
throw new EOFException("End of stream reached before reading all data");
}
total += r;
}
return buffer;
}
private byte[] readNBytesLE(InputStream input, int len) throws IOException {
return readNBytesLE(input, bufferAllocator.allocate(len), 0, len);
}
/**
* Reads {@code len} bytes from input stream to buffer in little-endian order.
*
* @param input - source of bytes
* @param buffer - target buffer
* @param offset - target buffer offset
* @param len - number of bytes to read
* @return - target buffer
* @throws IOException
*/
public static byte[] readNBytesLE(InputStream input, byte[] buffer, int offset, int len) throws IOException {
byte[] bytes = readNBytes(input, buffer, 0, len);
int s = 0;
int i = len - 1;
while (s < i) {
byte b = bytes[s];
bytes[s] = bytes[i];
bytes[i] = b;
s++;
i--;
}
return bytes;
}
/**
* Reads a array into an ArrayValue object.
* @param column - column information
* @return array value
* @throws IOException when IO error occurs
*/
public ArrayValue readArray(ClickHouseColumn column) throws IOException {
Class> itemType = column.getArrayBaseColumn().getDataType().getWiderPrimitiveClass();
int len = readVarInt(input);
ArrayValue array = new ArrayValue(column.getArrayNestedLevel() > 1 ? ArrayValue.class : itemType, len);
if (len == 0) {
return array;
}
for (int i = 0; i < len; i++) {
array.set(i, readValue(column.getNestedColumns().get(0)));
}
return array;
}
public void skipValue(ClickHouseColumn column) throws IOException {
readValue(column, null);
}
public static class ArrayValue {
final int length;
final Class> itemType;
final Object array;
ArrayValue(Class> itemType, int length) {
this.itemType = itemType;
this.length = length;
try {
if (itemType.isArray()) {
array = Array.newInstance(ArrayValue.class, length);
} else {
array = Array.newInstance(itemType, length);
}
} catch (Exception e) {
throw new IllegalArgumentException("Failed to create array of type: " + itemType, e);
}
}
public int length() {
return length;
}
public Object get(int index) {
return Array.get(array, index);
}
public void set(int index, Object value) {
try {
Array.set(array, index, value);
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("Failed to set value at index: " + index +
" value " + value + " of class " + value.getClass().getName(), e);
}
}
private List> list = null;
public synchronized List asList() {
if (list == null) {
ArrayList list = new ArrayList<>(length);
for (int i = 0; i < length; i++) {
Object item = get(i);
if (item instanceof ArrayValue) {
list.add((T) ((ArrayValue) item).asList());
} else {
list.add((T) item);
}
}
this.list = list;
}
return (List) list;
}
}
/**
* Reads a map.
* @param column - column information
* @return a map
* @throws IOException when IO error occurs
*/
public Map, ?> readMap(ClickHouseColumn column) throws IOException {
int len = readVarInt(input);
if (len == 0) {
return Collections.emptyMap();
}
ClickHouseColumn keyType = column.getKeyInfo();
ClickHouseColumn valueType = column.getValueInfo();
LinkedHashMap