com.aliyun.datahub.client.impl.batch.header.BatchHeader Maven / Gradle / Ivy
The newest version!
package com.aliyun.datahub.client.impl.batch.header;
import com.aliyun.datahub.client.exception.DatahubClientException;
import com.aliyun.datahub.client.impl.batch.BatchType;
import com.aliyun.datahub.client.impl.batch.binary.BinaryUtil;
import com.aliyun.datahub.client.model.CompressType;
public abstract class BatchHeader {
private final static int FIXED_HEADER_PREFIX = 8;
protected final static byte[] MAGIC_NUMBER = new byte[] { 'D', 'H', 'U', 'B' };
protected int version = 0;
protected int length = 0;
protected int rawDataSize = 0;
protected int crc32 = 0;
protected short attributes = 0;
protected int recordCount = 0;
// for version > 0
protected short dataType = 0;
protected int schemaVersion = 0;
protected int dataOffset = 0;
public BatchHeader() {
}
public static BatchHeader parseHeader(byte[] bytes) {
int version = getVersion(bytes);
BatchHeader header;
switch (version) {
case 0:
header = new BatchHeaderV0();
break;
case 1:
header = new BatchHeaderV1();
break;
default:
throw new DatahubClientException("Unsupported batch version: " + version);
}
header.deserialize(bytes);
return header;
}
private static int getVersion(byte[] bytes) {
if (bytes.length < FIXED_HEADER_PREFIX) {
throw new DatahubClientException("Parse batch header fail");
}
if (!checkMagic(bytes)) {
throw new DatahubClientException("Check magic number fail");
}
return BinaryUtil.readInt(bytes, MAGIC_NUMBER.length);
}
private static boolean checkMagic(byte[] bytes) {
for (int idx = 0; idx < 4; ++idx) {
if (bytes[idx] != MAGIC_NUMBER[idx]) {
return false;
}
}
return true;
}
public abstract byte[] serialize();
public abstract void deserialize(byte[] bytes);
public abstract int getHeaderSize();
public int getSchemaVersion() {
return schemaVersion;
}
public void setSchemaVersion(int schemaVersion) {
this.schemaVersion = schemaVersion;
}
public short getAttributes() {
return attributes;
}
public void setAttributes(short attributes) {
this.attributes = attributes;
}
public int getVersion() {
return version;
}
public int getLength() {
return length;
}
public void setLength(int length) {
this.length = length;
}
public int getRawDataSize() {
return rawDataSize;
}
public void setRawDataSize(int rawDataSize) {
this.rawDataSize = rawDataSize;
}
public int getCrc32() {
return crc32;
}
public void setCrc32(int crc32) {
this.crc32 = crc32;
}
public int getDataOffset() {
return dataOffset;
}
public void setDataOffset(int dataOffset) {
this.dataOffset = dataOffset;
}
public int getRecordCount() {
return recordCount;
}
public void setRecordCount(int recordCount) {
this.recordCount = recordCount;
}
public void setDataType(BatchType type) {
switch (type) {
case BINARY:
dataType = 0;
break;
case ARROW:
dataType = 1;
break;
case AVRO:
dataType = 2;
break;
default:
throw new DatahubClientException("unsupported data type" + type);
}
}
public BatchType getDataType() {
switch (dataType) {
case 0: return BatchType.BINARY;
case 1: return BatchType.ARROW;
case 2: return BatchType.AVRO;
default: throw new DatahubClientException("unsupported data type for v1: " + dataType);
}
}
public CompressType getCompressType() {
return CompressType.fromValue(getAttributes() & 0x03);
}
public void setCompressType(CompressType type) {
attributes = (short) ((attributes & 0xfffc) | type.getValue());
}
@Override
public String toString() {
return "BatchHeader{" +
"version=" + version +
", length=" + length +
", rawDataSize=" + rawDataSize +
", crc32=" + crc32 +
", attributes=" + attributes +
", recordCount=" + recordCount +
", dataType=" + dataType +
", schemaVersion=" + schemaVersion +
", dataOffset=" + dataOffset +
'}';
}
}