com.aliyun.datahub.client.impl.batch.BatchSerializer Maven / Gradle / Ivy
The newest version!
package com.aliyun.datahub.client.impl.batch;
import com.aliyun.datahub.client.exception.DatahubClientException;
import com.aliyun.datahub.client.exception.InvalidParameterException;
import com.aliyun.datahub.client.impl.batch.header.BatchHeader;
import com.aliyun.datahub.client.impl.compress.Compressor;
import com.aliyun.datahub.client.impl.compress.CompressorFactory;
import com.aliyun.datahub.client.impl.schemaregistry.SchemaRegistryClient;
import com.aliyun.datahub.client.model.*;
import com.aliyun.datahub.client.util.CrcUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;
public abstract class BatchSerializer {
private final static Logger LOGGER = LoggerFactory.getLogger(BatchSerializer.class);
private String projectName;
private String topicName;
private CompressType compressType = CompressType.NONE;
private SchemaRegistryClient schemaRegistry;
public BatchSerializer() {
}
public String getProjectName() {
return projectName;
}
public BatchSerializer setProjectName(String projectName) {
this.projectName = projectName;
return this;
}
public String getTopicName() {
return topicName;
}
public BatchSerializer setTopicName(String topicName) {
this.topicName = topicName;
return this;
}
public CompressType getCompressType() {
return compressType;
}
public BatchSerializer setCompressType(CompressType compressType) {
this.compressType = compressType;
return this;
}
public SchemaRegistryClient getSchemaRegistry() {
return schemaRegistry;
}
public BatchSerializer setSchemaRegistry(SchemaRegistryClient schemaRegistry) {
this.schemaRegistry = schemaRegistry;
return this;
}
public byte[] serialize(List recordList) {
try {
checkRecord(recordList);
byte[] rawBytes = serializeRecord(recordList);
BatchHeader header = getHeader();
byte[] bytes = compress(header, rawBytes);
header.setLength(header.getHeaderSize() + bytes.length);
header.setRawDataSize(rawBytes.length);
header.setCrc32(CrcUtils.getCrc32(bytes));
header.setRecordCount(recordList.size());
if (header.getVersion() > 0) {
RecordSchema recordSchema = getRecordSchema(recordList.get(0));
header.setSchemaVersion(getSchemaVersion(recordSchema)); // only useful for version > 0
}
ByteArrayOutputStream output = new ByteArrayOutputStream();
output.write(header.serialize());
output.write(bytes);
return output.toByteArray();
} catch (Exception e) {
LOGGER.error("Serialize record failed, {}/{}", projectName, topicName, e);
throw new DatahubClientException(e.getMessage());
}
}
private void checkRecord(List recordList) {
RecordSchema firstRecordSchema = null;
if (recordList.get(0).getRecordData() instanceof TupleRecordData) {
firstRecordSchema = ((TupleRecordData) recordList.get(0).getRecordData()).getRecordSchema();
}
for (RecordEntry entry : recordList) {
boolean isTuple = entry.getRecordData() instanceof TupleRecordData;
if (firstRecordSchema == null ^ !isTuple) {
throw new DatahubClientException("Record type is not same in record list");
}
if (isTuple) {
TupleRecordData data = (TupleRecordData) entry.getRecordData();
if (!((TupleRecordData) entry.getRecordData()).getRecordSchema().equals(firstRecordSchema)) {
throw new DatahubClientException("Record schema is not same in record list");
}
checkData(data);
} else {
checkData((BlobRecordData) entry.getRecordData());
}
}
}
protected RecordSchema getRecordSchema(RecordEntry recordEntry) {
if (recordEntry.getRecordData() instanceof TupleRecordData) {
return ((TupleRecordData) recordEntry.getRecordData()).getRecordSchema();
}
return null;
}
protected int getSchemaVersion(RecordSchema recordSchema) {
if (recordSchema == null) {
return -1;
} else {
return schemaRegistry.getVersionId(projectName, topicName, recordSchema);
}
}
private void checkData(TupleRecordData data) {
for (Field field : data.getRecordSchema().getFields()) {
if (!field.isAllowNull() && data.getField(field.getName()) == null) {
throw new InvalidParameterException("Filed [" + field.getName() + "] not allow null");
}
}
}
private void checkData(BlobRecordData data) {
if (data.getData() == null) {
throw new InvalidParameterException("Blob data not allow null");
}
}
private byte[] compress(BatchHeader header, byte[] rawBytes) {
Compressor compressor = CompressorFactory.getCompressor(compressType);
if (compressor == null) {
header.setCompressType(CompressType.NONE);
return rawBytes;
}
try {
byte[] buf = compressor.compress(rawBytes);
header.setCompressType(compressType);
return buf;
} catch (IOException e) {
LOGGER.error("Compress data fail", e);
throw new DatahubClientException(e.getMessage());
}
}
protected abstract byte[] serializeRecord(List recordList);
protected abstract BatchHeader getHeader();
}