All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.datahub.client.impl.batch.BatchSerializer Maven / Gradle / Ivy

The newest version!
package com.aliyun.datahub.client.impl.batch;

import com.aliyun.datahub.client.exception.DatahubClientException;
import com.aliyun.datahub.client.exception.InvalidParameterException;
import com.aliyun.datahub.client.impl.batch.header.BatchHeader;
import com.aliyun.datahub.client.impl.compress.Compressor;
import com.aliyun.datahub.client.impl.compress.CompressorFactory;
import com.aliyun.datahub.client.impl.schemaregistry.SchemaRegistryClient;
import com.aliyun.datahub.client.model.*;
import com.aliyun.datahub.client.util.CrcUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.List;

public abstract class BatchSerializer {
    private final static Logger LOGGER = LoggerFactory.getLogger(BatchSerializer.class);

    private String projectName;
    private String topicName;
    private CompressType compressType = CompressType.NONE;
    private SchemaRegistryClient schemaRegistry;

    public BatchSerializer() {
    }

    public String getProjectName() {
        return projectName;
    }

    public BatchSerializer setProjectName(String projectName) {
        this.projectName = projectName;
        return this;
    }

    public String getTopicName() {
        return topicName;
    }

    public BatchSerializer setTopicName(String topicName) {
        this.topicName = topicName;
        return this;
    }

    public CompressType getCompressType() {
        return compressType;
    }

    public BatchSerializer setCompressType(CompressType compressType) {
        this.compressType = compressType;
        return this;
    }

    public SchemaRegistryClient getSchemaRegistry() {
        return schemaRegistry;
    }

    public BatchSerializer setSchemaRegistry(SchemaRegistryClient schemaRegistry) {
        this.schemaRegistry = schemaRegistry;
        return this;
    }

    public byte[] serialize(List recordList) {
        try {
            checkRecord(recordList);
            byte[] rawBytes = serializeRecord(recordList);
            BatchHeader header = getHeader();
            byte[] bytes = compress(header, rawBytes);

            header.setLength(header.getHeaderSize() + bytes.length);
            header.setRawDataSize(rawBytes.length);
            header.setCrc32(CrcUtils.getCrc32(bytes));
            header.setRecordCount(recordList.size());
            if (header.getVersion() > 0) {
                RecordSchema recordSchema = getRecordSchema(recordList.get(0));
                header.setSchemaVersion(getSchemaVersion(recordSchema)); // only useful for version > 0
            }

            ByteArrayOutputStream output = new ByteArrayOutputStream();
            output.write(header.serialize());
            output.write(bytes);
            return output.toByteArray();
        } catch (Exception e) {
            LOGGER.error("Serialize record failed, {}/{}", projectName, topicName, e);
            throw new DatahubClientException(e.getMessage());
        }
    }

    private void checkRecord(List recordList) {
        RecordSchema firstRecordSchema = null;
        if (recordList.get(0).getRecordData() instanceof TupleRecordData) {
            firstRecordSchema = ((TupleRecordData) recordList.get(0).getRecordData()).getRecordSchema();
        }

        for (RecordEntry entry : recordList) {
            boolean isTuple = entry.getRecordData() instanceof TupleRecordData;
            if (firstRecordSchema == null ^ !isTuple) {
                throw new DatahubClientException("Record type is not same in record list");
            }

            if (isTuple) {
                TupleRecordData data = (TupleRecordData) entry.getRecordData();
                if (!((TupleRecordData) entry.getRecordData()).getRecordSchema().equals(firstRecordSchema)) {
                    throw new DatahubClientException("Record schema is not same in record list");
                }
                checkData(data);
            } else {
                checkData((BlobRecordData) entry.getRecordData());
            }
        }
    }

    protected RecordSchema getRecordSchema(RecordEntry recordEntry) {
        if (recordEntry.getRecordData() instanceof TupleRecordData) {
            return ((TupleRecordData) recordEntry.getRecordData()).getRecordSchema();
        }
        return null;
    }

    protected int getSchemaVersion(RecordSchema recordSchema) {
        if (recordSchema == null) {
            return -1;
        } else {
            return schemaRegistry.getVersionId(projectName, topicName, recordSchema);
        }
    }

    private void checkData(TupleRecordData data) {
        for (Field field : data.getRecordSchema().getFields()) {
            if (!field.isAllowNull() && data.getField(field.getName()) == null) {
                throw new InvalidParameterException("Filed [" + field.getName() + "] not allow null");
            }
        }
    }

    private void checkData(BlobRecordData data) {
        if (data.getData() == null) {
            throw new InvalidParameterException("Blob data not allow null");
        }
    }

    private byte[] compress(BatchHeader header, byte[] rawBytes) {
        Compressor compressor = CompressorFactory.getCompressor(compressType);
        if (compressor == null) {
            header.setCompressType(CompressType.NONE);
            return rawBytes;
        }

        try {
            byte[] buf = compressor.compress(rawBytes);
            header.setCompressType(compressType);
            return buf;
        } catch (IOException e) {
            LOGGER.error("Compress data fail", e);
            throw new DatahubClientException(e.getMessage());
        }
    }

    protected abstract byte[] serializeRecord(List recordList);

    protected abstract BatchHeader getHeader();
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy