All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliyun.datahub.client.impl.batch.avro.AvroSerializer Maven / Gradle / Ivy

The newest version!
package com.aliyun.datahub.client.impl.batch.avro;

import com.aliyun.datahub.client.exception.DatahubClientException;
import com.aliyun.datahub.client.exception.InvalidParameterException;
import com.aliyun.datahub.client.impl.batch.BatchConstants;
import com.aliyun.datahub.client.impl.batch.BatchSerializer;
import com.aliyun.datahub.client.impl.batch.BatchType;
import com.aliyun.datahub.client.impl.batch.BatchUtils;
import com.aliyun.datahub.client.impl.batch.header.BatchHeader;
import com.aliyun.datahub.client.impl.batch.header.BatchHeaderV1;
import com.aliyun.datahub.client.model.*;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.util.Utf8;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class AvroSerializer extends BatchSerializer {
    private final static Logger LOGGER = LoggerFactory.getLogger(AvroSerializer.class);

    @Override
    public byte[] serializeRecord(List recordEntryList) {
        try {
            List genericRecordList = convertRecord(recordEntryList);
            return serializeAvroRecord(genericRecordList);
        } catch (Exception e) {
            LOGGER.error("Serialize avro record failed", e);
            throw new DatahubClientException(e.getMessage());
        }
    }

    @Override
    protected BatchHeader getHeader() {
        BatchHeaderV1 header = new BatchHeaderV1();
        header.setDataType(BatchType.AVRO);
        return header;
    }

    public List convertRecord(List recordEntryList) {
        RecordSchema recordSchema = getRecordSchema(recordEntryList.get(0));
        Schema avroSchema = AvroSchemaCache.getSchema(recordSchema);
        List genericRecordList = new ArrayList<>(recordEntryList.size());
        for (RecordEntry recordEntry : recordEntryList) {
            GenericRecord record = new GenericData.Record(avroSchema);

            if (recordEntry.getRecordData() instanceof TupleRecordData) {
                setColumnValue((TupleRecordData) recordEntry.getRecordData(), record);
            } else {
                BlobRecordData data = (BlobRecordData) recordEntry.getRecordData();
                ByteBuffer buffer = ByteBuffer.wrap(data.getData());
                record.put(BatchConstants.BLOB_COLUMN_NAME, buffer);
            }

            setAttribute(recordEntry.getAttributes(), record);
            genericRecordList.add(record);
        }
        return genericRecordList;
    }

    private void setColumnValue(TupleRecordData data, GenericRecord record) {
        for (int idx = 0; idx < data.getRecordSchema().getFields().size(); ++idx) {
            FieldType type = data.getRecordSchema().getField(idx).getType();
            Object obj = data.getField(idx);
            if (obj == null) {
                continue;
            }

            switch (type) {
                case BOOLEAN:
                    record.put(idx, obj);
                    break;
                case TINYINT:
                case SMALLINT:
                case INTEGER:
                    record.put(idx, BatchUtils.getIntValue(obj));
                    break;
                case BIGINT:
                case TIMESTAMP:
                    record.put(idx, BatchUtils.getLongValue(obj));
                    break;
                case FLOAT:
                    record.put(idx, BatchUtils.getFloatValue(obj));
                    break;
                case DOUBLE:
                    record.put(idx, BatchUtils.getDoubleValue(obj));
                    break;
                case STRING:
                case JSON:
                    record.put(idx, new Utf8(obj.toString()));
                    break;
                case DECIMAL:
                    record.put(idx, new Utf8(((BigDecimal) obj).toPlainString()));
                    break;
                default:
                    throw new InvalidParameterException("Unknown value type: " + type);
            }
        }
    }

    private void setAttribute(Map attrs, GenericRecord record) {
        if (attrs != null && !attrs.isEmpty()) {
            Map avroAttrs = new HashMap<>(attrs.size());
            for (Map.Entry entry : attrs.entrySet()) {
                avroAttrs.put(new Utf8(entry.getKey()), new Utf8(entry.getValue()));
            }
            record.put(BatchConstants.ATTRIBUTE_COLUMN_NAME, avroAttrs);
        }
    }

    private byte[] serializeAvroRecord(List recordList) throws IOException {
        DatumWriter datumWriter = new GenericDatumWriter(recordList.get(0).getSchema());
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
        Encoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);

        for (GenericRecord record : recordList) {
            datumWriter.write(record, encoder);
        }

        encoder.flush();
        return outputStream.toByteArray();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy