com.aliyun.datahub.client.impl.batch.avro.AvroSerializer Maven / Gradle / Ivy
The newest version!
package com.aliyun.datahub.client.impl.batch.avro;
import com.aliyun.datahub.client.exception.DatahubClientException;
import com.aliyun.datahub.client.exception.InvalidParameterException;
import com.aliyun.datahub.client.impl.batch.BatchConstants;
import com.aliyun.datahub.client.impl.batch.BatchSerializer;
import com.aliyun.datahub.client.impl.batch.BatchType;
import com.aliyun.datahub.client.impl.batch.BatchUtils;
import com.aliyun.datahub.client.impl.batch.header.BatchHeader;
import com.aliyun.datahub.client.impl.batch.header.BatchHeaderV1;
import com.aliyun.datahub.client.model.*;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.util.Utf8;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class AvroSerializer extends BatchSerializer {
private final static Logger LOGGER = LoggerFactory.getLogger(AvroSerializer.class);
@Override
public byte[] serializeRecord(List recordEntryList) {
try {
List genericRecordList = convertRecord(recordEntryList);
return serializeAvroRecord(genericRecordList);
} catch (Exception e) {
LOGGER.error("Serialize avro record failed", e);
throw new DatahubClientException(e.getMessage());
}
}
@Override
protected BatchHeader getHeader() {
BatchHeaderV1 header = new BatchHeaderV1();
header.setDataType(BatchType.AVRO);
return header;
}
public List convertRecord(List recordEntryList) {
RecordSchema recordSchema = getRecordSchema(recordEntryList.get(0));
Schema avroSchema = AvroSchemaCache.getSchema(recordSchema);
List genericRecordList = new ArrayList<>(recordEntryList.size());
for (RecordEntry recordEntry : recordEntryList) {
GenericRecord record = new GenericData.Record(avroSchema);
if (recordEntry.getRecordData() instanceof TupleRecordData) {
setColumnValue((TupleRecordData) recordEntry.getRecordData(), record);
} else {
BlobRecordData data = (BlobRecordData) recordEntry.getRecordData();
ByteBuffer buffer = ByteBuffer.wrap(data.getData());
record.put(BatchConstants.BLOB_COLUMN_NAME, buffer);
}
setAttribute(recordEntry.getAttributes(), record);
genericRecordList.add(record);
}
return genericRecordList;
}
private void setColumnValue(TupleRecordData data, GenericRecord record) {
for (int idx = 0; idx < data.getRecordSchema().getFields().size(); ++idx) {
FieldType type = data.getRecordSchema().getField(idx).getType();
Object obj = data.getField(idx);
if (obj == null) {
continue;
}
switch (type) {
case BOOLEAN:
record.put(idx, obj);
break;
case TINYINT:
case SMALLINT:
case INTEGER:
record.put(idx, BatchUtils.getIntValue(obj));
break;
case BIGINT:
case TIMESTAMP:
record.put(idx, BatchUtils.getLongValue(obj));
break;
case FLOAT:
record.put(idx, BatchUtils.getFloatValue(obj));
break;
case DOUBLE:
record.put(idx, BatchUtils.getDoubleValue(obj));
break;
case STRING:
case JSON:
record.put(idx, new Utf8(obj.toString()));
break;
case DECIMAL:
record.put(idx, new Utf8(((BigDecimal) obj).toPlainString()));
break;
default:
throw new InvalidParameterException("Unknown value type: " + type);
}
}
}
private void setAttribute(Map attrs, GenericRecord record) {
if (attrs != null && !attrs.isEmpty()) {
Map avroAttrs = new HashMap<>(attrs.size());
for (Map.Entry entry : attrs.entrySet()) {
avroAttrs.put(new Utf8(entry.getKey()), new Utf8(entry.getValue()));
}
record.put(BatchConstants.ATTRIBUTE_COLUMN_NAME, avroAttrs);
}
}
private byte[] serializeAvroRecord(List recordList) throws IOException {
DatumWriter datumWriter = new GenericDatumWriter(recordList.get(0).getSchema());
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Encoder encoder = EncoderFactory.get().binaryEncoder(outputStream, null);
for (GenericRecord record : recordList) {
datumWriter.write(record, encoder);
}
encoder.flush();
return outputStream.toByteArray();
}
}