Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.flink.streaming.connectors.pulsar.internal.PulsarDeserializer Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.connectors.pulsar.internal;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.streaming.util.serialization.PulsarDeserializationSchema;
import org.apache.flink.table.api.DataTypes;
import org.apache.flink.table.types.CollectionDataType;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.FieldsDataType;
import org.apache.flink.table.types.KeyValueDataType;
import org.apache.flink.table.types.logical.DecimalType;
import org.apache.flink.table.types.logical.LogicalTypeRoot;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.table.types.utils.TypeConversions;
import org.apache.flink.types.Row;
import org.apache.flink.util.ExceptionUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.pulsar.client.api.Message;
import org.apache.pulsar.client.impl.schema.generic.GenericAvroRecord;
import org.apache.pulsar.common.schema.SchemaInfo;
import org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonFactory;
import org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonParser;
import org.apache.pulsar.shade.com.google.common.collect.ImmutableSet;
import org.apache.pulsar.shade.org.apache.avro.Conversions;
import org.apache.pulsar.shade.org.apache.avro.LogicalType;
import org.apache.pulsar.shade.org.apache.avro.LogicalTypes;
import org.apache.pulsar.shade.org.apache.avro.Schema;
import org.apache.pulsar.shade.org.apache.avro.SchemaBuilder;
import org.apache.pulsar.shade.org.apache.avro.generic.GenericData;
import org.apache.pulsar.shade.org.apache.avro.generic.GenericFixed;
import org.apache.pulsar.shade.org.apache.avro.generic.GenericRecord;
import org.apache.pulsar.shade.org.apache.avro.util.Utf8;
import java.io.IOException;
import java.io.Serializable;
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.BiFunction;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.flink.streaming.connectors.pulsar.internal.PulsarOptions.META_FIELD_NAMES;
import static org.apache.pulsar.common.schema.SchemaType.JSON;
import static org.apache.pulsar.shade.org.apache.avro.Schema.Type.ARRAY;
import static org.apache.pulsar.shade.org.apache.avro.Schema.Type.DOUBLE;
import static org.apache.pulsar.shade.org.apache.avro.Schema.Type.FLOAT;
import static org.apache.pulsar.shade.org.apache.avro.Schema.Type.INT;
import static org.apache.pulsar.shade.org.apache.avro.Schema.Type.LONG;
import static org.apache.pulsar.shade.org.apache.avro.Schema.Type.MAP;
import static org.apache.pulsar.shade.org.apache.avro.Schema.Type.NULL;
import static org.apache.pulsar.shade.org.apache.avro.Schema.Type.UNION;
/**
* Deserialize Pulsar message into Flink row.
*/
@Slf4j
public class PulsarDeserializer implements PulsarDeserializationSchema {
private final Function, Row> converter;
private final DataType rootDataType;
private final FieldsDataType fieldsDataType;
private final SchemaTranslator schemaTranslator;
private final NewDecimalConversion decimalConversions = new NewDecimalConversion();
public PulsarDeserializer(SchemaInfo schemaInfo, JSONOptions parsedOptions, boolean useExtendField) {
try {
schemaTranslator = new SimpleSchemaTranslator(useExtendField);
this.fieldsDataType = schemaTranslator.pulsarSchemaToFieldsDataType(schemaInfo);
this.rootDataType = schemaTranslator.schemaInfo2SqlType(schemaInfo);
switch (schemaInfo.getType()) {
case AVRO:
FieldsDataType st = (FieldsDataType) rootDataType;
int fieldsNum;
if (useExtendField){
fieldsNum = st.getChildren().size() + META_FIELD_NAMES.size();
} else {
fieldsNum = st.getChildren().size();
}
Schema avroSchema =
new Schema.Parser().parse(new String(schemaInfo.getSchema(), StandardCharsets.UTF_8));
BinFunction writer = getRecordWriter(avroSchema, st, new ArrayList<>());
this.converter = msg -> {
RowUpdater fieldUpdater = new RowUpdater();
Row resultRow = new Row(fieldsNum);
fieldUpdater.setRow(resultRow);
Object value = msg.getValue();
writer.apply(fieldUpdater, ((GenericAvroRecord) value).getAvroRecord());
if (useExtendField){
writeMetadataFields(msg, resultRow);
}
return resultRow;
};
break;
case JSON:
FieldsDataType fdt = (FieldsDataType) rootDataType;
BiFunction createParser =
(jsonFactory, s) -> {
try {
return jsonFactory.createParser(s);
} catch (IOException e) {
throw new RuntimeException(e);
}
};
JacksonRecordParser rawParser = new JacksonRecordParser(rootDataType, parsedOptions);
JacksonRecordParser.FailureSafeRecordParser parser = new JacksonRecordParser.FailureSafeRecordParser(
(s, row) -> rawParser.parse(s, createParser, row),
parsedOptions.getParseMode(),
fdt);
this.converter = msg -> {
int rowSize = useExtendField ? fdt.getChildren().size() + META_FIELD_NAMES.size() : fdt.getChildren().size();
Row resultRow = new Row(rowSize);
byte[] value = msg.getData();
parser.parse(new String(value, StandardCharsets.UTF_8), resultRow);
if (useExtendField){
writeMetadataFields(msg, resultRow);
}
return resultRow;
};
break;
default:
TriFunction writer2 = newAtomicWriter(rootDataType);
this.converter = msg -> {
RowUpdater fUpdater = new RowUpdater();
int rowSize = useExtendField ? 1 + META_FIELD_NAMES.size() : 1;
Row tmpRow = new Row(rowSize);
fUpdater.setRow(tmpRow);
Object value = msg.getValue();
writer2.apply(fUpdater, 0, value);
if (useExtendField){
writeMetadataFields(msg, tmpRow);
}
return tmpRow;
};
}
} catch (IncompatibleSchemaException e) {
log.error("Failed to convert pulsar schema to flink data type {}",
ExceptionUtils.stringifyException(e));
throw new RuntimeException(e);
}
}
private void writeMetadataFields(Message message, Row row) {
int metaStartIdx = row.getArity() - 5;
if (message.hasKey()) {
row.setField(metaStartIdx, message.getKeyBytes());
} else {
row.setField(metaStartIdx, null);
}
row.setField(metaStartIdx + 1, message.getTopicName());
row.setField(metaStartIdx + 2, message.getMessageId().toByteArray());
row.setField(metaStartIdx + 3, LocalDateTime.ofInstant(Instant.ofEpochMilli(message.getPublishTime()), ZoneId.systemDefault()));
if (message.getEventTime() > 0L) {
row.setField(metaStartIdx + 4, LocalDateTime.ofInstant(Instant.ofEpochMilli(message.getEventTime()), ZoneId.systemDefault()));
} else {
row.setField(metaStartIdx + 4, null);
}
}
private TriFunction newAtomicWriter(DataType dataType) {
LogicalTypeRoot tpe = dataType.getLogicalType().getTypeRoot();
switch (tpe) {
case DATE:
case TIMESTAMP_WITHOUT_TIME_ZONE:
return (rowUpdater, ordinal, value) -> {
rowUpdater.set(ordinal, value);
};
default:
return (rowUpdater, ordinal, value) -> rowUpdater.set(ordinal, value);
}
}
private TriFunction newWriter(Schema avroType, DataType flinkType, List path) throws IncompatibleSchemaException {
LogicalTypeRoot tpe = flinkType.getLogicalType().getTypeRoot();
Schema.Type atpe = avroType.getType();
if (atpe == Schema.Type.NULL && tpe == LogicalTypeRoot.NULL) {
return (rowUpdater, ordinal, value) -> rowUpdater.setNullAt(ordinal);
} else if (atpe == Schema.Type.BOOLEAN && tpe == LogicalTypeRoot.BOOLEAN ||
atpe == Schema.Type.INT && tpe == LogicalTypeRoot.INTEGER ||
atpe == Schema.Type.LONG && tpe == LogicalTypeRoot.BIGINT ||
atpe == Schema.Type.FLOAT && tpe == LogicalTypeRoot.FLOAT ||
atpe == Schema.Type.DOUBLE && tpe == LogicalTypeRoot.DOUBLE) {
return (rowUpdater, ordinal, value) -> rowUpdater.set(ordinal, value);
} else if (atpe == INT && tpe == LogicalTypeRoot.DATE) {
return (rowUpdater, ordinal, value) ->
rowUpdater.set(ordinal, LocalDate.ofEpochDay((Long) value));
} else if (atpe == Schema.Type.LONG && tpe == LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE) {
LogicalType altpe = avroType.getLogicalType();
if (altpe instanceof LogicalTypes.TimestampMillis) {
return (rowUpdater, ordinal, value) ->
rowUpdater.set(ordinal,
DateTimeUtils.toJavaTimestamp(((Long) value) * 1000).toLocalDateTime());
} else if (altpe instanceof LogicalTypes.TimestampMicros) {
return (rowUpdater, ordinal, value) ->
rowUpdater.set(ordinal,
DateTimeUtils.toJavaTimestamp((Long) value).toLocalDateTime());
} else {
throw new IncompatibleSchemaException(String.format(
"Cannot convert Avro logical type %s to flink timestamp type", altpe.toString()));
}
} else if (atpe == Schema.Type.STRING && tpe == LogicalTypeRoot.VARCHAR) {
return (rowUpdater, ordinal, value) -> {
String s = null;
if (value instanceof String) {
s = (String) value;
} else if (value instanceof Utf8) {
Utf8 u8 = (Utf8) value;
byte[] bytes = new byte[u8.getByteLength()];
System.arraycopy(u8.getBytes(), 0, bytes, 0, u8.getByteLength());
s = new String(bytes, StandardCharsets.UTF_8);
}
rowUpdater.set(ordinal, s);
};
} else if (atpe == Schema.Type.ENUM && tpe == LogicalTypeRoot.VARCHAR) {
return (rowUpdater, ordinal, value) ->
rowUpdater.set(ordinal, value.toString());
} else if (atpe == Schema.Type.FIXED && tpe == LogicalTypeRoot.BINARY) {
return (rowUpdater, ordinal, value) ->
rowUpdater.set(ordinal, ((GenericFixed) value).bytes().clone());
} else if (atpe == Schema.Type.BYTES && tpe == LogicalTypeRoot.VARBINARY) {
return (rowUpdater, ordinal, value) -> {
byte[] bytes = null;
if (value instanceof ByteBuffer) {
ByteBuffer bb = (ByteBuffer) value;
bytes = new byte[bb.remaining()];
bb.get(bytes);
} else if (value instanceof byte[]) {
bytes = (byte[]) value;
} else {
throw new RuntimeException(value.toString() + " is not a valid avro binary");
}
rowUpdater.set(ordinal, bytes);
};
} else if (atpe == Schema.Type.FIXED && tpe == LogicalTypeRoot.DECIMAL) {
DecimalType d = (DecimalType) flinkType.getLogicalType();
return (rowUpdater, ordinal, value) -> {
BigDecimal bigDecimal = decimalConversions.fromFixed(
(GenericFixed) value,
avroType,
LogicalTypes.decimal(d.getPrecision(), d.getScale()));
rowUpdater.set(ordinal, bigDecimal);
};
} else if (atpe == Schema.Type.BYTES && tpe == LogicalTypeRoot.DECIMAL) {
DecimalType d = (DecimalType) flinkType.getLogicalType();
return (rowUpdater, ordinal, value) -> {
BigDecimal bigDecimal = decimalConversions.fromBytes(
(ByteBuffer) value,
avroType,
LogicalTypes.decimal(d.getPrecision(), d.getScale()));
rowUpdater.set(ordinal, bigDecimal);
};
} else if (atpe == Schema.Type.RECORD && tpe == LogicalTypeRoot.ROW) {
FieldsDataType fieldsDataType = (FieldsDataType) flinkType;
BinFunction writeRecord = getRecordWriter(avroType, fieldsDataType, path);
return (rowUpdater, ordinal, value) -> {
Row row = new Row(fieldsDataType.getChildren().size());
RowUpdater ru = new RowUpdater();
ru.setRow(row);
writeRecord.apply(ru, (GenericRecord) value);
rowUpdater.set(ordinal, row);
};
} else if (tpe == LogicalTypeRoot.ARRAY && atpe == ARRAY && flinkType instanceof CollectionDataType) {
DataType et = ((CollectionDataType) flinkType).getElementDataType();
boolean containsNull = et.getLogicalType().isNullable();
TriFunction elementWriter = newWriter(avroType.getElementType(), et, path);
return (rowUpdater, ordinal, value) -> {
List array = (List) value;
int len = array.size();
Object[] result = new Object[len];
ArrayDataUpdater elementUpdater = new ArrayDataUpdater(result);
for (int i = 0; i < len; i++) {
Object element = array.get(i);
if (element == null) {
if (!containsNull) {
throw new RuntimeException(String.format(
"Array value at path %s is not allowed to be null", path.toString()));
} else {
elementUpdater.setNullAt(i);
}
} else {
elementWriter.apply(elementUpdater, i, element);
}
}
rowUpdater.set(ordinal, result);
};
} else if (tpe == LogicalTypeRoot.MAP && atpe == MAP &&
((KeyValueDataType) flinkType).getKeyDataType().getLogicalType().getTypeRoot() == LogicalTypeRoot.VARCHAR) {
KeyValueDataType kvt = (KeyValueDataType) flinkType;
DataType kt = kvt.getKeyDataType();
TriFunction keyWriter = newWriter(SchemaBuilder.builder().stringType(), kt, path);
DataType vt = kvt.getValueDataType();
TriFunction valueWriter = newWriter(avroType.getValueType(), vt, path);
boolean valueContainsNull = vt.getLogicalType().isNullable();
return (rowUpdater, ordinal, value) -> {
Map map = (Map) value;
String[] keys = new String[map.size()];
Object[] values = new Object[map.size()];
ArrayDataUpdater keyUpdater = new ArrayDataUpdater(keys);
ArrayDataUpdater valueUpdater = new ArrayDataUpdater(values);
Iterator> iterator = map.entrySet().iterator();
int i = 0;
while (iterator.hasNext()) {
Map.Entry entry = iterator.next();
assert entry.getKey() != null;
keyWriter.apply(keyUpdater, i, entry.getKey());
if (entry.getValue() == null) {
if (!valueContainsNull) {
throw new RuntimeException(String.format(
"Map value at path %s is not allowed to be null", path.toString()));
} else {
valueUpdater.setNullAt(i);
}
} else {
valueWriter.apply(valueUpdater, i, entry.getValue());
}
i += 1;
}
Map result = new HashMap<>(map.size());
for (int j = 0; j < map.size(); j++) {
result.put(keys[j], values[j]);
}
rowUpdater.set(ordinal, result);
};
} else if (atpe == UNION) {
List allTypes = avroType.getTypes();
List nonNullTypes = allTypes.stream().filter(t -> t.getType() != NULL).collect(Collectors.toList());
if (!nonNullTypes.isEmpty()) {
if (nonNullTypes.size() == 1) {
return newWriter(nonNullTypes.get(0), flinkType, path);
} else {
if (nonNullTypes.size() == 2) {
Schema.Type tp1 = nonNullTypes.get(0).getType();
Schema.Type tp2 = nonNullTypes.get(1).getType();
if (ImmutableSet.of(tp1, tp2).equals(ImmutableSet.of(INT, LONG)) && flinkType == DataTypes.BIGINT()) {
return (updater, ordinal, value) -> {
if (value == null) {
updater.setNullAt(ordinal);
} else if (value instanceof Long) {
updater.set(ordinal, value);
} else if (value instanceof Integer) {
updater.set(ordinal, ((Integer) value).longValue());
}
};
} else if (ImmutableSet.of(tp1, tp2).equals(ImmutableSet.of(FLOAT, DOUBLE)) && flinkType == DataTypes.DOUBLE()) {
return (updater, ordinal, value) -> {
if (value == null) {
updater.setNullAt(ordinal);
} else if (value instanceof Double) {
updater.set(ordinal, value);
} else if (value instanceof Float) {
updater.set(ordinal, ((Float) value).doubleValue());
}
};
} else {
throw new IncompatibleSchemaException(String.format(
"Cannot convert %s %s together to %s", tp1.toString(), tp2.toString(), flinkType.toString()));
}
} else if (tpe == LogicalTypeRoot.ROW && ((RowType) flinkType.getLogicalType()).getFieldCount() == nonNullTypes.size()) {
RowType rt = (RowType) flinkType.getLogicalType();
List> fieldWriters = new ArrayList>();
for (int i = 0; i < nonNullTypes.size(); i++) {
Schema schema = nonNullTypes.get(i);
String field = rt.getFieldNames().get(i);
org.apache.flink.table.types.logical.LogicalType logicalType = rt.getTypeAt(i);
fieldWriters.add(newWriter(schema, TypeConversions.fromLogicalToDataType(logicalType),
Stream.concat(path.stream(), Stream.of(field)).collect(Collectors.toList())));
}
return (updater, ordinal, value) -> {
Row row = new Row(rt.getFieldCount());
RowUpdater fieldUpdater = new RowUpdater();
fieldUpdater.setRow(row);
int i = GenericData.get().resolveUnion(avroType, value);
fieldWriters.get(i).apply(fieldUpdater, i, value);
updater.set(ordinal, row);
};
} else {
throw new IncompatibleSchemaException(String.format(
"Cannot convert avro to flink because schema at %s is not compatible (avroType = %s, sqlType = %s)",
path.toString(), avroType.toString(), flinkType.toString()));
}
}
} else {
return (updater, ordinal, value) -> updater.setNullAt(ordinal);
}
} else {
throw new IncompatibleSchemaException(String.format(
"Cannot convert avro to flink because schema at path %s is not compatible (avroType = %s, sqlType = %s)",
path.toString(), avroType.toString(), flinkType.toString()));
}
}
private BinFunction getRecordWriter(Schema avroType, FieldsDataType sqlType, List path) throws IncompatibleSchemaException {
List validFieldIndexes = new ArrayList<>();
List> fieldWriters = new ArrayList<>();
int length = sqlType.getChildren().size();
RowType rowType = (RowType) sqlType.getLogicalType();
List fields = rowType.getFields();
//Map fieldsType = sqlType.getFieldDataTypes();
for (int i = 0; i < length; i++) {
RowType.RowField sqlField = fields.get(i);
org.apache.flink.table.types.logical.LogicalType logicalType = rowType.getTypeAt(i);
Schema.Field avroField = avroType.getField(sqlField.getName());
if (avroField != null) {
validFieldIndexes.add(avroField.pos());
TriFunction baseWriter = newWriter(
avroField.schema(), TypeConversions.fromLogicalToDataType(logicalType),
Stream.concat(path.stream(), Stream.of(sqlField.getName())).collect(Collectors.toList()));
int ordinal = i;
BinFunction fieldWriter = (updater, value) -> {
if (value == null) {
updater.setNullAt(ordinal);
} else {
baseWriter.apply(updater, ordinal, value);
}
};
fieldWriters.add(fieldWriter);
} else if (!sqlField.getType().isNullable()) {
throw new IncompatibleSchemaException(String.format(
"Cannot find non-nullable field in avro schema %s", avroType));
}
}
return (rowUpdater, record) -> {
for (int i = 0; i < validFieldIndexes.size(); i++) {
fieldWriters.get(i).apply(rowUpdater, record.get(validFieldIndexes.get(i)));
}
};
}
public void open(DeserializationSchema.InitializationContext context) throws Exception {
}
@Override
public boolean isEndOfStream(Row nextElement) {
return false;
}
@Override
public Row deserialize(Message message) throws IOException {
return converter.apply(message);
}
@Override
public TypeInformation getProducedType() {
return (TypeInformation) TypeConversions.fromDataTypeToLegacyInfo(fieldsDataType);
}
@Override
public org.apache.pulsar.client.api.Schema getSchema() {
return null;
}
/**
* Update flink data object.
*/
interface FlinkDataUpdater extends Serializable{
void set(int ordinal, Object value);
void setNullAt(int ordinal);
}
/**
* Flink Row field updater.
*/
public static final class RowUpdater implements FlinkDataUpdater {
private Row row;
public void setRow(Row currentRow) {
this.row = currentRow;
}
@Override
public void set(int ordinal, Object value) {
row.setField(ordinal, value);
}
@Override
public void setNullAt(int ordinal) {
row.setField(ordinal, null);
}
}
/**
* Flink array field updater.
*/
public static final class ArrayDataUpdater implements FlinkDataUpdater {
private final Object[] array;
public ArrayDataUpdater(Object[] array) {
this.array = array;
}
@Override
public void set(int ordinal, Object value) {
array[ordinal] = value;
}
@Override
public void setNullAt(int ordinal) {
array[ordinal] = null;
}
}
/**
* Trinary function interface that takes three arguments and returns nothing.
*
* @param type of the first argument.
* @param type of the second argument.
* @param type of the third argument.
*/
public interface TriFunction extends Serializable{
/**
* Applies this function to the given arguments.
*/
void apply(A a, B b, C c);
}
/**
* Binary function interface that takes three arguments and returns nothing.
*
* @param type of the first argument.
* @param type of the second argument.
*/
public interface BinFunction extends Serializable {
/**
* Applies this function to the given arguments.
*/
void apply(A a, B b);
}
/**
* Represents a function that accepts one argument and produces a result.
*
* @param the type of the input to the function
* @param the type of the result of the function
*/
public interface Function extends Serializable {
/**
* Applies this function to the given argument.
*
* @param t the function argument
* @return the function result
*/
R apply(T t);
}
/**
* DecimalConversion.
*/
public static class NewDecimalConversion extends Conversions.DecimalConversion implements Serializable{
}
}