Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.flink.streaming.connectors.pulsar.internal.JacksonRecordParser Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.connectors.pulsar.internal;
import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.FieldsDataType;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.LogicalTypeRoot;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.table.types.utils.TypeConversions;
import org.apache.flink.types.Row;
import org.apache.flink.util.ExceptionUtils;
import lombok.extern.slf4j.Slf4j;
import org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonEncoding;
import org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonFactory;
import org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonGenerator;
import org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonParser;
import org.apache.pulsar.shade.com.fasterxml.jackson.core.JsonToken;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.ParseException;
import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;
import java.util.function.BiFunction;
import java.util.function.Function;
/**
* JSON record parser.
*/
@Slf4j
public class JacksonRecordParser {
private final DataType schema;
private final JSONOptions options;
private final JsonFactory factory;
private final BiFunction rootConverter;
public JacksonRecordParser(DataType schema, JSONOptions options) {
assert schema instanceof FieldsDataType;
this.schema = schema;
this.options = options;
this.rootConverter = makeStructRootConverter((FieldsDataType) schema);
this.factory = new JsonFactory();
options.setJacksonOptions(factory);
}
private BiFunction makeStructRootConverter(FieldsDataType st) {
List fieldNames = ((RowType) st.getLogicalType()).getFieldNames();
List> fieldConverters = new ArrayList>();
RowType rowType = (RowType) st.getLogicalType();
for (int i = 0; i < fieldNames.size(); i++) {
//String fieldName = fieldNames.get(i);
LogicalType logicalType = rowType.getTypeAt(i);
DataType type = TypeConversions.fromLogicalToDataType(logicalType);
fieldConverters.add(makeConverter(type));
}
return (parser, row) -> {
try {
parseJsonToken(parser, st, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.START_OBJECT || token == JsonToken.START_ARRAY;
}
@Override
public Object apply(JsonToken token) {
if (token == JsonToken.START_OBJECT) {
try {
return convertObject(parser, st, fieldConverters, row);
} catch (IOException e) {
suroundWithRuntimeE(e);
}
} else {
throw new IllegalStateException("Message should be a single JSON object");
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
}
public Row parse(String record, BiFunction createParser, Row row) throws BadRecordException {
try (JsonParser parser = createParser.apply(factory, record)) {
JsonToken token = parser.nextToken();
if (token == null) {
return new Row(0);
} else {
Row result = rootConverter.apply(parser, row);
if (result == null) {
throw new RuntimeException("Root converter returned null");
} else {
return result;
}
}
} catch (Exception e) {
throw new BadRecordException(record, e);
}
}
private Row convertObject(JsonParser parser, FieldsDataType fdt, List> fieldConverters, Row row) throws IOException {
RowType rowType = (RowType) fdt.getLogicalType();
List fieldNames = rowType.getFieldNames();
while (nextUntil(parser, JsonToken.END_OBJECT)) {
int index = fieldNames.indexOf(parser.getCurrentName());
if (index == -1) {
parser.skipChildren();
} else {
row.setField(index, fieldConverters.get(index).apply(parser));
}
}
return row;
}
private void suroundWithRuntimeE(Exception e) {
log.error("Failed to parse json due to {}", ExceptionUtils.stringifyException(e));
throw new RuntimeException(e);
}
private boolean nextUntil(JsonParser parser, JsonToken stopOn) throws IOException {
JsonToken token = parser.nextToken();
if (token == null) {
return false;
} else {
return token != stopOn;
}
}
private Function makeConverter(DataType dataType) {
LogicalTypeRoot tpe = dataType.getLogicalType().getTypeRoot();
switch (tpe) {
case BOOLEAN:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_TRUE || token == JsonToken.VALUE_FALSE;
}
@Override
public Object apply(JsonToken token) {
return token == JsonToken.VALUE_TRUE;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case TINYINT:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_NUMBER_INT;
}
@Override
public Object apply(JsonToken token) {
try {
return parser.getByteValue();
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case SMALLINT:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_NUMBER_INT;
}
@Override
public Object apply(JsonToken token) {
try {
return parser.getShortValue();
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case INTEGER:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_NUMBER_INT;
}
@Override
public Object apply(JsonToken token) {
try {
return parser.getIntValue();
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case BIGINT:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_NUMBER_INT;
}
@Override
public Object apply(JsonToken token) {
try {
return parser.getLongValue();
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case FLOAT:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_NUMBER_INT
|| token == JsonToken.VALUE_NUMBER_FLOAT
|| token == JsonToken.VALUE_STRING;
}
@Override
public Object apply(JsonToken token) {
try {
if (token == JsonToken.VALUE_NUMBER_INT
|| token == JsonToken.VALUE_NUMBER_FLOAT) {
return parser.getFloatValue();
} else {
String txt = parser.getText();
if (txt.equals("NaN")) {
return Float.NaN;
} else if (txt.equals("Infinity")) {
return Float.POSITIVE_INFINITY;
} else if (txt.equals("-Infinity")) {
return Float.NEGATIVE_INFINITY;
} else {
throw new RuntimeException("Cannot parse " + txt + " as Float");
}
}
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case DOUBLE:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_NUMBER_INT
|| token == JsonToken.VALUE_NUMBER_FLOAT
|| token == JsonToken.VALUE_STRING;
}
@Override
public Object apply(JsonToken token) {
try {
if (token == JsonToken.VALUE_NUMBER_INT
|| token == JsonToken.VALUE_NUMBER_FLOAT) {
return parser.getDoubleValue();
} else {
String txt = parser.getText();
if (txt.equals("NaN")) {
return Float.NaN;
} else if (txt.equals("Infinity")) {
return Float.POSITIVE_INFINITY;
} else if (txt.equals("-Infinity")) {
return Float.NEGATIVE_INFINITY;
} else {
throw new RuntimeException("Cannot parse " + txt + " as Float");
}
}
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case VARCHAR:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return true;
}
@Override
public Object apply(JsonToken token) {
try {
if (token == JsonToken.VALUE_STRING) {
return parser.getText();
} else {
ByteArrayOutputStream writer = new ByteArrayOutputStream();
try (JsonGenerator generator = factory.createGenerator(writer, JsonEncoding.UTF8)) {
generator.copyCurrentStructure(parser);
}
return new String(writer.toByteArray(), StandardCharsets.UTF_8);
}
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case TIMESTAMP_WITHOUT_TIME_ZONE:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_NUMBER_INT || token == JsonToken.VALUE_STRING;
}
@Override
public Object apply(JsonToken token) {
try {
if (token == JsonToken.VALUE_STRING) {
String v = parser.getText();
long t = options.getTimestampFormat().parse(v).getTime() * 1000L;
return DateTimeUtils.toJavaTimestamp(t).toLocalDateTime();
} else {
return DateTimeUtils.toJavaTimestamp(parser.getLongValue() * 1000000L).toLocalDateTime();
}
} catch (IOException | ParseException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case DATE:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_STRING;
}
@Override
public Object apply(JsonToken token) {
try {
String v = parser.getText();
int t = DateTimeUtils.millisToDays(options.getDateFormat().parse(v).getTime());
return DateTimeUtils.toJavaDate(t).toInstant().atZone(ZoneId.systemDefault()).toLocalDate();
} catch (IOException | ParseException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case VARBINARY:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_STRING;
}
@Override
public Object apply(JsonToken token) {
try {
return parser.getBinaryValue();
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case DECIMAL:
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.VALUE_NUMBER_INT || token == JsonToken.VALUE_NUMBER_FLOAT;
}
@Override
public Object apply(JsonToken token) {
try {
return parser.getDecimalValue();
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
case ROW:
RowType rowType = (RowType) dataType.getLogicalType();
List fieldNames = rowType.getFieldNames();
List> fieldConverters = new ArrayList>();
for (int i = 0; i < fieldNames.size(); i++) {
LogicalType logicalType = rowType.getTypeAt(i);
fieldConverters.add(makeConverter(TypeConversions.fromLogicalToDataType(logicalType)));
}
return parser -> {
try {
return parseJsonToken(parser, dataType, new PartialFunc() {
@Override
public boolean isDefinedAt(JsonToken token) {
return token == JsonToken.START_OBJECT;
}
@Override
public Object apply(JsonToken token) {
try {
Row record = new Row(rowType.getFieldCount());
return convertObject(parser, (FieldsDataType) dataType, fieldConverters, record);
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
}
});
} catch (IOException e) {
suroundWithRuntimeE(e);
}
return null;
};
default:
throw new RuntimeException(String.format(
"Failed to parse a value for data type %s (current: %s).", dataType.toString(), tpe.toString()));
}
}
/**
* This method skips `FIELD_NAME`s at the beginning, and handles nulls ahead before trying
* to parse the JSON token using given function `f`. If the `f` failed to parse and convert the
* token, call `failedConversion` to handle the token.
*/
public Object parseJsonToken(JsonParser parser, DataType dataType, PartialFunc f) throws IOException {
while (true) {
JsonToken currentToken = parser.getCurrentToken();
if (!JsonToken.FIELD_NAME.equals(currentToken)) {
Object result;
if (currentToken == null || JsonToken.VALUE_NULL.equals(currentToken)) {
result = null;
} else {
result = f.applyOrElse(currentToken, parser, dataType);
}
return result;
}
parser.nextToken();
}
}
interface PartialFunc {
boolean isDefinedAt(JsonToken token);
Object apply(JsonToken token);
default Object applyOrElse(JsonToken token, JsonParser parser, DataType dataType) throws IOException {
if (isDefinedAt(token)) {
return apply(token);
} else {
if (token == JsonToken.VALUE_STRING && parser.getTextLength() < 1) {
// If conversion is failed, this produces `null` rather than throwing exception.
// This will protect the mismatch of types.
return null;
} else {
throw new RuntimeException(String.format(
"Failed to parse a value for data type %s (current token: %s).", dataType.toString(), token.toString()));
}
}
}
}
interface BiFunctionWithException {
R apply(T t, U u) throws BadRecordException;
}
static class FailureSafeRecordParser {
private final BiFunctionWithException rawParser;
private final ParseMode mode;
private final FieldsDataType schema;
FailureSafeRecordParser(BiFunctionWithException rawParser, ParseMode mode, FieldsDataType schema) {
this.rawParser = rawParser;
this.mode = mode;
this.schema = schema;
}
Row parse(String input, Row row) {
try {
return rawParser.apply(input, row);
} catch (BadRecordException e) {
switch (mode) {
case PERMISSIVE:
return row;
case DROPMALFORMED:
return null;
case FAILFAST:
throw new RuntimeException("Malformed records are detected in record parsing", e);
}
}
return null;
}
}
static class BadRecordException extends Exception {
public String record;
public BadRecordException(String record, Exception e) {
super(e);
this.record = record;
}
}
}