org.apache.flink.formats.json.JsonParserToRowDataConverters Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.formats.json;
import org.apache.flink.annotation.Internal;
import org.apache.flink.formats.common.TimestampFormat;
import org.apache.flink.table.api.TableException;
import org.apache.flink.table.data.DecimalData;
import org.apache.flink.table.data.GenericArrayData;
import org.apache.flink.table.data.GenericMapData;
import org.apache.flink.table.data.GenericRowData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.StringData;
import org.apache.flink.table.data.TimestampData;
import org.apache.flink.table.types.logical.ArrayType;
import org.apache.flink.table.types.logical.DecimalType;
import org.apache.flink.table.types.logical.IntType;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.LogicalTypeFamily;
import org.apache.flink.table.types.logical.MapType;
import org.apache.flink.table.types.logical.MultisetType;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.table.types.logical.utils.LogicalTypeUtils;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParser;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonToken;
import org.apache.commons.lang3.ArrayUtils;
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Array;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.ZoneOffset;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalQueries;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;
import static org.apache.flink.formats.common.TimeFormats.ISO8601_TIMESTAMP_FORMAT;
import static org.apache.flink.formats.common.TimeFormats.ISO8601_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT;
import static org.apache.flink.formats.common.TimeFormats.SQL_TIMESTAMP_FORMAT;
import static org.apache.flink.formats.common.TimeFormats.SQL_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT;
import static org.apache.flink.formats.common.TimeFormats.SQL_TIME_FORMAT;
/** Tool class used to convert fields from {@link JsonParser} to {@link RowData}. */
@Internal
public class JsonParserToRowDataConverters implements Serializable {
private static final long serialVersionUID = 1L;
/** Flag indicating whether to fail if a field is missing. */
private final boolean failOnMissingField;
/** Flag indicating whether to ignore invalid fields/rows (default: throw an exception). */
private final boolean ignoreParseErrors;
/** Timestamp format specification which is used to parse timestamp. */
private final TimestampFormat timestampFormat;
public JsonParserToRowDataConverters(
boolean failOnMissingField,
boolean ignoreParseErrors,
TimestampFormat timestampFormat) {
this.failOnMissingField = failOnMissingField;
this.ignoreParseErrors = ignoreParseErrors;
this.timestampFormat = timestampFormat;
}
/**
* Runtime converter that converts {@link JsonParser}s into objects of Flink Table & SQL
* internal data structures. Unlike {@link JsonToRowDataConverters.JsonToRowDataConverter}, this
* interface also supports projection pushdown of nested fields.
*/
@FunctionalInterface
public interface JsonParserToRowDataConverter extends Serializable {
Object convert(JsonParser jsonParser) throws IOException;
}
/** Creates a runtime nested converter which is null safe. */
public JsonParserToRowDataConverter createConverter(
String[][] projectedFields, RowType rowType) {
// If projectedFields is null or doesn't contain nested fields, fallback to origin way
if (projectedFields == null
|| Arrays.stream(projectedFields).allMatch(arr -> arr.length == 1)) {
return createConverter(rowType);
}
RowNestedConverter rowConverter = new RowNestedConverter();
for (int i = 0; i < projectedFields.length; i++) {
addFieldConverter(
rowConverter.fieldConverters, projectedFields[i], 0, i, rowType.getTypeAt(i));
}
// DO NOT USE Lambda,it has shade problem.
return new JsonParserToRowDataConverter() {
@Override
public Object convert(JsonParser jp) throws IOException {
GenericRowData row = new GenericRowData(rowType.getFieldCount());
rowConverter.convert(jp, row);
return row;
}
};
}
/** Creates a runtime converter which is null safe. */
private JsonParserToRowDataConverter createConverter(LogicalType type) {
return wrapIntoNullableConverter(createNotNullConverter(type));
}
/** Creates a runtime converter which assuming input object is not null. */
private JsonParserToRowDataConverter createNotNullConverter(LogicalType type) {
switch (type.getTypeRoot()) {
case NULL:
return jsonNode -> null;
case BOOLEAN:
return this::convertToBoolean;
case TINYINT:
return this::convertToByte;
case SMALLINT:
return this::convertToShort;
case INTEGER:
case INTERVAL_YEAR_MONTH:
return this::convertToInt;
case BIGINT:
case INTERVAL_DAY_TIME:
return this::convertToLong;
case DATE:
return this::convertToDate;
case TIME_WITHOUT_TIME_ZONE:
return this::convertToTime;
case TIMESTAMP_WITHOUT_TIME_ZONE:
return this::convertToTimestamp;
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
return this::convertToTimestampWithLocalZone;
case FLOAT:
return this::convertToFloat;
case DOUBLE:
return this::convertToDouble;
case CHAR:
case VARCHAR:
return this::convertToString;
case BINARY:
case VARBINARY:
return JsonParser::getBinaryValue;
case DECIMAL:
return createDecimalConverter((DecimalType) type);
case ARRAY:
return createArrayConverter((ArrayType) type);
case MAP:
MapType mapType = (MapType) type;
return createMapConverter(
mapType.asSummaryString(), mapType.getKeyType(), mapType.getValueType());
case MULTISET:
MultisetType multisetType = (MultisetType) type;
return createMapConverter(
multisetType.asSummaryString(),
multisetType.getElementType(),
new IntType());
case ROW:
return createRowConverter((RowType) type);
case RAW:
default:
throw new UnsupportedOperationException("Unsupported type: " + type);
}
}
private boolean convertToBoolean(JsonParser jp) throws IOException {
if (jp.currentToken() == JsonToken.VALUE_TRUE) {
return true;
} else if (jp.currentToken() == JsonToken.VALUE_FALSE) {
return false;
} else {
return Boolean.parseBoolean(jp.getText().trim());
}
}
private byte convertToByte(JsonParser jp) throws IOException {
if (jp.currentToken() == JsonToken.VALUE_NUMBER_INT) {
// DON'T use jp.getByteValue() whose value is from -128 to 255 because of the unsigned
// value.
int value = jp.getIntValue();
if (value < Byte.MIN_VALUE || value > Byte.MAX_VALUE) {
throw new JsonParseException(
String.format("Numeric value (%s) out of range of Java byte.", value));
}
return (byte) value;
} else {
return Byte.parseByte(jp.getText().trim());
}
}
private short convertToShort(JsonParser jp) throws IOException {
if (jp.currentToken() == JsonToken.VALUE_NUMBER_INT) {
return jp.getShortValue();
} else {
return Short.parseShort(jp.getText().trim());
}
}
private int convertToInt(JsonParser jp) throws IOException {
if (jp.currentToken() == JsonToken.VALUE_NUMBER_INT
|| jp.currentToken() == JsonToken.VALUE_NUMBER_FLOAT) {
return jp.getIntValue();
} else {
return Integer.parseInt(jp.getText().trim());
}
}
private long convertToLong(JsonParser jp) throws IOException {
if (jp.currentToken() == JsonToken.VALUE_NUMBER_INT
|| jp.currentToken() == JsonToken.VALUE_NUMBER_FLOAT) {
return jp.getLongValue();
} else {
return Long.parseLong(jp.getText().trim());
}
}
private double convertToDouble(JsonParser jp) throws IOException {
if (jp.currentToken() == JsonToken.VALUE_NUMBER_FLOAT) {
return jp.getDoubleValue();
} else {
return Double.parseDouble(jp.getText().trim());
}
}
private float convertToFloat(JsonParser jp) throws IOException {
if (jp.currentToken() == JsonToken.VALUE_NUMBER_FLOAT) {
return jp.getFloatValue();
} else {
return Float.parseFloat(jp.getText().trim());
}
}
private int convertToDate(JsonParser jp) throws IOException {
LocalDate date = ISO_LOCAL_DATE.parse(jp.getText()).query(TemporalQueries.localDate());
return (int) date.toEpochDay();
}
private int convertToTime(JsonParser jsonNode) throws IOException {
TemporalAccessor parsedTime = SQL_TIME_FORMAT.parse(jsonNode.getText());
LocalTime localTime = parsedTime.query(TemporalQueries.localTime());
// get number of milliseconds of the day
return localTime.toSecondOfDay() * 1000;
}
private TimestampData convertToTimestamp(JsonParser jp) throws IOException {
TemporalAccessor parsedTimestamp;
switch (timestampFormat) {
case SQL:
parsedTimestamp = SQL_TIMESTAMP_FORMAT.parse(jp.getText());
break;
case ISO_8601:
parsedTimestamp = ISO8601_TIMESTAMP_FORMAT.parse(jp.getText());
break;
default:
throw new TableException(
String.format(
"Unsupported timestamp format '%s'. Validator should have checked that.",
timestampFormat));
}
LocalTime localTime = parsedTimestamp.query(TemporalQueries.localTime());
LocalDate localDate = parsedTimestamp.query(TemporalQueries.localDate());
return TimestampData.fromLocalDateTime(LocalDateTime.of(localDate, localTime));
}
private TimestampData convertToTimestampWithLocalZone(JsonParser jp) throws IOException {
TemporalAccessor parsedTimestampWithLocalZone;
switch (timestampFormat) {
case SQL:
parsedTimestampWithLocalZone =
SQL_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT.parse(jp.getText());
break;
case ISO_8601:
parsedTimestampWithLocalZone =
ISO8601_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT.parse(jp.getText());
break;
default:
throw new TableException(
String.format(
"Unsupported timestamp format '%s'. Validator should have checked that.",
timestampFormat));
}
LocalTime localTime = parsedTimestampWithLocalZone.query(TemporalQueries.localTime());
LocalDate localDate = parsedTimestampWithLocalZone.query(TemporalQueries.localDate());
return TimestampData.fromInstant(
LocalDateTime.of(localDate, localTime).toInstant(ZoneOffset.UTC));
}
private StringData convertToString(JsonParser jp) throws IOException {
if (jp.currentToken() == JsonToken.START_OBJECT
|| jp.currentToken() == JsonToken.START_ARRAY) {
return StringData.fromString(jp.readValueAsTree().toString());
} else {
return StringData.fromString(jp.getText());
}
}
private JsonParserToRowDataConverter createDecimalConverter(DecimalType decimalType) {
final int precision = decimalType.getPrecision();
final int scale = decimalType.getScale();
return jp -> {
BigDecimal bigDecimal;
if (jp.currentToken() == JsonToken.VALUE_STRING) {
bigDecimal = new BigDecimal(jp.getText().trim());
} else {
bigDecimal = jp.getDecimalValue();
}
return DecimalData.fromBigDecimal(bigDecimal, precision, scale);
};
}
private JsonParserToRowDataConverter createArrayConverter(ArrayType arrayType) {
JsonParserToRowDataConverter elementConverter = createConverter(arrayType.getElementType());
final Class> elementClass =
LogicalTypeUtils.toInternalConversionClass(arrayType.getElementType());
return jp -> {
if (jp.currentToken() != JsonToken.START_ARRAY) {
throw new IllegalStateException("Illegal JSON array data...");
}
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy