
org.apache.flink.formats.json.JsonRowDeserializationSchema Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.formats.json;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.serialization.DeserializationSchema;
import org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo;
import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.typeutils.MapTypeInfo;
import org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.types.Row;
import org.apache.flink.util.WrappingRuntimeException;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.TextNode;
import java.io.IOException;
import java.io.Serializable;
import java.lang.reflect.Array;
import java.sql.Date;
import java.sql.Time;
import java.sql.Timestamp;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.LocalTime;
import java.time.ZoneOffset;
import java.time.temporal.TemporalAccessor;
import java.time.temporal.TemporalQueries;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.stream.Collectors;
import static java.lang.String.format;
import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;
import static org.apache.flink.formats.json.TimeFormats.RFC3339_TIMESTAMP_FORMAT;
import static org.apache.flink.formats.json.TimeFormats.RFC3339_TIME_FORMAT;
import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* Deserialization schema from JSON to Flink types.
*
* Deserializes a byte[]
message as a JSON object and reads
* the specified fields.
*
*
Failures during deserialization are forwarded as wrapped IOExceptions.
*/
@PublicEvolving
public class JsonRowDeserializationSchema implements DeserializationSchema {
private static final long serialVersionUID = -228294330688809195L;
/** Type information describing the result type. */
private final RowTypeInfo typeInfo;
private boolean failOnMissingField;
/** Object mapper for parsing the JSON. */
private final ObjectMapper objectMapper = new ObjectMapper();
private DeserializationRuntimeConverter runtimeConverter;
private JsonRowDeserializationSchema(
TypeInformation typeInfo,
boolean failOnMissingField) {
checkNotNull(typeInfo, "Type information");
checkArgument(typeInfo instanceof RowTypeInfo, "Only RowTypeInfo is supported");
this.typeInfo = (RowTypeInfo) typeInfo;
this.failOnMissingField = failOnMissingField;
this.runtimeConverter = createConverter(this.typeInfo);
}
/**
* @deprecated Use the provided {@link Builder} instead.
*/
@Deprecated
public JsonRowDeserializationSchema(TypeInformation typeInfo) {
this(typeInfo, false);
}
/**
* @deprecated Use the provided {@link Builder} instead.
*/
@Deprecated
public JsonRowDeserializationSchema(String jsonSchema) {
this(JsonRowSchemaConverter.convert(checkNotNull(jsonSchema)), false);
}
/**
* @deprecated Use the provided {@link Builder} instead.
*/
@Deprecated
public void setFailOnMissingField(boolean failOnMissingField) {
// TODO make this class immutable once we drop this method
this.failOnMissingField = failOnMissingField;
this.runtimeConverter = createConverter(this.typeInfo);
}
@Override
public Row deserialize(byte[] message) throws IOException {
try {
final JsonNode root = objectMapper.readTree(message);
return (Row) runtimeConverter.convert(objectMapper, root);
} catch (Throwable t) {
throw new IOException("Failed to deserialize JSON object.", t);
}
}
@Override
public boolean isEndOfStream(Row nextElement) {
return false;
}
@Override
public TypeInformation getProducedType() {
return typeInfo;
}
/**
* Builder for {@link JsonRowDeserializationSchema}.
*/
public static class Builder {
private final RowTypeInfo typeInfo;
private boolean failOnMissingField = false;
/**
* Creates a JSON deserialization schema for the given type information.
*
* @param typeInfo Type information describing the result type. The field names of {@link Row}
* are used to parse the JSON properties.
*/
public Builder(TypeInformation typeInfo) {
checkArgument(typeInfo instanceof RowTypeInfo, "Only RowTypeInfo is supported");
this.typeInfo = (RowTypeInfo) typeInfo;
}
/**
* Creates a JSON deserialization schema for the given JSON schema.
*
* @param jsonSchema JSON schema describing the result type
*
* @see http://json-schema.org/
*/
public Builder(String jsonSchema) {
this(JsonRowSchemaConverter.convert(checkNotNull(jsonSchema)));
}
/**
* Configures schema to fail if a JSON field is missing.
*
* By default, a missing field is ignored and the field is set to null.
*/
public Builder failOnMissingField() {
this.failOnMissingField = true;
return this;
}
public JsonRowDeserializationSchema build() {
return new JsonRowDeserializationSchema(typeInfo, failOnMissingField);
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final JsonRowDeserializationSchema that = (JsonRowDeserializationSchema) o;
return Objects.equals(typeInfo, that.typeInfo) &&
Objects.equals(failOnMissingField, that.failOnMissingField);
}
@Override
public int hashCode() {
return Objects.hash(typeInfo, failOnMissingField);
}
/*
Runtime converter
*/
/**
* Runtime converter that maps between {@link JsonNode}s and Java objects.
*/
@FunctionalInterface
private interface DeserializationRuntimeConverter extends Serializable {
Object convert(ObjectMapper mapper, JsonNode jsonNode);
}
private DeserializationRuntimeConverter createConverter(TypeInformation> typeInfo) {
DeserializationRuntimeConverter baseConverter = createConverterForSimpleType(typeInfo)
.orElseGet(() ->
createContainerConverter(typeInfo)
.orElseGet(() -> createFallbackConverter(typeInfo.getTypeClass())));
return wrapIntoNullableConverter(baseConverter);
}
private DeserializationRuntimeConverter wrapIntoNullableConverter(DeserializationRuntimeConverter converter) {
return (mapper, jsonNode) -> {
if (jsonNode.isNull()) {
return null;
}
return converter.convert(mapper, jsonNode);
};
}
private Optional createContainerConverter(TypeInformation> typeInfo) {
if (typeInfo instanceof RowTypeInfo) {
return Optional.of(createRowConverter((RowTypeInfo) typeInfo));
} else if (typeInfo instanceof ObjectArrayTypeInfo) {
return Optional.of(createObjectArrayConverter(((ObjectArrayTypeInfo) typeInfo).getComponentInfo()));
} else if (typeInfo instanceof BasicArrayTypeInfo) {
return Optional.of(createObjectArrayConverter(((BasicArrayTypeInfo) typeInfo).getComponentInfo()));
} else if (isPrimitiveByteArray(typeInfo)) {
return Optional.of(createByteArrayConverter());
} else if (typeInfo instanceof MapTypeInfo) {
MapTypeInfo, ?> mapTypeInfo = (MapTypeInfo, ?>) typeInfo;
return Optional.of(createMapConverter(mapTypeInfo.getKeyTypeInfo(), mapTypeInfo.getValueTypeInfo()));
} else {
return Optional.empty();
}
}
private DeserializationRuntimeConverter createMapConverter(TypeInformation keyType, TypeInformation valueType) {
DeserializationRuntimeConverter valueConverter = createConverter(valueType);
DeserializationRuntimeConverter keyConverter = createConverter(keyType);
return (mapper, jsonNode) -> {
Iterator> fields = jsonNode.fields();
Map