org.apache.avro.SchemaCompatibility Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-core Show documentation
Show all versions of spark-core Show documentation
Shaded version of Apache Spark 2.x.x for Presto
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
import com.facebook.presto.spark.$internal.org.slf4j.Logger;
import com.facebook.presto.spark.$internal.org.slf4j.LoggerFactory;
/**
* Evaluate the compatibility between a reader schema and a writer schema.
* A reader and a writer schema are declared compatible if all datum instances of the writer
* schema can be successfully decoded using the specified reader schema.
*/
public class SchemaCompatibility {
private static final Logger LOG = LoggerFactory.getLogger(SchemaCompatibility.class);
/** Utility class cannot be instantiated. */
private SchemaCompatibility() {
}
/** Message to annotate reader/writer schema pairs that are compatible. */
public static final String READER_WRITER_COMPATIBLE_MESSAGE =
"Reader schema can always successfully decode data written using the writer schema.";
/**
* Validates that the provided reader schema can be used to decode avro data written with the
* provided writer schema.
*
* @param reader schema to check.
* @param writer schema to check.
* @return a result object identifying any compatibility errors.
*/
public static SchemaPairCompatibility checkReaderWriterCompatibility(
final Schema reader,
final Schema writer
) {
final SchemaCompatibilityType compatibility =
new ReaderWriterCompatiblityChecker()
.getCompatibility(reader, writer);
final String message;
switch (compatibility) {
case INCOMPATIBLE: {
message = String.format(
"Data encoded using writer schema:%n%s%n"
+ "will or may fail to decode using reader schema:%n%s%n",
writer.toString(true),
reader.toString(true));
break;
}
case COMPATIBLE: {
message = READER_WRITER_COMPATIBLE_MESSAGE;
break;
}
default: throw new AvroRuntimeException("Unknown compatibility: " + compatibility);
}
return new SchemaPairCompatibility(
compatibility,
reader,
writer,
message);
}
// -----------------------------------------------------------------------------------------------
/**
* Tests the equality of two Avro named schemas.
*
* Matching includes reader name aliases.
*
* @param reader Named reader schema.
* @param writer Named writer schema.
* @return whether the names of the named schemas match or not.
*/
public static boolean schemaNameEquals(final Schema reader, final Schema writer) {
final String writerFullName = writer.getFullName();
if (objectsEqual(reader.getFullName(), writerFullName)) {
return true;
}
// Apply reader aliases:
if (reader.getAliases().contains(writerFullName)) {
return true;
}
return false;
}
/**
* Identifies the writer field that corresponds to the specified reader field.
*
* Matching includes reader name aliases.
*
* @param writerSchema Schema of the record where to look for the writer field.
* @param readerField Reader field to identify the corresponding writer field of.
* @return the writer field, if any does correspond, or None.
*/
public static Field lookupWriterField(final Schema writerSchema, final Field readerField) {
assert (writerSchema.getType() == Type.RECORD);
final List writerFields = new ArrayList();
final Field direct = writerSchema.getField(readerField.name());
if (direct != null) {
writerFields.add(direct);
}
for (final String readerFieldAliasName : readerField.aliases()) {
final Field writerField = writerSchema.getField(readerFieldAliasName);
if (writerField != null) {
writerFields.add(writerField);
}
}
switch (writerFields.size()) {
case 0: return null;
case 1: return writerFields.get(0);
default: {
throw new AvroRuntimeException(String.format(
"Reader record field %s matches multiple fields in writer record schema %s",
readerField, writerSchema));
}
}
}
/**
* Reader/writer schema pair that can be used as a key in a hash map.
*
* This reader/writer pair differentiates Schema objects based on their system hash code.
*/
private static final class ReaderWriter {
private final Schema mReader;
private final Schema mWriter;
/**
* Initializes a new reader/writer pair.
*
* @param reader Reader schema.
* @param writer Writer schema.
*/
public ReaderWriter(final Schema reader, final Schema writer) {
mReader = reader;
mWriter = writer;
}
/**
* Returns the reader schema in this pair.
* @return the reader schema in this pair.
*/
public Schema getReader() {
return mReader;
}
/**
* Returns the writer schema in this pair.
* @return the writer schema in this pair.
*/
public Schema getWriter() {
return mWriter;
}
/** {@inheritDoc} */
@Override
public int hashCode() {
return System.identityHashCode(mReader) ^ System.identityHashCode(mWriter);
}
/** {@inheritDoc} */
@Override
public boolean equals(Object obj) {
if (!(obj instanceof ReaderWriter)) {
return false;
}
final ReaderWriter that = (ReaderWriter) obj;
// Use pointer comparison here:
return (this.mReader == that.mReader)
&& (this.mWriter == that.mWriter);
}
/** {@inheritDoc} */
@Override
public String toString() {
return String.format("ReaderWriter{reader:%s, writer:%s}", mReader, mWriter);
}
}
/**
* Determines the compatibility of a reader/writer schema pair.
*
* Provides memoization to handle recursive schemas.
*/
private static final class ReaderWriterCompatiblityChecker {
private final Map mMemoizeMap =
new HashMap();
/**
* Reports the compatibility of a reader/writer schema pair.
*
* Memoizes the compatibility results.
*
* @param reader Reader schema to test.
* @param writer Writer schema to test.
* @return the compatibility of the reader/writer schema pair.
*/
public SchemaCompatibilityType getCompatibility(
final Schema reader,
final Schema writer
) {
LOG.debug("Checking compatibility of reader {} with writer {}", reader, writer);
final ReaderWriter pair = new ReaderWriter(reader, writer);
final SchemaCompatibilityType existing = mMemoizeMap.get(pair);
if (existing != null) {
if (existing == SchemaCompatibilityType.RECURSION_IN_PROGRESS) {
// Break the recursion here.
// schemas are compatible unless proven incompatible:
return SchemaCompatibilityType.COMPATIBLE;
}
return existing;
}
// Mark this reader/writer pair as "in progress":
mMemoizeMap.put(pair, SchemaCompatibilityType.RECURSION_IN_PROGRESS);
final SchemaCompatibilityType calculated = calculateCompatibility(reader, writer);
mMemoizeMap.put(pair, calculated);
return calculated;
}
/**
* Calculates the compatibility of a reader/writer schema pair.
*
*
* Relies on external memoization performed by {@link #getCompatibility(Schema, Schema)}.
*
*
* @param reader Reader schema to test.
* @param writer Writer schema to test.
* @return the compatibility of the reader/writer schema pair.
*/
private SchemaCompatibilityType calculateCompatibility(
final Schema reader,
final Schema writer
) {
assert (reader != null);
assert (writer != null);
if (reader.getType() == writer.getType()) {
switch (reader.getType()) {
case NULL:
case BOOLEAN:
case INT:
case LONG:
case FLOAT:
case DOUBLE:
case BYTES:
case STRING: {
return SchemaCompatibilityType.COMPATIBLE;
}
case ARRAY: {
return getCompatibility(reader.getElementType(), writer.getElementType());
}
case MAP: {
return getCompatibility(reader.getValueType(), writer.getValueType());
}
case FIXED: {
// fixed size and name must match:
if (!schemaNameEquals(reader, writer)) {
return SchemaCompatibilityType.INCOMPATIBLE;
}
if (reader.getFixedSize() != writer.getFixedSize()) {
return SchemaCompatibilityType.INCOMPATIBLE;
}
return SchemaCompatibilityType.COMPATIBLE;
}
case ENUM: {
// enum names must match:
if (!schemaNameEquals(reader, writer)) {
return SchemaCompatibilityType.INCOMPATIBLE;
}
// reader symbols must contain all writer symbols:
final Set symbols = new HashSet(writer.getEnumSymbols());
symbols.removeAll(reader.getEnumSymbols());
// TODO: Report a human-readable error.
// if (!symbols.isEmpty()) {
// }
return symbols.isEmpty()
? SchemaCompatibilityType.COMPATIBLE
: SchemaCompatibilityType.INCOMPATIBLE;
}
case RECORD: {
// record names must match:
if (!schemaNameEquals(reader, writer)) {
return SchemaCompatibilityType.INCOMPATIBLE;
}
// Check that each field in the reader record can be populated from the writer record:
for (final Field readerField : reader.getFields()) {
final Field writerField = lookupWriterField(writer, readerField);
if (writerField == null) {
// Reader field does not correspond to any field in the writer record schema,
// reader field must have a default value.
if (readerField.defaultValue() == null) {
// reader field has no default value
return SchemaCompatibilityType.INCOMPATIBLE;
}
} else {
if (getCompatibility(readerField.schema(), writerField.schema())
== SchemaCompatibilityType.INCOMPATIBLE) {
return SchemaCompatibilityType.INCOMPATIBLE;
}
}
}
// All fields in the reader record can be populated from the writer record:
return SchemaCompatibilityType.COMPATIBLE;
}
case UNION: {
// Check that each individual branch of the writer union can be decoded:
for (final Schema writerBranch : writer.getTypes()) {
if (getCompatibility(reader, writerBranch) == SchemaCompatibilityType.INCOMPATIBLE) {
return SchemaCompatibilityType.INCOMPATIBLE;
}
}
// Each schema in the writer union can be decoded with the reader:
return SchemaCompatibilityType.COMPATIBLE;
}
default: {
throw new AvroRuntimeException("Unknown schema type: " + reader.getType());
}
}
} else {
// Reader and writer have different schema types:
// Handle the corner case where writer is a union of a singleton branch: { X } === X
if ((writer.getType() == Schema.Type.UNION)
&& writer.getTypes().size() == 1) {
return getCompatibility(reader, writer.getTypes().get(0));
}
switch (reader.getType()) {
case NULL: return SchemaCompatibilityType.INCOMPATIBLE;
case BOOLEAN: return SchemaCompatibilityType.INCOMPATIBLE;
case INT: return SchemaCompatibilityType.INCOMPATIBLE;
case LONG: {
return (writer.getType() == Type.INT)
? SchemaCompatibilityType.COMPATIBLE
: SchemaCompatibilityType.INCOMPATIBLE;
}
case FLOAT: {
return ((writer.getType() == Type.INT)
|| (writer.getType() == Type.LONG))
? SchemaCompatibilityType.COMPATIBLE
: SchemaCompatibilityType.INCOMPATIBLE;
}
case DOUBLE: {
return ((writer.getType() == Type.INT)
|| (writer.getType() == Type.LONG)
|| (writer.getType() == Type.FLOAT))
? SchemaCompatibilityType.COMPATIBLE
: SchemaCompatibilityType.INCOMPATIBLE;
}
case BYTES: {
return (writer.getType() == Type.STRING)
? SchemaCompatibilityType.COMPATIBLE
: SchemaCompatibilityType.INCOMPATIBLE;
}
case STRING: {
return (writer.getType() == Type.BYTES)
? SchemaCompatibilityType.COMPATIBLE
: SchemaCompatibilityType.INCOMPATIBLE;
}
case ARRAY: return SchemaCompatibilityType.INCOMPATIBLE;
case MAP: return SchemaCompatibilityType.INCOMPATIBLE;
case FIXED: return SchemaCompatibilityType.INCOMPATIBLE;
case ENUM: return SchemaCompatibilityType.INCOMPATIBLE;
case RECORD: return SchemaCompatibilityType.INCOMPATIBLE;
case UNION: {
for (final Schema readerBranch : reader.getTypes()) {
if (getCompatibility(readerBranch, writer) == SchemaCompatibilityType.COMPATIBLE) {
return SchemaCompatibilityType.COMPATIBLE;
}
}
// No branch in the reader union has been found compatible with the writer schema:
return SchemaCompatibilityType.INCOMPATIBLE;
}
default: {
throw new AvroRuntimeException("Unknown schema type: " + reader.getType());
}
}
}
}
}
/**
* Identifies the type of a schema compatibility result.
*/
public static enum SchemaCompatibilityType {
COMPATIBLE,
INCOMPATIBLE,
/** Used internally to tag a reader/writer schema pair and prevent recursion. */
RECURSION_IN_PROGRESS;
}
// -----------------------------------------------------------------------------------------------
/**
* Provides information about the compatibility of a single reader and writer schema pair.
*
* Note: This class represents a one-way relationship from the reader to the writer schema.
*/
public static final class SchemaPairCompatibility {
/** The type of this result. */
private final SchemaCompatibilityType mType;
/** Validated reader schema. */
private final Schema mReader;
/** Validated writer schema. */
private final Schema mWriter;
/** Human readable description of this result. */
private final String mDescription;
/**
* Constructs a new instance.
*
* @param type of the schema compatibility.
* @param reader schema that was validated.
* @param writer schema that was validated.
* @param description of this compatibility result.
*/
public SchemaPairCompatibility(
SchemaCompatibilityType type,
Schema reader,
Schema writer,
String description) {
mType = type;
mReader = reader;
mWriter = writer;
mDescription = description;
}
/**
* Gets the type of this result.
*
* @return the type of this result.
*/
public SchemaCompatibilityType getType() {
return mType;
}
/**
* Gets the reader schema that was validated.
*
* @return reader schema that was validated.
*/
public Schema getReader() {
return mReader;
}
/**
* Gets the writer schema that was validated.
*
* @return writer schema that was validated.
*/
public Schema getWriter() {
return mWriter;
}
/**
* Gets a human readable description of this validation result.
*
* @return a human readable description of this validation result.
*/
public String getDescription() {
return mDescription;
}
/** {@inheritDoc} */
@Override
public String toString() {
return String.format(
"SchemaPairCompatibility{type:%s, readerSchema:%s, writerSchema:%s, description:%s}",
mType, mReader, mWriter, mDescription);
}
/** {@inheritDoc} */
@Override
public boolean equals(Object other) {
if ((null != other) && (other instanceof SchemaPairCompatibility)) {
final SchemaPairCompatibility result = (SchemaPairCompatibility) other;
return objectsEqual(result.mType, mType)
&& objectsEqual(result.mReader, mReader)
&& objectsEqual(result.mWriter, mWriter)
&& objectsEqual(result.mDescription, mDescription);
} else {
return false;
}
}
/** {@inheritDoc} */
@Override
public int hashCode() {
return Arrays.hashCode(new Object[]{mType, mReader, mWriter, mDescription});
}
}
/** Borrowed from Guava's Objects.equal(a, b) */
private static boolean objectsEqual(Object obj1, Object obj2) {
return (obj1 == obj2) || ((obj1 != null) && obj1.equals(obj2));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy