All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.avro.SchemaCompatibility Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.avro;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
import com.facebook.presto.spark.$internal.org.slf4j.Logger;
import com.facebook.presto.spark.$internal.org.slf4j.LoggerFactory;

/**
 * Evaluate the compatibility between a reader schema and a writer schema.
 * A reader and a writer schema are declared compatible if all datum instances of the writer
 * schema can be successfully decoded using the specified reader schema.
 */
public class SchemaCompatibility {
  private static final Logger LOG = LoggerFactory.getLogger(SchemaCompatibility.class);

  /** Utility class cannot be instantiated. */
  private SchemaCompatibility() {
  }

  /** Message to annotate reader/writer schema pairs that are compatible. */
  public static final String READER_WRITER_COMPATIBLE_MESSAGE =
      "Reader schema can always successfully decode data written using the writer schema.";

  /**
   * Validates that the provided reader schema can be used to decode avro data written with the
   * provided writer schema.
   *
   * @param reader schema to check.
   * @param writer schema to check.
   * @return a result object identifying any compatibility errors.
   */
  public static SchemaPairCompatibility checkReaderWriterCompatibility(
      final Schema reader,
      final Schema writer
  ) {
    final SchemaCompatibilityType compatibility =
        new ReaderWriterCompatiblityChecker()
            .getCompatibility(reader, writer);

    final String message;
    switch (compatibility) {
      case INCOMPATIBLE: {
        message = String.format(
            "Data encoded using writer schema:%n%s%n"
            + "will or may fail to decode using reader schema:%n%s%n",
            writer.toString(true),
            reader.toString(true));
        break;
      }
      case COMPATIBLE: {
        message = READER_WRITER_COMPATIBLE_MESSAGE;
        break;
      }
      default: throw new AvroRuntimeException("Unknown compatibility: " + compatibility);
    }

    return new SchemaPairCompatibility(
        compatibility,
        reader,
        writer,
        message);
  }

  // -----------------------------------------------------------------------------------------------

  /**
   * Tests the equality of two Avro named schemas.
   *
   * 

Matching includes reader name aliases.

* * @param reader Named reader schema. * @param writer Named writer schema. * @return whether the names of the named schemas match or not. */ public static boolean schemaNameEquals(final Schema reader, final Schema writer) { final String writerFullName = writer.getFullName(); if (objectsEqual(reader.getFullName(), writerFullName)) { return true; } // Apply reader aliases: if (reader.getAliases().contains(writerFullName)) { return true; } return false; } /** * Identifies the writer field that corresponds to the specified reader field. * *

Matching includes reader name aliases.

* * @param writerSchema Schema of the record where to look for the writer field. * @param readerField Reader field to identify the corresponding writer field of. * @return the writer field, if any does correspond, or None. */ public static Field lookupWriterField(final Schema writerSchema, final Field readerField) { assert (writerSchema.getType() == Type.RECORD); final List writerFields = new ArrayList(); final Field direct = writerSchema.getField(readerField.name()); if (direct != null) { writerFields.add(direct); } for (final String readerFieldAliasName : readerField.aliases()) { final Field writerField = writerSchema.getField(readerFieldAliasName); if (writerField != null) { writerFields.add(writerField); } } switch (writerFields.size()) { case 0: return null; case 1: return writerFields.get(0); default: { throw new AvroRuntimeException(String.format( "Reader record field %s matches multiple fields in writer record schema %s", readerField, writerSchema)); } } } /** * Reader/writer schema pair that can be used as a key in a hash map. * * This reader/writer pair differentiates Schema objects based on their system hash code. */ private static final class ReaderWriter { private final Schema mReader; private final Schema mWriter; /** * Initializes a new reader/writer pair. * * @param reader Reader schema. * @param writer Writer schema. */ public ReaderWriter(final Schema reader, final Schema writer) { mReader = reader; mWriter = writer; } /** * Returns the reader schema in this pair. * @return the reader schema in this pair. */ public Schema getReader() { return mReader; } /** * Returns the writer schema in this pair. * @return the writer schema in this pair. */ public Schema getWriter() { return mWriter; } /** {@inheritDoc} */ @Override public int hashCode() { return System.identityHashCode(mReader) ^ System.identityHashCode(mWriter); } /** {@inheritDoc} */ @Override public boolean equals(Object obj) { if (!(obj instanceof ReaderWriter)) { return false; } final ReaderWriter that = (ReaderWriter) obj; // Use pointer comparison here: return (this.mReader == that.mReader) && (this.mWriter == that.mWriter); } /** {@inheritDoc} */ @Override public String toString() { return String.format("ReaderWriter{reader:%s, writer:%s}", mReader, mWriter); } } /** * Determines the compatibility of a reader/writer schema pair. * *

Provides memoization to handle recursive schemas.

*/ private static final class ReaderWriterCompatiblityChecker { private final Map mMemoizeMap = new HashMap(); /** * Reports the compatibility of a reader/writer schema pair. * *

Memoizes the compatibility results.

* * @param reader Reader schema to test. * @param writer Writer schema to test. * @return the compatibility of the reader/writer schema pair. */ public SchemaCompatibilityType getCompatibility( final Schema reader, final Schema writer ) { LOG.debug("Checking compatibility of reader {} with writer {}", reader, writer); final ReaderWriter pair = new ReaderWriter(reader, writer); final SchemaCompatibilityType existing = mMemoizeMap.get(pair); if (existing != null) { if (existing == SchemaCompatibilityType.RECURSION_IN_PROGRESS) { // Break the recursion here. // schemas are compatible unless proven incompatible: return SchemaCompatibilityType.COMPATIBLE; } return existing; } // Mark this reader/writer pair as "in progress": mMemoizeMap.put(pair, SchemaCompatibilityType.RECURSION_IN_PROGRESS); final SchemaCompatibilityType calculated = calculateCompatibility(reader, writer); mMemoizeMap.put(pair, calculated); return calculated; } /** * Calculates the compatibility of a reader/writer schema pair. * *

* Relies on external memoization performed by {@link #getCompatibility(Schema, Schema)}. *

* * @param reader Reader schema to test. * @param writer Writer schema to test. * @return the compatibility of the reader/writer schema pair. */ private SchemaCompatibilityType calculateCompatibility( final Schema reader, final Schema writer ) { assert (reader != null); assert (writer != null); if (reader.getType() == writer.getType()) { switch (reader.getType()) { case NULL: case BOOLEAN: case INT: case LONG: case FLOAT: case DOUBLE: case BYTES: case STRING: { return SchemaCompatibilityType.COMPATIBLE; } case ARRAY: { return getCompatibility(reader.getElementType(), writer.getElementType()); } case MAP: { return getCompatibility(reader.getValueType(), writer.getValueType()); } case FIXED: { // fixed size and name must match: if (!schemaNameEquals(reader, writer)) { return SchemaCompatibilityType.INCOMPATIBLE; } if (reader.getFixedSize() != writer.getFixedSize()) { return SchemaCompatibilityType.INCOMPATIBLE; } return SchemaCompatibilityType.COMPATIBLE; } case ENUM: { // enum names must match: if (!schemaNameEquals(reader, writer)) { return SchemaCompatibilityType.INCOMPATIBLE; } // reader symbols must contain all writer symbols: final Set symbols = new HashSet(writer.getEnumSymbols()); symbols.removeAll(reader.getEnumSymbols()); // TODO: Report a human-readable error. // if (!symbols.isEmpty()) { // } return symbols.isEmpty() ? SchemaCompatibilityType.COMPATIBLE : SchemaCompatibilityType.INCOMPATIBLE; } case RECORD: { // record names must match: if (!schemaNameEquals(reader, writer)) { return SchemaCompatibilityType.INCOMPATIBLE; } // Check that each field in the reader record can be populated from the writer record: for (final Field readerField : reader.getFields()) { final Field writerField = lookupWriterField(writer, readerField); if (writerField == null) { // Reader field does not correspond to any field in the writer record schema, // reader field must have a default value. if (readerField.defaultValue() == null) { // reader field has no default value return SchemaCompatibilityType.INCOMPATIBLE; } } else { if (getCompatibility(readerField.schema(), writerField.schema()) == SchemaCompatibilityType.INCOMPATIBLE) { return SchemaCompatibilityType.INCOMPATIBLE; } } } // All fields in the reader record can be populated from the writer record: return SchemaCompatibilityType.COMPATIBLE; } case UNION: { // Check that each individual branch of the writer union can be decoded: for (final Schema writerBranch : writer.getTypes()) { if (getCompatibility(reader, writerBranch) == SchemaCompatibilityType.INCOMPATIBLE) { return SchemaCompatibilityType.INCOMPATIBLE; } } // Each schema in the writer union can be decoded with the reader: return SchemaCompatibilityType.COMPATIBLE; } default: { throw new AvroRuntimeException("Unknown schema type: " + reader.getType()); } } } else { // Reader and writer have different schema types: // Handle the corner case where writer is a union of a singleton branch: { X } === X if ((writer.getType() == Schema.Type.UNION) && writer.getTypes().size() == 1) { return getCompatibility(reader, writer.getTypes().get(0)); } switch (reader.getType()) { case NULL: return SchemaCompatibilityType.INCOMPATIBLE; case BOOLEAN: return SchemaCompatibilityType.INCOMPATIBLE; case INT: return SchemaCompatibilityType.INCOMPATIBLE; case LONG: { return (writer.getType() == Type.INT) ? SchemaCompatibilityType.COMPATIBLE : SchemaCompatibilityType.INCOMPATIBLE; } case FLOAT: { return ((writer.getType() == Type.INT) || (writer.getType() == Type.LONG)) ? SchemaCompatibilityType.COMPATIBLE : SchemaCompatibilityType.INCOMPATIBLE; } case DOUBLE: { return ((writer.getType() == Type.INT) || (writer.getType() == Type.LONG) || (writer.getType() == Type.FLOAT)) ? SchemaCompatibilityType.COMPATIBLE : SchemaCompatibilityType.INCOMPATIBLE; } case BYTES: { return (writer.getType() == Type.STRING) ? SchemaCompatibilityType.COMPATIBLE : SchemaCompatibilityType.INCOMPATIBLE; } case STRING: { return (writer.getType() == Type.BYTES) ? SchemaCompatibilityType.COMPATIBLE : SchemaCompatibilityType.INCOMPATIBLE; } case ARRAY: return SchemaCompatibilityType.INCOMPATIBLE; case MAP: return SchemaCompatibilityType.INCOMPATIBLE; case FIXED: return SchemaCompatibilityType.INCOMPATIBLE; case ENUM: return SchemaCompatibilityType.INCOMPATIBLE; case RECORD: return SchemaCompatibilityType.INCOMPATIBLE; case UNION: { for (final Schema readerBranch : reader.getTypes()) { if (getCompatibility(readerBranch, writer) == SchemaCompatibilityType.COMPATIBLE) { return SchemaCompatibilityType.COMPATIBLE; } } // No branch in the reader union has been found compatible with the writer schema: return SchemaCompatibilityType.INCOMPATIBLE; } default: { throw new AvroRuntimeException("Unknown schema type: " + reader.getType()); } } } } } /** * Identifies the type of a schema compatibility result. */ public static enum SchemaCompatibilityType { COMPATIBLE, INCOMPATIBLE, /** Used internally to tag a reader/writer schema pair and prevent recursion. */ RECURSION_IN_PROGRESS; } // ----------------------------------------------------------------------------------------------- /** * Provides information about the compatibility of a single reader and writer schema pair. * * Note: This class represents a one-way relationship from the reader to the writer schema. */ public static final class SchemaPairCompatibility { /** The type of this result. */ private final SchemaCompatibilityType mType; /** Validated reader schema. */ private final Schema mReader; /** Validated writer schema. */ private final Schema mWriter; /** Human readable description of this result. */ private final String mDescription; /** * Constructs a new instance. * * @param type of the schema compatibility. * @param reader schema that was validated. * @param writer schema that was validated. * @param description of this compatibility result. */ public SchemaPairCompatibility( SchemaCompatibilityType type, Schema reader, Schema writer, String description) { mType = type; mReader = reader; mWriter = writer; mDescription = description; } /** * Gets the type of this result. * * @return the type of this result. */ public SchemaCompatibilityType getType() { return mType; } /** * Gets the reader schema that was validated. * * @return reader schema that was validated. */ public Schema getReader() { return mReader; } /** * Gets the writer schema that was validated. * * @return writer schema that was validated. */ public Schema getWriter() { return mWriter; } /** * Gets a human readable description of this validation result. * * @return a human readable description of this validation result. */ public String getDescription() { return mDescription; } /** {@inheritDoc} */ @Override public String toString() { return String.format( "SchemaPairCompatibility{type:%s, readerSchema:%s, writerSchema:%s, description:%s}", mType, mReader, mWriter, mDescription); } /** {@inheritDoc} */ @Override public boolean equals(Object other) { if ((null != other) && (other instanceof SchemaPairCompatibility)) { final SchemaPairCompatibility result = (SchemaPairCompatibility) other; return objectsEqual(result.mType, mType) && objectsEqual(result.mReader, mReader) && objectsEqual(result.mWriter, mWriter) && objectsEqual(result.mDescription, mDescription); } else { return false; } } /** {@inheritDoc} */ @Override public int hashCode() { return Arrays.hashCode(new Object[]{mType, mReader, mWriter, mDescription}); } } /** Borrowed from Guava's Objects.equal(a, b) */ private static boolean objectsEqual(Object obj1, Object obj2) { return (obj1 == obj2) || ((obj1 != null) && obj1.equals(obj2)); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy