All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.avro.generic.GenericData Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.avro.generic;

import java.nio.ByteBuffer;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.AbstractList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.LinkedHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.apache.avro.AvroRuntimeException;
import org.apache.avro.AvroTypeException;
import org.apache.avro.Conversion;
import org.apache.avro.Conversions;
import org.apache.avro.LogicalType;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Type;
import org.apache.avro.UnresolvedUnionException;
import org.apache.avro.io.BinaryData;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.parsing.ResolvingGrammarGenerator;
import org.apache.avro.util.Utf8;

import org.codehaus.jackson.JsonNode;

import com.facebook.presto.spark.$internal.com.google.common.collect.MapMaker;

/** Utilities for generic Java data. See {@link GenericRecordBuilder} for a convenient
 * way to build {@link GenericRecord} instances.
 * @see GenericRecordBuilder
 */
public class GenericData {

  private static final GenericData INSTANCE = new GenericData();

  /** Used to specify the Java type for a string schema. */
  public enum StringType { CharSequence, String, Utf8 };

  public static final String STRING_PROP = "avro.java.string";
  protected static final String STRING_TYPE_STRING = "String";

  private final ClassLoader classLoader;

  /** Set the Java type to be used when reading this schema.  Meaningful only
   * only string schemas and map schemas (for the keys). */
  public static void setStringType(Schema s, StringType stringType) {
    // Utf8 is the default and implements CharSequence, so we only need to add
    // a property when the type is String
    if (stringType == StringType.String)
      s.addProp(GenericData.STRING_PROP, GenericData.STRING_TYPE_STRING);
  }

  /** Return the singleton instance. */
  public static GenericData get() { return INSTANCE; }

  /** For subclasses.  Applications normally use {@link GenericData#get()}. */
  public GenericData() {
    this(null);
  }

  /** For subclasses.  GenericData does not use a ClassLoader. */
  public GenericData(ClassLoader classLoader) {
    this.classLoader = (classLoader != null)
      ? classLoader
      : getClass().getClassLoader();
  }

  /** Return the class loader that's used (by subclasses). */
  public ClassLoader getClassLoader() { return classLoader; }

  private Map> conversions =
      new HashMap>();

  private Map, Map>> conversionsByClass =
      new IdentityHashMap, Map>>();

  /**
   * Registers the given conversion to be used when reading and writing with
   * this data model.
   *
   * @param conversion a logical type Conversion.
   */
  public void addLogicalTypeConversion(Conversion conversion) {
    conversions.put(conversion.getLogicalTypeName(), conversion);
    Class type = conversion.getConvertedType();
    if (conversionsByClass.containsKey(type)) {
      conversionsByClass.get(type).put(
          conversion.getLogicalTypeName(), conversion);
    } else {
      Map> conversions = new LinkedHashMap>();
      conversions.put(conversion.getLogicalTypeName(), conversion);
      conversionsByClass.put(type, conversions);
    }
  }

  /**
   * Returns the first conversion found for the given class.
   *
   * @param datumClass a Class
   * @return the first registered conversion for the class, or null
   */
  @SuppressWarnings("unchecked")
  public  Conversion getConversionByClass(Class datumClass) {
    Map> conversions = conversionsByClass.get(datumClass);
    if (conversions != null) {
      return (Conversion) conversions.values().iterator().next();
    }
    return null;
  }

  /**
   * Returns the conversion for the given class and logical type.
   *
   * @param datumClass a Class
   * @param logicalType a LogicalType
   * @return the conversion for the class and logical type, or null
   */
  @SuppressWarnings("unchecked")
  public  Conversion getConversionByClass(Class datumClass,
                                                LogicalType logicalType) {
    Map> conversions = conversionsByClass.get(datumClass);
    if (conversions != null) {
      return (Conversion) conversions.get(logicalType.getName());
    }
    return null;
  }

  /**
   * Returns the Conversion for the given logical type.
   *
   * @param logicalType a logical type
   * @return the conversion for the logical type, or null
   */
  @SuppressWarnings("unchecked")
  public Conversion getConversionFor(LogicalType logicalType) {
    if (logicalType == null) {
      return null;
    }
    return (Conversion) conversions.get(logicalType.getName());
  }

  /** Default implementation of {@link GenericRecord}. Note that this implementation
   * does not fill in default values for fields if they are not specified; use {@link
   * GenericRecordBuilder} in that case.
   * @see GenericRecordBuilder
   */
  public static class Record implements GenericRecord, Comparable {
    private final Schema schema;
    private final Object[] values;
    public Record(Schema schema) {
      if (schema == null || !Type.RECORD.equals(schema.getType()))
        throw new AvroRuntimeException("Not a record schema: "+schema);
      this.schema = schema;
      this.values = new Object[schema.getFields().size()];
    }
    public Record(Record other, boolean deepCopy) {
      schema = other.schema;
      values = new Object[schema.getFields().size()];
      if (deepCopy) {
        for (int ii = 0; ii < values.length; ii++) {
          values[ii] = INSTANCE.deepCopy(
              schema.getFields().get(ii).schema(), other.values[ii]);
        }
      }
      else {
        System.arraycopy(other.values, 0, values, 0, other.values.length);
      }
    }
    @Override public Schema getSchema() { return schema; }
    @Override public void put(String key, Object value) {
      Schema.Field field = schema.getField(key);
      if (field == null)
        throw new AvroRuntimeException("Not a valid schema field: "+key);

      values[field.pos()] = value;
    }
    @Override public void put(int i, Object v) { values[i] = v; }
    @Override public Object get(String key) {
      Field field = schema.getField(key);
      if (field == null) return null;
      return values[field.pos()];
    }
    @Override public Object get(int i) { return values[i]; }
    @Override public boolean equals(Object o) {
      if (o == this) return true;                 // identical object
      if (!(o instanceof Record)) return false;   // not a record
      Record that = (Record)o;
      if (!this.schema.equals(that.schema))
        return false;                             // not the same schema
      return GenericData.get().compare(this, that, schema, true) == 0;
    }
    @Override public int hashCode() {
      return GenericData.get().hashCode(this, schema);
    }
    @Override public int compareTo(Record that) {
      return GenericData.get().compare(this, that, schema);
    }
    @Override public String toString() {
      return GenericData.get().toString(this);
    }
  }

  /** Default implementation of an array. */
  @SuppressWarnings(value="unchecked")
  public static class Array extends AbstractList
    implements GenericArray, Comparable> {
    private static final Object[] EMPTY = new Object[0];
    private final Schema schema;
    private int size;
    private Object[] elements = EMPTY;
    public Array(int capacity, Schema schema) {
      if (schema == null || !Type.ARRAY.equals(schema.getType()))
        throw new AvroRuntimeException("Not an array schema: "+schema);
      this.schema = schema;
      if (capacity != 0)
        elements = new Object[capacity];
    }
    public Array(Schema schema, Collection c) {
      if (schema == null || !Type.ARRAY.equals(schema.getType()))
        throw new AvroRuntimeException("Not an array schema: "+schema);
      this.schema = schema;
      if (c != null) {
        elements = new Object[c.size()];
        addAll(c);
      }
    }
    @Override
    public Schema getSchema() { return schema; }
    @Override public int size() { return size; }
    @Override public void clear() { size = 0; }
    @Override public Iterator iterator() {
      return new Iterator() {
        private int position = 0;
        @Override
        public boolean hasNext() { return position < size; }
        @Override
        public T next() { return (T)elements[position++]; }
        @Override
        public void remove() { throw new UnsupportedOperationException(); }
      };
    }
    @Override public T get(int i) {
      if (i >= size)
        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
      return (T)elements[i];
    }
    @Override public boolean add(T o) {
      if (size == elements.length) {
        Object[] newElements = new Object[(size * 3)/2 + 1];
        System.arraycopy(elements, 0, newElements, 0, size);
        elements = newElements;
      }
      elements[size++] = o;
      return true;
    }
    @Override public void add(int location, T o) {
      if (location > size || location < 0) {
        throw new IndexOutOfBoundsException("Index " + location + " out of bounds.");
      }
      if (size == elements.length) {
        Object[] newElements = new Object[(size * 3)/2 + 1];
        System.arraycopy(elements, 0, newElements, 0, size);
        elements = newElements;
      }
      System.arraycopy(elements, location, elements, location + 1, size - location);
      elements[location] = o;
      size++;
    }
    @Override public T set(int i, T o) {
      if (i >= size)
        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
      T response = (T)elements[i];
      elements[i] = o;
      return response;
    }
    @Override public T remove(int i) {
      if (i >= size)
        throw new IndexOutOfBoundsException("Index " + i + " out of bounds.");
      T result = (T)elements[i];
      --size;
      System.arraycopy(elements, i+1, elements, i, (size-i));
      elements[size] = null;
      return result;
    }
    @Override
    public T peek() {
      return (size < elements.length) ? (T)elements[size] : null;
    }
    @Override
    public int compareTo(GenericArray that) {
      return GenericData.get().compare(this, that, this.getSchema());
    }
    @Override
    public void reverse() {
      int left = 0;
      int right = elements.length - 1;

      while (left < right) {
        Object tmp = elements[left];
        elements[left] = elements[right];
        elements[right] = tmp;

        left++;
        right--;
      }
    }
    @Override
    public String toString() {
      StringBuilder buffer = new StringBuilder();
      buffer.append("[");
      int count = 0;
      for (T e : this) {
        buffer.append(e==null ? "null" : e.toString());
        if (++count < size())
          buffer.append(", ");
      }
      buffer.append("]");
      return buffer.toString();
    }
  }

  /** Default implementation of {@link GenericFixed}. */
  public static class Fixed implements GenericFixed, Comparable {
    private Schema schema;
    private byte[] bytes;

    public Fixed(Schema schema) { setSchema(schema); }

    public Fixed(Schema schema, byte[] bytes) {
      this.schema = schema;
      this.bytes = bytes;
    }

    protected Fixed() {}

    protected void setSchema(Schema schema) {
      this.schema = schema;
      this.bytes = new byte[schema.getFixedSize()];
    }

    @Override public Schema getSchema() { return schema; }

    public void bytes(byte[] bytes) { this.bytes = bytes; }

    @Override
    public byte[] bytes() { return bytes; }

    @Override
    public boolean equals(Object o) {
      if (o == this) return true;
      return o instanceof GenericFixed
        && Arrays.equals(bytes, ((GenericFixed)o).bytes());
    }

    @Override
    public int hashCode() { return Arrays.hashCode(bytes); }

    @Override
    public String toString() { return Arrays.toString(bytes); }

    @Override
    public int compareTo(Fixed that) {
      return BinaryData.compareBytes(this.bytes, 0, this.bytes.length,
                                     that.bytes, 0, that.bytes.length);
    }
  }

  /** Default implementation of {@link GenericEnumSymbol}. */
  public static class EnumSymbol
      implements GenericEnumSymbol, Comparable  {
    private Schema schema;
    private String symbol;

    public EnumSymbol(Schema schema, String symbol) {
      this.schema = schema;
      this.symbol = symbol;
    }

    /**
     * Maps existing Objects into an Avro enum
     * by calling toString(), eg for Java Enums
     */
    public EnumSymbol(Schema schema, Object symbol) {
      this(schema, symbol.toString());
    }

    @Override public Schema getSchema() { return schema; }

    @Override
    public boolean equals(Object o) {
      if (o == this) return true;
      return o instanceof GenericEnumSymbol
        && symbol.equals(o.toString());
    }

    @Override
    public int hashCode() { return symbol.hashCode(); }

    @Override
    public String toString() { return symbol; }

    @Override
    public int compareTo(GenericEnumSymbol that) {
      return GenericData.get().compare(this, that, schema);
    }
  }

  /** Returns a {@link DatumReader} for this kind of data. */
  public DatumReader createDatumReader(Schema schema) {
    return new GenericDatumReader(schema, schema, this);
  }

  /** Returns a {@link DatumReader} for this kind of data. */
  public DatumReader createDatumReader(Schema writer, Schema reader) {
    return new GenericDatumReader(writer, reader, this);
  }

  /** Returns a {@link DatumWriter} for this kind of data. */
  public DatumWriter createDatumWriter(Schema schema) {
    return new GenericDatumWriter(schema, this);
  }

  /** Returns true if a Java datum matches a schema. */
  public boolean validate(Schema schema, Object datum) {
    switch (schema.getType()) {
    case RECORD:
      if (!isRecord(datum)) return false;
      for (Field f : schema.getFields()) {
        if (!validate(f.schema(), getField(datum, f.name(), f.pos())))
          return false;
      }
      return true;
    case ENUM:
      if (!isEnum(datum)) return false;
      return schema.getEnumSymbols().contains(datum.toString());
    case ARRAY:
      if (!(isArray(datum))) return false;
      for (Object element : getArrayAsCollection(datum))
        if (!validate(schema.getElementType(), element))
          return false;
      return true;
    case MAP:
      if (!(isMap(datum))) return false;
      @SuppressWarnings(value="unchecked")
      Map map = (Map)datum;
      for (Map.Entry entry : map.entrySet())
        if (!validate(schema.getValueType(), entry.getValue()))
          return false;
      return true;
    case UNION:
      try {
        int i = resolveUnion(schema, datum);
        return validate(schema.getTypes().get(i), datum);
      } catch (UnresolvedUnionException e) {
        return false;
      }
    case FIXED:
      return datum instanceof GenericFixed
        && ((GenericFixed)datum).bytes().length==schema.getFixedSize();
    case STRING:  return isString(datum);
    case BYTES:   return isBytes(datum);
    case INT:     return isInteger(datum);
    case LONG:    return isLong(datum);
    case FLOAT:   return isFloat(datum);
    case DOUBLE:  return isDouble(datum);
    case BOOLEAN: return isBoolean(datum);
    case NULL:    return datum == null;
    default: return false;
    }
  }

  /** Renders a Java datum as JSON. */
  public String toString(Object datum) {
    StringBuilder buffer = new StringBuilder();
    toString(datum, buffer, new IdentityHashMap(128) );
    return buffer.toString();
  }

  private static final String TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT =
    " \">>> CIRCULAR REFERENCE CANNOT BE PUT IN JSON STRING, ABORTING RECURSION <<<\" ";

  /** Renders a Java datum as JSON. */
  protected void toString(Object datum, StringBuilder buffer, IdentityHashMap seenObjects) {
    if (isRecord(datum)) {
      if (seenObjects.containsKey(datum)) {
        buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT);
        return;
      }
      seenObjects.put(datum, datum);
      buffer.append("{");
      int count = 0;
      Schema schema = getRecordSchema(datum);
      for (Field f : schema.getFields()) {
        toString(f.name(), buffer, seenObjects);
        buffer.append(": ");
        toString(getField(datum, f.name(), f.pos()), buffer, seenObjects);
        if (++count < schema.getFields().size())
          buffer.append(", ");
      }
      buffer.append("}");
      seenObjects.remove(datum);
    } else if (isArray(datum)) {
      if (seenObjects.containsKey(datum)) {
        buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT);
        return;
      }
      seenObjects.put(datum, datum);
      Collection array = getArrayAsCollection(datum);
      buffer.append("[");
      long last = array.size()-1;
      int i = 0;
      for (Object element : array) {
        toString(element, buffer, seenObjects);
        if (i++ < last)
          buffer.append(", ");
      }
      buffer.append("]");
      seenObjects.remove(datum);
    } else if (isMap(datum)) {
      if (seenObjects.containsKey(datum)) {
        buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT);
        return;
      }
      seenObjects.put(datum, datum);
      buffer.append("{");
      int count = 0;
      @SuppressWarnings(value="unchecked")
      Map map = (Map)datum;
      for (Map.Entry entry : map.entrySet()) {
        toString(entry.getKey(), buffer, seenObjects);
        buffer.append(": ");
        toString(entry.getValue(), buffer, seenObjects);
        if (++count < map.size())
          buffer.append(", ");
      }
      buffer.append("}");
      seenObjects.remove(datum);
    } else if (isString(datum)|| isEnum(datum)) {
      buffer.append("\"");
      writeEscapedString(datum.toString(), buffer);
      buffer.append("\"");
    } else if (isBytes(datum)) {
      buffer.append("{\"bytes\": \"");
      ByteBuffer bytes = ((ByteBuffer) datum).duplicate();
      writeEscapedString(StandardCharsets.ISO_8859_1.decode(bytes), buffer);
      buffer.append("\"}");
    } else if (((datum instanceof Float) &&       // quote Nan & Infinity
                (((Float)datum).isInfinite() || ((Float)datum).isNaN()))
               || ((datum instanceof Double) &&
                   (((Double)datum).isInfinite() || ((Double)datum).isNaN()))) {
      buffer.append("\"");
      buffer.append(datum);
      buffer.append("\"");
    } else if (datum instanceof GenericData) {
      if (seenObjects.containsKey(datum)) {
        buffer.append(TOSTRING_CIRCULAR_REFERENCE_ERROR_TEXT);
        return;
      }
      seenObjects.put(datum, datum);
      toString(datum, buffer, seenObjects);
      seenObjects.remove(datum);
    } else {
      buffer.append(datum);
    }
  }

  /* Adapted from http://code.google.com/p/json-simple */
  private void writeEscapedString(CharSequence string, StringBuilder builder) {
    for(int i = 0; i < string.length(); i++){
      char ch = string.charAt(i);
      switch(ch){
        case '"':
          builder.append("\\\"");
          break;
        case '\\':
          builder.append("\\\\");
          break;
        case '\b':
          builder.append("\\b");
          break;
        case '\f':
          builder.append("\\f");
          break;
        case '\n':
          builder.append("\\n");
          break;
        case '\r':
          builder.append("\\r");
          break;
        case '\t':
          builder.append("\\t");
          break;
        default:
          // Reference: http://www.unicode.org/versions/Unicode5.1.0/
          if((ch>='\u0000' && ch<='\u001F') || (ch>='\u007F' && ch<='\u009F') || (ch>='\u2000' && ch<='\u20FF')){
            String hex = Integer.toHexString(ch);
            builder.append("\\u");
            for(int j = 0; j < 4 - hex.length(); j++)
              builder.append('0');
            builder.append(hex.toUpperCase());
          } else {
            builder.append(ch);
          }
        }
    }
  }

  /** Create a schema given an example datum. */
  public Schema induce(Object datum) {
    if (isRecord(datum)) {
      return getRecordSchema(datum);
    } else if (isArray(datum)) {
      Schema elementType = null;
      for (Object element : getArrayAsCollection(datum)) {
        if (elementType == null) {
          elementType = induce(element);
        } else if (!elementType.equals(induce(element))) {
          throw new AvroTypeException("No mixed type arrays.");
        }
      }
      if (elementType == null) {
        throw new AvroTypeException("Empty array: "+datum);
      }
      return Schema.createArray(elementType);

    } else if (isMap(datum)) {
      @SuppressWarnings(value="unchecked")
      Map map = (Map)datum;
      Schema value = null;
      for (Map.Entry entry : map.entrySet()) {
        if (value == null) {
          value = induce(entry.getValue());
        } else if (!value.equals(induce(entry.getValue()))) {
          throw new AvroTypeException("No mixed type map values.");
        }
      }
      if (value == null) {
        throw new AvroTypeException("Empty map: "+datum);
      }
      return Schema.createMap(value);
    } else if (datum instanceof GenericFixed) {
      return Schema.createFixed(null, null, null,
                                ((GenericFixed)datum).bytes().length);
    }
    else if (isString(datum)) return Schema.create(Type.STRING);
    else if (isBytes(datum)) return Schema.create(Type.BYTES);
    else if (isInteger(datum))    return Schema.create(Type.INT);
    else if (isLong(datum))       return Schema.create(Type.LONG);
    else if (isFloat(datum))      return Schema.create(Type.FLOAT);
    else if (isDouble(datum))     return Schema.create(Type.DOUBLE);
    else if (isBoolean(datum))    return Schema.create(Type.BOOLEAN);
    else if (datum == null)               return Schema.create(Type.NULL);

    else throw new AvroTypeException("Can't create schema for: "+datum);
  }

  /** Called by {@link GenericDatumReader#readRecord} to set a record fields
   * value to a record instance.  The default implementation is for {@link
   * IndexedRecord}.*/
  public void setField(Object record, String name, int position, Object o) {
    ((IndexedRecord)record).put(position, o);
  }

  /** Called by {@link GenericDatumReader#readRecord} to retrieve a record
   * field value from a reused instance.  The default implementation is for
   * {@link IndexedRecord}.*/
  public Object getField(Object record, String name, int position) {
    return ((IndexedRecord)record).get(position);
  }

  /** Produce state for repeated calls to {@link
   * #getField(Object,String,int,Object)} and {@link
   * #setField(Object,String,int,Object,Object)} on the same record.*/
  protected Object getRecordState(Object record, Schema schema) { return null; }

  /** Version of {@link #setField} that has state. */
  protected void setField(Object r, String n, int p, Object o, Object state) {
    setField(r, n, p, o);
  }

  /** Version of {@link #getField} that has state. */
  protected Object getField(Object record, String name, int pos, Object state) {
    return getField(record, name, pos);
  }

  /** Return the index for a datum within a union.  Implemented with {@link
   * Schema#getIndexNamed(String)} and {@link #getSchemaName(Object)}.*/
  public int resolveUnion(Schema union, Object datum) {
    // if there is a logical type that works, use it first
    // this allows logical type concrete classes to overlap with supported ones
    // for example, a conversion could return a map
    if (datum != null) {
      Map> conversions = conversionsByClass.get(datum.getClass());
      if (conversions != null) {
        List candidates = union.getTypes();
        for (int i = 0; i < candidates.size(); i += 1) {
          LogicalType candidateType = candidates.get(i).getLogicalType();
          if (candidateType != null) {
            Conversion conversion = conversions.get(candidateType.getName());
            if (conversion != null) {
              return i;
            }
          }
        }
      }
    }

    Integer i = union.getIndexNamed(getSchemaName(datum));
    if (i != null)
      return i;
    throw new UnresolvedUnionException(union, datum);
  }

  /** Return the schema full name for a datum.  Called by {@link
   * #resolveUnion(Schema,Object)}. */
  protected String getSchemaName(Object datum) {
    if (datum == null)
      return Type.NULL.getName();
    if (isRecord(datum))
      return getRecordSchema(datum).getFullName();
    if (isEnum(datum))
      return getEnumSchema(datum).getFullName();
    if (isArray(datum))
      return Type.ARRAY.getName();
    if (isMap(datum))
      return Type.MAP.getName();
    if (isFixed(datum))
      return getFixedSchema(datum).getFullName();
    if (isString(datum))
      return Type.STRING.getName();
    if (isBytes(datum))
      return Type.BYTES.getName();
    if (isInteger(datum))
      return Type.INT.getName();
    if (isLong(datum))
      return Type.LONG.getName();
    if (isFloat(datum))
      return Type.FLOAT.getName();
    if (isDouble(datum))
      return Type.DOUBLE.getName();
    if (isBoolean(datum))
      return Type.BOOLEAN.getName();
    throw new AvroRuntimeException
      (String.format("Unknown datum type %s: %s",
                     datum.getClass().getName(), datum));
 }

  /** Called by {@link #resolveUnion(Schema,Object)}.  May be overridden for
      alternate data representations.*/
  protected boolean instanceOf(Schema schema, Object datum) {
    switch (schema.getType()) {
    case RECORD:
      if (!isRecord(datum)) return false;
      return (schema.getFullName() == null)
        ? getRecordSchema(datum).getFullName() == null
        : schema.getFullName().equals(getRecordSchema(datum).getFullName());
    case ENUM:
      if (!isEnum(datum)) return false;
      return schema.getFullName().equals(getEnumSchema(datum).getFullName());
    case ARRAY:   return isArray(datum);
    case MAP:     return isMap(datum);
    case FIXED:
      if (!isFixed(datum)) return false;
      return schema.getFullName().equals(getFixedSchema(datum).getFullName());
    case STRING:  return isString(datum);
    case BYTES:   return isBytes(datum);
    case INT:     return isInteger(datum);
    case LONG:    return isLong(datum);
    case FLOAT:   return isFloat(datum);
    case DOUBLE:  return isDouble(datum);
    case BOOLEAN: return isBoolean(datum);
    case NULL:    return datum == null;
    default: throw new AvroRuntimeException("Unexpected type: " +schema);
    }
  }

  /** Called by the default implementation of {@link #instanceOf}.*/
  protected boolean isArray(Object datum) {
    return datum instanceof Collection;
  }

  /** Called to access an array as a collection. */
  protected Collection getArrayAsCollection(Object datum) {
    return (Collection)datum;
  }

  /** Called by the default implementation of {@link #instanceOf}.*/
  protected boolean isRecord(Object datum) {
    return datum instanceof IndexedRecord;
  }

  /** Called to obtain the schema of a record.  By default calls
   * {GenericContainer#getSchema().  May be overridden for alternate record
   * representations. */
  protected Schema getRecordSchema(Object record) {
    return ((GenericContainer)record).getSchema();
  }

  /** Called by the default implementation of {@link #instanceOf}.*/
  protected boolean isEnum(Object datum) {
    return datum instanceof GenericEnumSymbol;
  }

  /** Called to obtain the schema of a enum.  By default calls
   * {GenericContainer#getSchema().  May be overridden for alternate enum
   * representations. */
  protected Schema getEnumSchema(Object enu) {
    return ((GenericContainer)enu).getSchema();
  }

  /** Called by the default implementation of {@link #instanceOf}.*/
  protected boolean isMap(Object datum) {
    return datum instanceof Map;
  }

  /** Called by the default implementation of {@link #instanceOf}.*/
  protected boolean isFixed(Object datum) {
    return datum instanceof GenericFixed;
  }

  /** Called to obtain the schema of a fixed.  By default calls
   * {GenericContainer#getSchema().  May be overridden for alternate fixed
   * representations. */
  protected Schema getFixedSchema(Object fixed) {
    return ((GenericContainer)fixed).getSchema();
  }

  /** Called by the default implementation of {@link #instanceOf}.*/
  protected boolean isString(Object datum) {
    return datum instanceof CharSequence;
  }

  /** Called by the default implementation of {@link #instanceOf}.*/
  protected boolean isBytes(Object datum) {
    return datum instanceof ByteBuffer;
  }

   /**
   * Called by the default implementation of {@link #instanceOf}.
   */
  protected boolean isInteger(Object datum) {
    return datum instanceof Integer;
  }

  /**
   * Called by the default implementation of {@link #instanceOf}.
   */
  protected boolean isLong(Object datum) {
    return datum instanceof Long;
  }

  /**
   * Called by the default implementation of {@link #instanceOf}.
   */
  protected boolean isFloat(Object datum) {
    return datum instanceof Float;
  }

  /**
   * Called by the default implementation of {@link #instanceOf}.
   */
  protected boolean isDouble(Object datum) {
    return datum instanceof Double;
  }

  /**
   * Called by the default implementation of {@link #instanceOf}.
   */
  protected boolean isBoolean(Object datum) {
    return datum instanceof Boolean;
  }


  /** Compute a hash code according to a schema, consistent with {@link
   * #compare(Object,Object,Schema)}. */
  public int hashCode(Object o, Schema s) {
    if (o == null) return 0;                      // incomplete datum
    int hashCode = 1;
    switch (s.getType()) {
    case RECORD:
      for (Field f : s.getFields()) {
        if (f.order() == Field.Order.IGNORE)
          continue;
        hashCode = hashCodeAdd(hashCode,
                               getField(o, f.name(), f.pos()), f.schema());
      }
      return hashCode;
    case ARRAY:
      Collection a = (Collection)o;
      Schema elementType = s.getElementType();
      for (Object e : a)
        hashCode = hashCodeAdd(hashCode, e, elementType);
      return hashCode;
    case UNION:
      return hashCode(o, s.getTypes().get(resolveUnion(s, o)));
    case ENUM:
      return s.getEnumOrdinal(o.toString());
    case NULL:
      return 0;
    case STRING:
      return (o instanceof Utf8 ? o : new Utf8(o.toString())).hashCode();
    default:
      return o.hashCode();
    }
  }

  /** Add the hash code for an object into an accumulated hash code. */
  protected int hashCodeAdd(int hashCode, Object o, Schema s) {
    return 31*hashCode + hashCode(o, s);
  }

  /** Compare objects according to their schema.  If equal, return zero.  If
   * greater-than, return 1, if less than return -1.  Order is consistent with
   * that of {@link BinaryData#compare(byte[], int, byte[], int, Schema)}.
   */
  public int compare(Object o1, Object o2, Schema s) {
    return compare(o1, o2, s, false);
  }

  /** Comparison implementation.  When equals is true, only checks for equality,
   * not for order. */
  @SuppressWarnings(value="unchecked")
  protected int compare(Object o1, Object o2, Schema s, boolean equals) {
    if (o1 == o2) return 0;
    switch (s.getType()) {
    case RECORD:
      for (Field f : s.getFields()) {
        if (f.order() == Field.Order.IGNORE)
          continue;                               // ignore this field
        int pos = f.pos();
        String name = f.name();
        int compare =
          compare(getField(o1, name, pos), getField(o2, name, pos),
                  f.schema(), equals);
        if (compare != 0)                         // not equal
          return f.order() == Field.Order.DESCENDING ? -compare : compare;
      }
      return 0;
    case ENUM:
      return s.getEnumOrdinal(o1.toString()) - s.getEnumOrdinal(o2.toString());
    case ARRAY:
      Collection a1 = (Collection)o1;
      Collection a2 = (Collection)o2;
      Iterator e1 = a1.iterator();
      Iterator e2 = a2.iterator();
      Schema elementType = s.getElementType();
      while(e1.hasNext() && e2.hasNext()) {
        int compare = compare(e1.next(), e2.next(), elementType, equals);
        if (compare != 0) return compare;
      }
      return e1.hasNext() ? 1 : (e2.hasNext() ? -1 : 0);
    case MAP:
      if (equals)
        return ((Map)o1).equals(o2) ? 0 : 1;
      throw new AvroRuntimeException("Can't compare maps!");
    case UNION:
      int i1 = resolveUnion(s, o1);
      int i2 = resolveUnion(s, o2);
      return (i1 == i2)
        ? compare(o1, o2, s.getTypes().get(i1), equals)
        : i1 - i2;
    case NULL:
      return 0;
    case STRING:
      Utf8 u1 = o1 instanceof Utf8 ? (Utf8)o1 : new Utf8(o1.toString());
      Utf8 u2 = o2 instanceof Utf8 ? (Utf8)o2 : new Utf8(o2.toString());
      return u1.compareTo(u2);
    default:
      return ((Comparable)o1).compareTo(o2);
    }
  }

  private final Map defaultValueCache
    = new MapMaker().weakKeys().makeMap();

  /**
   * Gets the default value of the given field, if any.
   * @param field the field whose default value should be retrieved.
   * @return the default value associated with the given field,
   * or null if none is specified in the schema.
   */
  @SuppressWarnings({ "rawtypes", "unchecked" })
  public Object getDefaultValue(Field field) {
    JsonNode json = field.defaultValue();
    if (json == null)
      throw new AvroRuntimeException("Field " + field
                                     + " not set and has no default value");
    if (json.isNull()
        && (field.schema().getType() == Type.NULL
            || (field.schema().getType() == Type.UNION
                && field.schema().getTypes().get(0).getType() == Type.NULL))) {
      return null;
    }

    // Check the cache
    Object defaultValue = defaultValueCache.get(field);

    // If not cached, get the default Java value by encoding the default JSON
    // value and then decoding it:
    if (defaultValue == null)
      try {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(baos, null);
        ResolvingGrammarGenerator.encode(encoder, field.schema(), json);
        encoder.flush();
        BinaryDecoder decoder =
          DecoderFactory.get().binaryDecoder(baos.toByteArray(), null);
        defaultValue =
          createDatumReader(field.schema()).read(null, decoder);

        defaultValueCache.put(field, defaultValue);
      } catch (IOException e) {
        throw new AvroRuntimeException(e);
      }

    return defaultValue;
  }

  private static final Schema STRINGS = Schema.create(Type.STRING);

  /**
   * Makes a deep copy of a value given its schema.
   * 

Logical types are converted to raw types, copied, then converted back. * @param schema the schema of the value to deep copy. * @param value the value to deep copy. * @return a deep copy of the given value. */ @SuppressWarnings({ "rawtypes", "unchecked" }) public T deepCopy(Schema schema, T value) { if (value == null) return null; LogicalType logicalType = schema.getLogicalType(); if (logicalType == null) // not a logical type -- use raw copy return (T)deepCopyRaw(schema, value); Conversion conversion = getConversionByClass(value.getClass(), logicalType); if (conversion == null) // no conversion defined -- try raw copy return (T)deepCopyRaw(schema, value); // logical type with conversion: convert to raw, copy, then convert back to logical Object raw = Conversions.convertToRawType(value, schema, logicalType, conversion); Object copy = deepCopyRaw(schema, raw); // copy raw return (T)Conversions.convertToLogicalType(copy, schema, logicalType, conversion); } private Object deepCopyRaw(Schema schema, Object value) { if (value == null) { return null; } switch (schema.getType()) { case ARRAY: List arrayValue = (List) value; List arrayCopy = new GenericData.Array( arrayValue.size(), schema); for (Object obj : arrayValue) { arrayCopy.add(deepCopy(schema.getElementType(), obj)); } return arrayCopy; case BOOLEAN: return value; // immutable case BYTES: ByteBuffer byteBufferValue = (ByteBuffer) value; int start = byteBufferValue.position(); int length = byteBufferValue.limit() - start; byte[] bytesCopy = new byte[length]; byteBufferValue.get(bytesCopy, 0, length); byteBufferValue.position(start); return ByteBuffer.wrap(bytesCopy, 0, length); case DOUBLE: return value; // immutable case ENUM: return createEnum(value.toString(), schema); case FIXED: return createFixed(null, ((GenericFixed) value).bytes(), schema); case FLOAT: return value; // immutable case INT: return value; // immutable case LONG: return value; // immutable case MAP: Map mapValue = (Map) value; Map mapCopy = new HashMap(mapValue.size()); for (Map.Entry entry : mapValue.entrySet()) { mapCopy.put((CharSequence)(deepCopy(STRINGS, entry.getKey())), deepCopy(schema.getValueType(), entry.getValue())); } return mapCopy; case NULL: return null; case RECORD: Object oldState = getRecordState(value, schema); Object newRecord = newRecord(null, schema); Object newState = getRecordState(newRecord, schema); for (Field f : schema.getFields()) { int pos = f.pos(); String name = f.name(); Object newValue = deepCopy(f.schema(), getField(value, name, pos, oldState)); setField(newRecord, name, pos, newValue, newState); } return newRecord; case STRING: // Strings are immutable if (value instanceof String) { return value; } // Some CharSequence subclasses are mutable, so we still need to make // a copy else if (value instanceof Utf8) { // Utf8 copy constructor is more efficient than converting // to string and then back to Utf8 return new Utf8((Utf8)value); } return new Utf8(value.toString()); case UNION: return deepCopy( schema.getTypes().get(resolveUnion(schema, value)), value); default: throw new AvroRuntimeException( "Deep copy failed for schema \"" + schema + "\" and value \"" + value + "\""); } } /** Called to create an fixed value. May be overridden for alternate fixed * representations. By default, returns {@link GenericFixed}. */ public Object createFixed(Object old, Schema schema) { if ((old instanceof GenericFixed) && ((GenericFixed)old).bytes().length == schema.getFixedSize()) return old; return new GenericData.Fixed(schema); } /** Called to create an fixed value. May be overridden for alternate fixed * representations. By default, returns {@link GenericFixed}. */ public Object createFixed(Object old, byte[] bytes, Schema schema) { GenericFixed fixed = (GenericFixed)createFixed(old, schema); System.arraycopy(bytes, 0, fixed.bytes(), 0, schema.getFixedSize()); return fixed; } /** Called to create an enum value. May be overridden for alternate enum * representations. By default, returns a GenericEnumSymbol. */ public Object createEnum(String symbol, Schema schema) { return new EnumSymbol(schema, symbol); } /** * Called to create new record instances. Subclasses may override to use a * different record implementation. The returned instance must conform to the * schema provided. If the old object contains fields not present in the * schema, they should either be removed from the old object, or it should * create a new instance that conforms to the schema. By default, this returns * a {@link GenericData.Record}. */ public Object newRecord(Object old, Schema schema) { if (old instanceof IndexedRecord) { IndexedRecord record = (IndexedRecord)old; if (record.getSchema() == schema) return record; } return new GenericData.Record(schema); } }