oracle.kv.impl.api.avro.JsonBinding Maven / Gradle / Ivy
Show all versions of oracle-nosql-server Show documentation
/*-
* Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle NoSQL
* Database made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle NoSQL Database for a copy of the license and
* additional information.
*/
package oracle.kv.impl.api.avro;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.AbstractMap.SimpleEntry;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.avro.AvroTypeException;
import org.apache.avro.Schema;
import org.apache.avro.UnresolvedUnionException;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.ResolvingDecoder;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.ArrayNode;
import org.codehaus.jackson.node.JsonNodeFactory;
import org.codehaus.jackson.node.ObjectNode;
import oracle.kv.Value;
import oracle.kv.avro.JsonAvroBinding;
import oracle.kv.avro.JsonRecord;
import oracle.kv.avro.RawAvroBinding;
import oracle.kv.avro.RawRecord;
import oracle.kv.avro.SchemaNotAllowedException;
import oracle.kv.avro.UndefinedSchemaException;
/**
* Implements our JSON binding API by subclassing the built-in Avro generic
* classes to translate JsonNode objects to Avro format and back again.
* Although it may appear that Avro includes built-in support for JSON and the
* Jackson API (JsonNode), in fact it only provides the following:
*
* -
* The org.apache.avro.data.Json class provides a serialization mechanism for
* JSON via the Jackson API (JsonNode), but it is not a standard Avro
* serialization. It is a self-describing format, isn't compatible with the
* standard format used for generic and specific bindings, and does not
* support schema evolution. For all three reasons, it is not appropriate
* for our API.
*
* -
* The org.apache.avro.io.JsonEncoder allows serializing Avro data as JSON
* text, and the JsonDecoder deserializes JSON text back to Avro data. The
* JSON text format represents Avro data types according to the rules in the
* Avro spec; for example, 'bytes' and 'fixed' types are represented as
* Strings with Unicode escape syntax and 'union' has a special format
* defined in the spec.
*
* -
* GenericRecord.toString returns JSON text for debugging purposes, as if it
* were written by a JsonEncoder.
*
*
* In our JsonBinding, we map the standard Avro serialization format to
* JsonNode, and we follow the same rules as defined above for the JSON text
* encoding. Schema evolution is also supported.
*
* Our JsonBinding.toValue method is the equivalent of serializing a JSON
* object as JSON text, then deserializing it using a generic binding and a
* JsonDecoder, then serializing it again as Avro binary data. Of course, this
* implementation was not used because it would be very slow compared to the
* approach we've taken.
*
* WARNING: To implement the mapping to JsonNode we subclass GenericData,
* GenericDatumReader and GenericDatumWriter. Although these Avro classes are
* intended to be subclassed in this way, the subclassing can be fragile if the
* overriden methods are changed in Avro from version to version, and we don't
* have control over that.
*
* NOTE: Because the Avro javadoc and comments are extremely sparse, it is
* important to document the implementation of this class in more detail than
* would otherwise be necessary.
*
* Avro Schema Validation
*
* One might be led to believe that because Avro has schemas, it has a full
* featured schema validation facility that would, for example, validate a JSON
* object and give meaningful error messages. The reality is different.
*
* Avro schemas do not have constraints that some might expect, for example,
* there are no numeric range constraints. The purpose of the schema is really
* for serialization, not validation, so any validation features are simply a
* side effect of the need for proper serialization.
*
* Avro does guarantee that serialized data conforms to its associated schema.
* However, it does not always output meaningful error messages and does not
* have the schema validation features that some might expect.
*
* On error messages, when Avro rejects an object during serializtion it is
* possible to figure out from the exception messages what is wrong with the
* object. However, this is not as easy as some might like it to be. In at
* least some cases, the field name is not included in the error message.
*
* In addition, Avro sometimes simply coerces the data to conform to the schema
* rather than give an error message. A 'fixed' binary array that is longer
* than allowed by the schema is simply truncated to the required length; in
* this case we have subclassed the datum writer in order to report a
* meaningful error message. With Avro 1.6, when using the 'int' type, a
* number that has more information than can be contained in an int (a float,
* double or long) is simply truncated to an int using Number.intValue; in this
* case we have also subclassed the datum writer to report an error.
*
* This section applies to the generic binding as well as the JSON binding. It
* also applies to the specific binding; however, with a specific binding some
* constraints are enforced by Java itself, since the generated classes have
* setters with specific data types. For example, an 'int' field may not
* contain a float, double or long value, because the setter param is Java type
* 'int'.
*
* In our implementation of the JSON binding, for sake of consistency we do not
* perform any more validation than is performed by the built-in generic
* generic binding. If we were to add more validation in the future (by
* subclassing) we should do so for all types of bindings.
*
* Note that Avro does include some addition validation facilities that we are
* not currently using:
*
* -
* The ValidatingEncoder and ValidatingDecoder classes check for two things:
* they disallow byte arrays of the wrong length for the 'fixed' type, and
* they disallow illegal enum indices. For the 'fixed' type, we have
* implemented validation in our subclasses. For enum indices, this check
* seems redundant since it is also checked during serialization. Because
* these classes have additional overhead -- they traverse the schema in
* parallel with the data -- and have very little benefit, they're not
* currently used.
*
* -
* GenericData.validate does validation without doing serialization. This
* method returns a boolean rather than throwing an exception, so when it
* fails there is no information returned about what is wrong. Therefore,
* this method isn't currently used here and probably won't be useful in the
* future. Even if it were used, it would only work for the generic binding
* and would need to be rewritten for the JSON binding.
*
*
*/
@SuppressWarnings("deprecation")
class JsonBinding implements JsonAvroBinding {
/**
* A raw binding is used for packaging and unpackaging the Avro raw data
* bytes and their associated writer schema.
*/
private final RawAvroBinding rawBinding;
/**
* The allowed schemas as used to throw SchemaNotAllowedException when an
* attempt is made to use this binding with a different schema, and is also
* used to determine the reader schema.
*/
private final Map allowedSchemas;
JsonBinding(AvroCatalogImpl catalog, Map allowedSchemas)
throws UndefinedSchemaException {
this.rawBinding = catalog.getRawBinding();
this.allowedSchemas = new HashMap(allowedSchemas);
/* May throw UndefinedSchemaException. */
catalog.checkDefinedSchemas(allowedSchemas);
}
/**
* Straightforward deserialization to JsonRecord using RawBinding,
* BinaryDecoder and JsonDatumReader (our subclass of GenericDatumReader).
*/
@Override
public JsonRecord toObject(Value value)
throws SchemaNotAllowedException, IllegalArgumentException {
final RawRecord raw = rawBinding.toObject(value);
final Schema writerSchema = raw.getSchema();
/* May throw SchemaNotAllowedException. */
final Schema readerSchema =
AvroCatalogImpl.checkToObjectSchema(writerSchema, allowedSchemas);
final JsonDatumReader reader =
new JsonDatumReader(writerSchema, readerSchema);
final Decoder decoder =
DecoderFactory.get().binaryDecoder(raw.getRawData(), null);
final JsonNode node;
try {
node = reader.read(null, decoder);
} catch (Exception e) {
throw new IllegalArgumentException
("Unable to deserialize JsonNode", e);
}
return new JsonRecord(node, readerSchema);
}
/**
* Straightforward deserialization to JsonRecord using RawBinding,
* BinaryDecoder and JsonDatumReader used by import utility. The schema is
* needed for deserialization is provided by the export package.
*/
@Override
public JsonRecord toObjectForImport(Value value, Schema schema)
throws SchemaNotAllowedException, IllegalArgumentException {
RawBinding rBinding = (RawBinding)rawBinding;
final RawRecord raw = rBinding.toObjectForImport(value, schema);
final Schema writerSchema = raw.getSchema();
/* May throw SchemaNotAllowedException. */
final Schema readerSchema =
AvroCatalogImpl.checkToObjectSchema(writerSchema, allowedSchemas);
final JsonDatumReader reader =
new JsonDatumReader(writerSchema, readerSchema);
final Decoder decoder =
DecoderFactory.get().binaryDecoder(raw.getRawData(), null);
final JsonNode node;
try {
node = reader.read(null, decoder);
} catch (Exception e) {
throw new IllegalArgumentException
("Unable to deserialize JsonNode", e);
}
return new JsonRecord(node, readerSchema);
}
/**
* Straightforward serialization of JsonRecord using RawBinding,
* BinaryEncoder and JsonDatumWriter (our subclass of GenericDatumWriter).
*/
@Override
public Value toValue(JsonRecord object)
throws SchemaNotAllowedException, UndefinedSchemaException,
IllegalArgumentException {
final Schema writerSchema = object.getSchema();
/* May throw SchemaNotAllowedException. */
AvroCatalogImpl.checkToValueSchema(writerSchema, allowedSchemas);
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final JsonDatumWriter writer = new JsonDatumWriter(writerSchema);
final Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
try {
writer.write(object.getJsonNode(), encoder);
encoder.flush();
} catch (Exception e) {
throw new IllegalArgumentException
("Unable to serialize JsonNode", e);
}
final RawRecord raw = new RawRecord(out.toByteArray(), writerSchema);
/* May throw UndefinedSchemaException. */
return rawBinding.toValue(raw);
}
/**
* Subclass of GenericData, a singleton object which is used by
* GenericDatumReader and GenericDatumWriter in certain cases to access the
* deserialized representation. By default (in GenericData) the
* deserialized representation is GenericRecord, of course. We override
* such methods to use JsonNode instead.
*
* Note that some methods that access the deserialized representation are
* in GenericData, but others are in GenericDatumReader and
* GenericDatumWriter. There doesn't seem to be a rule about which access
* methods are in GenericData versus the others.
*/
private static class JsonData extends GenericData {
/** Singleton. */
static final JsonData INSTANCE = new JsonData();
/** Only the singleton is allowed. */
private JsonData() {
}
/**
* Not called by GenericDatumReader or GenericDatumWriter, but we've
* implemented it anyway. Might be used by Avro tools.
*
* This method was added in Avro 1.6.x.
*/
@Override
public JsonDatumReader createDatumReader(Schema schema) {
return new JsonDatumReader(schema, schema);
}
/**
* Called by GenericDatumReader to create deserialized form of Avro
* 'record' type. We map the 'record' type to the Jackson ObjectNode.
*
* This method was added in Avro 1.6.x.
*/
@Override
public Object newRecord(Object old, Schema schema) {
return JsonNodeFactory.instance.objectNode();
}
/**
* Called by GenericDatumReader during deserialization of Avro fields
* for the 'record' type. Converts the data format from what was
* returned by GenericDatumWriter.read to JsonNode.
*
* There is another setField method with an extra state param. We
* could use the state param if necessary to hold schema information,
* but so far that hasn't been necessary. The default version of that
* method simply calls this one. To use the state param in the future,
* also override getRecordState.
*/
@Override
public void setField(Object r, String name, int pos, Object value) {
final ObjectNode parent = (ObjectNode) r;
final JsonNode child = genericToJson(value);
parent.put(name, child);
}
/**
* Called by GenericDatumWriter during serialization of Avro fields for
* the 'record' type.
*
* Called by GenericDatumReader to get field values for reuse during
* deserialization, but this never happens because we don't reuse
* JsonNode objects, i.e., we pass null for the 'old' param of
* JsonDatumReader.read. Called by GenericData in a couple other cases
* (validate and hashCode) that don't apply here.
*
* Like setField this method has a signature with an extra state param.
* See setField comments.
*
* @return null if the field is not present in the JSON object.
*/
@Override
public Object getField(Object r, String name, int pos) {
return ((JsonNode) r).get(name);
}
/**
* Never called, because our reader doesn't call it.
*
* This method was added in Avro 1.6.x.
*/
@Override
public Object createFixed(Object old, Schema schema) {
throw new UnsupportedOperationException();
}
/**
* Called by our JsonDatumReader.readFixed during deserialization of
* the Avro 'fixed' type. Uses the JSON encoding for this type defined
* by the Avro spec.
*
* Called by GenericData in a couple other cases (induce and deepCopy)
* that don't apply here.
*
* This method doesn't necessarily have to be overridden here (it could
* be implemented by JsonDatumReader), but we done it this way for
* consistency with the Avro implementation.
*
* This method was added in Avro 1.6.x.
*/
@Override
public Object createFixed(Object old, byte[] bytes, Schema schema) {
return bytesToString(bytes, 0, schema.getFixedSize());
}
/**
* Called by our JsonDatumWriter.write during serialization of the
* Avro 'union' type. Uses the JSON encoding for this type defined by
* the Avro spec.
*
* Called by GenericData in a few other cases that don't apply here
* (hashCode, compare and deepCopy).
*
* This method doesn't necessarily have to be overridden here (it could
* be implemented by JsonDatumWriter), but we done it this way for
* consistency with the Avro implementation.
*/
@Override
public int resolveUnion(Schema unionSchema, Object datum) {
final JsonNode node = (JsonNode) datum;
/*
* Compute the implied Avro schema name for the datum. According
* to the Avro spec, the null type is represented simply as a JSON
* null. And for all other types a JSON object with a single
* property is used, where the propery name is the Avro type and
* the property value is the datum.
*/
final String schemaName;
if (node.isNull()) {
schemaName = "null";
} else {
if (!(node.isObject())) {
throw new UnresolvedUnionException(unionSchema, datum);
}
final Iterator names = node.getFieldNames();
if (!names.hasNext()) {
throw new UnresolvedUnionException(unionSchema, datum);
}
schemaName = names.next();
if (names.hasNext()) {
throw new UnresolvedUnionException(unionSchema, datum);
}
}
/*
* With the datum schema name we can use the union schema to find
* the index of the datum's type.
*/
final Integer index = unionSchema.getIndexNamed(schemaName);
if (index == null) {
throw new UnresolvedUnionException(unionSchema, datum);
}
return index;
}
}
/**
* Subclass of GenericDatumWriter for overriding the deserialized
* representation: use JsonNode rather than GenericRecord. See JsonData
* for general information.
*/
static class JsonDatumWriter extends GenericDatumWriter {
private final boolean applyDefaultValues;
JsonDatumWriter(Schema schema) {
this(schema, false);
}
/**
* @param applyDefaultValues if true, default field values are used for
* missing fields during serialization, and the first type in a union
* is used. This is used by SchemaChekcer to validate default values
* in the schema. It is NOT used for serialization of user data, since
* that would violate the Avro spec.
*/
JsonDatumWriter(Schema schema, boolean applyDefaultValues) {
/** Always use the JsonData singleton. */
super(schema, JsonData.INSTANCE);
this.applyDefaultValues = applyDefaultValues;
}
/**
* Called to serialize the Avro 'fixed' type. Uses the JSON encoding
* for this type defined by the Avro spec. Uses the
* GenericBinding.writeFixed utility method to perform validation.
*/
@Override
protected void writeFixed(Schema schema, Object datum, Encoder out)
throws IOException {
final byte[] bytes = stringToBytes(schema, (String) datum,
true /*isFixedType*/);
GenericBinding.writeFixed(schema, bytes, out);
}
/**
* Called as part of serializing the Avro 'array' type.
*/
@Override
protected long getArraySize(Object array) {
return ((ArrayNode) array).size();
}
/**
* Called as part of serializing the Avro 'array' type.
*/
@Override
protected Iterator extends Object> getArrayElements(Object array) {
return ((ArrayNode) array).iterator();
}
/**
* Called as part of serializing the Avro 'map' type.
*/
@Override
protected int getMapSize(Object map) {
return ((ObjectNode) map).size();
}
/**
* Called as part of serializing the Avro 'map' type.
*/
@Override
protected Iterable>
getMapEntries(Object map) {
final ObjectNode objectNode = (ObjectNode) map;
return new Iterable>() {
@Override
public Iterator> iterator() {
/*
* Although getFields returns Iterator> we must implement the Iterator ourselves
* because Java doesn't allow using a subtype of a generic
* type unless the required type is > or extends X>,
* and the Avro method isn't declared that way. This
* wouldn't be necessary if the Avro getMapEntries method
* was declared to return type Iterable>.
*/
final Iterator> iter =
objectNode.getFields();
return new Iterator>() {
@Override
public boolean hasNext() {
return iter.hasNext();
}
@Override
public Map.Entry