Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.avro.generic.GenericDatumReader Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.generic;
import java.io.IOException;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Map;
import java.util.Collection;
import java.nio.ByteBuffer;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Conversion;
import org.apache.avro.Conversions;
import org.apache.avro.LogicalType;
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.ResolvingDecoder;
import org.apache.avro.util.Utf8;
import org.apache.avro.util.WeakIdentityHashMap;
/** {@link DatumReader} for generic Java objects. */
public class GenericDatumReader implements DatumReader {
private final GenericData data;
private Schema actual;
private Schema expected;
private ResolvingDecoder creatorResolver = null;
private final Thread creator;
public GenericDatumReader() {
this(null, null, GenericData.get());
}
/** Construct where the writer's and reader's schemas are the same. */
public GenericDatumReader(Schema schema) {
this(schema, schema, GenericData.get());
}
/** Construct given writer's and reader's schema. */
public GenericDatumReader(Schema writer, Schema reader) {
this(writer, reader, GenericData.get());
}
public GenericDatumReader(Schema writer, Schema reader, GenericData data) {
this(data);
this.actual = writer;
this.expected = reader;
}
protected GenericDatumReader(GenericData data) {
this.data = data;
this.creator = Thread.currentThread();
}
/** Return the {@link GenericData} implementation. */
public GenericData getData() { return data; }
/** Return the writer's schema. */
public Schema getSchema() { return actual; }
@Override
public void setSchema(Schema writer) {
this.actual = writer;
if (expected == null) {
expected = actual;
}
creatorResolver = null;
}
/** Get the reader's schema. */
public Schema getExpected() { return expected; }
/** Set the reader's schema. */
public void setExpected(Schema reader) {
this.expected = reader;
creatorResolver = null;
}
private static final ThreadLocal>>
RESOLVER_CACHE =
new ThreadLocal>>() {
protected Map> initialValue() {
return new WeakIdentityHashMap>();
}
};
/** Gets a resolving decoder for use by this GenericDatumReader.
* Unstable API.
* Currently uses a thread local cache to prevent constructing the
* resolvers too often, because that is very expensive.
*/
protected final ResolvingDecoder getResolver(Schema actual, Schema expected)
throws IOException {
Thread currThread = Thread.currentThread();
ResolvingDecoder resolver;
if (currThread == creator && creatorResolver != null) {
return creatorResolver;
}
Map cache = RESOLVER_CACHE.get().get(actual);
if (cache == null) {
cache = new WeakIdentityHashMap();
RESOLVER_CACHE.get().put(actual, cache);
}
resolver = cache.get(expected);
if (resolver == null) {
resolver = DecoderFactory.get().resolvingDecoder(
Schema.applyAliases(actual, expected), expected, null);
cache.put(expected, resolver);
}
if (currThread == creator){
creatorResolver = resolver;
}
return resolver;
}
@Override
@SuppressWarnings("unchecked")
public D read(D reuse, Decoder in) throws IOException {
ResolvingDecoder resolver = getResolver(actual, expected);
resolver.configure(in);
D result = (D) read(reuse, expected, resolver);
resolver.drain();
return result;
}
/** Called to read data.*/
protected Object read(Object old, Schema expected,
ResolvingDecoder in) throws IOException {
Object datum = readWithoutConversion(old, expected, in);
LogicalType logicalType = expected.getLogicalType();
if (logicalType != null) {
Conversion> conversion = getData().getConversionFor(logicalType);
if (conversion != null) {
return convert(datum, expected, logicalType, conversion);
}
}
return datum;
}
protected Object readWithConversion(Object old, Schema expected,
LogicalType logicalType,
Conversion> conversion,
ResolvingDecoder in) throws IOException {
return convert(readWithoutConversion(old, expected, in),
expected, logicalType, conversion);
}
protected Object readWithoutConversion(Object old, Schema expected,
ResolvingDecoder in) throws IOException {
switch (expected.getType()) {
case RECORD: return readRecord(old, expected, in);
case ENUM: return readEnum(expected, in);
case ARRAY: return readArray(old, expected, in);
case MAP: return readMap(old, expected, in);
case UNION: return read(old, expected.getTypes().get(in.readIndex()), in);
case FIXED: return readFixed(old, expected, in);
case STRING: return readString(old, expected, in);
case BYTES: return readBytes(old, expected, in);
case INT: return readInt(old, expected, in);
case LONG: return in.readLong();
case FLOAT: return in.readFloat();
case DOUBLE: return in.readDouble();
case BOOLEAN: return in.readBoolean();
case NULL: in.readNull(); return null;
default: throw new AvroRuntimeException("Unknown type: " + expected);
}
}
/**
* Convert a underlying representation of a logical type (such as a
* ByteBuffer) to a higher level object (such as a BigDecimal).
* @throws IllegalArgumentException if a null schema or logicalType is passed
* in while datum and conversion are not null. Please be noticed that
* the exception type has changed. With version 1.8.0 and earlier, in above
* circumstance, the exception thrown out depends on the implementation
* of conversion (most likely a NullPointerException). Now, an
* IllegalArgumentException will be thrown out instead.
*/
protected Object convert(Object datum, Schema schema, LogicalType type,
Conversion> conversion) {
return Conversions.convertToLogicalType(datum, schema, type, conversion);
}
/** Called to read a record instance. May be overridden for alternate record
* representations.*/
protected Object readRecord(Object old, Schema expected,
ResolvingDecoder in) throws IOException {
Object r = data.newRecord(old, expected);
Object state = data.getRecordState(r, expected);
for (Field f : in.readFieldOrder()) {
int pos = f.pos();
String name = f.name();
Object oldDatum = null;
if (old!=null) {
oldDatum = data.getField(r, name, pos, state);
}
readField(r, f, oldDatum, in, state);
}
return r;
}
/** Called to read a single field of a record. May be overridden for more
* efficient or alternate implementations.*/
protected void readField(Object r, Field f, Object oldDatum,
ResolvingDecoder in, Object state) throws IOException {
data.setField(r, f.name(), f.pos(), read(oldDatum, f.schema(), in), state);
}
/** Called to read an enum value. May be overridden for alternate enum
* representations. By default, returns a GenericEnumSymbol. */
protected Object readEnum(Schema expected, Decoder in) throws IOException {
return createEnum(expected.getEnumSymbols().get(in.readEnum()), expected);
}
/** Called to create an enum value. May be overridden for alternate enum
* representations. By default, returns a GenericEnumSymbol. */
protected Object createEnum(String symbol, Schema schema) {
return data.createEnum(symbol, schema);
}
/** Called to read an array instance. May be overridden for alternate array
* representations.*/
protected Object readArray(Object old, Schema expected,
ResolvingDecoder in) throws IOException {
Schema expectedType = expected.getElementType();
long l = in.readArrayStart();
long base = 0;
if (l > 0) {
LogicalType logicalType = expectedType.getLogicalType();
Conversion> conversion = getData().getConversionFor(logicalType);
Object array = newArray(old, (int) l, expected);
do {
if (logicalType != null && conversion != null) {
for (long i = 0; i < l; i++) {
addToArray(array, base + i, readWithConversion(
peekArray(array), expectedType, logicalType, conversion, in));
}
} else {
for (long i = 0; i < l; i++) {
addToArray(array, base + i, readWithoutConversion(
peekArray(array), expectedType, in));
}
}
base += l;
} while ((l = in.arrayNext()) > 0);
return array;
} else {
return newArray(old, 0, expected);
}
}
/** Called by the default implementation of {@link #readArray} to retrieve a
* value from a reused instance. The default implementation is for {@link
* GenericArray}.*/
@SuppressWarnings("unchecked")
protected Object peekArray(Object array) {
return (array instanceof GenericArray)
? ((GenericArray)array).peek()
: null;
}
/** Called by the default implementation of {@link #readArray} to add a
* value. The default implementation is for {@link Collection}.*/
@SuppressWarnings("unchecked")
protected void addToArray(Object array, long pos, Object e) {
((Collection) array).add(e);
}
/** Called to read a map instance. May be overridden for alternate map
* representations.*/
protected Object readMap(Object old, Schema expected,
ResolvingDecoder in) throws IOException {
Schema eValue = expected.getValueType();
long l = in.readMapStart();
LogicalType logicalType = eValue.getLogicalType();
Conversion> conversion = getData().getConversionFor(logicalType);
Object map = newMap(old, (int) l);
if (l > 0) {
do {
if (logicalType != null && conversion != null) {
for (int i = 0; i < l; i++) {
addToMap(map, readMapKey(null, expected, in),
readWithConversion(null, eValue, logicalType, conversion, in));
}
} else {
for (int i = 0; i < l; i++) {
addToMap(map, readMapKey(null, expected, in),
readWithoutConversion(null, eValue, in));
}
}
} while ((l = in.mapNext()) > 0);
}
return map;
}
/** Called by the default implementation of {@link #readMap} to read a
* key value. The default implementation returns delegates to
* {@link #readString(Object, org.apache.avro.io.Decoder)}.*/
protected Object readMapKey(Object old, Schema expected, Decoder in)
throws IOException{
return readString(old, expected, in);
}
/** Called by the default implementation of {@link #readMap} to add a
* key/value pair. The default implementation is for {@link Map}.*/
@SuppressWarnings("unchecked")
protected void addToMap(Object map, Object key, Object value) {
((Map) map).put(key, value);
}
/** Called to read a fixed value. May be overridden for alternate fixed
* representations. By default, returns {@link GenericFixed}. */
protected Object readFixed(Object old, Schema expected, Decoder in)
throws IOException {
GenericFixed fixed = (GenericFixed)data.createFixed(old, expected);
in.readFixed(fixed.bytes(), 0, expected.getFixedSize());
return fixed;
}
/**
* Called to create an fixed value. May be overridden for alternate fixed
* representations. By default, returns {@link GenericFixed}.
* @deprecated As of Avro 1.6.0 this method has been moved to
* {@link GenericData#createFixed(Object, Schema)}
*/
@Deprecated
protected Object createFixed(Object old, Schema schema) {
return data.createFixed(old, schema);
}
/**
* Called to create an fixed value. May be overridden for alternate fixed
* representations. By default, returns {@link GenericFixed}.
* @deprecated As of Avro 1.6.0 this method has been moved to
* {@link GenericData#createFixed(Object, byte[], Schema)}
*/
@Deprecated
protected Object createFixed(Object old, byte[] bytes, Schema schema) {
return data.createFixed(old, bytes, schema);
}
/**
* Called to create new record instances. Subclasses may override to use a
* different record implementation. The returned instance must conform to the
* schema provided. If the old object contains fields not present in the
* schema, they should either be removed from the old object, or it should
* create a new instance that conforms to the schema. By default, this returns
* a {@link GenericData.Record}.
* @deprecated As of Avro 1.6.0 this method has been moved to
* {@link GenericData#newRecord(Object, Schema)}
*/
@Deprecated
protected Object newRecord(Object old, Schema schema) {
return data.newRecord(old, schema);
}
/** Called to create new array instances. Subclasses may override to use a
* different array implementation. By default, this returns a {@link
* GenericData.Array}.*/
@SuppressWarnings("unchecked")
protected Object newArray(Object old, int size, Schema schema) {
if (old instanceof Collection) {
((Collection) old).clear();
return old;
} else return new GenericData.Array(size, schema);
}
/** Called to create new array instances. Subclasses may override to use a
* different map implementation. By default, this returns a {@link
* HashMap}.*/
@SuppressWarnings("unchecked")
protected Object newMap(Object old, int size) {
if (old instanceof Map) {
((Map) old).clear();
return old;
} else return new HashMap(size);
}
/** Called to read strings. Subclasses may override to use a different
* string representation. By default, this calls {@link
* #readString(Object,Decoder)}.*/
protected Object readString(Object old, Schema expected,
Decoder in) throws IOException {
Class stringClass = getStringClass(expected);
if (stringClass == String.class)
return in.readString();
if (stringClass == CharSequence.class)
return readString(old, in);
return newInstanceFromString(stringClass, in.readString());
}
/** Called to read strings. Subclasses may override to use a different
* string representation. By default, this calls {@link
* Decoder#readString(Utf8)}.*/
protected Object readString(Object old, Decoder in) throws IOException {
return in.readString(old instanceof Utf8 ? (Utf8)old : null);
}
/** Called to create a string from a default value. Subclasses may override
* to use a different string representation. By default, this calls {@link
* Utf8#Utf8(String)}.*/
protected Object createString(String value) { return new Utf8(value); }
/** Determines the class to used to represent a string Schema. By default
* uses {@link GenericData#STRING_PROP} to determine whether {@link Utf8} or
* {@link String} is used. Subclasses may override for alternate
* representations.
*/
protected Class findStringClass(Schema schema) {
String name = schema.getProp(GenericData.STRING_PROP);
if (name == null) return CharSequence.class;
switch (GenericData.StringType.valueOf(name)) {
case String:
return String.class;
default:
return CharSequence.class;
}
}
private Map stringClassCache =
new IdentityHashMap();
private Class getStringClass(Schema s) {
Class c = stringClassCache.get(s);
if (c == null) {
c = findStringClass(s);
stringClassCache.put(s, c);
}
return c;
}
private final Map stringCtorCache =
new HashMap();
@SuppressWarnings("unchecked")
protected Object newInstanceFromString(Class c, String s) {
try {
Constructor ctor = stringCtorCache.get(c);
if (ctor == null) {
ctor = c.getDeclaredConstructor(String.class);
ctor.setAccessible(true);
stringCtorCache.put(c, ctor);
}
return ctor.newInstance(s);
} catch (NoSuchMethodException e) {
throw new AvroRuntimeException(e);
} catch (InstantiationException e) {
throw new AvroRuntimeException(e);
} catch (IllegalAccessException e) {
throw new AvroRuntimeException(e);
} catch (InvocationTargetException e) {
throw new AvroRuntimeException(e);
}
}
/** Called to read byte arrays. Subclasses may override to use a different
* byte array representation. By default, this calls {@link
* Decoder#readBytes(ByteBuffer)}.*/
protected Object readBytes(Object old, Schema s, Decoder in)
throws IOException {
return readBytes(old, in);
}
/** Called to read byte arrays. Subclasses may override to use a different
* byte array representation. By default, this calls {@link
* Decoder#readBytes(ByteBuffer)}.*/
protected Object readBytes(Object old, Decoder in) throws IOException {
return in.readBytes(old instanceof ByteBuffer ? (ByteBuffer) old : null);
}
/** Called to read integers. Subclasses may override to use a different
* integer representation. By default, this calls {@link
* Decoder#readInt()}.*/
protected Object readInt(Object old, Schema expected, Decoder in)
throws IOException {
return in.readInt();
}
/** Called to create byte arrays from default values. Subclasses may
* override to use a different byte array representation. By default, this
* calls {@link ByteBuffer#wrap(byte[])}.*/
protected Object createBytes(byte[] value) { return ByteBuffer.wrap(value); }
/** Skip an instance of a schema. */
public static void skip(Schema schema, Decoder in) throws IOException {
switch (schema.getType()) {
case RECORD:
for (Field field : schema.getFields())
skip(field.schema(), in);
break;
case ENUM:
in.readInt();
break;
case ARRAY:
Schema elementType = schema.getElementType();
for (long l = in.skipArray(); l > 0; l = in.skipArray()) {
for (long i = 0; i < l; i++) {
skip(elementType, in);
}
}
break;
case MAP:
Schema value = schema.getValueType();
for (long l = in.skipMap(); l > 0; l = in.skipMap()) {
for (long i = 0; i < l; i++) {
in.skipString();
skip(value, in);
}
}
break;
case UNION:
skip(schema.getTypes().get((int)in.readIndex()), in);
break;
case FIXED:
in.skipFixed(schema.getFixedSize());
break;
case STRING:
in.skipString();
break;
case BYTES:
in.skipBytes();
break;
case INT: in.readInt(); break;
case LONG: in.readLong(); break;
case FLOAT: in.readFloat(); break;
case DOUBLE: in.readDouble(); break;
case BOOLEAN: in.readBoolean(); break;
case NULL: break;
default: throw new RuntimeException("Unknown type: "+schema);
}
}
}