com.google.cloud.dataflow.sdk.util.CoderUtils Maven / Gradle / Ivy
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.util;
import static com.google.cloud.dataflow.sdk.util.Structs.addList;
import com.google.api.client.util.Base64;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.CoderException;
import com.google.cloud.dataflow.sdk.coders.IterableCoder;
import com.google.cloud.dataflow.sdk.coders.KvCoder;
import com.google.cloud.dataflow.sdk.coders.KvCoderBase;
import com.google.cloud.dataflow.sdk.coders.MapCoder;
import com.google.cloud.dataflow.sdk.coders.MapCoderBase;
import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
import com.google.common.base.Throwables;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import com.fasterxml.jackson.annotation.JsonTypeInfo.As;
import com.fasterxml.jackson.annotation.JsonTypeInfo.Id;
import com.fasterxml.jackson.databind.DatabindContext;
import com.fasterxml.jackson.databind.JavaType;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.annotation.JsonTypeIdResolver;
import com.fasterxml.jackson.databind.jsontype.impl.TypeIdResolverBase;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.fasterxml.jackson.databind.type.TypeFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.lang.ref.SoftReference;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.TypeVariable;
/**
* Utilities for working with Coders.
*/
public final class CoderUtils {
private CoderUtils() {} // Non-instantiable
/**
* Coder class-name alias for a key-value type.
*/
public static final String KIND_PAIR = "kind:pair";
/**
* Coder class-name alias for a stream type.
*/
public static final String KIND_STREAM = "kind:stream";
private static ThreadLocal> threadLocalOutputStream
= new ThreadLocal<>();
/**
* If true, a call to {@code encodeToByteArray} is already on the call stack.
*/
private static ThreadLocal threadLocalOutputStreamInUse = new ThreadLocal() {
@Override
protected Boolean initialValue() {
return false;
}
};
/**
* Encodes the given value using the specified Coder, and returns
* the encoded bytes.
*
* This function is not reentrant; it should not be called from methods of the provided
* {@link Coder}.
*/
public static byte[] encodeToByteArray(Coder coder, T value) throws CoderException {
return encodeToByteArray(coder, value, Coder.Context.OUTER);
}
public static byte[] encodeToByteArray(Coder coder, T value, Coder.Context context)
throws CoderException {
if (threadLocalOutputStreamInUse.get()) {
// encodeToByteArray() is called recursively and the thread local stream is in use,
// allocating a new one.
ByteArrayOutputStream stream = new ExposedByteArrayOutputStream();
encodeToSafeStream(coder, value, stream, context);
return stream.toByteArray();
} else {
threadLocalOutputStreamInUse.set(true);
try {
ByteArrayOutputStream stream = getThreadLocalOutputStream();
encodeToSafeStream(coder, value, stream, context);
return stream.toByteArray();
} finally {
threadLocalOutputStreamInUse.set(false);
}
}
}
/**
* Encodes {@code value} to the given {@code stream}, which should be a stream that never throws
* {@code IOException}, such as {@code ByteArrayOutputStream} or
* {@link ExposedByteArrayOutputStream}.
*/
private static void encodeToSafeStream(
Coder coder, T value, OutputStream stream, Coder.Context context) throws CoderException {
try {
coder.encode(value, new UnownedOutputStream(stream), context);
} catch (IOException exn) {
Throwables.propagateIfPossible(exn, CoderException.class);
throw new IllegalArgumentException(
"Forbidden IOException when writing to OutputStream", exn);
}
}
/**
* Decodes the given bytes using the specified Coder, and returns
* the resulting decoded value.
*/
public static T decodeFromByteArray(Coder coder, byte[] encodedValue)
throws CoderException {
return decodeFromByteArray(coder, encodedValue, Coder.Context.OUTER);
}
public static T decodeFromByteArray(
Coder coder, byte[] encodedValue, Coder.Context context) throws CoderException {
try (ExposedByteArrayInputStream stream = new ExposedByteArrayInputStream(encodedValue)) {
T result = decodeFromSafeStream(coder, stream, context);
if (stream.available() != 0) {
throw new CoderException(
stream.available() + " unexpected extra bytes after decoding " + result);
}
return result;
}
}
/**
* Decodes a value from the given {@code stream}, which should be a stream that never throws
* {@code IOException}, such as {@code ByteArrayInputStream} or
* {@link ExposedByteArrayInputStream}.
*/
private static T decodeFromSafeStream(
Coder coder, InputStream stream, Coder.Context context) throws CoderException {
try {
return coder.decode(new UnownedInputStream(stream), context);
} catch (IOException exn) {
Throwables.propagateIfPossible(exn, CoderException.class);
throw new IllegalArgumentException(
"Forbidden IOException when reading from InputStream", exn);
}
}
private static ByteArrayOutputStream getThreadLocalOutputStream() {
SoftReference refStream = threadLocalOutputStream.get();
ExposedByteArrayOutputStream stream = refStream == null ? null : refStream.get();
if (stream == null) {
stream = new ExposedByteArrayOutputStream();
threadLocalOutputStream.set(new SoftReference<>(stream));
}
stream.reset();
return stream;
}
/**
* Clones the given value by encoding and then decoding it with the specified Coder.
*
* This function is not reentrant; it should not be called from methods of the provided
* {@link Coder}.
*/
public static T clone(Coder coder, T value) throws CoderException {
return decodeFromByteArray(coder, encodeToByteArray(coder, value, Coder.Context.OUTER));
}
/**
* Encodes the given value using the specified Coder, and returns the Base64 encoding of the
* encoded bytes.
*
* @throws CoderException if there are errors during encoding.
*/
public static String encodeToBase64(Coder coder, T value)
throws CoderException {
byte[] rawValue = encodeToByteArray(coder, value);
return Base64.encodeBase64URLSafeString(rawValue);
}
/**
* Parses a value from a base64-encoded String using the given coder.
*/
public static T decodeFromBase64(Coder coder, String encodedValue) throws CoderException {
return decodeFromSafeStream(
coder, new ByteArrayInputStream(Base64.decodeBase64(encodedValue)), Coder.Context.OUTER);
}
/**
* If {@code coderType} is a subclass of {@code Coder} for a specific
* type {@code T}, returns {@code T.class}.
*/
@SuppressWarnings({"rawtypes", "unchecked"})
public static TypeDescriptor getCodedType(TypeDescriptor coderDescriptor) {
ParameterizedType coderType =
(ParameterizedType) coderDescriptor.getSupertype(Coder.class).getType();
TypeDescriptor codedType = TypeDescriptor.of(coderType.getActualTypeArguments()[0]);
return codedType;
}
public static CloudObject makeCloudEncoding(
String type,
CloudObject... componentSpecs) {
CloudObject encoding = CloudObject.forClassName(type);
if (componentSpecs.length > 0) {
addList(encoding, PropertyNames.COMPONENT_ENCODINGS, componentSpecs);
}
return encoding;
}
/**
* A {@link com.fasterxml.jackson.databind.Module} that adds the type
* resolver needed for Coder definitions created by the Dataflow service.
*/
static final class Jackson2Module extends SimpleModule {
/**
* The Coder custom type resolver.
*
* This resolver resolves coders. If the Coder ID is a particular
* well-known identifier supplied by the Dataflow service, it's replaced
* with the corresponding class. All other Coder instances are resolved
* by class name, using the package com.google.cloud.dataflow.sdk.coders
* if there are no "."s in the ID.
*/
private static final class Resolver extends TypeIdResolverBase {
@SuppressWarnings("unused") // Used via @JsonTypeIdResolver annotation on Mixin
public Resolver() {
super(TypeFactory.defaultInstance().constructType(Coder.class),
TypeFactory.defaultInstance());
}
@Deprecated
@Override
public JavaType typeFromId(String id) {
return typeFromId(null, id);
}
@Override
public JavaType typeFromId(DatabindContext context, String id) {
Class clazz = getClassForId(id);
if (clazz == KvCoder.class) {
clazz = KvCoderBase.class;
}
if (clazz == MapCoder.class) {
clazz = MapCoderBase.class;
}
@SuppressWarnings("rawtypes")
TypeVariable[] tvs = clazz.getTypeParameters();
JavaType[] types = new JavaType[tvs.length];
for (int lupe = 0; lupe < tvs.length; lupe++) {
types[lupe] = TypeFactory.unknownType();
}
return _typeFactory.constructSimpleType(clazz, types);
}
private Class getClassForId(String id) {
try {
if (id.contains(".")) {
return Class.forName(id);
}
if (id.equals(KIND_STREAM)) {
return IterableCoder.class;
} else if (id.equals(KIND_PAIR)) {
return KvCoder.class;
}
// Otherwise, see if the ID is the name of a class in
// com.google.cloud.dataflow.sdk.coders. We do this via creating
// the class object so that class loaders have a chance to get
// involved -- and since we need the class object anyway.
return Class.forName(Coder.class.getPackage().getName() + "." + id);
} catch (ClassNotFoundException e) {
throw new RuntimeException("Unable to convert coder ID " + id + " to class", e);
}
}
@Override
public String idFromValueAndType(Object o, Class clazz) {
return clazz.getName();
}
@Override
public String idFromValue(Object o) {
return o.getClass().getName();
}
@Override
public JsonTypeInfo.Id getMechanism() {
return JsonTypeInfo.Id.CUSTOM;
}
}
/**
* The mixin class defining how Coders are handled by the deserialization
* {@link ObjectMapper}.
*
*
This is done via a mixin so that this resolver is only used
* during deserialization requested by the Dataflow SDK.
*/
@JsonTypeIdResolver(Resolver.class)
@JsonTypeInfo(use = Id.CUSTOM, include = As.PROPERTY, property = PropertyNames.OBJECT_TYPE_NAME)
private static final class Mixin {}
public Jackson2Module() {
super("DataflowCoders");
setMixInAnnotation(Coder.class, Mixin.class);
}
}
}