com.google.cloud.dataflow.sdk.coders.StringUtf8Coder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
Google Cloud Dataflow Java SDK provides a simple, Java-based
interface for processing virtually any size data using Google cloud
resources. This artifact includes entire Dataflow Java SDK.
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.coders;
import com.google.cloud.dataflow.sdk.util.ExposedByteArrayOutputStream;
import com.google.cloud.dataflow.sdk.util.StreamUtils;
import com.google.cloud.dataflow.sdk.util.VarInt;
import com.google.common.base.Utf8;
import com.google.common.io.ByteStreams;
import com.google.common.io.CountingOutputStream;
import com.fasterxml.jackson.annotation.JsonCreator;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.UTFDataFormatException;
import java.nio.charset.StandardCharsets;
/**
* A {@link Coder} that encodes {@link String Strings} in UTF-8 encoding.
* If in a nested context, prefixes the string with an integer length field,
* encoded via a {@link VarIntCoder}.
*/
public class StringUtf8Coder extends AtomicCoder {
@JsonCreator
public static StringUtf8Coder of() {
return INSTANCE;
}
/////////////////////////////////////////////////////////////////////////////
private static final StringUtf8Coder INSTANCE = new StringUtf8Coder();
private static void writeString(String value, DataOutputStream dos)
throws IOException {
byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
VarInt.encode(bytes.length, dos);
dos.write(bytes);
}
private static String readString(DataInputStream dis) throws IOException {
int len = VarInt.decodeInt(dis);
if (len < 0) {
throw new CoderException("Invalid encoded string length: " + len);
}
byte[] bytes = new byte[len];
dis.readFully(bytes);
return new String(bytes, StandardCharsets.UTF_8);
}
private StringUtf8Coder() {}
@Override
public void encode(String value, OutputStream outStream, Context context)
throws IOException {
if (value == null) {
throw new CoderException("cannot encode a null String");
}
if (context.isWholeStream) {
byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
if (outStream instanceof ExposedByteArrayOutputStream) {
((ExposedByteArrayOutputStream) outStream).writeAndOwn(bytes);
} else {
outStream.write(bytes);
}
} else {
writeString(value, new DataOutputStream(outStream));
}
}
@Override
public String decode(InputStream inStream, Context context)
throws IOException {
if (context.isWholeStream) {
byte[] bytes = StreamUtils.getBytes(inStream);
return new String(bytes, StandardCharsets.UTF_8);
} else {
try {
return readString(new DataInputStream(inStream));
} catch (EOFException | UTFDataFormatException exn) {
// These exceptions correspond to decoding problems, so change
// what kind of exception they're branded as.
throw new CoderException(exn);
}
}
}
/**
* {@inheritDoc}
*
* @return {@code true}. This coder is injective.
*/
@Override
public boolean consistentWithEquals() {
return true;
}
/**
* {@inheritDoc}
*
* @return the byte size of the UTF-8 encoding of the a string or, in a nested context,
* the byte size of the encoding plus the encoded length prefix.
*/
@Override
protected long getEncodedElementByteSize(String value, Context context)
throws Exception {
if (value == null) {
throw new CoderException("cannot encode a null String");
}
if (context.isWholeStream) {
return Utf8.encodedLength(value);
} else {
CountingOutputStream countingStream =
new CountingOutputStream(ByteStreams.nullOutputStream());
DataOutputStream stream = new DataOutputStream(countingStream);
writeString(value, stream);
return countingStream.getCount();
}
}
}