Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
co.cask.wrangler.utils.StructuredRecordJsonConverter Maven / Gradle / Ivy
/*
* Copyright © 2017 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.wrangler.utils;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.schema.Schema;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.JsonToken;
import com.google.gson.stream.JsonWriter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.lang.reflect.Array;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.annotation.Nullable;
/**
* Utility class for converting {@link StructuredRecord} to and from json as the json
* specification : http://avro.apache.org/docs/current/spec.html#json_encoding
*/
public final class StructuredRecordJsonConverter {
// Known Java type to schema type mapping
// Doesn't have map and array as those need to use instanceof to check
private static final Map, Schema.Type> TYPE_TO_SCHEMA = new IdentityHashMap<>(
ImmutableMap., Schema.Type>builder()
.put(Boolean.class, Schema.Type.BOOLEAN)
.put(Byte.class, Schema.Type.INT)
.put(Short.class, Schema.Type.INT)
.put(Integer.class, Schema.Type.INT)
.put(Long.class, Schema.Type.LONG)
.put(Float.class, Schema.Type.FLOAT)
.put(Double.class, Schema.Type.DOUBLE)
.put(String.class, Schema.Type.STRING)
.put(ByteBuffer.class, Schema.Type.BYTES)
.put(byte[].class, Schema.Type.BYTES)
.put(StructuredRecord.class, Schema.Type.RECORD)
.build()
);
private static final EnumMap SCHEMA_TO_JSON_TYPE = new EnumMap<>(
ImmutableMap.builder()
.put(Schema.Type.NULL, JsonToken.NULL)
.put(Schema.Type.BOOLEAN, JsonToken.BOOLEAN)
.put(Schema.Type.INT, JsonToken.NUMBER)
.put(Schema.Type.LONG, JsonToken.NUMBER)
.put(Schema.Type.FLOAT, JsonToken.NUMBER)
.put(Schema.Type.DOUBLE, JsonToken.NUMBER)
.put(Schema.Type.STRING, JsonToken.STRING)
.put(Schema.Type.BYTES, JsonToken.BEGIN_ARRAY)
.put(Schema.Type.ARRAY, JsonToken.BEGIN_ARRAY)
.put(Schema.Type.MAP, JsonToken.BEGIN_OBJECT)
.put(Schema.Type.RECORD, JsonToken.BEGIN_OBJECT)
.build()
);
/**
* Converts a {@link StructuredRecord} to a json string.
*/
public static String toJsonString(StructuredRecord record) throws IOException {
StringWriter strWriter = new StringWriter();
JsonWriter writer = new JsonWriter(strWriter);
try {
writeJson(writer, record.getSchema(), record);
return strWriter.toString();
} finally {
writer.close();
}
}
/**
* Converts a json string to a {@link StructuredRecord} based on the schema.
*/
public static StructuredRecord fromJsonString(String json, Schema schema) throws IOException {
JsonReader reader = new JsonReader(new StringReader(json));
try {
return (StructuredRecord) readJson(reader, schema);
} finally {
reader.close();
}
}
/**
* Converts a {@link StructuredRecord} to a delimited string.
*/
public static String toDelimitedString(final StructuredRecord record, String delimiter) {
return Joiner.on(delimiter).join(
Iterables.transform(record.getSchema().getFields(), new Function() {
@Override
public String apply(Schema.Field field) {
return record.get(field.getName()).toString();
}
}));
}
/**
* Converts a delimited string to a {@link StructuredRecord} based on the schema.
*/
public static StructuredRecord fromDelimitedString(String delimitedString, String delimiter, Schema schema) {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
Iterator fields = schema.getFields().iterator();
for (String part : Splitter.on(delimiter).split(delimitedString)) {
if (!part.isEmpty()) {
builder.convertAndSet(fields.next().getName(), part);
}
}
return builder.build();
}
private static Object readJson(JsonReader reader, Schema schema) throws IOException {
switch (schema.getType()) {
case NULL:
reader.nextNull();
return null;
case BOOLEAN:
return reader.nextBoolean();
case INT:
return reader.nextInt();
case LONG:
return reader.nextLong();
case FLOAT:
// Force down cast
return (float) reader.nextDouble();
case DOUBLE:
return reader.nextDouble();
case BYTES:
return readBytes(reader);
case STRING:
return reader.nextString();
case ENUM:
// Currently there is no standard container to represent enum type
return reader.nextString();
case ARRAY:
return readArray(reader, schema.getComponentSchema());
case MAP:
return readMap(reader, schema.getMapSchema());
case RECORD:
return readRecord(reader, schema);
case UNION:
return readUnion(reader, schema);
}
throw new IOException("Unsupported schema: " + schema);
}
private static byte[] readBytes(JsonReader reader) throws IOException {
ByteArrayOutputStream os = new ByteArrayOutputStream(128);
reader.beginArray();
while (reader.peek() != JsonToken.END_ARRAY) {
os.write(reader.nextInt());
}
reader.endArray();
return os.toByteArray();
}
private static List readArray(JsonReader reader, Schema elementSchema) throws IOException {
List result = new ArrayList<>();
reader.beginArray();
while (reader.peek() != JsonToken.END_ARRAY) {
result.add(readJson(reader, elementSchema));
}
reader.endArray();
return result;
}
private static Map readMap(JsonReader reader,
Map.Entry mapSchema) throws IOException {
Schema keySchema = mapSchema.getKey();
if (!keySchema.isCompatible(Schema.of(Schema.Type.STRING))) {
throw new IOException("Complex key type not supported: " + keySchema);
}
Schema valueSchema = mapSchema.getValue();
Map result = new HashMap<>();
reader.beginObject();
while (reader.peek() != JsonToken.END_OBJECT) {
Object key = convertKey(reader.nextName(), keySchema.getType());
result.put(key, readJson(reader, valueSchema));
}
reader.endObject();
return result;
}
private static Object convertKey(String key, Schema.Type type) throws IOException {
switch (type) {
case STRING:
return key;
case BOOLEAN:
return Boolean.valueOf(key);
case INT:
return Integer.valueOf(key);
case LONG:
return Long.valueOf(key);
case FLOAT:
return Float.valueOf(key);
case DOUBLE:
return Double.valueOf(key);
}
throw new IOException("Unable to convert string to type " + type);
}
private static StructuredRecord readRecord(JsonReader reader, Schema schema) throws IOException {
StructuredRecord.Builder builder = StructuredRecord.builder(schema);
reader.beginObject();
while (reader.peek() != JsonToken.END_OBJECT) {
Schema.Field field = schema.getField(reader.nextName());
if (field == null) {
// Ignore unrecognized fields
reader.skipValue();
continue;
}
builder.set(field.getName(), readJson(reader, field.getSchema()));
}
reader.endObject();
return builder.build();
}
private static Object readUnion(JsonReader reader, Schema unionSchema) throws IOException {
JsonToken token = reader.peek();
if (token == JsonToken.NULL) {
reader.nextNull();
return null;
}
reader.beginObject();
String type = reader.nextName();
Schema matchingSchema = null;
for (Schema schema : unionSchema.getUnionSchemas()) {
if (schema.getType().name().toLowerCase().equals(type) ||
(schema.getType() == Schema.Type.RECORD && schema.getRecordName().equals(type))) {
matchingSchema = schema;
break;
}
}
if (matchingSchema == null) {
throw new IOException("No matching schema found for type " + type + " in union types: " + unionSchema);
}
Object object = readJson(reader, matchingSchema);
reader.endObject();
return object;
}
private static void writeJson(JsonWriter writer, Schema schema, Object value) throws IOException {
switch (schema.getType()) {
case NULL:
writer.nullValue();
break;
case BOOLEAN:
writer.value((Boolean) value);
break;
case INT:
case LONG:
case FLOAT:
case DOUBLE:
writer.value((Number) value);
break;
case BYTES:
writeBytes(writer, value);
break;
case STRING:
writer.value((String) value);
break;
case ENUM:
writer.value(((Enum) value).name());
break;
case ARRAY:
writeArray(writer, schema.getComponentSchema(), value);
break;
case MAP:
writeMap(writer, schema.getMapSchema(), value);
break;
case RECORD:
writeRecord(writer, schema, value);
break;
case UNION:
writeUnion(writer, schema, value);
break;
}
}
private static void writeUnion(JsonWriter writer, Schema schema, Object value) throws IOException {
Schema actualSchema = findUnionSchema(schema, value);
Schema.Type type = actualSchema.getType();
if (type == Schema.Type.NULL) {
writer.nullValue();
return;
}
writer.beginObject();
if (type == Schema.Type.RECORD) {
writer.name(actualSchema.getRecordName());
} else {
writer.name(type.name().toLowerCase());
}
writeJson(writer, actualSchema, value);
writer.endObject();
}
private static void writeBytes(JsonWriter writer, Object value) throws IOException {
if (value instanceof ByteBuffer) {
writeBytes(writer, (ByteBuffer) value);
} else if (value.getClass().isArray() && value.getClass().getComponentType().equals(byte.class)) {
byte[] bytes = (byte[]) value;
writeBytes(writer, bytes, 0, bytes.length);
} else {
throw new IOException("Expects either ByteBuffer or byte[]. Got " + value.getClass());
}
}
private static void writeBytes(JsonWriter writer, ByteBuffer buffer) throws IOException {
if (buffer.hasArray()) {
writeBytes(writer, buffer.array(), buffer.arrayOffset() + buffer.position(), buffer.remaining());
} else {
byte[] buf = Bytes.getBytes(buffer);
buffer.mark();
buffer.get(buf);
buffer.reset();
writeBytes(writer, buf, 0, buf.length);
}
}
private static void writeBytes(JsonWriter writer, byte[] bytes, int off, int len) throws IOException {
writer.beginArray();
for (int i = off; i < off + len; i++) {
writer.value(bytes[i]);
}
writer.endArray();
}
private static void writeArray(JsonWriter writer, Schema elementSchema, Object value) throws IOException {
if (!(value instanceof Collection) && !value.getClass().isArray()) {
throw new IOException("Expects either Collection or array. Got: " + value.getClass());
}
writer.beginArray();
if (value instanceof Collection) {
for (Object element : (Collection) value) {
writeJson(writer, elementSchema, element);
}
} else {
for (int i = 0; i < Array.getLength(value); i++) {
writeJson(writer, elementSchema, Array.get(value, i));
}
}
writer.endArray();
}
private static void writeMap(JsonWriter writer,
Map.Entry entrySchema, Object value) throws IOException {
if (!(value instanceof Map)) {
throw new IOException("Expects a map, have " + value.getClass());
}
Schema keySchema = entrySchema.getKey();
if (!keySchema.isCompatible(Schema.of(Schema.Type.STRING))) {
throw new IOException("Complex key type not supported: " + keySchema);
}
Schema valueSchema = entrySchema.getValue();
writer.beginObject();
for (Map.Entry, ?> entry : ((Map, ?>) value).entrySet()) {
writer.name(entry.getKey().toString());
writeJson(writer, valueSchema, entry.getValue());
}
writer.endObject();
}
private static void writeRecord(JsonWriter writer, Schema schema, Object value) throws IOException {
if (!(value instanceof StructuredRecord)) {
throw new IOException("Expects a record, but have " + value.getClass());
}
StructuredRecord record = (StructuredRecord) value;
writer.beginObject();
for (Schema.Field field : schema.getFields()) {
Object fieldValue = record.get(field.getName());
if (fieldValue != null) {
writer.name(field.getName());
writeJson(writer, field.getSchema(), fieldValue);
}
}
writer.endObject();
}
private static Schema findUnionSchema(Schema unionSchema, @Nullable Object value) throws IOException {
Schema.Type type = getSchemaType(value);
for (Schema schema : unionSchema.getUnionSchemas()) {
// Just match the type, not matching the detail schema as it'd be too expensive.
if (schema.getType() == type) {
return schema;
}
}
throw new IOException("Value type " + type + " not valid in union: " + unionSchema);
}
private static Schema.Type getSchemaType(@Nullable Object value) throws IOException {
if (value == null) {
return Schema.Type.NULL;
}
Class> cls = value.getClass();
Schema.Type type = TYPE_TO_SCHEMA.get(cls);
if (type != null) {
return type;
}
if (Collection.class.isAssignableFrom(cls) || cls.isArray()) {
return Schema.Type.ARRAY;
}
if (Map.class.isAssignableFrom(cls)) {
return Schema.Type.MAP;
}
throw new IOException("Unsupported type found in the record: " + cls);
}
private StructuredRecordJsonConverter() {
//inaccessible constructor for static class
}
}