All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.avro.JsonEncoder Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.avro;

import com.fasterxml.jackson.core.JsonEncoding;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
import org.apache.avro.AvroTypeException;
import org.apache.avro.Schema;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.ParsingEncoder;
import org.apache.avro.io.parsing.JsonGrammarGenerator;
import org.apache.avro.io.parsing.Parser;
import org.apache.avro.io.parsing.Symbol;
import org.apache.avro.util.Utf8;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
import java.util.EnumSet;
import java.util.Objects;
import java.util.Set;

/**
 * An {@link Encoder} for Avro's JSON data encoding.
 *
 * 

NOTE: This class is a copy of Avro's JsonEncoder class, with the only difference being that * this class does not wrap union types in JSON by overriding {@link #writeIndex(int)}. * *

By default, Avro's JSON encoding for union types is to wrap the union value * in a JSON object with the type name as the key (e.g., {"string": "value"}). * This encoder overrides that behavior to write just the value, resulting in cleaner JSON output. * *

For instance, a union with schema ["null", "string"] would be encoded as just * "value" instead of {"string": "value"}. * *

This encoder is particularly useful when the standard Avro JSON format's verbosity * for union types is not desired. */ public class JsonEncoder extends ParsingEncoder implements Parser.ActionHandler { private static final String LINE_SEPARATOR = System.getProperty("line.separator"); final Parser parser; private JsonGenerator out; private boolean includeNamespace = true; /** * Has anything been written into the collections? */ protected BitSet isEmpty = new BitSet(); public JsonEncoder(Schema sc, OutputStream out) throws IOException { this(sc, getJsonGenerator(out, EnumSet.noneOf(JsonOptions.class))); } JsonEncoder(Schema sc, OutputStream out, boolean pretty) throws IOException { this(sc, getJsonGenerator(out, pretty ? EnumSet.of(JsonOptions.Pretty) : EnumSet.noneOf(JsonOptions.class))); } JsonEncoder(Schema sc, OutputStream out, Set options) throws IOException { this(sc, getJsonGenerator(out, options)); } JsonEncoder(Schema sc, JsonGenerator out) throws IOException { configure(out); this.parser = new Parser(new JsonGrammarGenerator().generate(sc), this); } @Override public void flush() throws IOException { parser.processImplicitActions(); if (out != null) { out.flush(); } } enum JsonOptions { Pretty, // Prevent underlying outputstream to be flush for optimisation purpose. NoFlushStream } // by default, one object per line. // with pretty option use default pretty printer with root line separator. private static JsonGenerator getJsonGenerator(OutputStream out, Set options) throws IOException { Objects.requireNonNull(out, "OutputStream cannot be null"); JsonGenerator g = new JsonFactory().createJsonGenerator(out, JsonEncoding.UTF8); if (options.contains(JsonOptions.NoFlushStream)) { g = g.configure(JsonGenerator.Feature.FLUSH_PASSED_TO_STREAM, false); } MinimalPrettyPrinter pp = new MinimalPrettyPrinter(); pp.setRootValueSeparator(LINE_SEPARATOR); g.setPrettyPrinter(pp); return g; } public boolean isIncludeNamespace() { return includeNamespace; } public void setIncludeNamespace(final boolean includeNamespace) { this.includeNamespace = includeNamespace; } /** * Reconfigures this JsonEncoder to use the output stream provided. *

* If the OutputStream provided is null, a NullPointerException is thrown. *

* Otherwise, this JsonEncoder will flush its current output and then * reconfigure its output to use a default UTF8 JsonGenerator that writes to the * provided OutputStream. * * @param out The OutputStream to direct output to. Cannot be null. * @return this JsonEncoder * @throws IOException * @throws NullPointerException if {@code out} is {@code null} */ public JsonEncoder configure(OutputStream out) throws IOException { return this.configure(out, true); } /** * Reconfigures this JsonEncoder to use the output stream provided. *

* If the OutputStream provided is null, a NullPointerException is thrown. *

* Otherwise, this JsonEncoder will flush its current output and then * reconfigure its output to use a default UTF8 JsonGenerator that writes to the * provided OutputStream. * * @param out The OutputStream to direct output to. Cannot be null. * @return this JsonEncoder * @throws IOException * @throws NullPointerException if {@code out} is {@code null} */ public JsonEncoder configure(OutputStream out, boolean autoflush) throws IOException { EnumSet jsonOptions = EnumSet.noneOf(JsonOptions.class); if (!autoflush) { jsonOptions.add(JsonOptions.NoFlushStream); } this.configure(getJsonGenerator(out, jsonOptions)); return this; } /** * Reconfigures this JsonEncoder to output to the JsonGenerator provided. *

* If the JsonGenerator provided is null, a NullPointerException is thrown. *

* Otherwise, this JsonEncoder will flush its current output and then * reconfigure its output to use the provided JsonGenerator. * * @param generator The JsonGenerator to direct output to. Cannot be null. * @return this JsonEncoder * @throws IOException * @throws NullPointerException if {@code generator} is {@code null} */ private JsonEncoder configure(JsonGenerator generator) throws IOException { Objects.requireNonNull(generator, "JsonGenerator cannot be null"); if (null != parser) { flush(); } this.out = generator; return this; } @Override public void writeNull() throws IOException { parser.advance(Symbol.NULL); out.writeNull(); } @Override public void writeBoolean(boolean b) throws IOException { parser.advance(Symbol.BOOLEAN); out.writeBoolean(b); } @Override public void writeInt(int n) throws IOException { parser.advance(Symbol.INT); out.writeNumber(n); } @Override public void writeLong(long n) throws IOException { parser.advance(Symbol.LONG); out.writeNumber(n); } @Override public void writeFloat(float f) throws IOException { parser.advance(Symbol.FLOAT); out.writeNumber(f); } @Override public void writeDouble(double d) throws IOException { parser.advance(Symbol.DOUBLE); out.writeNumber(d); } @Override public void writeString(Utf8 utf8) throws IOException { writeString(utf8.toString()); } @Override public void writeString(String str) throws IOException { parser.advance(Symbol.STRING); if (parser.topSymbol() == Symbol.MAP_KEY_MARKER) { parser.advance(Symbol.MAP_KEY_MARKER); out.writeFieldName(str); } else { out.writeString(str); } } @Override public void writeBytes(ByteBuffer bytes) throws IOException { if (bytes.hasArray()) { writeBytes(bytes.array(), bytes.position(), bytes.remaining()); } else { byte[] b = new byte[bytes.remaining()]; bytes.duplicate().get(b); writeBytes(b); } } @Override public void writeBytes(byte[] bytes, int start, int len) throws IOException { parser.advance(Symbol.BYTES); writeByteArray(bytes, start, len); } private void writeByteArray(byte[] bytes, int start, int len) throws IOException { out.writeString(new String(bytes, start, len, StandardCharsets.ISO_8859_1)); } @Override public void writeFixed(byte[] bytes, int start, int len) throws IOException { parser.advance(Symbol.FIXED); Symbol.IntCheckAction top = (Symbol.IntCheckAction) parser.popSymbol(); if (len != top.size) { throw new AvroTypeException( "Incorrect length for fixed binary: expected " + top.size + " but received " + len + " bytes."); } writeByteArray(bytes, start, len); } @Override public void writeEnum(int e) throws IOException { parser.advance(Symbol.ENUM); Symbol.EnumLabelsAction top = (Symbol.EnumLabelsAction) parser.popSymbol(); if (e < 0 || e >= top.size) { throw new AvroTypeException("Enumeration out of range: max is " + top.size + " but received " + e); } out.writeString(top.getLabel(e)); } @Override public void writeArrayStart() throws IOException { parser.advance(Symbol.ARRAY_START); out.writeStartArray(); push(); isEmpty.set(depth()); } @Override public void writeArrayEnd() throws IOException { if (!isEmpty.get(pos)) { parser.advance(Symbol.ITEM_END); } pop(); parser.advance(Symbol.ARRAY_END); out.writeEndArray(); } @Override public void writeMapStart() throws IOException { push(); isEmpty.set(depth()); parser.advance(Symbol.MAP_START); out.writeStartObject(); } @Override public void writeMapEnd() throws IOException { if (!isEmpty.get(pos)) { parser.advance(Symbol.ITEM_END); } pop(); parser.advance(Symbol.MAP_END); out.writeEndObject(); } @Override public void startItem() throws IOException { if (!isEmpty.get(pos)) { parser.advance(Symbol.ITEM_END); } super.startItem(); isEmpty.clear(depth()); } @Override public void writeIndex(int unionIndex) throws IOException { // Do not write index for union types. parser.advance(Symbol.UNION); Symbol.Alternative top = (Symbol.Alternative) parser.popSymbol(); Symbol symbol = top.getSymbol(unionIndex); parser.pushSymbol(symbol); } @Override public Symbol doAction(Symbol input, Symbol top) throws IOException { if (top instanceof Symbol.FieldAdjustAction) { Symbol.FieldAdjustAction fa = (Symbol.FieldAdjustAction) top; out.writeFieldName(fa.fname); } else if (top == Symbol.RECORD_START) { out.writeStartObject(); } else if (top == Symbol.RECORD_END || top == Symbol.UNION_END) { out.writeEndObject(); } else if (top != Symbol.FIELD_END) { throw new AvroTypeException("Unknown action symbol " + top); } return null; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy