All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.avro.io.EncoderFactory Maven / Gradle / Ivy

There is a newer version: 3.1.2-23
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.avro.io;

import java.io.IOException;
import java.io.OutputStream;

import org.apache.avro.AvroRuntimeException;
import org.apache.avro.Schema;
import io.trino.hive.$internal.com.fasterxml.jackson.core.JsonGenerator;

/**
 * A factory for creating and configuring {@link Encoder} instances.
 * 

* Factory methods that create Encoder instances are thread-safe. Multiple * instances with different configurations can be cached by an application. * * @see Encoder * @see BinaryEncoder * @see JsonEncoder * @see ValidatingEncoder * @see BufferedBinaryEncoder * @see BlockingBinaryEncoder * @see DirectBinaryEncoder */ public class EncoderFactory { private static final int DEFAULT_BUFFER_SIZE = 2048; private static final int DEFAULT_BLOCK_BUFFER_SIZE = 64 * 1024; private static final int MIN_BLOCK_BUFFER_SIZE = 64; private static final int MAX_BLOCK_BUFFER_SIZE = 1024 * 1024 * 1024; private static final EncoderFactory DEFAULT_FACTORY = new DefaultEncoderFactory(); protected int binaryBufferSize = DEFAULT_BUFFER_SIZE; protected int binaryBlockSize = DEFAULT_BLOCK_BUFFER_SIZE; /** * Returns an immutable static DecoderFactory with default configuration. All * configuration methods throw AvroRuntimeExceptions if called. */ public static EncoderFactory get() { return DEFAULT_FACTORY; } /** * Configures this factory to use the specified buffer size when creating * Encoder instances that buffer their output. The default buffer size is 2048 * bytes. * * @param size The buffer size to configure new instances with. Valid values are * in the range [32, 16*1024*1024]. Values outside this range are * set to the nearest value in the range. Values less than 256 will * limit performance but will consume less memory if the * BinaryEncoder is short-lived, values greater than 8*1024 are not * likely to improve performance but may be useful for the * downstream OutputStream. * @return This factory, to enable method chaining: * *

   *         EncoderFactory factory = new EncoderFactory().configureBufferSize(4096);
   *         
* * @see #binaryEncoder(OutputStream, BinaryEncoder) */ public EncoderFactory configureBufferSize(int size) { if (size < 32) size = 32; if (size > 16 * 1024 * 1024) size = 16 * 1024 * 1024; this.binaryBufferSize = size; return this; } /** * Returns this factory's configured default buffer size. Used when creating * Encoder instances that buffer writes. * * @see #configureBufferSize(int) * @see #binaryEncoder(OutputStream, BinaryEncoder) * @return The preferred buffer size, in bytes. */ public int getBufferSize() { return this.binaryBufferSize; } /** * Configures this factory to construct blocking BinaryEncoders with the * specified block buffer size. The default buffer size is 64 * 1024 bytes. * * @param size The preferred block size for array blocking. Arrays larger than * this size will be segmented into blocks according to the Avro * spec. Valid values are in the range [64, 1024*1024*1024] Values * outside this range are set to the nearest value in the range. The * encoder will require at least this amount of memory. * @return This factory, to enable method chaining: * *
   *         EncoderFactory factory = new EncoderFactory().configureBlockSize(8000);
   *         
* * @see #blockingBinaryEncoder(OutputStream, BinaryEncoder) */ public EncoderFactory configureBlockSize(int size) { if (size < MIN_BLOCK_BUFFER_SIZE) size = MIN_BLOCK_BUFFER_SIZE; if (size > MAX_BLOCK_BUFFER_SIZE) size = MAX_BLOCK_BUFFER_SIZE; this.binaryBlockSize = size; return this; } /** * Returns this factory's configured default block buffer size. * {@link BinaryEncoder} instances created with * #blockingBinaryEncoder(OutputStream, BinaryEncoder) will have block buffers * of this size. *

* * @see #configureBlockSize(int) * @see #blockingBinaryEncoder(OutputStream, BinaryEncoder) * @return The preferred block size, in bytes. */ public int getBlockSize() { return this.binaryBlockSize; } /** * Creates or reinitializes a {@link BinaryEncoder} with the OutputStream * provided as the destination for written data. If reuse is provided, an * attempt will be made to reconfigure reuse rather than construct a new * instance, but this is not guaranteed, a new instance may be returned. *

* The {@link BinaryEncoder} implementation returned may buffer its output. Data * may not appear on the underlying OutputStream until {@link Encoder#flush()} * is called. The buffer size is configured with * {@link #configureBufferSize(int)}. *

* If buffering is not desired, and lower performance is acceptable, use * {@link #directBinaryEncoder(OutputStream, BinaryEncoder)} *

* {@link BinaryEncoder} instances returned by this method are not thread-safe * * @param out The OutputStream to write to. Cannot be null. * @param reuse The BinaryEncoder to attempt to reuse given the factory * configuration. A BinaryEncoder implementation may not be * compatible with reuse, causing a new instance to be returned. If * null, a new instance is returned. * @return A BinaryEncoder that uses out as its data output. If * reuse is null, this will be a new instance. If reuse is * not null, then the returned instance may be a new instance or * reuse reconfigured to use out. * @throws IOException * @see BufferedBinaryEncoder * @see Encoder */ public BinaryEncoder binaryEncoder(OutputStream out, BinaryEncoder reuse) { if (null == reuse || !reuse.getClass().equals(BufferedBinaryEncoder.class)) { return new BufferedBinaryEncoder(out, this.binaryBufferSize); } else { return ((BufferedBinaryEncoder) reuse).configure(out, this.binaryBufferSize); } } /** * Creates or reinitializes a {@link BinaryEncoder} with the OutputStream * provided as the destination for written data. If reuse is provided, an * attempt will be made to reconfigure reuse rather than construct a new * instance, but this is not guaranteed, a new instance may be returned. *

* The {@link BinaryEncoder} implementation returned does not buffer its output, * calling {@link Encoder#flush()} will simply cause the wrapped OutputStream to * be flushed. *

* Performance of unbuffered writes can be significantly slower than buffered * writes. {@link #binaryEncoder(OutputStream, BinaryEncoder)} returns * BinaryEncoder instances that are tuned for performance but may buffer output. * The unbuffered, 'direct' encoder may be desired when buffering semantics are * problematic, or if the lifetime of the encoder is so short that the buffer * would not be useful. *

* {@link BinaryEncoder} instances returned by this method are not thread-safe. * * @param out The OutputStream to initialize to. Cannot be null. * @param reuse The BinaryEncoder to attempt to reuse given the factory * configuration. A BinaryEncoder implementation may not be * compatible with reuse, causing a new instance to be returned. If * null, a new instance is returned. * @return A BinaryEncoder that uses out as its data output. If * reuse is null, this will be a new instance. If reuse is * not null, then the returned instance may be a new instance or * reuse reconfigured to use out. * @see DirectBinaryEncoder * @see Encoder */ public BinaryEncoder directBinaryEncoder(OutputStream out, BinaryEncoder reuse) { if (null == reuse || !reuse.getClass().equals(DirectBinaryEncoder.class)) { return new DirectBinaryEncoder(out); } else { return ((DirectBinaryEncoder) reuse).configure(out); } } /** * Creates or reinitializes a {@link BinaryEncoder} with the OutputStream * provided as the destination for written data. If reuse is provided, an * attempt will be made to reconfigure reuse rather than construct a new * instance, but this is not guaranteed, a new instance may be returned. *

* The {@link BinaryEncoder} implementation returned buffers its output, calling * {@link Encoder#flush()} is required for output to appear on the underlying * OutputStream. *

* The returned BinaryEncoder implements the Avro binary encoding using blocks * delimited with byte sizes for Arrays and Maps. This allows for some decoders * to skip over large Arrays or Maps without decoding the contents, but adds * some overhead. The default block size is configured with * {@link #configureBlockSize(int)} *

* {@link BinaryEncoder} instances returned by this method are not thread-safe. * * @param out The OutputStream to initialize to. Cannot be null. * @param reuse The BinaryEncoder to attempt to reuse given the factory * configuration. A BinaryEncoder implementation may not be * compatible with reuse, causing a new instance to be returned. If * null, a new instance is returned. * @return A BinaryEncoder that uses out as its data output. If * reuse is null, this will be a new instance. If reuse is * not null, then the returned instance may be a new instance or * reuse reconfigured to use out. * @throws IOException * @see BlockingBinaryEncoder * @see Encoder */ public BinaryEncoder blockingBinaryEncoder(OutputStream out, BinaryEncoder reuse) { int blockSize = this.binaryBlockSize; int bufferSize = (blockSize * 2 >= this.binaryBufferSize) ? 32 : this.binaryBufferSize; if (null == reuse || !reuse.getClass().equals(BlockingBinaryEncoder.class)) { return new BlockingBinaryEncoder(out, blockSize, bufferSize); } else { return ((BlockingBinaryEncoder) reuse).configure(out, blockSize, bufferSize); } } /** * Creates a {@link JsonEncoder} using the OutputStream provided for writing * data conforming to the Schema provided. *

* {@link JsonEncoder} buffers its output. Data may not appear on the underlying * OutputStream until {@link Encoder#flush()} is called. *

* {@link JsonEncoder} is not thread-safe. * * @param schema The Schema for data written to this JsonEncoder. Cannot be * null. * @param out The OutputStream to write to. Cannot be null. * @return A JsonEncoder configured with out and schema * @throws IOException */ public JsonEncoder jsonEncoder(Schema schema, OutputStream out) throws IOException { return new JsonEncoder(schema, out); } /** * Creates a {@link JsonEncoder} using the OutputStream provided for writing * data conforming to the Schema provided with optional pretty printing. *

* {@link JsonEncoder} buffers its output. Data may not appear on the underlying * OutputStream until {@link Encoder#flush()} is called. *

* {@link JsonEncoder} is not thread-safe. * * @param schema The Schema for data written to this JsonEncoder. Cannot be * null. * @param out The OutputStream to write to. Cannot be null. * @param pretty Pretty print encoding. * @return A JsonEncoder configured with out, schema and * pretty * @throws IOException */ public JsonEncoder jsonEncoder(Schema schema, OutputStream out, boolean pretty) throws IOException { return new JsonEncoder(schema, out, pretty); } /** * Creates a {@link JsonEncoder} using the {@link JsonGenerator} provided for * output of data conforming to the Schema provided. *

* {@link JsonEncoder} buffers its output. Data may not appear on the underlying * output until {@link Encoder#flush()} is called. *

* {@link JsonEncoder} is not thread-safe. * * @param schema The Schema for data written to this JsonEncoder. Cannot be * null. * @param gen The JsonGenerator to write with. Cannot be null. * @return A JsonEncoder configured with gen and schema * @throws IOException */ JsonEncoder jsonEncoder(Schema schema, JsonGenerator gen) throws IOException { return new JsonEncoder(schema, gen); } /** * Creates a {@link ValidatingEncoder} that wraps the Encoder provided. This * ValidatingEncoder will ensure that operations against it conform to the * schema provided. *

* Many {@link Encoder}s buffer their output. Data may not appear on the * underlying output until {@link Encoder#flush()} is called. *

* {@link ValidatingEncoder} is not thread-safe. * * @param schema The Schema to validate operations against. Cannot be null. * @param encoder The Encoder to wrap. Cannot be be null. * @return A ValidatingEncoder configured to wrap encoder and validate * against schema * @throws IOException */ public ValidatingEncoder validatingEncoder(Schema schema, Encoder encoder) throws IOException { return new ValidatingEncoder(schema, encoder); } // default encoder is not mutable private static class DefaultEncoderFactory extends EncoderFactory { @Override public EncoderFactory configureBlockSize(int size) { throw new AvroRuntimeException("Default EncoderFactory cannot be configured"); } @Override public EncoderFactory configureBufferSize(int size) { throw new AvroRuntimeException("Default EncoderFactory cannot be configured"); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy