All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.avro.hadoop.io.AvroSerializer Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.  See the License for the specific language governing
 * permissions and limitations under the License.
 */

package org.apache.avro.hadoop.io;

import java.io.IOException;
import java.io.OutputStream;

import org.apache.avro.Schema;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.mapred.AvroWrapper;
import org.apache.avro.reflect.ReflectDatumWriter;
import org.apache.hadoop.io.serializer.Serializer;

/**
 * Serializes AvroWrapper objects within Hadoop.
 *
 * 

* Keys and values containing Avro types are more efficiently serialized outside * of the WritableSerialization model, so they are wrapped in * {@link org.apache.avro.mapred.AvroWrapper} objects and serialization is * handled by this class. *

* *

* MapReduce jobs that use AvroWrapper objects as keys or values need to be * configured with {@link AvroSerialization}. Use * {@link org.apache.avro.mapreduce.AvroJob} to help with Job configuration. *

* * @param The Java type of the Avro data. */ public class AvroSerializer implements Serializer> { /** An factory for creating Avro datum encoders. */ private static final EncoderFactory ENCODER_FACTORY = new EncoderFactory(); /** The writer schema for the data to serialize. */ private final Schema mWriterSchema; /** The Avro datum writer for serializing. */ private final DatumWriter mAvroDatumWriter; /** The Avro encoder for serializing. */ private BinaryEncoder mAvroEncoder; /** The output stream for serializing. */ private OutputStream mOutputStream; /** * Constructor. * * @param writerSchema The writer schema for the Avro data being serialized. */ public AvroSerializer(Schema writerSchema) { if (null == writerSchema) { throw new IllegalArgumentException("Writer schema may not be null"); } mWriterSchema = writerSchema; mAvroDatumWriter = new ReflectDatumWriter<>(writerSchema); } /** * Constructor. * * @param writerSchema The writer schema for the Avro data being serialized. * @param datumWriter The datum writer to use for serialization. */ public AvroSerializer(Schema writerSchema, DatumWriter datumWriter) { if (null == writerSchema) { throw new IllegalArgumentException("Writer schema may not be null"); } mWriterSchema = writerSchema; mAvroDatumWriter = datumWriter; } /** * Gets the writer schema being used for serialization. * * @return The writer schema. */ public Schema getWriterSchema() { return mWriterSchema; } /** {@inheritDoc} */ @Override public void open(OutputStream outputStream) throws IOException { mOutputStream = outputStream; mAvroEncoder = ENCODER_FACTORY.binaryEncoder(outputStream, mAvroEncoder); } /** {@inheritDoc} */ @Override public void serialize(AvroWrapper avroWrapper) throws IOException { mAvroDatumWriter.write(avroWrapper.datum(), mAvroEncoder); // This would be a lot faster if the Serializer interface had a flush() method // and the // Hadoop framework called it when needed. For now, we'll have to flush on every // record. mAvroEncoder.flush(); } /** {@inheritDoc} */ @Override public void close() throws IOException { mOutputStream.close(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy