All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.avro.hadoop.io.AvroDatumConverterFactory Maven / Gradle / Ivy

Go to download

An org.apache.hadoop.mapred compatible API for using Avro Serializatin in Hadoop

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.  See the License for the specific language governing
 * permissions and limitations under the License.
 */

package org.apache.avro.hadoop.io;

import java.nio.ByteBuffer;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericFixed;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapred.AvroValue;
import org.apache.avro.mapred.AvroWrapper;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;

/**
 * Constructs converters that turn objects (usually from the output of a MR job)
 * into Avro data that can be serialized.
 *
 * 

* Currently, only the following types have implemented converters: *

    *
  • AvroKey
  • *
  • AvroValue
  • *
  • BooleanWritable
  • *
  • BytesWritable
  • *
  • ByteWritable
  • *
  • DoubleWritable
  • *
  • FloatWritable
  • *
  • IntWritable
  • *
  • LongWritable
  • *
  • NullWritable
  • *
  • Text
  • *
*

*/ public class AvroDatumConverterFactory extends Configured { /** * Creates a new AvroDatumConverterFactory instance. * * @param conf The job configuration. */ public AvroDatumConverterFactory(Configuration conf) { super(conf); } /** * Creates a converter that turns objects of type inputClass into * Avro data. * * @param inputClass The type of input data to convert. * @return A converter that turns objects of type inputClass into * Avro data. */ @SuppressWarnings("unchecked") public AvroDatumConverter create(Class inputClass) { boolean isMapOnly = ((JobConf) getConf()).getNumReduceTasks() == 0; if (AvroKey.class.isAssignableFrom(inputClass)) { Schema schema; if (isMapOnly) { schema = AvroJob.getMapOutputKeySchema(getConf()); if (null == schema) { schema = AvroJob.getOutputKeySchema(getConf()); } } else { schema = AvroJob.getOutputKeySchema(getConf()); } if (null == schema) { throw new IllegalStateException("Writer schema for output key was not set. Use AvroJob.setOutputKeySchema()."); } return (AvroDatumConverter) new AvroWrapperConverter(schema); } if (AvroValue.class.isAssignableFrom(inputClass)) { Schema schema; if (isMapOnly) { schema = AvroJob.getMapOutputValueSchema(getConf()); if (null == schema) { schema = AvroJob.getOutputValueSchema(getConf()); } } else { schema = AvroJob.getOutputValueSchema(getConf()); } if (null == schema) { throw new IllegalStateException( "Writer schema for output value was not set. Use AvroJob.setOutputValueSchema()."); } return (AvroDatumConverter) new AvroWrapperConverter(schema); } if (BooleanWritable.class.isAssignableFrom(inputClass)) { return (AvroDatumConverter) new BooleanWritableConverter(); } if (BytesWritable.class.isAssignableFrom(inputClass)) { return (AvroDatumConverter) new BytesWritableConverter(); } if (ByteWritable.class.isAssignableFrom(inputClass)) { return (AvroDatumConverter) new ByteWritableConverter(); } if (DoubleWritable.class.isAssignableFrom(inputClass)) { return (AvroDatumConverter) new DoubleWritableConverter(); } if (FloatWritable.class.isAssignableFrom(inputClass)) { return (AvroDatumConverter) new FloatWritableConverter(); } if (IntWritable.class.isAssignableFrom(inputClass)) { return (AvroDatumConverter) new IntWritableConverter(); } if (LongWritable.class.isAssignableFrom(inputClass)) { return (AvroDatumConverter) new LongWritableConverter(); } if (NullWritable.class.isAssignableFrom(inputClass)) { return (AvroDatumConverter) new NullWritableConverter(); } if (Text.class.isAssignableFrom(inputClass)) { return (AvroDatumConverter) new TextConverter(); } throw new UnsupportedOperationException("Unsupported input type: " + inputClass.getName()); } /** Converts AvroWrappers into their wrapped Avro data. */ public static class AvroWrapperConverter extends AvroDatumConverter, Object> { private final Schema mSchema; public AvroWrapperConverter(Schema schema) { mSchema = schema; } /** {@inheritDoc} */ @Override public Object convert(AvroWrapper input) { return input.datum(); } /** {@inheritDoc} */ @Override public Schema getWriterSchema() { return mSchema; } } /** Converts BooleanWritables into Booleans. */ public static class BooleanWritableConverter extends AvroDatumConverter { private final Schema mSchema; /** Constructor. */ public BooleanWritableConverter() { mSchema = Schema.create(Schema.Type.BOOLEAN); } /** {@inheritDoc} */ @Override public Boolean convert(BooleanWritable input) { return input.get(); } /** {@inheritDoc} */ @Override public Schema getWriterSchema() { return mSchema; } } /** Converts BytesWritables into ByteBuffers. */ public static class BytesWritableConverter extends AvroDatumConverter { private final Schema mSchema; /** Constructor. */ public BytesWritableConverter() { mSchema = Schema.create(Schema.Type.BYTES); } /** {@inheritDoc} */ @Override public ByteBuffer convert(BytesWritable input) { return ByteBuffer.wrap(input.getBytes()); } /** {@inheritDoc} */ @Override public Schema getWriterSchema() { return mSchema; } } /** Converts ByteWritables into GenericFixed of size 1. */ public static class ByteWritableConverter extends AvroDatumConverter { private final Schema mSchema; /** Constructor. */ public ByteWritableConverter() { mSchema = Schema.createFixed("Byte", "A single byte", "org.apache.avro.mapreduce", 1); } /** {@inheritDoc} */ @Override public GenericFixed convert(ByteWritable input) { return new GenericData.Fixed(mSchema, new byte[] { input.get() }); } /** {@inheritDoc} */ @Override public Schema getWriterSchema() { return mSchema; } } /** Converts DoubleWritables into Doubles. */ public static class DoubleWritableConverter extends AvroDatumConverter { private final Schema mSchema; /** Constructor. */ public DoubleWritableConverter() { mSchema = Schema.create(Schema.Type.DOUBLE); } /** {@inheritDoc} */ @Override public Double convert(DoubleWritable input) { return input.get(); } /** {@inheritDoc} */ @Override public Schema getWriterSchema() { return mSchema; } } /** Converts FloatWritables into Floats. */ public static class FloatWritableConverter extends AvroDatumConverter { private final Schema mSchema; /** Constructor. */ public FloatWritableConverter() { mSchema = Schema.create(Schema.Type.FLOAT); } /** {@inheritDoc} */ @Override public Float convert(FloatWritable input) { return input.get(); } /** {@inheritDoc} */ @Override public Schema getWriterSchema() { return mSchema; } } /** Converts IntWritables into Ints. */ public static class IntWritableConverter extends AvroDatumConverter { private final Schema mSchema; /** Constructor. */ public IntWritableConverter() { mSchema = Schema.create(Schema.Type.INT); } /** {@inheritDoc} */ @Override public Integer convert(IntWritable input) { return input.get(); } /** {@inheritDoc} */ @Override public Schema getWriterSchema() { return mSchema; } } /** Converts LongWritables into Longs. */ public static class LongWritableConverter extends AvroDatumConverter { private final Schema mSchema; /** Constructor. */ public LongWritableConverter() { mSchema = Schema.create(Schema.Type.LONG); } /** {@inheritDoc} */ @Override public Long convert(LongWritable input) { return input.get(); } /** {@inheritDoc} */ @Override public Schema getWriterSchema() { return mSchema; } } /** Converts NullWritables into Nulls. */ public static class NullWritableConverter extends AvroDatumConverter { private final Schema mSchema; /** Constructor. */ public NullWritableConverter() { mSchema = Schema.create(Schema.Type.NULL); } /** {@inheritDoc} */ @Override public Object convert(NullWritable input) { return null; } /** {@inheritDoc} */ @Override public Schema getWriterSchema() { return mSchema; } } /** Converts Text into CharSequences. */ public static class TextConverter extends AvroDatumConverter { private final Schema mSchema; /** Constructor. */ public TextConverter() { mSchema = Schema.create(Schema.Type.STRING); } /** {@inheritDoc} */ @Override public CharSequence convert(Text input) { return input.toString(); } /** {@inheritDoc} */ @Override public Schema getWriterSchema() { return mSchema; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy