All Downloads are FREE. Search and download functionalities are using the official Maven repository.

datafu.hourglass.avro.CombinedAvroKeyInputFormat Maven / Gradle / Ivy

/**
* Copyright 2013 LinkedIn, Inc
* 
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
* 
* http://www.apache.org/licenses/LICENSE-2.0
* 
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package datafu.hourglass.avro;

import java.io.IOException;

import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroKey;
import org.apache.avro.mapreduce.AvroJob;
import org.apache.avro.mapreduce.AvroKeyRecordReader;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.CombineFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.CombineFileRecordReader;
import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.log4j.Logger;

import datafu.hourglass.jobs.AbstractPartitionPreservingIncrementalJob;

/**
 * A combined input format for reading Avro data.
 * 
 * @author "Matthew Hayes"
 *
 * @param  Type of data to be read
 */
public class CombinedAvroKeyInputFormat extends CombineFileInputFormat, NullWritable> 
{
  private final Logger LOG = Logger.getLogger(AbstractPartitionPreservingIncrementalJob.class);

  public static class CombinedAvroKeyRecordReader extends AvroKeyRecordReader
  {
    private CombineFileSplit inputSplit;
    private Integer idx;
    
    public CombinedAvroKeyRecordReader(CombineFileSplit inputSplit, TaskAttemptContext context, Integer idx)
    {
      super(AvroJob.getInputKeySchema(context.getConfiguration()));
      this.inputSplit = inputSplit;
      this.idx = idx;            
    }

    @Override
    public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException,
        InterruptedException
    {
      this.inputSplit = (CombineFileSplit)inputSplit;
      
      FileSplit fileSplit = new FileSplit(this.inputSplit.getPath(idx), 
                                          this.inputSplit.getOffset(idx), 
                                          this.inputSplit.getLength(idx), 
                                          this.inputSplit.getLocations());
      
      super.initialize(fileSplit, context);
    }
  }
  
  @SuppressWarnings("unchecked")
  @Override
  public RecordReader, NullWritable> createRecordReader(InputSplit inputSplit,
                                                                   TaskAttemptContext context) throws IOException
  {
    Schema readerSchema = AvroJob.getInputKeySchema(context.getConfiguration());
    if (null == readerSchema) {
      LOG.warn("Reader schema was not set. Use AvroJob.setInputKeySchema() if desired.");
      LOG.info("Using a reader schema equal to the writer schema.");
    }
        
    Object c = CombinedAvroKeyRecordReader.class;
    return new CombineFileRecordReader, NullWritable>((CombineFileSplit) inputSplit, context, (Class, NullWritable>>)c);
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy