All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.avro.mapreduce.AvroSequenceFileInputFormat Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.  See the License for the specific language governing
 * permissions and limitations under the License.
 */

package org.apache.avro.mapreduce;

import java.io.IOException;

import org.apache.avro.Schema;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.avro.hadoop.io.AvroSequenceFile;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;

/**
 * An input format for reading from AvroSequenceFiles (sequence files that support Avro data).
 *
 * @param  The input key type.
 * @param  The input value type.
 */
public class AvroSequenceFileInputFormat extends SequenceFileInputFormat {
  /** {@inheritDoc} */
  @Override
  public RecordReader createRecordReader(InputSplit inputSplit, TaskAttemptContext context)
      throws IOException {
    return new AvroSequenceFileRecordReader();
  }

  /**
   * Reads records from a SequenceFile that supports Avro data.
   *
   * 

This class is based on Hadoop's SequenceFileRecordReader, modified to construct an * AvroSequenceFile.Reader instead of a SequenceFile.Reader.

*/ protected class AvroSequenceFileRecordReader extends RecordReader { private SequenceFile.Reader mReader; private long mStart; private long mEnd; private boolean mHasMoreData; private K mCurrentKey; private V mCurrentValue; /** {@inheritDoc} */ @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit fileSplit = (FileSplit) split; Configuration conf = context.getConfiguration(); Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); // Configure the SequenceFile reader. AvroSequenceFile.Reader.Options options = new AvroSequenceFile.Reader.Options() .withFileSystem(fs) .withInputPath(path) .withConfiguration(conf); Schema keySchema = AvroJob.getInputKeySchema(conf); if (null != keySchema) { options.withKeySchema(keySchema); } Schema valueSchema = AvroJob.getInputValueSchema(conf); if (null != valueSchema) { options.withValueSchema(valueSchema); } mReader = new AvroSequenceFile.Reader(options); mEnd = fileSplit.getStart() + fileSplit.getLength(); if (fileSplit.getStart() > mReader.getPosition()) { // Sync to the beginning of the input split. mReader.sync(fileSplit.getStart()); } mStart = mReader.getPosition(); mHasMoreData = mStart < mEnd; } /** {@inheritDoc} */ @Override @SuppressWarnings("unchecked") public boolean nextKeyValue() throws IOException, InterruptedException { if (!mHasMoreData) { return false; } long pos = mReader.getPosition(); mCurrentKey = (K) mReader.next(mCurrentKey); if (null == mCurrentKey || (pos >= mEnd && mReader.syncSeen())) { mHasMoreData = false; mCurrentKey = null; mCurrentValue = null; } else { mCurrentValue = (V) mReader.getCurrentValue(mCurrentValue); } return mHasMoreData; } /** {@inheritDoc} */ @Override public K getCurrentKey() { return mCurrentKey; } /** {@inheritDoc} */ @Override public V getCurrentValue() { return mCurrentValue; } /** {@inheritDoc} */ @Override public float getProgress() throws IOException { if (mEnd == mStart) { return 0.0f; } else { return Math.min(1.0f, (mReader.getPosition() - mStart) / (float)(mEnd - mStart)); } } /** {@inheritDoc} */ @Override public synchronized void close() throws IOException { mReader.close(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy