All Downloads are FREE. Search and download functionalities are using the official Maven repository.

co.cask.hydrator.plugin.batch.CopybookInputFormat Maven / Gradle / Ivy

The newest version!
/*
 * Copyright © 2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.hydrator.plugin.batch;

import net.sf.JRecord.Common.AbstractFieldValue;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.SplittableCompressionCodec;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import java.io.IOException;
import java.util.LinkedHashMap;

/**
 * InputFormat class for CopybookReader plugin.
 */
public class CopybookInputFormat extends FileInputFormat> {

  public static final String COPYBOOK_INPUTFORMAT_CBL_CONTENTS = "copybook.inputformat.cbl.contents";

  public static final String COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH = "copybook.inputformat.data.hdfs.path";

  public static void setCopybookInputformatCblContents(Job job, String copybookCOntents) {
    job.getConfiguration().set(COPYBOOK_INPUTFORMAT_CBL_CONTENTS, copybookCOntents);
  }

  public static void setBinaryFilePath(Job job, String binaryFile) {
    job.getConfiguration().set(COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH, binaryFile);
  }

  @Override
  public RecordReader>
  createRecordReader(InputSplit split, TaskAttemptContext context)
    throws IOException, InterruptedException {
    return new CopybookRecordReader();
  }

  @Override
  protected boolean isSplitable(JobContext context, Path file) {
    Configuration conf = context.getConfiguration();
    Path path = new Path(conf.get(COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH));
    final CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path);
    return (null == codec) ? true : codec instanceof SplittableCompressionCodec;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy