All Downloads are FREE. Search and download functionalities are using the official Maven repository.

datafu.hourglass.jobs.PartitionCollapsingIncrementalJob Maven / Gradle / Ivy

The newest version!
/**
* Copyright 2013 LinkedIn, Inc
* 
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
* 
* http://www.apache.org/licenses/LICENSE-2.0
* 
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/

package datafu.hourglass.jobs;

import java.io.IOException;

import org.apache.avro.Schema;
import org.apache.avro.generic.GenericRecord;
import org.apache.hadoop.conf.Configuration;

import datafu.hourglass.model.Accumulator;
import datafu.hourglass.model.Mapper;
import datafu.hourglass.model.Merger;

/**
 * A concrete version of {@link AbstractPartitionCollapsingIncrementalJob}.
 * 
 * This provides an alternative to extending {@link AbstractPartitionCollapsingIncrementalJob}.
 * Instead of extending this class and implementing the abstract methods, this concrete version
 * can be used instead.  Getters and setters have been provided for the abstract methods. 
 * 
 * @author "Matthew Hayes"
 *
 */
public class PartitionCollapsingIncrementalJob extends AbstractPartitionCollapsingIncrementalJob
{
  private Mapper _mapper;
  private Accumulator _combiner;
  private Accumulator _reducer;
  private Schema _keySchema;
  private Schema _intermediateValueSchema;
  private Schema _outputValueSchema;
  private Merger _merger;
  private Merger _oldMerger;
  private Setup _setup;

  /**
   * Initializes the job.  The job name is derived from the name of a provided class.
   * 
   * @param cls class to base job name on
   * @throws IOException
   */
  public PartitionCollapsingIncrementalJob(@SuppressWarnings("rawtypes") Class cls) throws IOException
  {
    setName(cls.getName());
  }

  @Override
  public Mapper getMapper()
  {
    return _mapper;
  }

  @Override
  public Accumulator getCombinerAccumulator()
  {
    return _combiner;
  }
  
  @Override
  public Accumulator getReducerAccumulator()
  {
    return _reducer;
  }

  @Override
  protected Schema getKeySchema()
  {
    return _keySchema;
  }

  @Override
  protected Schema getIntermediateValueSchema()
  {
    return _intermediateValueSchema;
  }

  @Override
  protected Schema getOutputValueSchema()
  {
    return _outputValueSchema;
  }

  @Override
  public Merger getRecordMerger()
  {
    return _merger;
  }

  @Override
  public Merger getOldRecordMerger()
  {
    return _oldMerger;
  }

  /**
   * Set the mapper.
   * 
   * @param mapper
   */
  public void setMapper(Mapper mapper)
  {
    this._mapper = mapper;
  }

  /**
   * Set the accumulator for the combiner
   * 
   * @param combiner accumulator for the combiner
   */
  public void setCombinerAccumulator(Accumulator combiner)
  {
    this._combiner = combiner;
  }

  /**
   * Set the accumulator for the reducer.
   * 
   * @param reducer accumulator for the reducer
   */
  public void setReducerAccumulator(Accumulator reducer)
  {
    this._reducer = reducer;
  }

  /**
   * Sets the Avro schema for the key.
   * 

* This is also used as the key for the map output. * * @param keySchema key schema */ public void setKeySchema(Schema keySchema) { this._keySchema = keySchema; } /** * Sets the Avro schema for the intermediate value. *

* This is also used for the value for the map output. * * @param intermediateValueSchema intermediate value schema */ public void setIntermediateValueSchema(Schema intermediateValueSchema) { this._intermediateValueSchema = intermediateValueSchema; } /** * Sets the Avro schema for the output data. * * @param outputValueSchema output value schema */ public void setOutputValueSchema(Schema outputValueSchema) { this._outputValueSchema = outputValueSchema; } /** * Sets the record merger that is capable of merging previous output with a new partial output. * This is only needed when reusing previous output where the intermediate and output schemas are different. * New partial output is produced by the reducer from new input that is after the previous output. * * @param merger */ public void setMerger(Merger merger) { this._merger = merger; } /** * Sets the record merger that is capable of unmerging old partial output from the new output. * This is only needed when reusing previous output for a fixed-length sliding window. * The new output is the result of merging the previous output with the new partial output. * The old partial output is produced by the reducer from old input data before the time range of * the previous output. * * @param oldMerger merger */ public void setOldMerger(Merger oldMerger) { this._oldMerger = oldMerger; } /** * Set callback to provide custom configuration before job begins execution. * * @param setup object with callback method */ public void setOnSetup(Setup setup) { _setup = setup; } @Override public void config(Configuration conf) { super.config(conf); if (_setup != null) { _setup.setup(conf); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy