Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.contrib.utils.join;
import java.io.IOException;
import java.util.Iterator;
import java.util.SortedMap;
import java.util.TreeMap;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
/**
* This abstract class serves as the base class for the reducer class of a data
* join job. The reduce function will first group the values according to their
* input tags, and then compute the cross product of over the groups. For each
* tuple in the cross product, it calls the following method, which is expected
* to be implemented in a subclass.
*
* protected abstract TaggedMapOutput combine(Object[] tags, Object[] values);
*
* The above method is expected to produce one output value from an array of
* records of different sources. The user code can also perform filtering here.
* It can return null if it decides to the records do not meet certain
* conditions.
*
*/
public abstract class DataJoinReducerBase extends JobBase {
protected Reporter reporter = null;
private long maxNumOfValuesPerGroup = 100;
protected long largestNumOfValues = 0;
protected long numOfValues = 0;
protected long collected = 0;
protected JobConf job;
public void close() throws IOException {
if (this.reporter != null) {
this.reporter.setStatus(super.getReport());
}
}
public void configure(JobConf job) {
super.configure(job);
this.job = job;
this.maxNumOfValuesPerGroup = job.getLong("datajoin.maxNumOfValuesPerGroup", 100);
}
/**
* The subclass can provide a different implementation on ResetableIterator.
* This is necessary if the number of values in a reduce call is very high.
*
* The default provided here uses ArrayListBackedIterator
*
* @return an Object of ResetableIterator.
*/
protected ResetableIterator createResetableIterator() {
return new ArrayListBackedIterator();
}
/**
* This is the function that re-groups values for a key into sub-groups based
* on a secondary key (input tag).
*
* @param arg1
* @return
*/
private SortedMap