org.apache.pig.impl.builtin.GFCross Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pig.impl.builtin;
import java.io.IOException;
import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.pig.EvalFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.util.UDFContext;
public class GFCross extends EvalFunc {
private int numInputs, myNumber, numGroupsPerInput, numGroupsGoingTo;
private BagFactory mBagFactory = BagFactory.getInstance();
private TupleFactory mTupleFactory = TupleFactory.getInstance();
private int parallelism = 0;
private Random r = new Random();
static private final int DEFAULT_PARALLELISM = 96;
@Override
public DataBag exec(Tuple input) throws IOException {
if (parallelism == 0) {
parallelism = DEFAULT_PARALLELISM;
Configuration cfg = UDFContext.getUDFContext().getJobConf();
if (cfg != null) {
String s = cfg.get("mapred.reduce.tasks");
if (s == null) {
throw new IOException("Unable to determine parallelism from job conf");
}
parallelism = Integer.valueOf(s);
}
numInputs = (Integer)input.get(0);
myNumber = (Integer)input.get(1);
numGroupsPerInput = (int) Math.ceil(Math.pow(parallelism, 1.0/numInputs));
numGroupsGoingTo = (int) Math.pow(numGroupsPerInput,numInputs - 1);
}
DataBag output = mBagFactory.newDefaultBag();
try{
int[] digits = new int[numInputs];
for (int i=0; i