All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.pact.runtime.task.CoGroupWithSolutionSetSecondDriver Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.pact.runtime.task;

import java.util.Iterator;

import eu.stratosphere.api.common.functions.GenericCoGrouper;
import eu.stratosphere.api.common.typeutils.TypeComparator;
import eu.stratosphere.api.common.typeutils.TypePairComparator;
import eu.stratosphere.api.common.typeutils.TypePairComparatorFactory;
import eu.stratosphere.api.common.typeutils.TypeSerializer;
import eu.stratosphere.pact.runtime.hash.CompactingHashTable;
import eu.stratosphere.pact.runtime.iterative.concurrent.SolutionSetBroker;
import eu.stratosphere.pact.runtime.iterative.task.AbstractIterativePactTask;
import eu.stratosphere.pact.runtime.util.EmptyIterator;
import eu.stratosphere.pact.runtime.util.KeyGroupedIterator;
import eu.stratosphere.pact.runtime.util.SingleElementIterator;
import eu.stratosphere.util.Collector;

public class CoGroupWithSolutionSetSecondDriver implements ResettablePactDriver, OT> {
	
	private PactTaskContext, OT> taskContext;
	
	private CompactingHashTable hashTable;
	
	private TypeSerializer probeSideSerializer;
	
	private TypeComparator probeSideComparator;
	
	private TypePairComparator pairComparator;
	
	private IT2 solutionSideRecord;
	
	protected volatile boolean running;

	// --------------------------------------------------------------------------------------------
	
	@Override
	public void setup(PactTaskContext, OT> context) {
		this.taskContext = context;
		this.running = true;
	}
	
	@Override
	public int getNumberOfInputs() {
		return 1;
	}
	
	@Override
	public Class> getStubType() {
		@SuppressWarnings("unchecked")
		final Class> clazz = (Class>) (Class) GenericCoGrouper.class;
		return clazz;
	}
	
	@Override
	public boolean requiresComparatorOnInput() {
		return true;
	}
	
	@Override
	public boolean isInputResettable(int inputNum) {
		if (inputNum < 0 || inputNum > 1) {
			throw new IndexOutOfBoundsException();
		}
		
		// from the perspective of the task that runs this operator, there is only one input, which is not resettable
		// we implement the resettable interface only in order to avoid that this class is re-instantiated for
		// every iteration
		return false;
	}
	
	// --------------------------------------------------------------------------------------------

	@SuppressWarnings("unchecked")
	@Override
	public void initialize() {
		// grab a handle to the hash table from the iteration broker
		if (taskContext instanceof AbstractIterativePactTask) {
			AbstractIterativePactTask iterativeTaskContext = (AbstractIterativePactTask) taskContext;
			String identifier = iterativeTaskContext.brokerKey();
			this.hashTable = (CompactingHashTable) SolutionSetBroker.instance().get(identifier);
		} else {
			throw new RuntimeException("The task context of this driver is no iterative task context.");
		}
		
		TypeSerializer buildSideSerializer = hashTable.getBuildSideSerializer();
		TypeComparator buildSideComparator = hashTable.getBuildSideComparator().duplicate();
		
		probeSideSerializer = taskContext.getInputSerializer(0).getSerializer();
		probeSideComparator = taskContext.getInputComparator(0);
		
		solutionSideRecord = buildSideSerializer.createInstance();
		
		TypePairComparatorFactory pairCompFactory = taskContext.getTaskConfig().getPairComparatorFactory(taskContext.getUserCodeClassLoader());
		pairComparator = pairCompFactory.createComparator12(probeSideComparator, buildSideComparator);
	}

	@Override
	public void prepare() {
		// nothing to prepare in each iteration
		// later, if we support out-of-core operation, we need to put the code in here
		// that brings the initial in-memory partitions into memory
	}

	@Override
	public void run() throws Exception {

		final GenericCoGrouper coGroupStub = taskContext.getStub();
		final Collector collector = taskContext.getOutputCollector();
		
		IT2 buildSideRecord = solutionSideRecord;
			
		final CompactingHashTable join = hashTable;
		
		final KeyGroupedIterator probeSideInput = new KeyGroupedIterator(taskContext.getInput(0), probeSideSerializer, probeSideComparator);
		final SingleElementIterator siIter = new SingleElementIterator();
		final Iterator emptySolutionSide = EmptyIterator.get();
		
		final CompactingHashTable.HashTableProber prober = join.getProber(this.probeSideComparator, this.pairComparator);
		
		while (this.running && probeSideInput.nextKey()) {
			IT1 current = probeSideInput.getCurrent();
			
			if (prober.getMatchFor(current, buildSideRecord)) {
				siIter.set(buildSideRecord);
				coGroupStub.coGroup(probeSideInput.getValues(), siIter, collector);
			}
			else {
				coGroupStub.coGroup(probeSideInput.getValues(), emptySolutionSide, collector);
			}
		}
	}

	@Override
	public void cleanup() {}
	
	@Override
	public void reset() {}

	@Override
	public void teardown() {
		// hash table is torn down by the iteration head task
	}

	@Override
	public void cancel() {
		this.running = false;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy