All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.pact.runtime.hash.BuildSecondHashMatchIterator Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 **********************************************************************************************************************/

package eu.stratosphere.pact.runtime.hash;

import java.io.IOException;
import java.util.List;

import eu.stratosphere.api.common.functions.GenericJoiner;
import eu.stratosphere.api.common.typeutils.TypeComparator;
import eu.stratosphere.api.common.typeutils.TypePairComparator;
import eu.stratosphere.api.common.typeutils.TypeSerializer;
import eu.stratosphere.core.memory.MemorySegment;
import eu.stratosphere.nephele.services.iomanager.IOManager;
import eu.stratosphere.nephele.services.memorymanager.MemoryAllocationException;
import eu.stratosphere.nephele.services.memorymanager.MemoryManager;
import eu.stratosphere.nephele.template.AbstractInvokable;
import eu.stratosphere.pact.runtime.task.util.JoinTaskIterator;
import eu.stratosphere.util.Collector;
import eu.stratosphere.util.MutableObjectIterator;


/**
 * An implementation of the {@link eu.stratosphere.pact.runtime.task.util.JoinTaskIterator} that uses a hybrid-hash-join
 * internally to match the records with equal key. The build side of the hash is the second input of the match.  
 */
public final class BuildSecondHashMatchIterator implements JoinTaskIterator {
	
	private final MutableHashTable hashJoin;
	
	private final V2 nextBuildSideObject;
	
	private final V2 tempBuildSideRecord;
	
	private final V1 probeCopy;
	
	private final TypeSerializer probeSideSerializer;
	
	private final MemoryManager memManager;
	
	private final MutableObjectIterator firstInput;
	
	private final MutableObjectIterator secondInput;
	
	private volatile boolean running = true;
	
	// --------------------------------------------------------------------------------------------
	
	public BuildSecondHashMatchIterator(MutableObjectIterator firstInput, MutableObjectIterator secondInput,
			TypeSerializer serializer1, TypeComparator comparator1,
			TypeSerializer serializer2, TypeComparator comparator2,
			TypePairComparator pairComparator,
			MemoryManager memManager, IOManager ioManager, AbstractInvokable ownerTask, long totalMemory)
	throws MemoryAllocationException
	{		
		this.memManager = memManager;
		this.firstInput = firstInput;
		this.secondInput = secondInput;
		this.probeSideSerializer = serializer1;
		
		this.nextBuildSideObject = serializer2.createInstance();
		this.tempBuildSideRecord = serializer2.createInstance();
		this.probeCopy = serializer1.createInstance();
		
		this.hashJoin = getHashJoin(serializer2, comparator2, serializer1, comparator1, pairComparator,
			memManager, ioManager, ownerTask, totalMemory);
	}
	
	// --------------------------------------------------------------------------------------------
	
	@Override
	public void open() throws IOException, MemoryAllocationException, InterruptedException {
		this.hashJoin.open(this.secondInput, this.firstInput);
	}

	@Override
	public void close() {
		// close the join
		this.hashJoin.close();
		
		// free the memory
		final List segments = this.hashJoin.getFreedMemory();
		this.memManager.release(segments);
	}

	@Override
	public boolean callWithNextKey(GenericJoiner matchFunction, Collector collector)
	throws Exception
	{
		if (this.hashJoin.nextRecord())
		{
			// we have a next record, get the iterators to the probe and build side values
			final MutableHashTable.HashBucketIterator buildSideIterator = this.hashJoin.getBuildSideIterator();
			V2 nextBuildSideRecord = this.nextBuildSideObject;
			
			// get the first build side value
			if ((nextBuildSideRecord = buildSideIterator.next(nextBuildSideRecord)) != null) {
				V2 tmpRec = this.tempBuildSideRecord;
				final V1 probeRecord = this.hashJoin.getCurrentProbeRecord();
				
				// check if there is another build-side value
				if ((tmpRec = buildSideIterator.next(tmpRec)) != null) {
					// more than one build-side value --> copy the probe side
					V1 probeCopy = this.probeCopy;
					probeCopy = this.probeSideSerializer.copy(probeRecord, probeCopy);
					
					// call match on the first pair
					matchFunction.join(probeCopy, nextBuildSideRecord, collector);
					
					// call match on the second pair
					probeCopy = this.probeSideSerializer.copy(probeRecord, probeCopy);
					matchFunction.join(probeCopy, tmpRec, collector);
					
					while (this.running && ((nextBuildSideRecord = buildSideIterator.next(nextBuildSideRecord)) != null)) {
						// call match on the next pair
						// make sure we restore the value of the probe side record
						probeCopy = this.probeSideSerializer.copy(probeRecord, probeCopy);
						matchFunction.join(probeCopy, nextBuildSideRecord, collector);
					}
				}
				else {
					// only single pair matches
					matchFunction.join(probeRecord, nextBuildSideRecord, collector);
				}
			}
			return true;
		}
		else {
			return false;
		}
	}
	
	@Override
	public void abort() {
		this.running = false;
		this.hashJoin.abort();
	}
	
	public  MutableHashTable getHashJoin(TypeSerializer buildSideSerializer, TypeComparator buildSideComparator,
			TypeSerializer probeSideSerializer, TypeComparator probeSideComparator,
			TypePairComparator pairComparator,
			MemoryManager memManager, IOManager ioManager, AbstractInvokable ownerTask, long totalMemory)
	throws MemoryAllocationException
	{
		final int numPages = memManager.computeNumberOfPages(totalMemory);
		final List memorySegments = memManager.allocatePages(ownerTask, numPages);
		return new MutableHashTable(buildSideSerializer, probeSideSerializer, buildSideComparator, probeSideComparator, pairComparator, memorySegments, ioManager);
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy