All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.operators.hash.ReOpenableMutableHashTable Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.flink.runtime.operators.hash;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.flink.api.common.typeutils.TypeComparator;
import org.apache.flink.api.common.typeutils.TypePairComparator;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.runtime.io.disk.iomanager.FileIOChannel;
import org.apache.flink.runtime.io.disk.iomanager.IOManager;
import org.apache.flink.util.MutableObjectIterator;

public class ReOpenableMutableHashTable extends MutableHashTable {

	/** Channel for the spilled partitions */
	private final FileIOChannel.Enumerator spilledInMemoryPartitions;
	
	/** Stores the initial partitions and a list of the files that contain the spilled contents */
	private List> initialPartitions;
	
	/**
	 * The values of these variables are stored here after the initial open()
	 * Required to restore the initial state before each additional probe phase.
	 */
	private int initialBucketCount;
	private byte initialPartitionFanOut;
	
	private boolean spilled = false;
	
	public ReOpenableMutableHashTable(TypeSerializer buildSideSerializer,
			TypeSerializer probeSideSerializer,
			TypeComparator buildSideComparator,
			TypeComparator probeSideComparator,
			TypePairComparator comparator,
			List memorySegments, IOManager ioManager,
			boolean useBitmapFilters) {
		
		super(buildSideSerializer, probeSideSerializer, buildSideComparator,
				probeSideComparator, comparator, memorySegments, ioManager, useBitmapFilters);
		keepBuildSidePartitions = true;
		spilledInMemoryPartitions = ioManager.createChannelEnumerator();
	}
	
	@Override
	public void open(MutableObjectIterator buildSide, MutableObjectIterator probeSide, boolean buildSideOuterJoin) throws IOException {
		super.open(buildSide, probeSide, buildSideOuterJoin);
		initialPartitions = new ArrayList>( partitionsBeingBuilt );
		initialPartitionFanOut = (byte) partitionsBeingBuilt.size();
		initialBucketCount = this.numBuckets;
	}

	public void reopenProbe(MutableObjectIterator probeInput) throws IOException {
		if (this.closed.get()) {
			throw new IllegalStateException("Cannot open probe input because hash join has already been closed");
		}
		partitionsBeingBuilt.clear();
		probeIterator = new ProbeIterator(probeInput, probeSideSerializer.createInstance());
		// We restore the same "partitionsBeingBuild" state as after the initial open call.
		partitionsBeingBuilt.addAll(initialPartitions);
		
		if (spilled) {
			this.currentRecursionDepth = 0;
			initTable(initialBucketCount, initialPartitionFanOut);
			
			//setup partitions for insertion:
			for (int i = 0; i < this.partitionsBeingBuilt.size(); i++) {
				ReOpenableHashPartition part = (ReOpenableHashPartition) this.partitionsBeingBuilt.get(i);
				if (part.isInMemory()) {
					ensureNumBuffersReturned(part.initialPartitionBuffersCount);
					part.restorePartitionBuffers(ioManager, availableMemory);
					// now, index the partition through a hash table
					final HashPartition.PartitionIterator pIter = part.getPartitionIterator(this.buildSideComparator);
					BT record = this.buildSideSerializer.createInstance();
					
					while ((record = pIter.next(record)) != null) {
						final int hashCode = hash(pIter.getCurrentHashCode(), 0);
						final int posHashCode = hashCode % initialBucketCount;
						final long pointer = pIter.getPointer();
						// get the bucket for the given hash code
						final int bucketArrayPos = posHashCode >> this.bucketsPerSegmentBits;
						final int bucketInSegmentPos = (posHashCode & this.bucketsPerSegmentMask) << NUM_INTRA_BUCKET_BITS;
						final MemorySegment bucket = this.buckets[bucketArrayPos];
						insertBucketEntry(part, bucket, bucketInSegmentPos, hashCode, pointer, true);
					}
				} else {
					this.writeBehindBuffersAvailable--; // we are not in-memory, thus the probe side buffer will grab one wbb.
					if (this.writeBehindBuffers.size() == 0) { // prepareProbePhase always requires one buffer in the writeBehindBuffers-Queue.
						this.writeBehindBuffers.add(getNextBuffer());
						this.writeBehindBuffersAvailable++;
					}
					part.prepareProbePhase(ioManager,currentEnumerator,writeBehindBuffers);
				}
			}
			// spilled partitions are automatically added as pending partitions after in-memory has been handled
		} else {
			// the build input completely fits into memory, hence everything is still in memory.
			for (int partIdx = 0; partIdx < partitionsBeingBuilt.size(); partIdx++) {
				final HashPartition p = partitionsBeingBuilt.get(partIdx);
				p.prepareProbePhase(ioManager,currentEnumerator,writeBehindBuffers);
			}
		}
	}
	

	/**
	 * This method stores the initial hash table's contents on disk if hash join needs the memory
	 * for further partition processing.
	 * The initial hash table is rebuild before a new secondary input is opened.
	 * 
	 * For the sake of simplicity we iterate over all in-memory elements and store them in one file.
	 * The file is hashed into memory upon opening a new probe input.
	 * @throws IOException 
	 */
	void storeInitialHashTable() throws IOException {
		if (spilled) {
			return; // we create the initialHashTable only once. Later calls are caused by deeper recursion lvls
		}
		spilled = true;
		
		for (int partIdx = 0; partIdx < initialPartitions.size(); partIdx++) {
			final ReOpenableHashPartition p = (ReOpenableHashPartition) initialPartitions.get(partIdx);
			if (p.isInMemory()) { // write memory resident partitions to disk
				this.writeBehindBuffersAvailable += p.spillInMemoryPartition(spilledInMemoryPartitions.next(), ioManager, writeBehindBuffers);
			}
		}
	}
	
	@Override
	protected boolean prepareNextPartition() throws IOException {
		// check if there will be further partition processing.
		this.furtherPartitioning = false;
		for (int i = 0; i < this.partitionsBeingBuilt.size(); i++) {
			final HashPartition p = this.partitionsBeingBuilt.get(i);
			if (!p.isInMemory() && p.getProbeSideRecordCount() != 0) {
				furtherPartitioning = true;
				break;
			}
		}
		if (furtherPartitioning) {
			((ReOpenableMutableHashTable) this).storeInitialHashTable();
		}
		return super.prepareNextPartition();
	}
	
	
	@Override
	protected void releaseTable() {
		if(furtherPartitioning | this.currentRecursionDepth > 0) {
			super.releaseTable();
		}
	}
	
	@Override
	protected HashPartition getNewInMemoryPartition(int number, int recursionLevel) {
		return new ReOpenableHashPartition(this.buildSideSerializer, this.probeSideSerializer,
				number, recursionLevel, this.availableMemory.remove(this.availableMemory.size() - 1),
				this, this.segmentSize);
	}
	@Override
	public void close() {
		if (partitionsBeingBuilt.size() == 0) { // partitions are cleared after the build phase. But we need to drop
			// memory with them.
			this.partitionsBeingBuilt.addAll(initialPartitions);
		}
		this.furtherPartitioning = true; // fake, to release table properly (close() will call releaseTable())
		super.close();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy