All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.operators.CoGroupWithSolutionSetFirstDriver Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.operators;

import java.util.Collections;

import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.functions.CoGroupFunction;
import org.apache.flink.api.common.operators.util.JoinHashMap;
import org.apache.flink.api.common.typeutils.TypeComparator;
import org.apache.flink.api.common.typeutils.TypeComparatorFactory;
import org.apache.flink.api.common.typeutils.TypePairComparator;
import org.apache.flink.api.common.typeutils.TypePairComparatorFactory;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.runtime.iterative.concurrent.SolutionSetBroker;
import org.apache.flink.runtime.iterative.task.AbstractIterativeTask;
import org.apache.flink.runtime.operators.hash.CompactingHashTable;
import org.apache.flink.runtime.operators.util.TaskConfig;
import org.apache.flink.runtime.util.NonReusingKeyGroupedIterator;
import org.apache.flink.runtime.util.ReusingKeyGroupedIterator;
import org.apache.flink.runtime.util.SingleElementIterator;
import org.apache.flink.util.Collector;

public class CoGroupWithSolutionSetFirstDriver implements ResettableDriver, OT> {
	
	private TaskContext, OT> taskContext;
	
	private CompactingHashTable hashTable;
	
	private JoinHashMap objectMap;
	
	private TypeSerializer probeSideSerializer;
	
	private TypeComparator probeSideComparator;

	private TypeSerializer solutionSetSerializer;


	private TypePairComparator pairComparator;
	
	private IT1 solutionSideRecord;
	
	protected volatile boolean running;

	private boolean objectReuseEnabled = false;

	// --------------------------------------------------------------------------------------------
	
	@Override
	public void setup(TaskContext, OT> context) {
		this.taskContext = context;
		this.running = true;
	}
	
	@Override
	public int getNumberOfInputs() {
		return 1;
	}
	
	@Override
	public Class> getStubType() {
		@SuppressWarnings("unchecked")
		final Class> clazz = (Class>) (Class) CoGroupFunction.class;
		return clazz;
	}
	
	@Override
	public int getNumberOfDriverComparators() {
		return 1;
	}
	
	@Override
	public boolean isInputResettable(int inputNum) {
		if (inputNum < 0 || inputNum > 1) {
			throw new IndexOutOfBoundsException();
		}
		
		// from the perspective of the task that runs this operator, there is only one input, which is not resettable
		// we implement the resettable interface only in order to avoid that this class is re-instantiated for
		// every iteration
		return false;
	}
	
	// --------------------------------------------------------------------------------------------

	@Override
	@SuppressWarnings("unchecked")
	public void initialize() {
		
		final TypeComparator solutionSetComparator;
		
		// grab a handle to the hash table from the iteration broker
		if (taskContext instanceof AbstractIterativeTask) {
			AbstractIterativeTask iterativeTaskContext = (AbstractIterativeTask) taskContext;
			String identifier = iterativeTaskContext.brokerKey();
			
			Object table = SolutionSetBroker.instance().get(identifier);
			if (table instanceof CompactingHashTable) {
				this.hashTable = (CompactingHashTable) table;
				solutionSetSerializer = this.hashTable.getBuildSideSerializer();
				solutionSetComparator = this.hashTable.getBuildSideComparator().duplicate();
			}
			else if (table instanceof JoinHashMap) {
				this.objectMap = (JoinHashMap) table;
				solutionSetSerializer = this.objectMap.getBuildSerializer();
				solutionSetComparator = this.objectMap.getBuildComparator().duplicate();
			}
			else {
				throw new RuntimeException("Unrecognized solution set index: " + table);
			}
		} else {
			throw new RuntimeException("The task context of this driver is no iterative task context.");
		}
		
		TaskConfig config = taskContext.getTaskConfig();
		ClassLoader classLoader = taskContext.getUserCodeClassLoader();
		
		TypeComparatorFactory probeSideComparatorFactory = config.getDriverComparator(0, classLoader);
		
		this.probeSideSerializer = taskContext.getInputSerializer(0).getSerializer();
		this.probeSideComparator = probeSideComparatorFactory.createComparator();
		
		ExecutionConfig executionConfig = taskContext.getExecutionConfig();
		objectReuseEnabled = executionConfig.isObjectReuseEnabled();

		if (objectReuseEnabled) {
			solutionSideRecord = solutionSetSerializer.createInstance();
		}
		
		TypePairComparatorFactory factory = taskContext.getTaskConfig().getPairComparatorFactory(taskContext.getUserCodeClassLoader());
		pairComparator = factory.createComparator21(solutionSetComparator, this.probeSideComparator);
	}

	@Override
	public void prepare() {
		// nothing to prepare in each iteration
		// later, if we support out-of-core operation, we need to put the code in here
		// that brings the initial in-memory partitions into memory
	}

	@Override
	public void run() throws Exception {

		final CoGroupFunction coGroupStub = taskContext.getStub();
		final Collector collector = taskContext.getOutputCollector();
		
		final SingleElementIterator siIter = new SingleElementIterator();
		final Iterable emptySolutionSide = Collections.emptySet();

		if (objectReuseEnabled) {
			final ReusingKeyGroupedIterator probeSideInput = new ReusingKeyGroupedIterator(taskContext.getInput(0), probeSideSerializer, probeSideComparator);
			if (this.hashTable != null) {
				final CompactingHashTable join = hashTable;
				final CompactingHashTable.HashTableProber prober = join.getProber(this.probeSideComparator, this.pairComparator);


				IT1 buildSideRecord = solutionSideRecord;

				while (this.running && probeSideInput.nextKey()) {
					IT2 current = probeSideInput.getCurrent();

					IT1 matchedRecord = prober.getMatchFor(current, buildSideRecord);
					if (matchedRecord != null) {
						siIter.set(matchedRecord);
						coGroupStub.coGroup(siIter, probeSideInput.getValues(), collector);
					} else {
						coGroupStub.coGroup(emptySolutionSide, probeSideInput.getValues(), collector);
					}
				}
			} else {
				final JoinHashMap join = this.objectMap;
				final JoinHashMap.Prober prober = join.createProber(this.probeSideComparator, this.pairComparator);
				final TypeSerializer serializer = join.getBuildSerializer();

				while (this.running && probeSideInput.nextKey()) {
					IT2 current = probeSideInput.getCurrent();

					IT1 buildSideRecord = prober.lookupMatch(current);
					if (buildSideRecord != null) {
						siIter.set(serializer.copy(buildSideRecord));
						coGroupStub.coGroup(siIter, probeSideInput.getValues(), collector);
					} else {
						coGroupStub.coGroup(emptySolutionSide, probeSideInput.getValues(), collector);
					}
				}
			}
		} else {
			final NonReusingKeyGroupedIterator probeSideInput = new NonReusingKeyGroupedIterator(taskContext.getInput(0), probeSideComparator);
			if (this.hashTable != null) {
				final CompactingHashTable join = hashTable;
				final CompactingHashTable.HashTableProber prober = join.getProber(this
						.probeSideComparator, this.pairComparator);

				IT1 buildSideRecord;

				while (this.running && probeSideInput.nextKey()) {
					IT2 current = probeSideInput.getCurrent();

					buildSideRecord = prober.getMatchFor(current);
					if (buildSideRecord != null) {
						siIter.set(solutionSetSerializer.copy(buildSideRecord));
						coGroupStub.coGroup(siIter, probeSideInput.getValues(), collector);
					} else {
						coGroupStub.coGroup(emptySolutionSide, probeSideInput.getValues(), collector);
					}
				}
			} else {
				final JoinHashMap join = this.objectMap;
				final JoinHashMap.Prober prober = join.createProber(this.probeSideComparator, this.pairComparator);
				final TypeSerializer serializer = join.getBuildSerializer();

				while (this.running && probeSideInput.nextKey()) {
					IT2 current = probeSideInput.getCurrent();

					IT1 buildSideRecord = prober.lookupMatch(current);
					if (buildSideRecord != null) {
						siIter.set(serializer.copy(buildSideRecord));
						coGroupStub.coGroup(siIter, probeSideInput.getValues(), collector);
					} else {
						coGroupStub.coGroup(emptySolutionSide, probeSideInput.getValues(), collector);
					}
				}
			}

		}
	}

	@Override
	public void cleanup() {}
	
	@Override
	public void reset() {}

	@Override
	public void teardown() {
		// hash table is torn down by the iteration head task
	}

	@Override
	public void cancel() {
		this.running = false;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy