All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.broadcast.BroadcastVariableMaterialization Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.broadcast;

import org.apache.flink.api.common.functions.BroadcastVariableInitializer;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerFactory;
import org.apache.flink.runtime.io.network.api.reader.MutableReader;
import org.apache.flink.runtime.operators.BatchTask;
import org.apache.flink.runtime.operators.util.ReaderIterator;
import org.apache.flink.runtime.plugable.DeserializationDelegate;
import org.apache.flink.util.Preconditions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * This class represents a single materialization of a broadcast variable and maintains a reference count for it. If the
 * reference count reaches zero the variable is no longer accessible and will eventually be garbage-collected.
 *
 * @param  The type of the elements in the broadcast data set.
 */
public class BroadcastVariableMaterialization {

	private static final Logger LOG = LoggerFactory.getLogger(BroadcastVariableMaterialization.class);

	private final Set> references = new HashSet>();

	private final Object materializationMonitor = new Object();

	private final BroadcastVariableKey key;

	private ArrayList data;

	private C transformed;

	private boolean materialized;

	private boolean disposed;

	public BroadcastVariableMaterialization(BroadcastVariableKey key) {
		this.key = key;
	}

	// --------------------------------------------------------------------------------------------

	public void materializeVariable(MutableReader reader, TypeSerializerFactory serializerFactory, BatchTask referenceHolder)
			throws MaterializationExpiredException, IOException {
		Preconditions.checkNotNull(reader);
		Preconditions.checkNotNull(serializerFactory);
		Preconditions.checkNotNull(referenceHolder);

		final boolean materializer;

		// hold the reference lock only while we track references and decide who should be the materializer
		// that way, other tasks can de-register (in case of failure) while materialization is happening
		synchronized (references) {
			if (disposed) {
				throw new MaterializationExpiredException();
			}

			// sanity check
			if (!references.add(referenceHolder)) {
				throw new IllegalStateException(
						String.format("The task %s already holds a reference to the broadcast variable %s.",
								referenceHolder.getEnvironment().getTaskInfo().getTaskNameWithSubtasks(),
								key.toString()));
			}

			materializer = references.size() == 1;
		}

		try {
			@SuppressWarnings("unchecked")
			final MutableReader> typedReader = (MutableReader>) reader;

			@SuppressWarnings("unchecked")
			final TypeSerializer serializer = ((TypeSerializerFactory) serializerFactory).getSerializer();

			final ReaderIterator readerIterator = new ReaderIterator(typedReader, serializer);

			if (materializer) {
				// first one, so we need to materialize;
				if (LOG.isDebugEnabled()) {
					LOG.debug("Getting Broadcast Variable (" + key + ") - First access, materializing.");
				}

				ArrayList data = new ArrayList();

				T element;
				while ((element = readerIterator.next()) != null) {
					data.add(element);
				}

				synchronized (materializationMonitor) {
					this.data = data;
					this.materialized = true;
					materializationMonitor.notifyAll();
				}

				if (LOG.isDebugEnabled()) {
					LOG.debug("Materialization of Broadcast Variable (" + key + ") finished.");
				}
			}
			else {
				// successor: discard all data and refer to the shared variable

				if (LOG.isDebugEnabled()) {
					LOG.debug("Getting Broadcast Variable (" + key + ") - shared access.");
				}

				T element = serializer.createInstance();
				while ((element = readerIterator.next(element)) != null) {
				}

				synchronized (materializationMonitor) {
					while (!this.materialized && !disposed) {
						materializationMonitor.wait();
					}
				}

			}
		}
		catch (Throwable t) {
			// in case of an exception, we need to clean up big time
			decrementReferenceIfHeld(referenceHolder);

			if (t instanceof IOException) {
				throw (IOException) t;
			} else {
				throw new IOException("Materialization of the broadcast variable failed.", t);
			}
		}
	}

	public boolean decrementReference(BatchTask referenceHolder) {
		return decrementReferenceInternal(referenceHolder, true);
	}

	public boolean decrementReferenceIfHeld(BatchTask referenceHolder) {
		return decrementReferenceInternal(referenceHolder, false);
	}

	private boolean decrementReferenceInternal(BatchTask referenceHolder, boolean errorIfNoReference) {
		synchronized (references) {
			if (disposed || references.isEmpty()) {
				if (errorIfNoReference) {
					throw new IllegalStateException("Decrementing reference to broadcast variable that is no longer alive.");
				} else {
					return false;
				}
			}

			if (!references.remove(referenceHolder)) {
				if (errorIfNoReference) {
					throw new IllegalStateException(
							String.format("The task %s did not hold a reference to the broadcast variable %s.",
									referenceHolder.getEnvironment().getTaskInfo().getTaskNameWithSubtasks(),
									key.toString()));
				} else {
					return false;
				}
			}

			if (references.isEmpty()) {
				disposed = true;
				data = null;
				transformed = null;
				return true;
			} else {
				return false;
			}
		}
	}

	// --------------------------------------------------------------------------------------------

	public List getVariable() throws InitializationTypeConflictException {
		if (!materialized) {
			throw new IllegalStateException("The Broadcast Variable has not yet been materialized.");
		}
		if (disposed) {
			throw new IllegalStateException("The Broadcast Variable has been disposed");
		}

		synchronized (references) {
			if (transformed != null) {
				if (transformed instanceof List) {
					@SuppressWarnings("unchecked")
					List casted = (List) transformed;
					return casted;
				} else {
					throw new InitializationTypeConflictException(transformed.getClass());
				}
			}
			else {
				return data;
			}
		}
	}

	public C getVariable(BroadcastVariableInitializer initializer) {
		if (!materialized) {
			throw new IllegalStateException("The Broadcast Variable has not yet been materialized.");
		}
		if (disposed) {
			throw new IllegalStateException("The Broadcast Variable has been disposed");
		}

		synchronized (references) {
			if (transformed == null) {
				transformed = initializer.initializeBroadcastVariable(data);
				data = null;
			}
			return transformed;
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy