All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.broadcast.BroadcastVariableMaterialization Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.broadcast;

import org.apache.flink.api.common.functions.BroadcastVariableInitializer;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerFactory;
import org.apache.flink.runtime.io.network.api.reader.MutableReader;
import org.apache.flink.runtime.operators.BatchTask;
import org.apache.flink.runtime.operators.util.ReaderIterator;
import org.apache.flink.runtime.plugable.DeserializationDelegate;
import org.apache.flink.util.Preconditions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * This class represents a single materialization of a broadcast variable and maintains a reference
 * count for it. If the reference count reaches zero the variable is no longer accessible and will
 * eventually be garbage-collected.
 *
 * @param  The type of the elements in the broadcast data set.
 */
public class BroadcastVariableMaterialization {

    private static final Logger LOG =
            LoggerFactory.getLogger(BroadcastVariableMaterialization.class);

    private final Set> references = new HashSet>();

    private final Object materializationMonitor = new Object();

    private final BroadcastVariableKey key;

    private ArrayList data;

    private C transformed;

    private boolean materialized;

    private boolean disposed;

    public BroadcastVariableMaterialization(BroadcastVariableKey key) {
        this.key = key;
    }

    // --------------------------------------------------------------------------------------------

    public void materializeVariable(
            MutableReader reader,
            TypeSerializerFactory serializerFactory,
            BatchTask referenceHolder)
            throws MaterializationExpiredException, IOException {
        Preconditions.checkNotNull(reader);
        Preconditions.checkNotNull(serializerFactory);
        Preconditions.checkNotNull(referenceHolder);

        final boolean materializer;

        // hold the reference lock only while we track references and decide who should be the
        // materializer
        // that way, other tasks can de-register (in case of failure) while materialization is
        // happening
        synchronized (references) {
            if (disposed) {
                throw new MaterializationExpiredException();
            }

            // sanity check
            if (!references.add(referenceHolder)) {
                throw new IllegalStateException(
                        String.format(
                                "The task %s already holds a reference to the broadcast variable %s.",
                                referenceHolder
                                        .getEnvironment()
                                        .getTaskInfo()
                                        .getTaskNameWithSubtasks(),
                                key.toString()));
            }

            materializer = references.size() == 1;
        }

        try {
            @SuppressWarnings("unchecked")
            final MutableReader> typedReader =
                    (MutableReader>) reader;

            @SuppressWarnings("unchecked")
            final TypeSerializer serializer =
                    ((TypeSerializerFactory) serializerFactory).getSerializer();

            final ReaderIterator readerIterator = new ReaderIterator(typedReader, serializer);

            if (materializer) {
                // first one, so we need to materialize;
                if (LOG.isDebugEnabled()) {
                    LOG.debug(
                            "Getting Broadcast Variable ("
                                    + key
                                    + ") - First access, materializing.");
                }

                ArrayList data = new ArrayList();

                T element;
                while ((element = readerIterator.next()) != null) {
                    data.add(element);
                }

                synchronized (materializationMonitor) {
                    this.data = data;
                    this.materialized = true;
                    materializationMonitor.notifyAll();
                }

                if (LOG.isDebugEnabled()) {
                    LOG.debug("Materialization of Broadcast Variable (" + key + ") finished.");
                }
            } else {
                // successor: discard all data and refer to the shared variable

                if (LOG.isDebugEnabled()) {
                    LOG.debug("Getting Broadcast Variable (" + key + ") - shared access.");
                }

                T element = serializer.createInstance();
                while ((element = readerIterator.next(element)) != null) {}

                synchronized (materializationMonitor) {
                    while (!this.materialized && !disposed) {
                        materializationMonitor.wait();
                    }
                }
            }
        } catch (Throwable t) {
            // in case of an exception, we need to clean up big time
            decrementReferenceIfHeld(referenceHolder);

            if (t instanceof IOException) {
                throw (IOException) t;
            } else {
                throw new IOException("Materialization of the broadcast variable failed.", t);
            }
        }
    }

    public boolean decrementReference(BatchTask referenceHolder) {
        return decrementReferenceInternal(referenceHolder, true);
    }

    public boolean decrementReferenceIfHeld(BatchTask referenceHolder) {
        return decrementReferenceInternal(referenceHolder, false);
    }

    private boolean decrementReferenceInternal(
            BatchTask referenceHolder, boolean errorIfNoReference) {
        synchronized (references) {
            if (disposed || references.isEmpty()) {
                if (errorIfNoReference) {
                    throw new IllegalStateException(
                            "Decrementing reference to broadcast variable that is no longer alive.");
                } else {
                    return false;
                }
            }

            if (!references.remove(referenceHolder)) {
                if (errorIfNoReference) {
                    throw new IllegalStateException(
                            String.format(
                                    "The task %s did not hold a reference to the broadcast variable %s.",
                                    referenceHolder
                                            .getEnvironment()
                                            .getTaskInfo()
                                            .getTaskNameWithSubtasks(),
                                    key.toString()));
                } else {
                    return false;
                }
            }

            if (references.isEmpty()) {
                disposed = true;
                data = null;
                transformed = null;
                return true;
            } else {
                return false;
            }
        }
    }

    // --------------------------------------------------------------------------------------------

    public List getVariable() throws InitializationTypeConflictException {
        if (!materialized) {
            throw new IllegalStateException(
                    "The Broadcast Variable has not yet been materialized.");
        }
        if (disposed) {
            throw new IllegalStateException("The Broadcast Variable has been disposed");
        }

        synchronized (references) {
            if (transformed != null) {
                if (transformed instanceof List) {
                    @SuppressWarnings("unchecked")
                    List casted = (List) transformed;
                    return casted;
                } else {
                    throw new InitializationTypeConflictException(transformed.getClass());
                }
            } else {
                return data;
            }
        }
    }

    public C getVariable(BroadcastVariableInitializer initializer) {
        if (!materialized) {
            throw new IllegalStateException(
                    "The Broadcast Variable has not yet been materialized.");
        }
        if (disposed) {
            throw new IllegalStateException("The Broadcast Variable has been disposed");
        }

        synchronized (references) {
            if (transformed == null) {
                transformed = initializer.initializeBroadcastVariable(data);
                data = null;
            }
            return transformed;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy