All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.api.functions.source.FromElementsFunction Maven / Gradle / Ivy

There is a newer version: 1.14.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.functions.source;

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.base.IntSerializer;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataInputViewStreamWrapper;
import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.streaming.api.operators.OutputTypeConfigurable;
import org.apache.flink.util.IterableUtils;
import org.apache.flink.util.Preconditions;

import javax.annotation.Nullable;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Objects;

/**
 * A stream source function that returns a sequence of elements.
 *
 * 

This source function serializes the elements using Flink's type information. That way, any * object transport using Java serialization will not be affected by the serializability of the * elements. * *

NOTE: This source has a parallelism of 1. * * @param The type of elements returned by this function. */ @PublicEvolving public class FromElementsFunction implements SourceFunction, CheckpointedFunction, OutputTypeConfigurable { private static final long serialVersionUID = 1L; /** The (de)serializer to be used for the data elements. */ private @Nullable TypeSerializer serializer; /** The actual data elements, in serialized form. */ private byte[] elementsSerialized; /** The number of serialized elements. */ private final int numElements; /** The number of elements emitted already. */ private volatile int numElementsEmitted; /** The number of elements to skip initially. */ private volatile int numElementsToSkip; /** Flag to make the source cancelable. */ private volatile boolean isRunning = true; private final transient Iterable elements; private transient ListState checkpointedState; @SafeVarargs public FromElementsFunction(TypeSerializer serializer, T... elements) throws IOException { this(serializer, Arrays.asList(elements)); } public FromElementsFunction(TypeSerializer serializer, Iterable elements) throws IOException { this.serializer = Preconditions.checkNotNull(serializer); this.elements = elements; this.numElements = elements instanceof Collection ? ((Collection) elements).size() : (int) IterableUtils.toStream(elements).count(); serializeElements(); } @SafeVarargs public FromElementsFunction(T... elements) { this(Arrays.asList(elements)); } public FromElementsFunction(Iterable elements) { this.serializer = null; this.elements = elements; this.numElements = elements instanceof Collection ? ((Collection) elements).size() : (int) IterableUtils.toStream(elements).count(); checkIterable(elements, Object.class); } @VisibleForTesting @Nullable public TypeSerializer getSerializer() { return serializer; } private void serializeElements() throws IOException { Preconditions.checkState(serializer != null, "serializer not set"); ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputViewStreamWrapper wrapper = new DataOutputViewStreamWrapper(baos); try { for (T element : elements) { serializer.serialize(element, wrapper); } } catch (Exception e) { throw new IOException("Serializing the source elements failed: " + e.getMessage(), e); } this.elementsSerialized = baos.toByteArray(); } /** * Set element type and re-serialize element if required. Should only be called before * serialization/deserialization of this function. */ @Override public void setOutputType(TypeInformation outTypeInfo, ExecutionConfig executionConfig) { Preconditions.checkState( elements != null, "The output type should've been specified before shipping the graph to the cluster"); checkIterable(elements, outTypeInfo.getTypeClass()); TypeSerializer newSerializer = outTypeInfo.createSerializer(executionConfig); if (Objects.equals(serializer, newSerializer)) { return; } serializer = newSerializer; try { serializeElements(); } catch (IOException ex) { throw new UncheckedIOException(ex); } } @Override public void initializeState(FunctionInitializationContext context) throws Exception { Preconditions.checkState( this.checkpointedState == null, "The " + getClass().getSimpleName() + " has already been initialized."); this.checkpointedState = context.getOperatorStateStore() .getListState( new ListStateDescriptor<>( "from-elements-state", IntSerializer.INSTANCE)); if (context.isRestored()) { List retrievedStates = new ArrayList<>(); for (Integer entry : this.checkpointedState.get()) { retrievedStates.add(entry); } // given that the parallelism of the function is 1, we can only have 1 state Preconditions.checkArgument( retrievedStates.size() == 1, getClass().getSimpleName() + " retrieved invalid state."); this.numElementsToSkip = retrievedStates.get(0); } } @Override public void run(SourceContext ctx) throws Exception { Preconditions.checkState(serializer != null, "serializer not configured"); ByteArrayInputStream bais = new ByteArrayInputStream(elementsSerialized); final DataInputView input = new DataInputViewStreamWrapper(bais); // if we are restored from a checkpoint and need to skip elements, skip them now. int toSkip = numElementsToSkip; if (toSkip > 0) { try { while (toSkip > 0) { serializer.deserialize(input); toSkip--; } } catch (Exception e) { throw new IOException( "Failed to deserialize an element from the source. " + "If you are using user-defined serialization (Value and Writable types), check the " + "serialization functions.\nSerializer is " + serializer, e); } this.numElementsEmitted = this.numElementsToSkip; } final Object lock = ctx.getCheckpointLock(); while (isRunning && numElementsEmitted < numElements) { T next; try { next = serializer.deserialize(input); } catch (Exception e) { throw new IOException( "Failed to deserialize an element from the source. " + "If you are using user-defined serialization (Value and Writable types), check the " + "serialization functions.\nSerializer is " + serializer, e); } synchronized (lock) { ctx.collect(next); numElementsEmitted++; } } } @Override public void cancel() { isRunning = false; } /** * Gets the number of elements produced in total by this function. * * @return The number of elements produced in total. */ public int getNumElements() { return numElements; } /** * Gets the number of elements emitted so far. * * @return The number of elements emitted so far. */ public int getNumElementsEmitted() { return numElementsEmitted; } // ------------------------------------------------------------------------ // Checkpointing // ------------------------------------------------------------------------ @Override public void snapshotState(FunctionSnapshotContext context) throws Exception { Preconditions.checkState( this.checkpointedState != null, "The " + getClass().getSimpleName() + " has not been properly initialized."); this.checkpointedState.clear(); this.checkpointedState.add(this.numElementsEmitted); } // ------------------------------------------------------------------------ // Utilities // ------------------------------------------------------------------------ /** * Verifies that all elements in the collection are non-null, and are of the given class, or a * subclass thereof. * * @param elements The collection to check. * @param viewedAs The class to which the elements must be assignable to. * @param The generic type of the collection to be checked. */ public static void checkCollection(Collection elements, Class viewedAs) { checkIterable(elements, viewedAs); } private static void checkIterable(Iterable elements, Class viewedAs) { for (OUT elem : elements) { if (elem == null) { throw new IllegalArgumentException("The collection contains a null element"); } if (!viewedAs.isAssignableFrom(elem.getClass())) { throw new IllegalArgumentException( "The elements in the collection are not all subclasses of " + viewedAs.getCanonicalName()); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy