All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.connector.base.source.hybrid.HybridSource Maven / Gradle / Ivy

There is a newer version: 1.13.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.connector.base.source.hybrid;

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.connector.source.Boundedness;
import org.apache.flink.api.connector.source.Source;
import org.apache.flink.api.connector.source.SourceReader;
import org.apache.flink.api.connector.source.SourceReaderContext;
import org.apache.flink.api.connector.source.SplitEnumerator;
import org.apache.flink.api.connector.source.SplitEnumeratorContext;
import org.apache.flink.api.java.ClosureCleaner;
import org.apache.flink.core.io.SimpleVersionedSerializer;
import org.apache.flink.util.Preconditions;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

/**
 * Hybrid source that switches underlying sources based on configured source chain.
 *
 * 

A simple example with FileSource and KafkaSource with fixed Kafka start position: * *

{@code
 * FileSource fileSource =
 *   FileSource.forRecordStreamFormat(new TextLineFormat(), Path.fromLocalFile(testDir)).build();
 * KafkaSource kafkaSource =
 *           KafkaSource.builder()
 *                   .setBootstrapServers("localhost:9092")
 *                   .setGroupId("MyGroup")
 *                   .setTopics(Arrays.asList("quickstart-events"))
 *                   .setDeserializer(
 *                           KafkaRecordDeserializer.valueOnly(StringDeserializer.class))
 *                   .setStartingOffsets(OffsetsInitializer.earliest())
 *                   .build();
 * HybridSource hybridSource =
 *           HybridSource.builder(fileSource)
 *                   .addSource(kafkaSource)
 *                   .build();
 * }
* *

A more complex example with Kafka start position derived from previous source: * *

{@code
 * HybridSource hybridSource =
 *     HybridSource.builder(fileSource)
 *         .addSource(
 *             switchContext -> {
 *               StaticFileSplitEnumerator previousEnumerator =
 *                   switchContext.getPreviousEnumerator();
 *               // how to get timestamp depends on specific enumerator
 *               long timestamp = previousEnumerator.getEndTimestamp();
 *               OffsetsInitializer offsets =
 *                   OffsetsInitializer.timestamp(timestamp);
 *               KafkaSource kafkaSource =
 *                   KafkaSource.builder()
 *                       .setBootstrapServers("localhost:9092")
 *                       .setGroupId("MyGroup")
 *                       .setTopics(Arrays.asList("quickstart-events"))
 *                       .setDeserializer(
 *                           KafkaRecordDeserializer.valueOnly(StringDeserializer.class))
 *                       .setStartingOffsets(offsets)
 *                       .build();
 *               return kafkaSource;
 *             },
 *             Boundedness.CONTINUOUS_UNBOUNDED)
 *         .build();
 * }
*/ @PublicEvolving public class HybridSource implements Source { private final List sources; /** Protected for subclass, use {@link #builder(Source)} to construct source. */ protected HybridSource(List sources) { Preconditions.checkArgument(!sources.isEmpty()); this.sources = sources; } /** Builder for {@link HybridSource}. */ public static HybridSourceBuilder builder( Source firstSource) { HybridSourceBuilder builder = new HybridSourceBuilder<>(); return builder.addSource(firstSource); } @Override public Boundedness getBoundedness() { return sources.get(sources.size() - 1).boundedness; } @Override public SourceReader createReader(SourceReaderContext readerContext) throws Exception { return new HybridSourceReader(readerContext); } @Override public SplitEnumerator createEnumerator( SplitEnumeratorContext enumContext) { return new HybridSourceSplitEnumerator(enumContext, sources, 0, null); } @Override public SplitEnumerator restoreEnumerator( SplitEnumeratorContext enumContext, HybridSourceEnumeratorState checkpoint) throws Exception { return new HybridSourceSplitEnumerator( enumContext, sources, checkpoint.getCurrentSourceIndex(), checkpoint); } @Override public SimpleVersionedSerializer getSplitSerializer() { return new HybridSourceSplitSerializer(); } @Override public SimpleVersionedSerializer getEnumeratorCheckpointSerializer() { return new HybridSourceEnumeratorStateSerializer(); } /** * Context provided to source factory. * *

To derive a start position at switch time, the source can be initialized from context of * the previous enumerator. A specific enumerator implementation may carry state such as an end * timestamp, that can be used to derive the start position of the next source. * *

Currently only the previous enumerator is exposed. The context interface allows for * backward compatible extension, i.e. additional information about the previous source can be * supplied in the future. */ public interface SourceSwitchContext { EnumT getPreviousEnumerator(); } /** * Factory for underlying sources of {@link HybridSource}. * *

This factory permits building of a source at graph construction time or deferred at switch * time. Provides the ability to set a start position in any way a specific source allows. * Future convenience could be built on top of it, for example a default implementation that * recognizes optional interfaces to transfer position in a universal format. * *

Called when the current enumerator has finished. The previous source's final state can * thus be used to construct the next source, as required for dynamic position transfer at time * of switching. * *

If start position is known at job submission time, the source can be constructed in the * entry point and simply wrapped into the factory, providing the benefit of validation during * submission. */ @FunctionalInterface public interface SourceFactory< T, SourceT extends Source, FromEnumT extends SplitEnumerator> extends Serializable { SourceT create(SourceSwitchContext context); } private static class PassthroughSourceFactory< T, SourceT extends Source, FromEnumT extends SplitEnumerator> implements SourceFactory { private final SourceT source; private PassthroughSourceFactory(SourceT source) { this.source = source; } @Override public SourceT create(SourceSwitchContext context) { return source; } } /** Entry for list of underlying sources. */ static class SourceListEntry implements Serializable { protected final SourceFactory factory; protected final Boundedness boundedness; private SourceListEntry(SourceFactory factory, Boundedness boundedness) { this.factory = Preconditions.checkNotNull(factory); this.boundedness = Preconditions.checkNotNull(boundedness); } static SourceListEntry of(SourceFactory configurer, Boundedness boundedness) { return new SourceListEntry(configurer, boundedness); } } /** Builder for HybridSource. */ public static class HybridSourceBuilder implements Serializable { private final List sources; public HybridSourceBuilder() { sources = new ArrayList<>(); } /** Add pre-configured source (without switch time modification). */ public > HybridSourceBuilder addSource(NextSourceT source) { return addSource(new PassthroughSourceFactory<>(source), source.getBoundedness()); } /** Add source with deferred instantiation based on previous enumerator. */ public > HybridSourceBuilder addSource( SourceFactory sourceFactory, Boundedness boundedness) { if (!sources.isEmpty()) { Preconditions.checkArgument( Boundedness.BOUNDED.equals(sources.get(sources.size() - 1).boundedness), "All sources except the final source need to be bounded."); } ClosureCleaner.clean( sourceFactory, ExecutionConfig.ClosureCleanerLevel.RECURSIVE, true); sources.add(SourceListEntry.of(sourceFactory, boundedness)); return (HybridSourceBuilder) this; } /** Build the source. */ public HybridSource build() { return new HybridSource(sources); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy