com.hazelcast.jet.impl.pipeline.Planner Maven / Gradle / Ivy
/*
* Copyright (c) 2008-2023, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.jet.impl.pipeline;
import com.hazelcast.function.FunctionEx;
import com.hazelcast.function.SupplierEx;
import com.hazelcast.jet.Traverser;
import com.hazelcast.jet.Traversers;
import com.hazelcast.jet.core.DAG;
import com.hazelcast.jet.core.Edge;
import com.hazelcast.jet.core.EventTimePolicy;
import com.hazelcast.jet.core.Processor;
import com.hazelcast.jet.core.ProcessorMetaSupplier;
import com.hazelcast.jet.core.ProcessorSupplier;
import com.hazelcast.jet.core.Vertex;
import com.hazelcast.jet.impl.pipeline.PipelineImpl.Context;
import com.hazelcast.jet.impl.pipeline.transform.FlatMapTransform;
import com.hazelcast.jet.impl.pipeline.transform.MapTransform;
import com.hazelcast.jet.impl.pipeline.transform.SinkTransform;
import com.hazelcast.jet.impl.pipeline.transform.StreamSourceTransform;
import com.hazelcast.jet.impl.pipeline.transform.TimestampTransform;
import com.hazelcast.jet.impl.pipeline.transform.Transform;
import com.hazelcast.jet.impl.util.LoggingUtil;
import com.hazelcast.jet.impl.util.Util;
import com.hazelcast.logging.ILogger;
import com.hazelcast.logging.Logger;
import javax.annotation.Nonnull;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.function.Consumer;
import java.util.function.ObjIntConsumer;
import java.util.stream.Collectors;
import static com.hazelcast.jet.core.Edge.from;
import static com.hazelcast.jet.core.EventTimePolicy.eventTimePolicy;
import static com.hazelcast.jet.impl.TopologicalSorter.checkTopologicalSort;
import static com.hazelcast.jet.impl.util.Util.toList;
import static java.util.stream.Collectors.toList;
@SuppressWarnings("unchecked")
public class Planner {
private static final ILogger LOGGER = Logger.getLogger(Planner.class);
/**
* Maximum gap between two consecutive watermarks. This is not technically
* necessary, but improves debugging by avoiding too large gaps between WMs
* and the user can better observe if input to WM coalescing is lagging.
*/
private static final int MAXIMUM_WATERMARK_GAP = 1000;
public final DAG dag = new DAG();
public final Map transform2vertex = new HashMap<>();
private final PipelineImpl pipeline;
Planner(PipelineImpl pipeline) {
this.pipeline = pipeline;
}
public boolean isPreserveOrder() {
return pipeline.isPreserveOrder();
}
@SuppressWarnings("rawtypes")
DAG createDag(Context context) {
pipeline.makeNamesUnique();
Map> adjacencyMap = pipeline.adjacencyMap();
validateNoLeakage(adjacencyMap);
checkTopologicalSort(adjacencyMap.entrySet());
// Find the greatest common denominator of all frame lengths
// appearing in the pipeline
long frameSizeGcd = Util.gcd(adjacencyMap.keySet().stream()
.map(Transform::preferredWatermarkStride)
.filter(frameSize -> frameSize > 0)
.mapToLong(i -> i)
.toArray());
if (frameSizeGcd == 0) {
// even if there are no window aggregations, we want the watermarks for latency debugging
frameSizeGcd = MAXIMUM_WATERMARK_GAP;
}
if (frameSizeGcd > MAXIMUM_WATERMARK_GAP) {
frameSizeGcd = Util.gcd(frameSizeGcd, MAXIMUM_WATERMARK_GAP);
}
LoggingUtil.logFine(LOGGER, "Watermarks in the pipeline will be throttled to %d", frameSizeGcd);
// Update watermark throttling frame length on all transforms with the determined length
for (Transform transform : adjacencyMap.keySet()) {
if (transform instanceof StreamSourceTransform) {
StreamSourceTransform t = (StreamSourceTransform) transform;
EventTimePolicy policy = t.getEventTimePolicy();
if (policy != null) {
t.setEventTimePolicy(withFrameSize(policy, frameSizeGcd));
}
} else if (transform instanceof TimestampTransform) {
TimestampTransform t = (TimestampTransform) transform;
t.setEventTimePolicy(withFrameSize(t.getEventTimePolicy(), frameSizeGcd));
}
}
// fuse subsequent map/filter/flatMap transforms into one
Map> originalParents = new HashMap<>();
List transforms = new ArrayList<>(adjacencyMap.keySet());
for (int i = 0; i < transforms.size(); i++) {
Transform transform = transforms.get(i);
List chain = findFusibleChain(transform, adjacencyMap);
if (chain == null) {
continue;
}
// remove transforms in the chain and replace the parent with a fused transform
transforms.removeAll(chain.subList(1, chain.size()));
Transform fused = fuseFlatMapTransforms(chain);
transforms.set(i, fused);
Transform lastInChain = chain.get(chain.size() - 1);
for (Transform downstream : adjacencyMap.get(lastInChain)) {
originalParents.put(downstream, new ArrayList<>(downstream.upstream()));
downstream.upstream().replaceAll(p -> p == lastInChain ? fused : p);
}
}
for (Transform transform : transforms) {
transform.addToDag(this, context);
}
// restore original parents
for (Entry> en : originalParents.entrySet()) {
List upstream = en.getKey().upstream();
for (int i = 0; i < upstream.size(); i++) {
en.getKey().upstream().set(i, en.getValue().get(i));
}
}
return dag;
}
private static List findFusibleChain(
@Nonnull Transform transform,
@Nonnull Map> adjacencyMap
) {
ArrayList chain = new ArrayList<>();
for (;;) {
if (!(transform instanceof MapTransform || transform instanceof FlatMapTransform)) {
break;
}
chain.add(transform);
List downstream = adjacencyMap.get(transform);
if (downstream.size() != 1) {
break;
}
Transform nextTransform = downstream.get(0);
if (nextTransform.localParallelism() != transform.localParallelism()
|| nextTransform.shouldRebalanceInput(0)
) {
break;
}
transform = nextTransform;
}
return chain.size() > 1 ? chain : null;
}
@SuppressWarnings("rawtypes")
private static Transform fuseFlatMapTransforms(List chain) {
assert chain.size() > 1 : "chain.size()=" + chain.size();
assert chain.get(0).upstream().size() == 1;
int lastFlatMap = 0;
FunctionEx
© 2015 - 2024 Weber Informatics LLC | Privacy Policy