org.apache.beam.fn.harness.WindowMergingFnRunner Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.fn.harness;
import com.google.auto.service.AutoService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.beam.model.pipeline.v1.RunnerApi;
import org.apache.beam.model.pipeline.v1.RunnerApi.PTransform;
import org.apache.beam.sdk.function.ThrowingFunction;
import org.apache.beam.sdk.transforms.windowing.BoundedWindow;
import org.apache.beam.sdk.transforms.windowing.WindowFn;
import org.apache.beam.sdk.transforms.windowing.WindowFn.MergeContext;
import org.apache.beam.sdk.util.construction.PTransformTranslation;
import org.apache.beam.sdk.util.construction.WindowingStrategyTranslation;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Lists;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Sets;
/**
* Merges windows using a {@link org.apache.beam.sdk.transforms.windowing.WindowFn}.
*
* Window merging function:
*
*
* - Input: {@code KV
>}
* - Output: {@code KV
, iterable>>>}
*
*
* For each set of original windows, a list of all unmerged windows is output alongside a map of
* merged window to set of consumed windows. All original windows must be contained in either the
* unmerged original window set or one of the consumed original window sets. Each original window
* can only be part of one output set. The nonce is used by a runner to associate each input with
* its output. The nonce is represented as an opaque set of bytes.
*/
@SuppressWarnings({
"rawtypes" // TODO(https://github.com/apache/beam/issues/20447)
})
public abstract class WindowMergingFnRunner {
static final String URN = PTransformTranslation.MERGE_WINDOWS_TRANSFORM_URN;
/**
* A registrar which provides a factory to handle merging windows based upon the {@link WindowFn}.
*/
@AutoService(PTransformRunnerFactory.Registrar.class)
public static class Registrar implements PTransformRunnerFactory.Registrar {
@Override
public Map getPTransformRunnerFactories() {
return ImmutableMap.of(
URN,
MapFnRunners.forValueMapFnFactory(WindowMergingFnRunner::createMapFunctionForPTransform));
}
}
static
ThrowingFunction>, KV, Iterable>>>>>
createMapFunctionForPTransform(String ptransformId, PTransform ptransform)
throws IOException {
RunnerApi.FunctionSpec payload =
RunnerApi.FunctionSpec.parseFrom(ptransform.getSpec().getPayload());
WindowFn, W> windowFn =
(WindowFn, W>) WindowingStrategyTranslation.windowFnFromProto(payload);
return WindowMergingFnRunner.create(windowFn)::mergeWindows;
}
static WindowMergingFnRunner create(WindowFn, W> windowFn) {
if (windowFn.isNonMerging()) {
return new NonMergingWindowFnRunner();
} else {
return new MergingViaWindowFnRunner(windowFn);
}
}
/**
* Returns the set of unmerged windows and a mapping from merged windows to sets of original
* windows.
*/
abstract KV, Iterable>>>> mergeWindows(
KV> windowsToMerge) throws Exception;
/////////////////////////////////////////////////////////////////////////////////////////////////
/**
* An optimized version of window merging where the {@link WindowFn} does not do any window
* merging.
*
* Note that this is likely to never be invoked and the identity mapping will be handled
* directly by runners. We have this here because runners may not perform this optimization.
*/
private static class NonMergingWindowFnRunner
extends WindowMergingFnRunner {
@Override
KV, Iterable>>>> mergeWindows(
KV> windowsToMerge) {
return KV.of(
windowsToMerge.getKey(), KV.of(windowsToMerge.getValue(), Collections.emptyList()));
}
}
/** An implementation which uses a {@link WindowFn} to merge windows. */
private static class MergingViaWindowFnRunner
extends WindowMergingFnRunner {
private final WindowFn windowFn;
private final WindowFn, W>.MergeContext mergeContext;
private Collection currentWindows;
private List>> mergedWindows;
private MergingViaWindowFnRunner(WindowFn windowFn) {
this.windowFn = windowFn;
this.mergedWindows = new ArrayList<>();
this.currentWindows = new ArrayList<>();
this.mergeContext =
windowFn.new MergeContext() {
@Override
public Collection windows() {
return currentWindows;
}
@Override
public void merge(Collection toBeMerged, W mergeResult) throws Exception {
mergedWindows.add(KV.of(mergeResult, toBeMerged));
}
};
}
@Override
KV, Iterable>>>> mergeWindows(
KV> windowsToMerge) throws Exception {
currentWindows = Sets.newHashSet(windowsToMerge.getValue());
windowFn.mergeWindows((MergeContext) mergeContext);
for (KV> mergedWindow : mergedWindows) {
currentWindows.removeAll(mergedWindow.getValue());
}
KV, Iterable>>>> result =
KV.of(
windowsToMerge.getKey(),
KV.of(Sets.newHashSet(currentWindows), (Iterable) Lists.newArrayList(mergedWindows)));
currentWindows.clear();
mergedWindows.clear();
return result;
}
}
}