org.apache.beam.fn.harness.FnHarness Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.fn.harness;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Collections;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.function.Function;
import javax.annotation.Nullable;
import org.apache.beam.fn.harness.control.BeamFnControlClient;
import org.apache.beam.fn.harness.control.ExecutionStateSampler;
import org.apache.beam.fn.harness.control.FinalizeBundleHandler;
import org.apache.beam.fn.harness.control.HarnessMonitoringInfosInstructionHandler;
import org.apache.beam.fn.harness.control.ProcessBundleHandler;
import org.apache.beam.fn.harness.data.BeamFnDataGrpcClient;
import org.apache.beam.fn.harness.debug.DataSampler;
import org.apache.beam.fn.harness.logging.BeamFnLoggingClient;
import org.apache.beam.fn.harness.state.BeamFnStateGrpcClientCache;
import org.apache.beam.fn.harness.status.BeamFnStatusClient;
import org.apache.beam.fn.harness.stream.HarnessStreamObserverFactories;
import org.apache.beam.model.fnexecution.v1.BeamFnApi;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionRequest;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor;
import org.apache.beam.model.fnexecution.v1.BeamFnControlGrpc;
import org.apache.beam.model.pipeline.v1.Endpoints;
import org.apache.beam.runners.core.metrics.MetricsContainerImpl;
import org.apache.beam.runners.core.metrics.ShortIdMap;
import org.apache.beam.sdk.fn.IdGenerator;
import org.apache.beam.sdk.fn.IdGenerators;
import org.apache.beam.sdk.fn.JvmInitializers;
import org.apache.beam.sdk.fn.channel.AddHarnessIdInterceptor;
import org.apache.beam.sdk.fn.channel.ManagedChannelFactory;
import org.apache.beam.sdk.fn.stream.OutboundObserverFactory;
import org.apache.beam.sdk.function.ThrowingFunction;
import org.apache.beam.sdk.io.FileSystems;
import org.apache.beam.sdk.metrics.MetricsEnvironment;
import org.apache.beam.sdk.options.ExecutorOptions;
import org.apache.beam.sdk.options.ExperimentalOptions;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.util.construction.CoderTranslation;
import org.apache.beam.sdk.util.construction.PipelineOptionsTranslation;
import org.apache.beam.vendor.grpc.v1p60p1.com.google.protobuf.TextFormat;
import org.apache.beam.vendor.grpc.v1p60p1.io.grpc.ManagedChannel;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.annotations.VisibleForTesting;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.util.concurrent.MoreExecutors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Main entry point into the Beam SDK Fn Harness for Java.
*
* This entry point expects the following environment variables:
*
*
* - HARNESS_ID: A String representing the ID of this FnHarness. This will be added to the
* headers of calls to the Beam Control Service
*
- LOGGING_API_SERVICE_DESCRIPTOR: A {@link
* org.apache.beam.model.pipeline.v1.Endpoints.ApiServiceDescriptor} encoded as text
* representing the endpoint that is to be connected to for the Beam Fn Logging service.
*
- CONTROL_API_SERVICE_DESCRIPTOR: A {@link Endpoints.ApiServiceDescriptor} encoded as text
* representing the endpoint that is to be connected to for the Beam Fn Control service.
*
- PIPELINE_OPTIONS: A serialized form of {@link PipelineOptions}. See {@link PipelineOptions}
* for further details.
*
*/
@SuppressWarnings({
"nullness" // TODO(https://github.com/apache/beam/issues/20497)
})
public class FnHarness {
private static final String HARNESS_ID = "HARNESS_ID";
private static final String CONTROL_API_SERVICE_DESCRIPTOR = "CONTROL_API_SERVICE_DESCRIPTOR";
private static final String LOGGING_API_SERVICE_DESCRIPTOR = "LOGGING_API_SERVICE_DESCRIPTOR";
private static final String STATUS_API_SERVICE_DESCRIPTOR = "STATUS_API_SERVICE_DESCRIPTOR";
private static final String PIPELINE_OPTIONS_FILE = "PIPELINE_OPTIONS_FILE";
private static final String PIPELINE_OPTIONS = "PIPELINE_OPTIONS";
private static final String RUNNER_CAPABILITIES = "RUNNER_CAPABILITIES";
private static final Logger LOG = LoggerFactory.getLogger(FnHarness.class);
private static Endpoints.ApiServiceDescriptor getApiServiceDescriptor(String descriptor)
throws TextFormat.ParseException {
Endpoints.ApiServiceDescriptor.Builder apiServiceDescriptorBuilder =
Endpoints.ApiServiceDescriptor.newBuilder();
TextFormat.merge(descriptor, apiServiceDescriptorBuilder);
return apiServiceDescriptorBuilder.build();
}
public static String removeNestedKey(String jsonString, String keyToRemove) throws Exception {
ObjectMapper mapper = new ObjectMapper();
JsonNode rootNode = mapper.readTree(jsonString);
removeKeyRecursively(rootNode, keyToRemove);
return mapper.writeValueAsString(rootNode);
}
private static void removeKeyRecursively(JsonNode node, String keyToRemove) {
if (node.isObject()) {
Iterator> iterator = node.fields();
while (iterator.hasNext()) {
Map.Entry field = iterator.next();
if (field.getKey().equals(keyToRemove)) {
iterator.remove(); // Safe removal using Iterator
} else {
removeKeyRecursively(field.getValue(), keyToRemove);
}
}
}
}
public static void main(String[] args) throws Exception {
main(System::getenv);
}
@VisibleForTesting
public static void main(Function environmentVarGetter) throws Exception {
JvmInitializers.runOnStartup();
System.out.format("SDK Fn Harness started%n");
System.out.format("Harness ID %s%n", environmentVarGetter.apply(HARNESS_ID));
System.out.format(
"Logging location %s%n", environmentVarGetter.apply(LOGGING_API_SERVICE_DESCRIPTOR));
System.out.format(
"Control location %s%n", environmentVarGetter.apply(CONTROL_API_SERVICE_DESCRIPTOR));
System.out.format(
"Status location %s%n", environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR));
String id = environmentVarGetter.apply(HARNESS_ID);
String pipelineOptionsJson = environmentVarGetter.apply(PIPELINE_OPTIONS);
// Try looking for a file first. If that exists it should override PIPELINE_OPTIONS to avoid
// maxing out the kernel's environment space
try {
String pipelineOptionsPath = environmentVarGetter.apply(PIPELINE_OPTIONS_FILE);
System.out.format("Pipeline Options File %s%n", pipelineOptionsPath);
if (pipelineOptionsPath != null) {
Path filePath = Paths.get(pipelineOptionsPath);
if (Files.exists(filePath)) {
System.out.format(
"Pipeline Options File %s exists. Overriding existing options.%n",
pipelineOptionsPath);
pipelineOptionsJson = new String(Files.readAllBytes(filePath), StandardCharsets.UTF_8);
}
}
} catch (Exception e) {
System.out.format("Problem loading pipeline options from file: %s%n", e.getMessage());
}
System.out.format("Pipeline options %s%n", pipelineOptionsJson);
// TODO: https://github.com/apache/beam/issues/30301
pipelineOptionsJson = removeNestedKey(pipelineOptionsJson, "impersonateServiceAccount");
PipelineOptions options = PipelineOptionsTranslation.fromJson(pipelineOptionsJson);
Endpoints.ApiServiceDescriptor loggingApiServiceDescriptor =
getApiServiceDescriptor(environmentVarGetter.apply(LOGGING_API_SERVICE_DESCRIPTOR));
Endpoints.ApiServiceDescriptor controlApiServiceDescriptor =
getApiServiceDescriptor(environmentVarGetter.apply(CONTROL_API_SERVICE_DESCRIPTOR));
Endpoints.ApiServiceDescriptor statusApiServiceDescriptor =
environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR) == null
? null
: getApiServiceDescriptor(environmentVarGetter.apply(STATUS_API_SERVICE_DESCRIPTOR));
String runnerCapabilitesOrNull = environmentVarGetter.apply(RUNNER_CAPABILITIES);
Set runnerCapabilites =
runnerCapabilitesOrNull == null
? Collections.emptySet()
: ImmutableSet.copyOf(runnerCapabilitesOrNull.split("\\s+"));
main(
id,
options,
runnerCapabilites,
loggingApiServiceDescriptor,
controlApiServiceDescriptor,
statusApiServiceDescriptor);
}
/**
* Run a FnHarness with the given id and options that attaches to the specified logging and
* control API service descriptors.
*
* @param id Harness ID
* @param options The options for this pipeline
* @param runnerCapabilities
* @param loggingApiServiceDescriptor
* @param controlApiServiceDescriptor
* @param statusApiServiceDescriptor
* @throws Exception
*/
public static void main(
String id,
PipelineOptions options,
Set runnerCapabilities,
Endpoints.ApiServiceDescriptor loggingApiServiceDescriptor,
Endpoints.ApiServiceDescriptor controlApiServiceDescriptor,
@Nullable Endpoints.ApiServiceDescriptor statusApiServiceDescriptor)
throws Exception {
ManagedChannelFactory channelFactory;
if (ExperimentalOptions.hasExperiment(options, "beam_fn_api_epoll")) {
channelFactory = ManagedChannelFactory.createEpoll();
} else {
channelFactory = ManagedChannelFactory.createDefault();
}
OutboundObserverFactory outboundObserverFactory =
HarnessStreamObserverFactories.fromOptions(options);
main(
id,
options,
runnerCapabilities,
loggingApiServiceDescriptor,
controlApiServiceDescriptor,
statusApiServiceDescriptor,
channelFactory,
outboundObserverFactory,
Caches.fromOptions(options));
}
/**
* Run a FnHarness with the given id and options that attaches to the specified logging and
* control API service descriptors using the given channel factory and outbound observer factory.
*
* @param id Harness ID
* @param options The options for this pipeline
* @param runnerCapabilites
* @param loggingApiServiceDescriptor
* @param controlApiServiceDescriptor
* @param statusApiServiceDescriptor
* @param channelFactory
* @param outboundObserverFactory
* @param processWideCache
* @throws Exception
*/
public static void main(
String id,
PipelineOptions options,
Set runnerCapabilites,
Endpoints.ApiServiceDescriptor loggingApiServiceDescriptor,
Endpoints.ApiServiceDescriptor controlApiServiceDescriptor,
Endpoints.ApiServiceDescriptor statusApiServiceDescriptor,
ManagedChannelFactory channelFactory,
OutboundObserverFactory outboundObserverFactory,
Cache
© 2015 - 2024 Weber Informatics LLC | Privacy Policy