All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.beam.fn.harness.control.ExecutionStateSampler Maven / Gradle / Ivy

There is a newer version: 2.60.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.fn.harness.control;

import com.google.auto.value.AutoValue;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import javax.annotation.concurrent.GuardedBy;
import org.apache.beam.fn.harness.control.ProcessBundleHandler.BundleProcessor;
import org.apache.beam.fn.harness.logging.BeamFnLoggingMDC;
import org.apache.beam.model.pipeline.v1.MetricsApi.MonitoringInfo;
import org.apache.beam.runners.core.metrics.MetricsContainerStepMap;
import org.apache.beam.runners.core.metrics.MonitoringInfoEncodings;
import org.apache.beam.sdk.metrics.Counter;
import org.apache.beam.sdk.metrics.Distribution;
import org.apache.beam.sdk.metrics.Gauge;
import org.apache.beam.sdk.metrics.Histogram;
import org.apache.beam.sdk.metrics.MetricName;
import org.apache.beam.sdk.metrics.MetricsContainer;
import org.apache.beam.sdk.metrics.StringSet;
import org.apache.beam.sdk.options.ExecutorOptions;
import org.apache.beam.sdk.options.ExperimentalOptions;
import org.apache.beam.sdk.options.PipelineOptions;
import org.apache.beam.sdk.util.HistogramData;
import org.apache.beam.vendor.grpc.v1p60p1.com.google.protobuf.ByteString;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Joiner;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.joda.time.DateTimeUtils.MillisProvider;
import org.joda.time.Duration;
import org.joda.time.format.PeriodFormatter;
import org.joda.time.format.PeriodFormatterBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Monitors the execution of one or more execution threads. */
public class ExecutionStateSampler {

  private static final Logger LOG = LoggerFactory.getLogger(ExecutionStateSampler.class);
  private static final int DEFAULT_SAMPLING_PERIOD_MS = 200;
  private static final long MAX_LULL_TIME_MS = TimeUnit.MINUTES.toMillis(5);
  private static final PeriodFormatter DURATION_FORMATTER =
      new PeriodFormatterBuilder()
          .appendDays()
          .appendSuffix("d")
          .minimumPrintedDigits(2)
          .appendHours()
          .appendSuffix("h")
          .printZeroAlways()
          .appendMinutes()
          .appendSuffix("m")
          .appendSeconds()
          .appendSuffix("s")
          .toFormatter();
  private final int periodMs;
  private final MillisProvider clock;

  @GuardedBy("activeStateTrackers")
  private final Set activeStateTrackers;

  private final Future stateSamplingThread;

  @SuppressWarnings("methodref.receiver.bound" /* Synchronization ensures proper initialization */)
  public ExecutionStateSampler(PipelineOptions options, MillisProvider clock) {
    String samplingPeriodMills =
        ExperimentalOptions.getExperimentValue(
            options, ExperimentalOptions.STATE_SAMPLING_PERIOD_MILLIS);
    this.periodMs =
        samplingPeriodMills == null
            ? DEFAULT_SAMPLING_PERIOD_MS
            : Integer.parseInt(samplingPeriodMills);
    this.clock = clock;
    this.activeStateTrackers = new HashSet<>();
    // We specifically synchronize to ensure that this object can complete
    // being published before the state sampler thread starts.
    synchronized (this) {
      this.stateSamplingThread =
          options
              .as(ExecutorOptions.class)
              .getScheduledExecutorService()
              .submit(this::stateSampler);
    }
  }

  /** An {@link ExecutionState} represents the current state of an execution thread. */
  public interface ExecutionState {

    /**
     * Activates this execution state within the {@link ExecutionStateTracker}.
     *
     * 

Must only be invoked by the bundle processing thread. */ void activate(); /** * Restores the previously active execution state within the {@link ExecutionStateTracker}. * *

Must only be invoked by the bundle processing thread. */ void deactivate(); /** * Sets the error state to the currently executing state. Returns true if this was the first * time the error was set. Returns false otherwise. * *

This can only be set once. */ boolean error(); } /** Stops the execution of the state sampler. */ public void stop() { stateSamplingThread.cancel(true); try { stateSamplingThread.get(5L * periodMs, TimeUnit.MILLISECONDS); } catch (CancellationException e) { // This was expected -- we were cancelling the thread. } catch (InterruptedException | TimeoutException e) { throw new RuntimeException( "Failed to stop state sampling after waiting 5 sampling periods.", e); } catch (ExecutionException e) { throw new RuntimeException("Exception in state sampler", e); } } /** Entry point for the state sampling thread. */ private Void stateSampler() throws Exception { // Ensure the object finishes being published safely. synchronized (this) { if (stateSamplingThread == null) { throw new IllegalStateException("Underinitialized ExecutionStateSampler instance"); } } long lastSampleTimeMillis = clock.getMillis(); long targetTimeMillis = lastSampleTimeMillis + periodMs; while (!Thread.interrupted()) { long currentTimeMillis = clock.getMillis(); long difference = targetTimeMillis - currentTimeMillis; if (difference > 0) { Thread.sleep(difference); } else { long millisSinceLastSample = currentTimeMillis - lastSampleTimeMillis; synchronized (activeStateTrackers) { for (ExecutionStateTracker activeTracker : activeStateTrackers) { activeTracker.takeSample(currentTimeMillis, millisSinceLastSample); } } lastSampleTimeMillis = currentTimeMillis; targetTimeMillis = lastSampleTimeMillis + periodMs; } } return null; } /** Returns a new {@link ExecutionStateTracker} associated with this state sampler. */ public ExecutionStateTracker create() { return new ExecutionStateTracker(); } /** * A {@link MetricsContainer} that uses the current {@link ExecutionState} tracked by the provided * {@link ExecutionStateTracker}. */ private static class MetricsContainerForTracker implements MetricsContainer { private final transient ExecutionStateTracker tracker; private MetricsContainerForTracker(ExecutionStateTracker tracker) { this.tracker = tracker; } @Override public Counter getCounter(MetricName metricName) { if (tracker.currentState != null) { return tracker.currentState.metricsContainer.getCounter(metricName); } return tracker.metricsContainerRegistry.getUnboundContainer().getCounter(metricName); } @Override public Distribution getDistribution(MetricName metricName) { if (tracker.currentState != null) { return tracker.currentState.metricsContainer.getDistribution(metricName); } return tracker.metricsContainerRegistry.getUnboundContainer().getDistribution(metricName); } @Override public Gauge getGauge(MetricName metricName) { if (tracker.currentState != null) { return tracker.currentState.metricsContainer.getGauge(metricName); } return tracker.metricsContainerRegistry.getUnboundContainer().getGauge(metricName); } @Override public StringSet getStringSet(MetricName metricName) { if (tracker.currentState != null) { return tracker.currentState.metricsContainer.getStringSet(metricName); } return tracker.metricsContainerRegistry.getUnboundContainer().getStringSet(metricName); } @Override public Histogram getHistogram(MetricName metricName, HistogramData.BucketType bucketType) { if (tracker.currentState != null) { return tracker.currentState.metricsContainer.getHistogram(metricName, bucketType); } return tracker .metricsContainerRegistry .getUnboundContainer() .getHistogram(metricName, bucketType); } @Override public Iterable getMonitoringInfos() { if (tracker.currentState != null) { return tracker.currentState.metricsContainer.getMonitoringInfos(); } return tracker.metricsContainerRegistry.getUnboundContainer().getMonitoringInfos(); } } /** Tracks the current state of a single execution thread. */ public class ExecutionStateTracker implements BundleProgressReporter { // Used to create and store metrics containers for the execution states. private final MetricsContainerStepMap metricsContainerRegistry; private final MetricsContainer metricsContainer; // The set of execution states that this tracker is responsible for. Effectively // final since create() should not be invoked once any bundle starts processing. private final List executionStates; // Read by multiple threads, written by the bundle processing thread lazily. private final AtomicReference<@Nullable String> processBundleId; // Read by multiple threads, written by the bundle processing thread lazily. private final AtomicReference<@Nullable Thread> trackedThread; // Read by multiple threads, read and written by the ExecutionStateSampler thread lazily. private final AtomicLong lastTransitionTime; // Used to throttle lull logging. private long lastLullReport; // Read and written by the bundle processing thread frequently. private long numTransitions; // Read by the ExecutionStateSampler, written by the bundle processing thread lazily and // frequently. private final AtomicLong numTransitionsLazy; // Read and written by the bundle processing thread frequently. private @Nullable ExecutionStateImpl currentState; // Read by multiple threads, written by the bundle processing thread lazily. private final AtomicReference<@Nullable ExecutionStateImpl> currentStateLazy; // If an exception occurs, this will be to state at the time of exception. private boolean inErrorState = false; // Read and written by the ExecutionStateSampler thread private long transitionsAtLastSample; // Ignore the @UnderInitialization for ExecutionStateTracker since it will be initialized by the // time this method returns and no references are leaked to other threads during construction. @SuppressWarnings({"assignment", "argument"}) private ExecutionStateTracker() { this.metricsContainerRegistry = new MetricsContainerStepMap(); this.executionStates = new ArrayList<>(); this.trackedThread = new AtomicReference<>(); this.lastTransitionTime = new AtomicLong(); this.numTransitionsLazy = new AtomicLong(); this.currentStateLazy = new AtomicReference<>(); this.processBundleId = new AtomicReference<>(); this.metricsContainer = new MetricsContainerForTracker(this); } /** * Returns the {@link MetricsContainerStepMap} that is managed by this {@link * ExecutionStateTracker}. This metrics container registry stores all the user counters * associated for the current bundle execution. */ public MetricsContainerStepMap getMetricsContainerRegistry() { return metricsContainerRegistry; } /** * Returns a {@link MetricsContainer} that delegates based upon the current execution state to * the appropriate metrics container that is bound to the current {@link ExecutionState} or to * the unbound {@link MetricsContainer} if no execution state is currently running. */ public MetricsContainer getMetricsContainer() { return metricsContainer; } /** * Returns an {@link ExecutionState} bound to this tracker for the specified transform and * processing state. */ public ExecutionState create( String shortId, String ptransformId, String ptransformUniqueName, String stateName) { ExecutionStateImpl newState = new ExecutionStateImpl( shortId, ptransformId, ptransformUniqueName, stateName, metricsContainerRegistry.getContainer(ptransformId)); executionStates.add(newState); return newState; } /** * Called periodically by the {@link ExecutionStateSampler} to report time spent in this state. * * @param currentTimeMillis the current time. * @param millisSinceLastSample the time since the last sample was reported. As an * approximation, all of that time should be associated with this state. */ private void takeSample(long currentTimeMillis, long millisSinceLastSample) { ExecutionStateImpl currentExecutionState = currentStateLazy.get(); if (currentExecutionState != null) { currentExecutionState.takeSample(millisSinceLastSample); } long transitionsAtThisSample = numTransitionsLazy.get(); if (transitionsAtThisSample != transitionsAtLastSample) { lastTransitionTime.lazySet(currentTimeMillis); transitionsAtLastSample = transitionsAtThisSample; } else { long lullTimeMs = currentTimeMillis - lastTransitionTime.get(); if (lullTimeMs > MAX_LULL_TIME_MS) { if (lullTimeMs < lastLullReport // This must be a new report. || lullTimeMs > 1.2 * lastLullReport // Exponential backoff. || lullTimeMs > MAX_LULL_TIME_MS + lastLullReport // At least once every MAX_LULL_TIME_MS. ) { lastLullReport = lullTimeMs; Thread thread = trackedThread.get(); if (thread == null) { LOG.warn( String.format( "Operation ongoing in bundle %s for at least %s without outputting " + "or completing (stack trace unable to be generated).", processBundleId.get(), DURATION_FORMATTER.print(Duration.millis(lullTimeMs).toPeriod()))); } else if (currentExecutionState == null) { LOG.warn( String.format( "Operation ongoing in bundle %s for at least %s without outputting " + "or completing:%n at %s", processBundleId.get(), DURATION_FORMATTER.print(Duration.millis(lullTimeMs).toPeriod()), Joiner.on("\n at ").join(thread.getStackTrace()))); } else { LOG.warn( String.format( "Operation ongoing in bundle %s for PTransform{id=%s, name=%s, state=%s} " + "for at least %s without outputting or completing:%n at %s", processBundleId.get(), currentExecutionState.ptransformId, currentExecutionState.ptransformUniqueName, currentExecutionState.stateName, DURATION_FORMATTER.print(Duration.millis(lullTimeMs).toPeriod()), Joiner.on("\n at ").join(thread.getStackTrace()))); } } } } } /** Returns status information related to this tracker or null if not tracking a bundle. */ public @Nullable ExecutionStateTrackerStatus getStatus() { Thread thread = trackedThread.get(); if (thread == null) { return null; } long lastTransitionTimeMs = lastTransitionTime.get(); // We are actively processing a bundle but may have not yet entered into a state. ExecutionStateImpl current = currentStateLazy.get(); if (current != null) { return ExecutionStateTrackerStatus.create( current.ptransformId, current.ptransformUniqueName, thread, lastTransitionTimeMs, processBundleId.get()); } else { return ExecutionStateTrackerStatus.create( null, null, thread, lastTransitionTimeMs, processBundleId.get()); } } /** Returns the ptransform id of the currently executing thread. */ public @Nullable String getCurrentThreadsPTransformId() { if (currentState == null) { return null; } return currentState.ptransformId; } /** {@link ExecutionState} represents the current state of an execution thread. */ private class ExecutionStateImpl implements ExecutionState { private final String shortId; private final String ptransformId; private final String ptransformUniqueName; private final String stateName; private final MetricsContainer metricsContainer; // Read and written by the bundle processing thread frequently. private long msecs; // Read by the ExecutionStateSampler, written by the bundle processing thread frequently. private final AtomicLong lazyMsecs; /** Guarded by {@link BundleProcessor#getProgressRequestLock}. */ private boolean hasReportedValue; /** Guarded by {@link BundleProcessor#getProgressRequestLock}. */ private long lastReportedValue; // Read and written by the bundle processing thread frequently. private @Nullable ExecutionStateImpl previousState; private ExecutionStateImpl( String shortId, String ptransformId, String ptransformName, String stateName, MetricsContainer metricsContainer) { this.shortId = shortId; this.ptransformId = ptransformId; this.ptransformUniqueName = ptransformName; this.stateName = stateName; this.metricsContainer = metricsContainer; this.lazyMsecs = new AtomicLong(); } /** * Called periodically by the {@link ExecutionStateTracker} to report time spent in this * state. * * @param millisSinceLastSample the time since the last sample was reported. As an * approximation, all of that time should be associated with this state. */ public void takeSample(long millisSinceLastSample) { msecs += millisSinceLastSample; lazyMsecs.set(msecs); } /** Updates the monitoring data for this {@link ExecutionState}. */ public void updateMonitoringData(Map monitoringData) { long msecsReads = lazyMsecs.get(); if (hasReportedValue && lastReportedValue == msecsReads) { return; } monitoringData.put(shortId, MonitoringInfoEncodings.encodeInt64Counter(msecsReads)); lastReportedValue = msecsReads; hasReportedValue = true; } public void reset() { if (hasReportedValue) { msecs = 0; lazyMsecs.set(0); lastReportedValue = 0; } } @Override public void activate() { previousState = currentState; currentState = this; currentStateLazy.lazySet(this); numTransitions += 1; numTransitionsLazy.lazySet(numTransitions); } @Override public void deactivate() { currentState = previousState; currentStateLazy.lazySet(previousState); previousState = null; numTransitions += 1; numTransitionsLazy.lazySet(numTransitions); } @Override public boolean error() { if (!inErrorState) { inErrorState = true; return true; } return false; } } /** * Starts tracking execution states for specified {@code processBundleId}. * *

Only invoked by the bundle processing thread. */ public void start(String processBundleId) { BeamFnLoggingMDC.setStateTracker(this); this.processBundleId.lazySet(processBundleId); this.lastTransitionTime.lazySet(clock.getMillis()); this.trackedThread.lazySet(Thread.currentThread()); synchronized (activeStateTrackers) { activeStateTrackers.add(this); } } @Override public void updateIntermediateMonitoringData(Map monitoringData) { for (ExecutionStateImpl executionState : executionStates) { executionState.updateMonitoringData(monitoringData); } } @Override public void updateFinalMonitoringData(Map monitoringData) { for (ExecutionStateImpl executionState : executionStates) { executionState.updateMonitoringData(monitoringData); } } /** * Stops tracking execution states allowing for the {@link ExecutionStateTracker} to be re-used * for another bundle. */ @Override public void reset() { synchronized (activeStateTrackers) { activeStateTrackers.remove(this); for (ExecutionStateImpl executionState : executionStates) { executionState.reset(); } this.transitionsAtLastSample = 0; } this.processBundleId.lazySet(null); this.trackedThread.lazySet(null); this.numTransitions = 0; this.numTransitionsLazy.lazySet(0); this.lastTransitionTime.lazySet(0); this.metricsContainerRegistry.reset(); this.inErrorState = false; BeamFnLoggingMDC.setStateTracker(null); } } @AutoValue public abstract static class ExecutionStateTrackerStatus { public static ExecutionStateTrackerStatus create( @Nullable String ptransformId, @Nullable String ptransformUniqueName, Thread trackedThread, long lastTransitionTimeMs, @Nullable String processBundleId) { return new AutoValue_ExecutionStateSampler_ExecutionStateTrackerStatus( ptransformId, ptransformUniqueName, trackedThread, lastTransitionTimeMs, processBundleId); } public abstract @Nullable String getPTransformId(); public abstract @Nullable String getPTransformUniqueName(); public abstract Thread getTrackedThread(); public abstract long getLastTransitionTimeMillis(); public abstract @Nullable String getProcessBundleId(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy