com.netflix.atlas.eval.stream.Evaluator Maven / Gradle / Ivy

Go to download
/*
 * Copyright 2014-2024 Netflix, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.netflix.atlas.eval.stream;

import org.apache.pekko.actor.ActorSystem;
import org.apache.pekko.http.javadsl.model.Uri;
import org.apache.pekko.stream.Materializer;
import org.apache.pekko.stream.ThrottleMode;
import org.apache.pekko.stream.javadsl.Flow;
import org.apache.pekko.stream.javadsl.Framing;
import org.apache.pekko.stream.javadsl.Source;
import org.apache.pekko.stream.javadsl.StreamConverters;
import org.apache.pekko.util.ByteString;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.netflix.atlas.core.util.Strings$;
import com.netflix.atlas.json.JsonSupport;
import com.netflix.spectator.api.NoopRegistry;
import com.netflix.spectator.api.Registry;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import org.reactivestreams.Processor;
import org.reactivestreams.Publisher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.time.Duration;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * Public API for streaming evaluation of expressions.
 */
public final class Evaluator extends EvaluatorImpl {

  /**
   * Create a new instance. It is assumed this is created via the injector and the
   * signature may change over time.
   */
  public Evaluator(Config config, Registry registry, ActorSystem system) {
    super(config, registry, system, Materializer.createMaterializer(system));
  }

  /**
   * Collects the data from LWC for the specified duration and store it to a file. The
   * file can then be used as an input to replay evaluation.
   *
   * @param uri
   *     Source URI for collecting the data.
   * @param file
   *     Output file to store the results.
   * @param duration
   *     Specified how long the collection will run. Note this method will block until
   *     the collection is complete.
   */
  public void writeInputToFile(String uri, Path file, Duration duration) {
    writeInputToFileImpl(uri, file, duration);
  }

  /**
   * Creates a publisher stream for a given URI.
   *
   * @param uri
   *     Source URI for collecting the data.
   * @return
   *     Publisher that produces events representing the evaluation results for the
   *     expression in the URI.
   */
  public Publisher createPublisher(String uri) {
    return createPublisherImpl(uri);
  }

  /**
   * Creates a processor that can multiplex many streams at once. This can be used to
   * help reduce the overhead in terms of number of connections and duplicate work on
   * the backend producing the data.
   *
   * It takes a stream of data sources as an input and returns the output of evaluating
   * those streams. Each {@code DataSources} object should be the complete set of
   * sources that should be evaluated at a given time. The output messages can be
   * correlated with a particular data source using the id on the {@code MessageEnvelope}.
   */
  public Processor createStreamsProcessor() {
    return createStreamsProcessorImpl();
  }

  /**
   * Creates a processor that evaluates a fixed set of expressions against the stream of
   * time grouped datapoints. The caller must ensure:
   *
   * 

   *   All data sources have the same step size. This is required because there is a
   *       single input datapoint stream with a fixed step.
   *   There should be a single group for each step interval with timestamps aligned to
   *       the step boundary.
   *   Operations are feasible for the input. Time based operators may or may not work
   *       correctly depending on how the timestamps for the input groups are determined.
   *       For use-cases where the the timestamps are generated arbitrarily, such as incrementing
   *       for each group once a certain number of datapoints is available, then operators
   *       based on a clock such as {@code :time} will have bogus values based on the arbitrary
   *       times for the group. If the data is a snapshot of an actual stream, then time based
   *       operators should work correctly.
   * 
   */
  public Processor createDatapointProcessor(DataSources sources) {
    return createDatapointProcessorImpl(sources);
  }

  /**
   * Perform static analysis checks to ensure that the provided data source is supported
   * by this evaluator instance.
   *
   * @param ds
   *     Data source to validate.
   */
  public void validate(DataSource ds) {
    validateImpl(ds);
  }

  /**
   * Immutable set of data sources that should be consumed.
   */
  public record DataSources(Set sources) {

    /** Create a new instance that is empty. */
    public static DataSources empty() {
      return new DataSources(Collections.emptySet());
    }

    /** Create a new instance. */
    public static DataSources of(DataSource... sources) {
      Set set = new HashSet<>(Arrays.asList(sources));
      return new DataSources(set);
    }

    /** Create a new instance. */
    public DataSources {
      sources = Set.copyOf(sources);
    }

    /** Compares with another set and returns the new data sources that have been added. */
    public Set addedSources(DataSources other) {
      Set copy = new HashSet<>(sources);
      copy.removeAll(other.sources());
      return copy;
    }

    /** Compares with another set and returns the data sources that have been removed. */
    public Set removedSources(DataSources other) {
      return other.addedSources(this);
    }

    DataSources remoteOnly() {
      Set remote = new HashSet<>(sources);
      remote.removeAll(localSources());
      return new DataSources(remote);
    }

    DataSources localOnly() {
      return new DataSources(localSources());
    }

    private Set localSources() {
      return sources.stream()
          .filter(DataSource::isLocal)
          .collect(Collectors.toSet());
    }

    /**
     * Return the step size for the sources in this set. If there are mixed step sizes an
     * IllegalStateException will be thrown. If the set is empty, then -1 will be returned.
     */
    long stepSize() {
      long step = -1L;
      for (DataSource source : sources) {
        long sourceStep = source.step().toMillis();
        if (step != -1L && step != sourceStep) {
          throw new IllegalStateException("inconsistent step sizes, expected "
              + step + ", found " + sourceStep + " on " + source);
        }
        step = sourceStep;
      }
      return step;
    }
  }

  /**
   * Triple mapping an id to a step size and a URI that should be consumed. The
   * id can be used to find the messages in the output that correspond to this
   * data source.
   *
   * @param id
   *     An identifier for this {@code DataSource}. It will be added to
   *     corresponding MessageEnvelope objects in the output, facilitating
   *     matching output messages with the data source.
   * @param step
   *     The requested step size for this {@code DataSource}. NOTE:
   *     This may result in rejection of this {@code DataSource} if the
   *     backing metrics producers do not support the requested step size.
   * @param uri
   *     The URI for this {@code DataSource} (in atlas backend form).
   */
  public record DataSource(String id, Duration step, String uri) {

    /**
     * Create a new instance.
     */
    public DataSource {
      step = step == null ? extractStepFromUri(uri) : step;
    }

    /**
     * Create a new instance with the step size being derived from the URI or falling
     * back to {@link #DEFAULT_STEP}.
     *
     * @param id
     *     An identifier for this {@code DataSource}. It will be added to
     *     corresponding MessageEnvelope objects in the output, facilitating
     *     matching output messages with the data source.
     * @param uri
     *     The URI for this {@code DataSource} (in atlas backend form).
     */
    public DataSource(String id, String uri) {
      this(id, null, uri);
    }

    /** Returns true if the URI is for a local file or classpath resource. */
    @JsonIgnore
    public boolean isLocal() {
      return uri.startsWith("/")
          || uri.startsWith("file:")
          || uri.startsWith("resource:")
          || uri.startsWith("synthetic:");
    }
  }

  /**
   * Wraps the output messages from the evaluation with the id of the data source. This
   * can be used to route the message back to the appropriate consumer.
   */
  public record MessageEnvelope(String id, JsonSupport message) {
  }

  /**
   * Group of datapoints for the same time.
   */
  public record DatapointGroup(long timestamp, List datapoints) {
  }

  /**
   * Represents a datapoint for a dataset that is generated independently of the LWC clusters.
   * This can be used for synthetic data or if there is an alternative data source such as a
   * stream of events that are being mapped into streaming time series.
   */
  public record Datapoint(Map tags, double value) {
  }

  /** The default step size. */
  private static final Duration DEFAULT_STEP = Duration.ofSeconds(60L);

  /**
   * Try to extract the step size based on the URI query parameter. If this fails for any
   * reason or the parameter is not present, then use the default of 1 minute.
   */
  private static Duration extractStepFromUri(String uriString) {
    try {
      Uri uri = Uri.create(uriString, Uri.RELAXED);
      return uri.query()
          .get("step")
          .map(Strings$.MODULE$::parseDuration)
          .orElse(DEFAULT_STEP);
    } catch (Exception e) {
      return DEFAULT_STEP;
    }
  }

  /**
   * Helper used for simple tests. A set of URIs will be read from {@code stdin}, one per line,
   * and sent through a processor created via {@link #createStreamsProcessor()}. The output will
   * be sent to {@code stdout}.
   *
   * @param args
   *     Paths to additional configuration files that should be loaded. The configs will be
   *     loaded in order with later entries overriding earlier ones if there are conflicts.
   */
  public static void main(String[] args) throws Exception {
    final Logger logger = LoggerFactory.getLogger(Evaluator.class);

    // Load any additional configurations if present
    Config config = ConfigFactory.load();
    for (String file : args) {
      logger.info("loading configuration file {}", file);
      config = ConfigFactory.parseFile(new File(file))
          .withFallback(config)
          .resolve();
    }

    // Setup evaluator
    ActorSystem system = ActorSystem.create();
    Materializer mat = Materializer.createMaterializer(system);
    Evaluator evaluator = new Evaluator(config, new NoopRegistry(), system);

    // Process URIs
    StreamConverters                                       // Read in URIs from stdin
        .fromInputStream(() -> System.in)
        .via(Framing.delimiter(ByteString.fromString("\n"), 16384))
        .map(b -> b.decodeString(StandardCharsets.UTF_8))
        .zipWithIndex()                                    // Use line number as id for output
        .map(p -> new DataSource(p.second().toString(), p.first()))
        .fold(new HashSet(), (vs, v) -> { vs.add(v); return vs; })
        .map(DataSources::new)
        .flatMapConcat(Source::repeat)                     // Repeat so stream doesn't shutdown
        .throttle(                                         // Throttle to avoid wasting CPU
            1, Duration.ofMinutes(1),
            1, ThrottleMode.shaping()
        )
        .via(Flow.fromProcessor(evaluator::createStreamsProcessor))
        .runForeach(
            msg -> System.out.printf("%10s: %s%n", msg.id(), msg.message().toJson()),
            mat
        )
        .toCompletableFuture()
        .get();
  }
}