zipkin2.dependencies.opensearch.OpensearchDependenciesJob Maven / Gradle / Ivy
/*
* Copyright The OpenZipkin Authors
* SPDX-License-Identifier: Apache-2.0
*/
package zipkin2.dependencies.opensearch;
import com.google.gson.stream.JsonReader;
import com.google.gson.stream.MalformedJsonException;
import java.io.IOException;
import java.io.StringReader;
import java.net.URI;
import java.nio.charset.Charset;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.TimeZone;
import javax.annotation.Nullable;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.PairFunction;
import org.opensearch.spark.rdd.api.java.JavaOpenSearchSpark;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;
import zipkin2.DependencyLink;
import zipkin2.codec.SpanBytesDecoder;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_INDEX_READ_MISSING_AS_EMPTY;
import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_HTTP_AUTH_PASS;
import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_HTTP_AUTH_USER;
import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_SSL_KEYSTORE_LOCATION;
import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_SSL_KEYSTORE_PASS;
import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_SSL_TRUST_STORE_LOCATION;
import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_SSL_TRUST_STORE_PASS;
import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NET_USE_SSL;
import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NODES;
import static org.opensearch.hadoop.cfg.ConfigurationOptions.OPENSEARCH_NODES_WAN_ONLY;
import static zipkin2.internal.DateUtil.midnightUTC;
public final class OpensearchDependenciesJob {
static final Charset UTF_8 = Charset.forName("UTF-8");
private static final Logger log = LoggerFactory.getLogger(OpensearchDependenciesJob.class);
public static Builder builder() {
return new Builder();
}
public static final class Builder {
String index = getEnv("ES_INDEX", "zipkin");
String hosts = getEnv("ES_HOSTS", "127.0.0.1");
String username = getEnv("ES_USERNAME", null);
String password = getEnv("ES_PASSWORD", null);
final Map sparkProperties = new LinkedHashMap<>();
Builder() {
sparkProperties.put("spark.ui.enabled", "false");
// don't die if there are no spans
sparkProperties.put(OPENSEARCH_INDEX_READ_MISSING_AS_EMPTY, "true");
sparkProperties.put(OPENSEARCH_NODES_WAN_ONLY, getEnv("ES_NODES_WAN_ONLY", "false"));
sparkProperties.put(OPENSEARCH_NET_SSL_KEYSTORE_LOCATION,
getSystemPropertyAsFileResource("javax.net.ssl.keyStore"));
sparkProperties.put(OPENSEARCH_NET_SSL_KEYSTORE_PASS,
System.getProperty("javax.net.ssl.keyStorePassword", ""));
sparkProperties.put(OPENSEARCH_NET_SSL_TRUST_STORE_LOCATION,
getSystemPropertyAsFileResource("javax.net.ssl.trustStore"));
sparkProperties.put(OPENSEARCH_NET_SSL_TRUST_STORE_PASS,
System.getProperty("javax.net.ssl.trustStorePassword", ""));
}
// local[*] master lets us run & test the job locally without setting a Spark cluster
String sparkMaster = getEnv("SPARK_MASTER", "local[*]");
// needed when not in local mode
String[] jars;
Runnable logInitializer;
// By default, the job only works on traces whose first timestamp is today
long day = midnightUTC(System.currentTimeMillis());
/** When set, this indicates which jars to distribute to the cluster. */
public Builder jars(String... jars) {
this.jars = jars;
return this;
}
/** The index prefix to use when generating daily index names. Defaults to "zipkin" */
public Builder index(String index) {
this.index = checkNotNull(index, "index");
return this;
}
public Builder hosts(String hosts) {
this.hosts = checkNotNull(hosts, "hosts");
sparkProperties.put("opensearch.nodes.wan.only", "true");
return this;
}
/** username used for basic auth. Needed when Shield or X-Pack security is enabled */
public Builder username(String username) {
this.username = username;
return this;
}
/** password used for basic auth. Needed when Shield or X-Pack security is enabled */
public Builder password(String password) {
this.password = password;
return this;
}
/** Day (in epoch milliseconds) to process dependencies for. Defaults to today. */
public Builder day(long day) {
this.day = midnightUTC(day);
return this;
}
/** Extending more configuration of spark. */
public Builder conf(Map conf) {
sparkProperties.putAll(conf);
return this;
}
/** Ensures that logging is set up. Particularly important when in cluster mode. */
public Builder logInitializer(Runnable logInitializer) {
this.logInitializer = checkNotNull(logInitializer, "logInitializer");
return this;
}
public OpensearchDependenciesJob build() {
return new OpensearchDependenciesJob(this);
}
}
private static String getSystemPropertyAsFileResource(String key) {
String prop = System.getProperty(key, "");
return prop != null && !prop.isEmpty() ? "file:" + prop : prop;
}
final String index;
final String dateStamp;
final SparkConf conf;
@Nullable final Runnable logInitializer;
OpensearchDependenciesJob(Builder builder) {
this.index = builder.index;
String dateSeparator = getEnv("ES_DATE_SEPARATOR", "-");
SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd".replace("-", dateSeparator));
df.setTimeZone(TimeZone.getTimeZone("UTC"));
this.dateStamp = df.format(new Date(builder.day));
this.conf = new SparkConf(true).setMaster(builder.sparkMaster).setAppName(getClass().getName());
if (builder.sparkMaster.startsWith("local[")) {
conf.set("spark.driver.bindAddress", "127.0.0.1");
}
if (builder.jars != null) conf.setJars(builder.jars);
if (builder.username != null) conf.set(OPENSEARCH_NET_HTTP_AUTH_USER, builder.username);
if (builder.password != null) conf.set(OPENSEARCH_NET_HTTP_AUTH_PASS, builder.password);
conf.set(OPENSEARCH_NODES, parseHosts(builder.hosts));
if (builder.hosts.contains("https")) conf.set(OPENSEARCH_NET_USE_SSL, "true");
for (Map.Entry entry : builder.sparkProperties.entrySet()) {
conf.set(entry.getKey(), entry.getValue());
log.debug("Spark conf properties: {}={}", entry.getKey(), entry.getValue());
}
this.logInitializer = builder.logInitializer;
}
public void run() {
String spanResource = index + "-span-" + dateStamp;
String dependencyLinkResource = index + "-dependency-" + dateStamp;
SpanBytesDecoder decoder = SpanBytesDecoder.JSON_V2;
log.info("Processing spans from {}", spanResource);
JavaRDD
© 2015 - 2025 Weber Informatics LLC | Privacy Policy