
org.elasticsearch.ingest.IngestService Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.ingest;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.util.Strings;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.bulk.TransportBulkAction;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.ingest.DeletePipelineRequest;
import org.elasticsearch.action.ingest.PutPipelineRequest;
import org.elasticsearch.action.support.RefCountingRunnable;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.internal.Client;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateApplier;
import org.elasticsearch.cluster.ClusterStateTaskExecutor;
import org.elasticsearch.cluster.ClusterStateTaskListener;
import org.elasticsearch.cluster.metadata.DataStream.TimestampField;
import org.elasticsearch.cluster.metadata.IndexAbstraction;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.metadata.IndexTemplateMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.metadata.MetadataIndexTemplateService;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.cluster.service.MasterServiceTaskQueue;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.core.Releasable;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.env.Environment;
import org.elasticsearch.gateway.GatewayService;
import org.elasticsearch.grok.MatcherWatchdog;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.node.ReportingService;
import org.elasticsearch.plugins.IngestPlugin;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.threadpool.Scheduler;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.IntConsumer;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import static org.elasticsearch.core.Strings.format;
/**
* Holder class for several ingest related services.
*/
public class IngestService implements ClusterStateApplier, ReportingService {
public static final String NOOP_PIPELINE_NAME = "_none";
public static final String INGEST_ORIGIN = "ingest";
private static final Logger logger = LogManager.getLogger(IngestService.class);
private final MasterServiceTaskQueue taskQueue;
private final ClusterService clusterService;
private final ScriptService scriptService;
private final Map processorFactories;
// Ideally this should be in IngestMetadata class, but we don't have the processor factories around there.
// We know of all the processor factories when a node with all its plugin have been initialized. Also some
// processor factories rely on other node services. Custom metadata is statically registered when classes
// are loaded, so in the cluster state we just save the pipeline config and here we keep the actual pipelines around.
private volatile Map pipelines = Map.of();
private final ThreadPool threadPool;
private final IngestMetric totalMetrics = new IngestMetric();
private final List> ingestClusterStateListeners = new CopyOnWriteArrayList<>();
private volatile ClusterState state;
private static BiFunction createScheduler(ThreadPool threadPool) {
return (delay, command) -> threadPool.schedule(command, TimeValue.timeValueMillis(delay), ThreadPool.Names.GENERIC);
}
public static MatcherWatchdog createGrokThreadWatchdog(Environment env, ThreadPool threadPool) {
final Settings settings = env.settings();
final BiFunction scheduler = createScheduler(threadPool);
long intervalMillis = IngestSettings.GROK_WATCHDOG_INTERVAL.get(settings).getMillis();
long maxExecutionTimeMillis = IngestSettings.GROK_WATCHDOG_INTERVAL.get(settings).getMillis();
return MatcherWatchdog.newInstance(intervalMillis, maxExecutionTimeMillis, threadPool::relativeTimeInMillis, scheduler::apply);
}
/**
* Cluster state task executor for ingest pipeline operations
*/
static final ClusterStateTaskExecutor PIPELINE_TASK_EXECUTOR = batchExecutionContext -> {
final var allIndexMetadata = batchExecutionContext.initialState().metadata().indices().values();
final IngestMetadata initialIngestMetadata = batchExecutionContext.initialState().metadata().custom(IngestMetadata.TYPE);
var currentIngestMetadata = initialIngestMetadata;
for (final var taskContext : batchExecutionContext.taskContexts()) {
try {
final var task = taskContext.getTask();
try (var ignored = taskContext.captureResponseHeaders()) {
currentIngestMetadata = task.execute(currentIngestMetadata, allIndexMetadata);
}
taskContext.success(() -> task.listener.onResponse(AcknowledgedResponse.TRUE));
} catch (Exception e) {
taskContext.onFailure(e);
}
}
final var finalIngestMetadata = currentIngestMetadata;
return finalIngestMetadata == initialIngestMetadata
? batchExecutionContext.initialState()
: batchExecutionContext.initialState().copyAndUpdateMetadata(b -> b.putCustom(IngestMetadata.TYPE, finalIngestMetadata));
};
/**
* Specialized cluster state update task specifically for ingest pipeline operations.
* These operations all receive an AcknowledgedResponse.
*/
public abstract static class PipelineClusterStateUpdateTask implements ClusterStateTaskListener {
final ActionListener listener;
PipelineClusterStateUpdateTask(ActionListener listener) {
this.listener = listener;
}
public abstract IngestMetadata execute(IngestMetadata currentIngestMetadata, Collection allIndexMetadata);
@Override
public void onFailure(Exception e) {
listener.onFailure(e);
}
}
public IngestService(
ClusterService clusterService,
ThreadPool threadPool,
Environment env,
ScriptService scriptService,
AnalysisRegistry analysisRegistry,
List ingestPlugins,
Client client,
MatcherWatchdog matcherWatchdog
) {
this.clusterService = clusterService;
this.scriptService = scriptService;
this.processorFactories = processorFactories(
ingestPlugins,
new Processor.Parameters(
env,
scriptService,
analysisRegistry,
threadPool.getThreadContext(),
threadPool::relativeTimeInMillis,
createScheduler(threadPool),
this,
client,
threadPool.generic()::execute,
matcherWatchdog
)
);
this.threadPool = threadPool;
this.taskQueue = clusterService.createTaskQueue("ingest-pipelines", Priority.NORMAL, PIPELINE_TASK_EXECUTOR);
}
private static Map processorFactories(List ingestPlugins, Processor.Parameters parameters) {
Map processorFactories = new TreeMap<>();
for (IngestPlugin ingestPlugin : ingestPlugins) {
Map newProcessors = ingestPlugin.getProcessors(parameters);
for (Map.Entry entry : newProcessors.entrySet()) {
if (processorFactories.put(entry.getKey(), entry.getValue()) != null) {
throw new IllegalArgumentException("Ingest processor [" + entry.getKey() + "] is already registered");
}
}
}
logger.debug("registered ingest processor types: {}", processorFactories.keySet());
return Map.copyOf(processorFactories);
}
/**
* Resolves the potential pipelines (default and final) from the requests or templates associated to the index and then **mutates**
* the {@link org.elasticsearch.action.index.IndexRequest} passed object with the pipeline information.
*
* Also, this method marks the request as `isPipelinesResolved = true`: Due to the request could be rerouted from a coordinating node
* to an ingest node, we have to be able to avoid double resolving the pipelines and also able to distinguish that either the pipeline
* comes as part of the request or resolved from this method. All this is made to later be able to reject the request in case the
* pipeline was set by a required pipeline **and** the request also has a pipeline request too.
*
* @param originalRequest Original write request received.
* @param indexRequest The {@link org.elasticsearch.action.index.IndexRequest} object to update.
* @param metadata Cluster metadata from where the pipeline information could be derived.
*/
public static void resolvePipelinesAndUpdateIndexRequest(
final DocWriteRequest> originalRequest,
final IndexRequest indexRequest,
final Metadata metadata
) {
resolvePipelinesAndUpdateIndexRequest(originalRequest, indexRequest, metadata, System.currentTimeMillis());
}
static void resolvePipelinesAndUpdateIndexRequest(
final DocWriteRequest> originalRequest,
final IndexRequest indexRequest,
final Metadata metadata,
final long epochMillis
) {
if (indexRequest.isPipelineResolved()) {
return;
}
String requestPipeline = indexRequest.getPipeline();
Pipelines pipelines = resolvePipelinesFromMetadata(originalRequest, indexRequest, metadata, epochMillis) //
.or(() -> resolvePipelinesFromIndexTemplates(indexRequest, metadata))
.orElse(Pipelines.NO_PIPELINES_DEFINED);
// The pipeline coming as part of the request always has priority over the resolved one from metadata or templates
if (requestPipeline != null) {
indexRequest.setPipeline(requestPipeline);
} else {
indexRequest.setPipeline(pipelines.defaultPipeline);
}
indexRequest.setFinalPipeline(pipelines.finalPipeline);
indexRequest.isPipelineResolved(true);
}
public ClusterService getClusterService() {
return clusterService;
}
public ScriptService getScriptService() {
return scriptService;
}
/**
* Deletes the pipeline specified by id in the request.
*/
public void delete(DeletePipelineRequest request, ActionListener listener) {
taskQueue.submitTask(
"delete-pipeline-" + request.getId(),
new DeletePipelineClusterStateUpdateTask(listener, request),
request.masterNodeTimeout()
);
}
/**
* Used by this class and {@link org.elasticsearch.action.ingest.ReservedPipelineAction}
*/
public static class DeletePipelineClusterStateUpdateTask extends PipelineClusterStateUpdateTask {
private final DeletePipelineRequest request;
DeletePipelineClusterStateUpdateTask(ActionListener listener, DeletePipelineRequest request) {
super(listener);
this.request = request;
}
/**
* Used by the {@link org.elasticsearch.action.ingest.ReservedPipelineAction}
*/
public DeletePipelineClusterStateUpdateTask(String id) {
this(null, new DeletePipelineRequest(id));
}
@Override
public IngestMetadata execute(IngestMetadata currentIngestMetadata, Collection allIndexMetadata) {
if (currentIngestMetadata == null) {
return null;
}
Map pipelines = currentIngestMetadata.getPipelines();
Set toRemove = new HashSet<>();
for (String pipelineKey : pipelines.keySet()) {
if (Regex.simpleMatch(request.getId(), pipelineKey)) {
toRemove.add(pipelineKey);
}
}
if (toRemove.isEmpty() && Regex.isMatchAllPattern(request.getId()) == false) {
throw new ResourceNotFoundException("pipeline [{}] is missing", request.getId());
} else if (toRemove.isEmpty()) {
return currentIngestMetadata;
}
final Map pipelinesCopy = new HashMap<>(pipelines);
for (String key : toRemove) {
validateNotInUse(key, allIndexMetadata);
pipelinesCopy.remove(key);
}
return new IngestMetadata(pipelinesCopy);
}
}
static void validateNotInUse(String pipeline, Collection allIndexMetadata) {
List defaultPipelineIndices = new ArrayList<>();
List finalPipelineIndices = new ArrayList<>();
for (IndexMetadata indexMetadata : allIndexMetadata) {
String defaultPipeline = IndexSettings.DEFAULT_PIPELINE.get(indexMetadata.getSettings());
String finalPipeline = IndexSettings.FINAL_PIPELINE.get(indexMetadata.getSettings());
if (pipeline.equals(defaultPipeline)) {
defaultPipelineIndices.add(indexMetadata.getIndex().getName());
}
if (pipeline.equals(finalPipeline)) {
finalPipelineIndices.add(indexMetadata.getIndex().getName());
}
}
if (defaultPipelineIndices.size() > 0 || finalPipelineIndices.size() > 0) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"pipeline [%s] cannot be deleted because it is %s%s%s",
pipeline,
defaultPipelineIndices.size() > 0
? String.format(
Locale.ROOT,
"the default pipeline for %s index(es) including [%s]",
defaultPipelineIndices.size(),
defaultPipelineIndices.stream().sorted().limit(3).collect(Collectors.joining(","))
)
: Strings.EMPTY,
defaultPipelineIndices.size() > 0 && finalPipelineIndices.size() > 0 ? " and " : Strings.EMPTY,
finalPipelineIndices.size() > 0
? String.format(
Locale.ROOT,
"the final pipeline for %s index(es) including [%s]",
finalPipelineIndices.size(),
finalPipelineIndices.stream().sorted().limit(3).collect(Collectors.joining(","))
)
: Strings.EMPTY
)
);
}
}
/**
* @return pipeline configuration specified by id. If multiple ids or wildcards are specified multiple pipelines
* may be returned
*/
// Returning PipelineConfiguration instead of Pipeline, because Pipeline and Processor interface don't
// know how to serialize themselves.
public static List getPipelines(ClusterState clusterState, String... ids) {
IngestMetadata ingestMetadata = clusterState.getMetadata().custom(IngestMetadata.TYPE);
return innerGetPipelines(ingestMetadata, ids);
}
static List innerGetPipelines(IngestMetadata ingestMetadata, String... ids) {
if (ingestMetadata == null) {
return List.of();
}
// if we didn't ask for _any_ ID, then we get them all (this is the same as if they ask for '*')
if (ids.length == 0) {
return new ArrayList<>(ingestMetadata.getPipelines().values());
}
List result = new ArrayList<>(ids.length);
for (String id : ids) {
if (Regex.isSimpleMatchPattern(id)) {
for (Map.Entry entry : ingestMetadata.getPipelines().entrySet()) {
if (Regex.simpleMatch(id, entry.getKey())) {
result.add(entry.getValue());
}
}
} else {
PipelineConfiguration pipeline = ingestMetadata.getPipelines().get(id);
if (pipeline != null) {
result.add(pipeline);
}
}
}
return result;
}
/**
* Stores the specified pipeline definition in the request.
*/
public void putPipeline(
PutPipelineRequest request,
ActionListener listener,
Consumer> nodeInfoListener
) throws Exception {
if (isNoOpPipelineUpdate(state, request)) {
// existing pipeline matches request pipeline -- no need to update
listener.onResponse(AcknowledgedResponse.TRUE);
return;
}
nodeInfoListener.accept(ActionListener.wrap(nodeInfos -> {
validatePipelineRequest(request, nodeInfos);
taskQueue.submitTask(
"put-pipeline-" + request.getId(),
new PutPipelineClusterStateUpdateTask(listener, request),
request.masterNodeTimeout()
);
}, listener::onFailure));
}
public void validatePipelineRequest(PutPipelineRequest request, NodesInfoResponse nodeInfos) throws Exception {
final Map config = XContentHelper.convertToMap(request.getSource(), false, request.getXContentType()).v2();
Map ingestInfos = new HashMap<>();
for (NodeInfo nodeInfo : nodeInfos.getNodes()) {
ingestInfos.put(nodeInfo.getNode(), nodeInfo.getInfo(IngestInfo.class));
}
validatePipeline(ingestInfos, request.getId(), config);
}
public static boolean isNoOpPipelineUpdate(ClusterState state, PutPipelineRequest request) {
IngestMetadata currentIngestMetadata = state.metadata().custom(IngestMetadata.TYPE);
if (request.getVersion() == null
&& currentIngestMetadata != null
&& currentIngestMetadata.getPipelines().containsKey(request.getId())) {
var pipelineConfig = XContentHelper.convertToMap(request.getSource(), false, request.getXContentType()).v2();
var currentPipeline = currentIngestMetadata.getPipelines().get(request.getId());
if (currentPipeline.getConfigAsMap().equals(pipelineConfig)) {
return true;
}
}
return false;
}
/**
* Returns the pipeline by the specified id
*/
public Pipeline getPipeline(String id) {
if (id == null) {
return null;
}
PipelineHolder holder = pipelines.get(id);
if (holder != null) {
return holder.pipeline;
} else {
return null;
}
}
public Map getProcessorFactories() {
return processorFactories;
}
@Override
public IngestInfo info() {
Map processorFactories = getProcessorFactories();
List processorInfoList = new ArrayList<>(processorFactories.size());
for (Map.Entry entry : processorFactories.entrySet()) {
processorInfoList.add(new ProcessorInfo(entry.getKey()));
}
return new IngestInfo(processorInfoList);
}
Map pipelines() {
return pipelines;
}
/**
* Recursive method to obtain all the non-failure processors for given compoundProcessor.
*
* 'if' and 'ignore_failure'/'on_failure' are implemented as wrappers around the actual processor (via {@link ConditionalProcessor}
* and {@link OnFailureProcessor}, respectively), so we unwrap these processors internally in order to expose the underlying
* 'actual' processor via the metrics. This corresponds best to the customer intent -- e.g. they used a 'set' processor that has an
* 'on_failure', so we report metrics for the set processor, not an on_failure processor.
*
* @param compoundProcessor The compound processor to start walking the non-failure processors
* @param processorMetrics The list to populate with {@link Processor} {@link IngestMetric} tuples.
*/
private static void collectProcessorMetrics(
CompoundProcessor compoundProcessor,
List> processorMetrics
) {
// only surface the top level non-failure processors, on-failure processor times will be included in the top level non-failure
for (Tuple processorWithMetric : compoundProcessor.getProcessorsWithMetrics()) {
Processor processor = processorWithMetric.v1();
IngestMetric metric = processorWithMetric.v2();
// unwrap 'if' and 'ignore_failure/on_failure' wrapping, so that we expose the underlying actual processor
boolean unwrapped;
do {
unwrapped = false;
if (processor instanceof ConditionalProcessor conditional) {
processor = conditional.getInnerProcessor();
metric = conditional.getMetric(); // prefer the conditional's metric, it only covers when the conditional was true
unwrapped = true;
}
if (processor instanceof OnFailureProcessor onFailure) {
processor = onFailure.getInnerProcessor();
metric = onFailure.getInnerMetric(); // the wrapped processor records the failure count
unwrapped = true;
}
} while (unwrapped);
if (processor instanceof CompoundProcessor cp) {
collectProcessorMetrics(cp, processorMetrics);
} else {
processorMetrics.add(new Tuple<>(processor, metric));
}
}
}
/**
* Used in this class and externally by the {@link org.elasticsearch.action.ingest.ReservedPipelineAction}
*/
public static class PutPipelineClusterStateUpdateTask extends PipelineClusterStateUpdateTask {
private final PutPipelineRequest request;
PutPipelineClusterStateUpdateTask(ActionListener listener, PutPipelineRequest request) {
super(listener);
this.request = request;
}
/**
* Used by {@link org.elasticsearch.action.ingest.ReservedPipelineAction}
*/
public PutPipelineClusterStateUpdateTask(PutPipelineRequest request) {
this(null, request);
}
@Override
public IngestMetadata execute(IngestMetadata currentIngestMetadata, Collection allIndexMetadata) {
BytesReference pipelineSource = request.getSource();
if (request.getVersion() != null) {
var currentPipeline = currentIngestMetadata != null ? currentIngestMetadata.getPipelines().get(request.getId()) : null;
if (currentPipeline == null) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"version conflict, required version [%s] for pipeline [%s] but no pipeline was found",
request.getVersion(),
request.getId()
)
);
}
final Integer currentVersion = currentPipeline.getVersion();
if (Objects.equals(request.getVersion(), currentVersion) == false) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"version conflict, required version [%s] for pipeline [%s] but current version is [%s]",
request.getVersion(),
request.getId(),
currentVersion
)
);
}
var pipelineConfig = XContentHelper.convertToMap(request.getSource(), false, request.getXContentType()).v2();
final Integer specifiedVersion = (Integer) pipelineConfig.get("version");
if (pipelineConfig.containsKey("version") && Objects.equals(specifiedVersion, currentVersion)) {
throw new IllegalArgumentException(
String.format(
Locale.ROOT,
"cannot update pipeline [%s] with the same version [%s]",
request.getId(),
request.getVersion()
)
);
}
// if no version specified in the pipeline definition, inject a version of [request.getVersion() + 1]
if (specifiedVersion == null) {
pipelineConfig.put("version", request.getVersion() == null ? 1 : request.getVersion() + 1);
try {
var builder = XContentBuilder.builder(request.getXContentType().xContent()).map(pipelineConfig);
pipelineSource = BytesReference.bytes(builder);
} catch (IOException e) {
throw new IllegalStateException(e);
}
}
}
Map pipelines;
if (currentIngestMetadata != null) {
pipelines = new HashMap<>(currentIngestMetadata.getPipelines());
} else {
pipelines = new HashMap<>();
}
pipelines.put(request.getId(), new PipelineConfiguration(request.getId(), pipelineSource, request.getXContentType()));
return new IngestMetadata(pipelines);
}
}
void validatePipeline(Map ingestInfos, String pipelineId, Map pipelineConfig)
throws Exception {
if (ingestInfos.isEmpty()) {
throw new IllegalStateException("Ingest info is empty");
}
Pipeline pipeline = Pipeline.create(pipelineId, pipelineConfig, processorFactories, scriptService);
List exceptions = new ArrayList<>();
for (Processor processor : pipeline.flattenAllProcessors()) {
// run post-construction extra validation (if any, the default implementation from the Processor interface is a no-op)
try {
processor.extraValidation();
} catch (Exception e) {
exceptions.add(e);
}
for (Map.Entry entry : ingestInfos.entrySet()) {
String type = processor.getType();
if (entry.getValue().containsProcessor(type) == false && ConditionalProcessor.TYPE.equals(type) == false) {
String message = "Processor type [" + processor.getType() + "] is not installed on node [" + entry.getKey() + "]";
exceptions.add(ConfigurationUtils.newConfigurationException(processor.getType(), processor.getTag(), null, message));
}
}
}
ExceptionsHelper.rethrowAndSuppress(exceptions);
}
public void executeBulkRequest(
final int numberOfActionRequests,
final Iterable> actionRequests,
final IntConsumer onDropped,
final BiConsumer onFailure,
final BiConsumer onCompletion,
final String executorName
) {
assert numberOfActionRequests > 0 : "numberOfActionRequests must be greater than 0 but was [" + numberOfActionRequests + "]";
threadPool.executor(executorName).execute(new AbstractRunnable() {
@Override
public void onFailure(Exception e) {
onCompletion.accept(null, e);
}
@Override
protected void doRun() {
final Thread originalThread = Thread.currentThread();
try (var refs = new RefCountingRunnable(() -> onCompletion.accept(originalThread, null))) {
int i = 0;
for (DocWriteRequest> actionRequest : actionRequests) {
IndexRequest indexRequest = TransportBulkAction.getIndexWriteRequest(actionRequest);
if (indexRequest == null) {
i++;
continue;
}
PipelineIterator pipelines = getAndResetPipelines(indexRequest);
if (pipelines.hasNext() == false) {
i++;
continue;
}
// start the stopwatch and acquire a ref to indicate that we're working on this document
final long startTimeInNanos = System.nanoTime();
totalMetrics.preIngest();
final int slot = i;
final Releasable ref = refs.acquire();
// the document listener gives us three-way logic: a document can fail processing (1), or it can
// be successfully processed. a successfully processed document can be kept (2) or dropped (3).
final ActionListener documentListener = ActionListener.runAfter(new ActionListener<>() {
@Override
public void onResponse(Boolean kept) {
assert kept != null;
if (kept == false) {
onDropped.accept(slot);
}
}
@Override
public void onFailure(Exception e) {
totalMetrics.ingestFailed();
onFailure.accept(slot, e);
}
}, () -> {
// regardless of success or failure, we always stop the ingest "stopwatch" and release the ref to indicate
// that we're finished with this document
final long ingestTimeInNanos = System.nanoTime() - startTimeInNanos;
totalMetrics.postIngest(ingestTimeInNanos);
ref.close();
});
IngestDocument ingestDocument = newIngestDocument(indexRequest);
executePipelines(pipelines, indexRequest, ingestDocument, documentListener);
i++;
}
}
}
});
}
/**
* Returns the pipelines of the request, and updates the request so that it no longer references
* any pipelines (both the default and final pipeline are set to the noop pipeline).
*/
private PipelineIterator getAndResetPipelines(IndexRequest indexRequest) {
final String pipelineId = indexRequest.getPipeline();
indexRequest.setPipeline(NOOP_PIPELINE_NAME);
final String finalPipelineId = indexRequest.getFinalPipeline();
indexRequest.setFinalPipeline(NOOP_PIPELINE_NAME);
return new PipelineIterator(pipelineId, finalPipelineId);
}
/**
* A triple for tracking the non-null id of a pipeline, the pipeline itself, and whether the pipeline is a final pipeline.
*
* @param id the non-null id of the pipeline
* @param pipeline a possibly-null reference to the pipeline for the given pipeline id
* @param isFinal true if the pipeline is a final pipeline
*/
private record PipelineSlot(String id, @Nullable Pipeline pipeline, boolean isFinal) {
public PipelineSlot {
Objects.requireNonNull(id);
}
}
private class PipelineIterator implements Iterator {
private final String defaultPipeline;
private final String finalPipeline;
private final Iterator pipelineSlotIterator;
private PipelineIterator(String defaultPipeline, String finalPipeline) {
this.defaultPipeline = NOOP_PIPELINE_NAME.equals(defaultPipeline) ? null : defaultPipeline;
this.finalPipeline = NOOP_PIPELINE_NAME.equals(finalPipeline) ? null : finalPipeline;
this.pipelineSlotIterator = iterator();
}
public PipelineIterator withoutDefaultPipeline() {
return new PipelineIterator(null, finalPipeline);
}
private Iterator iterator() {
PipelineSlot defaultPipelineSlot = null, finalPipelineSlot = null;
if (defaultPipeline != null) {
defaultPipelineSlot = new PipelineSlot(defaultPipeline, getPipeline(defaultPipeline), false);
}
if (finalPipeline != null) {
finalPipelineSlot = new PipelineSlot(finalPipeline, getPipeline(finalPipeline), true);
}
if (defaultPipeline != null && finalPipeline != null) {
return List.of(defaultPipelineSlot, finalPipelineSlot).iterator();
} else if (finalPipeline != null) {
return List.of(finalPipelineSlot).iterator();
} else if (defaultPipeline != null) {
return List.of(defaultPipelineSlot).iterator();
} else {
return Collections.emptyIterator();
}
}
@Override
public boolean hasNext() {
return pipelineSlotIterator.hasNext();
}
@Override
public PipelineSlot next() {
return pipelineSlotIterator.next();
}
}
private void executePipelines(
final PipelineIterator pipelines,
final IndexRequest indexRequest,
final IngestDocument ingestDocument,
final ActionListener listener
) {
assert pipelines.hasNext();
PipelineSlot slot = pipelines.next();
final String pipelineId = slot.id();
final Pipeline pipeline = slot.pipeline();
final boolean isFinalPipeline = slot.isFinal();
// reset the reroute flag, at the start of a new pipeline execution this document hasn't been rerouted yet
ingestDocument.resetReroute();
try {
if (pipeline == null) {
throw new IllegalArgumentException("pipeline with id [" + pipelineId + "] does not exist");
}
final String originalIndex = indexRequest.indices()[0];
executePipeline(ingestDocument, pipeline, (keep, e) -> {
assert keep != null;
if (e != null) {
logger.debug(
() -> format(
"failed to execute pipeline [%s] for document [%s/%s]",
pipelineId,
indexRequest.index(),
indexRequest.id()
),
e
);
listener.onFailure(e);
return; // document failed!
}
if (keep == false) {
listener.onResponse(false);
return; // document dropped!
}
// update the index request so that we can execute additional pipelines (if any), etc
updateIndexRequestMetadata(indexRequest, ingestDocument.getMetadata());
try {
// check for self-references if necessary, (i.e. if a script processor has run), and clear the bit
if (ingestDocument.doNoSelfReferencesCheck()) {
CollectionUtils.ensureNoSelfReferences(ingestDocument.getSource(), null);
ingestDocument.doNoSelfReferencesCheck(false);
}
} catch (IllegalArgumentException ex) {
// An IllegalArgumentException can be thrown when an ingest processor creates a source map that is self-referencing.
// In that case, we catch and wrap the exception, so we can include more details
listener.onFailure(
new IllegalArgumentException(
format(
"Failed to generate the source document for ingest pipeline [%s] for document [%s/%s]",
pipelineId,
indexRequest.index(),
indexRequest.id()
),
ex
)
);
return; // document failed!
}
PipelineIterator newPipelines = pipelines;
final String newIndex = indexRequest.indices()[0];
if (Objects.equals(originalIndex, newIndex) == false) {
// final pipelines cannot change the target index (either directly or by way of a reroute)
if (isFinalPipeline) {
listener.onFailure(
new IllegalStateException(
format(
"final pipeline [%s] can't change the target index (from [%s] to [%s]) for document [%s]",
pipelineId,
originalIndex,
newIndex,
indexRequest.id()
)
)
);
return; // document failed!
}
// add the index to the document's index history, and check for cycles in the visited indices
boolean cycle = ingestDocument.updateIndexHistory(newIndex) == false;
if (cycle) {
List indexCycle = new ArrayList<>(ingestDocument.getIndexHistory());
indexCycle.add(newIndex);
listener.onFailure(
new IllegalStateException(
format(
"index cycle detected while processing pipeline [%s] for document [%s]: %s",
pipelineId,
indexRequest.id(),
indexCycle
)
)
);
return; // document failed!
}
// clear the current pipeline, then re-resolve the pipelines for this request
indexRequest.setPipeline(null);
indexRequest.isPipelineResolved(false);
resolvePipelinesAndUpdateIndexRequest(null, indexRequest, state.metadata());
newPipelines = getAndResetPipelines(indexRequest);
// for backwards compatibility, when a pipeline changes the target index for a document without using the reroute
// mechanism, do not invoke the default pipeline of the new target index
if (ingestDocument.isReroute() == false) {
newPipelines = newPipelines.withoutDefaultPipeline();
}
}
if (newPipelines.hasNext()) {
executePipelines(newPipelines, indexRequest, ingestDocument, listener);
} else {
// update the index request's source and (potentially) cache the timestamp for TSDB
updateIndexRequestSource(indexRequest, ingestDocument);
cacheRawTimestamp(indexRequest, ingestDocument);
listener.onResponse(true); // document succeeded!
}
});
} catch (Exception e) {
logger.debug(
() -> format("failed to execute pipeline [%s] for document [%s/%s]", pipelineId, indexRequest.index(), indexRequest.id()),
e
);
listener.onFailure(e); // document failed!
}
}
private void executePipeline(
final IngestDocument ingestDocument,
final Pipeline pipeline,
final BiConsumer handler
) {
// adapt our {@code BiConsumer} handler shape to the
// {@code BiConsumer} handler shape used internally
// by ingest pipelines and processors
ingestDocument.executePipeline(pipeline, (result, e) -> {
if (e != null) {
handler.accept(true, e);
} else {
handler.accept(result != null, null);
}
});
}
public IngestStats stats() {
IngestStats.Builder statsBuilder = new IngestStats.Builder();
statsBuilder.addTotalMetrics(totalMetrics);
pipelines.forEach((id, holder) -> {
Pipeline pipeline = holder.pipeline;
CompoundProcessor rootProcessor = pipeline.getCompoundProcessor();
statsBuilder.addPipelineMetrics(id, pipeline.getMetrics());
List> processorMetrics = new ArrayList<>();
collectProcessorMetrics(rootProcessor, processorMetrics);
processorMetrics.forEach(t -> {
Processor processor = t.v1();
IngestMetric processorMetric = t.v2();
statsBuilder.addProcessorMetrics(id, getProcessorName(processor), processor.getType(), processorMetric);
});
});
return statsBuilder.build();
}
/**
* Adds a listener that gets invoked with the current cluster state before processor factories
* get invoked.
*
* This is useful for components that are used by ingest processors, so that they have the opportunity to update
* before these components get used by the ingest processor factory.
*/
public void addIngestClusterStateListener(Consumer listener) {
ingestClusterStateListeners.add(listener);
}
// package private for testing
static String getProcessorName(Processor processor) {
// conditionals are implemented as wrappers around the real processor, so get the real processor for the correct type for the name
if (processor instanceof ConditionalProcessor conditionalProcessor) {
processor = conditionalProcessor.getInnerProcessor();
}
StringBuilder sb = new StringBuilder(5);
sb.append(processor.getType());
if (processor instanceof PipelineProcessor pipelineProcessor) {
String pipelineName = pipelineProcessor.getPipelineTemplate().newInstance(Map.of()).execute();
sb.append(":");
sb.append(pipelineName);
}
String tag = processor.getTag();
if (tag != null && tag.isEmpty() == false) {
sb.append(":");
sb.append(tag);
}
return sb.toString();
}
/**
* Builds a new ingest document from the passed-in index request.
*/
private static IngestDocument newIngestDocument(final IndexRequest request) {
return new IngestDocument(
request.index(),
request.id(),
request.version(),
request.routing(),
request.versionType(),
request.sourceAsMap()
);
}
/**
* Updates an index request based on the metadata of an ingest document.
*/
private static void updateIndexRequestMetadata(final IndexRequest request, final org.elasticsearch.script.Metadata metadata) {
// it's fine to set all metadata fields all the time, as ingest document holds their starting values
// before ingestion, which might also get modified during ingestion.
request.index(metadata.getIndex());
request.id(metadata.getId());
request.routing(metadata.getRouting());
request.version(metadata.getVersion());
if (metadata.getVersionType() != null) {
request.versionType(VersionType.fromString(metadata.getVersionType()));
}
Number number;
if ((number = metadata.getIfSeqNo()) != null) {
request.setIfSeqNo(number.longValue());
}
if ((number = metadata.getIfPrimaryTerm()) != null) {
request.setIfPrimaryTerm(number.longValue());
}
Map map;
if ((map = metadata.getDynamicTemplates()) != null) {
Map mergedDynamicTemplates = new HashMap<>(request.getDynamicTemplates());
mergedDynamicTemplates.putAll(map);
request.setDynamicTemplates(mergedDynamicTemplates);
}
}
/**
* Updates an index request based on the source of an ingest document, guarding against self-references if necessary.
*/
private static void updateIndexRequestSource(final IndexRequest request, final IngestDocument document) {
boolean ensureNoSelfReferences = document.doNoSelfReferencesCheck();
// we already check for self references elsewhere (and clear the bit), so this should always be false,
// keeping the check and assert as a guard against extraordinarily surprising circumstances
assert ensureNoSelfReferences == false;
request.source(document.getSource(), request.getContentType(), ensureNoSelfReferences);
}
/**
* Grab the @timestamp and store it on the index request so that TSDB can use it without needing to parse
* the source for this document.
*/
private static void cacheRawTimestamp(final IndexRequest request, final IngestDocument document) {
if (request.getRawTimestamp() == null) {
// cache the @timestamp from the ingest document's source map if there is one
Object rawTimestamp = document.getSource().get(TimestampField.FIXED_TIMESTAMP_FIELD);
if (rawTimestamp != null) {
request.setRawTimestamp(rawTimestamp);
}
}
}
@Override
public void applyClusterState(final ClusterChangedEvent event) {
state = event.state();
if (state.blocks().hasGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK)) {
return;
}
// Publish cluster state to components that are used by processor factories before letting
// processor factories create new processor instances.
// (Note that this needs to be done also in the case when there is no change to ingest metadata, because in the case
// when only the part of the cluster state that a component is interested in, is updated.)
ingestClusterStateListeners.forEach(consumer -> consumer.accept(state));
IngestMetadata newIngestMetadata = state.getMetadata().custom(IngestMetadata.TYPE);
if (newIngestMetadata == null) {
return;
}
try {
innerUpdatePipelines(newIngestMetadata);
} catch (ElasticsearchParseException e) {
logger.warn("failed to update ingest pipelines", e);
}
}
synchronized void innerUpdatePipelines(IngestMetadata newIngestMetadata) {
Map existingPipelines = this.pipelines;
// Lazy initialize these variables in order to favour the most like scenario that there are no pipeline changes:
Map newPipelines = null;
List exceptions = null;
// Iterate over pipeline configurations in ingest metadata and constructs a new pipeline if there is no pipeline
// or the pipeline configuration has been modified
for (PipelineConfiguration newConfiguration : newIngestMetadata.getPipelines().values()) {
PipelineHolder previous = existingPipelines.get(newConfiguration.getId());
if (previous != null && previous.configuration.equals(newConfiguration)) {
continue;
}
if (newPipelines == null) {
newPipelines = new HashMap<>(existingPipelines);
}
try {
Pipeline newPipeline = Pipeline.create(
newConfiguration.getId(),
newConfiguration.getConfigAsMap(),
processorFactories,
scriptService
);
newPipelines.put(newConfiguration.getId(), new PipelineHolder(newConfiguration, newPipeline));
if (previous == null) {
continue;
}
Pipeline oldPipeline = previous.pipeline;
newPipeline.getMetrics().add(oldPipeline.getMetrics());
List> oldPerProcessMetrics = new ArrayList<>();
List> newPerProcessMetrics = new ArrayList<>();
collectProcessorMetrics(oldPipeline.getCompoundProcessor(), oldPerProcessMetrics);
collectProcessorMetrics(newPipeline.getCompoundProcessor(), newPerProcessMetrics);
// Best attempt to populate new processor metrics using a parallel array of the old metrics. This is not ideal since
// the per processor metrics may get reset when the arrays don't match. However, to get to an ideal model, unique and
// consistent id's per processor and/or semantic equals for each processor will be needed.
if (newPerProcessMetrics.size() == oldPerProcessMetrics.size()) {
Iterator> oldMetricsIterator = oldPerProcessMetrics.iterator();
for (Tuple compositeMetric : newPerProcessMetrics) {
String type = compositeMetric.v1().getType();
IngestMetric metric = compositeMetric.v2();
if (oldMetricsIterator.hasNext()) {
Tuple oldCompositeMetric = oldMetricsIterator.next();
String oldType = oldCompositeMetric.v1().getType();
IngestMetric oldMetric = oldCompositeMetric.v2();
if (type.equals(oldType)) {
metric.add(oldMetric);
}
}
}
}
} catch (ElasticsearchParseException e) {
Pipeline pipeline = substitutePipeline(newConfiguration.getId(), e);
newPipelines.put(newConfiguration.getId(), new PipelineHolder(newConfiguration, pipeline));
if (exceptions == null) {
exceptions = new ArrayList<>();
}
exceptions.add(e);
} catch (Exception e) {
ElasticsearchParseException parseException = new ElasticsearchParseException(
"Error updating pipeline with id [" + newConfiguration.getId() + "]",
e
);
Pipeline pipeline = substitutePipeline(newConfiguration.getId(), parseException);
newPipelines.put(newConfiguration.getId(), new PipelineHolder(newConfiguration, pipeline));
if (exceptions == null) {
exceptions = new ArrayList<>();
}
exceptions.add(parseException);
}
}
// Iterate over the current active pipelines and check whether they are missing in the pipeline configuration and
// if so delete the pipeline from new Pipelines map:
for (Map.Entry entry : existingPipelines.entrySet()) {
if (newIngestMetadata.getPipelines().get(entry.getKey()) == null) {
if (newPipelines == null) {
newPipelines = new HashMap<>(existingPipelines);
}
newPipelines.remove(entry.getKey());
}
}
if (newPipelines != null) {
// Update the pipelines:
this.pipelines = Map.copyOf(newPipelines);
// Rethrow errors that may have occurred during creating new pipeline instances:
if (exceptions != null) {
ExceptionsHelper.rethrowAndSuppress(exceptions);
}
}
}
/**
* Gets all the Processors of the given type from within a Pipeline.
* @param pipelineId the pipeline to inspect
* @param clazz the Processor class to look for
* @return True if the pipeline contains an instance of the Processor class passed in
*/
public List
getProcessorsInPipeline(String pipelineId, Class
clazz) {
Pipeline pipeline = getPipeline(pipelineId);
if (pipeline == null) {
throw new IllegalArgumentException("pipeline with id [" + pipelineId + "] does not exist");
}
List
processors = new ArrayList<>();
for (Processor processor : pipeline.flattenAllProcessors()) {
if (clazz.isAssignableFrom(processor.getClass())) {
processors.add(clazz.cast(processor));
}
while (processor instanceof WrappingProcessor wrappingProcessor) {
if (clazz.isAssignableFrom(wrappingProcessor.getInnerProcessor().getClass())) {
processors.add(clazz.cast(wrappingProcessor.getInnerProcessor()));
}
processor = wrappingProcessor.getInnerProcessor();
// break in the case of self referencing processors in the event a processor author creates a
// wrapping processor that has its inner processor refer to itself.
if (wrappingProcessor == processor) {
break;
}
}
}
return processors;
}
public
Collection getPipelineWithProcessorType(Class clazz, Predicate
predicate) {
List matchedPipelines = new LinkedList<>();
for (PipelineHolder holder : pipelines.values()) {
String pipelineId = holder.pipeline.getId();
List processors = getProcessorsInPipeline(pipelineId, clazz);
if (processors.isEmpty() == false && processors.stream().anyMatch(predicate)) {
matchedPipelines.add(pipelineId);
}
}
return matchedPipelines;
}
public synchronized void reloadPipeline(String id) throws Exception {
PipelineHolder holder = pipelines.get(id);
Pipeline updatedPipeline = Pipeline.create(id, holder.configuration.getConfigAsMap(), processorFactories, scriptService);
Map updatedPipelines = new HashMap<>(this.pipelines);
updatedPipelines.put(id, new PipelineHolder(holder.configuration, updatedPipeline));
this.pipelines = Map.copyOf(updatedPipelines);
}
private static Pipeline substitutePipeline(String id, ElasticsearchParseException e) {
String tag = e.getHeaderKeys().contains("processor_tag") ? e.getHeader("processor_tag").get(0) : null;
String type = e.getHeaderKeys().contains("processor_type") ? e.getHeader("processor_type").get(0) : "unknown";
String errorMessage = "pipeline with id [" + id + "] could not be loaded, caused by [" + e.getDetailedMessage() + "]";
Processor failureProcessor = new AbstractProcessor(tag, "this is a placeholder processor") {
@Override
public IngestDocument execute(IngestDocument ingestDocument) {
throw new IllegalStateException(errorMessage);
}
@Override
public String getType() {
return type;
}
};
String description = "this is a place holder pipeline, because pipeline with id [" + id + "] could not be loaded";
return new Pipeline(id, description, null, null, new CompoundProcessor(failureProcessor));
}
record PipelineHolder(PipelineConfiguration configuration, Pipeline pipeline) {
public PipelineHolder {
Objects.requireNonNull(configuration);
Objects.requireNonNull(pipeline);
}
}
private static Optional resolvePipelinesFromMetadata(
DocWriteRequest> originalRequest,
IndexRequest indexRequest,
Metadata metadata,
long epochMillis
) {
IndexMetadata indexMetadata = null;
// start to look for default or final pipelines via settings found in the cluster metadata
if (originalRequest != null) {
indexMetadata = metadata.indices()
.get(IndexNameExpressionResolver.resolveDateMathExpression(originalRequest.index(), epochMillis));
}
// check the alias for the index request (this is how normal index requests are modeled)
if (indexMetadata == null && indexRequest.index() != null) {
IndexAbstraction indexAbstraction = metadata.getIndicesLookup().get(indexRequest.index());
if (indexAbstraction != null && indexAbstraction.getWriteIndex() != null) {
indexMetadata = metadata.index(indexAbstraction.getWriteIndex());
}
}
// check the alias for the action request (this is how upserts are modeled)
if (indexMetadata == null && originalRequest != null && originalRequest.index() != null) {
IndexAbstraction indexAbstraction = metadata.getIndicesLookup().get(originalRequest.index());
if (indexAbstraction != null && indexAbstraction.getWriteIndex() != null) {
indexMetadata = metadata.index(indexAbstraction.getWriteIndex());
}
}
if (indexMetadata == null) {
return Optional.empty();
}
final Settings settings = indexMetadata.getSettings();
return Optional.of(new Pipelines(IndexSettings.DEFAULT_PIPELINE.get(settings), IndexSettings.FINAL_PIPELINE.get(settings)));
}
private static Optional resolvePipelinesFromIndexTemplates(IndexRequest indexRequest, Metadata metadata) {
if (indexRequest.index() == null) {
return Optional.empty();
}
// the index does not exist yet (and this is a valid request), so match index
// templates to look for pipelines in either a matching V2 template (which takes
// precedence), or if a V2 template does not match, any V1 templates
String v2Template = MetadataIndexTemplateService.findV2Template(metadata, indexRequest.index(), false);
if (v2Template != null) {
final Settings settings = MetadataIndexTemplateService.resolveSettings(metadata, v2Template);
return Optional.of(new Pipelines(IndexSettings.DEFAULT_PIPELINE.get(settings), IndexSettings.FINAL_PIPELINE.get(settings)));
}
String defaultPipeline = null;
String finalPipeline = null;
List templates = MetadataIndexTemplateService.findV1Templates(metadata, indexRequest.index(), null);
// order of templates are the highest order first
for (final IndexTemplateMetadata template : templates) {
final Settings settings = template.settings();
// note: the exists/get trickiness here is because we explicitly *don't* want the default value
// of the settings -- a non-null value would terminate the search too soon
if (defaultPipeline == null && IndexSettings.DEFAULT_PIPELINE.exists(settings)) {
defaultPipeline = IndexSettings.DEFAULT_PIPELINE.get(settings);
// we can not break in case a lower-order template has a final pipeline that we need to collect
}
if (finalPipeline == null && IndexSettings.FINAL_PIPELINE.exists(settings)) {
finalPipeline = IndexSettings.FINAL_PIPELINE.get(settings);
// we can not break in case a lower-order template has a default pipeline that we need to collect
}
if (defaultPipeline != null && finalPipeline != null) {
// we can break if we have already collected a default and final pipeline
break;
}
}
// having exhausted the search, if nothing was found, then use the default noop pipeline names
defaultPipeline = Objects.requireNonNullElse(defaultPipeline, NOOP_PIPELINE_NAME);
finalPipeline = Objects.requireNonNullElse(finalPipeline, NOOP_PIPELINE_NAME);
return Optional.of(new Pipelines(defaultPipeline, finalPipeline));
}
/**
* Checks whether an IndexRequest has at least one pipeline defined.
*
* This method assumes that the pipelines are beforehand resolved.
*/
public static boolean hasPipeline(IndexRequest indexRequest) {
assert indexRequest.isPipelineResolved();
assert indexRequest.getPipeline() != null;
assert indexRequest.getFinalPipeline() != null;
return NOOP_PIPELINE_NAME.equals(indexRequest.getPipeline()) == false
|| NOOP_PIPELINE_NAME.equals(indexRequest.getFinalPipeline()) == false;
}
private record Pipelines(String defaultPipeline, String finalPipeline) {
private static final Pipelines NO_PIPELINES_DEFINED = new Pipelines(NOOP_PIPELINE_NAME, NOOP_PIPELINE_NAME);
public Pipelines {
Objects.requireNonNull(defaultPipeline);
Objects.requireNonNull(finalPipeline);
}
}
}