All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.ingest.IngestService Maven / Gradle / Ivy

There is a newer version: 8.15.1
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.ingest;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.apache.logging.log4j.util.Strings;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.ResourceNotFoundException;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.admin.cluster.node.info.NodeInfo;
import org.elasticsearch.action.admin.cluster.node.info.NodesInfoResponse;
import org.elasticsearch.action.bulk.TransportBulkAction;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.ingest.DeletePipelineRequest;
import org.elasticsearch.action.ingest.PutPipelineRequest;
import org.elasticsearch.action.support.master.AcknowledgedResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.cluster.AckedClusterStateUpdateTask;
import org.elasticsearch.cluster.ClusterChangedEvent;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.ClusterStateApplier;
import org.elasticsearch.cluster.metadata.IndexAbstraction;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.metadata.IndexTemplateMetadata;
import org.elasticsearch.cluster.metadata.Metadata;
import org.elasticsearch.cluster.metadata.MetadataIndexTemplateService;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.collect.ImmutableOpenMap;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.env.Environment;
import org.elasticsearch.gateway.GatewayService;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.node.ReportingService;
import org.elasticsearch.plugins.IngestPlugin;
import org.elasticsearch.script.ScriptService;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.xcontent.XContentBuilder;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.IntConsumer;
import java.util.stream.Collectors;

/**
 * Holder class for several ingest related services.
 */
public class IngestService implements ClusterStateApplier, ReportingService {

    public static final String NOOP_PIPELINE_NAME = "_none";

    public static final String INGEST_ORIGIN = "ingest";

    private static final Logger logger = LogManager.getLogger(IngestService.class);
    private static final IndexNameExpressionResolver.DateMathExpressionResolver DATE_MATH_EXPRESSION_RESOLVER =
        new IndexNameExpressionResolver.DateMathExpressionResolver();

    private final ClusterService clusterService;
    private final ScriptService scriptService;
    private final Map processorFactories;
    // Ideally this should be in IngestMetadata class, but we don't have the processor factories around there.
    // We know of all the processor factories when a node with all its plugin have been initialized. Also some
    // processor factories rely on other node services. Custom metadata is statically registered when classes
    // are loaded, so in the cluster state we just save the pipeline config and here we keep the actual pipelines around.
    private volatile Map pipelines = Collections.emptyMap();
    private final ThreadPool threadPool;
    private final IngestMetric totalMetrics = new IngestMetric();
    private final List> ingestClusterStateListeners = new CopyOnWriteArrayList<>();
    private volatile ClusterState state;

    public IngestService(
        ClusterService clusterService,
        ThreadPool threadPool,
        Environment env,
        ScriptService scriptService,
        AnalysisRegistry analysisRegistry,
        List ingestPlugins,
        Client client
    ) {
        this.clusterService = clusterService;
        this.scriptService = scriptService;
        this.processorFactories = processorFactories(
            ingestPlugins,
            new Processor.Parameters(
                env,
                scriptService,
                analysisRegistry,
                threadPool.getThreadContext(),
                threadPool::relativeTimeInMillis,
                (delay, command) -> threadPool.schedule(command, TimeValue.timeValueMillis(delay), ThreadPool.Names.GENERIC),
                this,
                client,
                threadPool.generic()::execute
            )
        );

        this.threadPool = threadPool;
    }

    private static Map processorFactories(List ingestPlugins, Processor.Parameters parameters) {
        Map processorFactories = new HashMap<>();
        for (IngestPlugin ingestPlugin : ingestPlugins) {
            Map newProcessors = ingestPlugin.getProcessors(parameters);
            for (Map.Entry entry : newProcessors.entrySet()) {
                if (processorFactories.put(entry.getKey(), entry.getValue()) != null) {
                    throw new IllegalArgumentException("Ingest processor [" + entry.getKey() + "] is already registered");
                }
            }
        }
        return Collections.unmodifiableMap(processorFactories);
    }

    public static boolean resolvePipelines(
        final DocWriteRequest originalRequest,
        final IndexRequest indexRequest,
        final Metadata metadata
    ) {
        return resolvePipelines(originalRequest, indexRequest, metadata, System.currentTimeMillis());
    }

    public static boolean resolvePipelines(
        final DocWriteRequest originalRequest,
        final IndexRequest indexRequest,
        final Metadata metadata,
        final long epochMillis
    ) {
        if (indexRequest.isPipelineResolved() == false) {
            final String requestPipeline = indexRequest.getPipeline();
            indexRequest.setPipeline(NOOP_PIPELINE_NAME);
            indexRequest.setFinalPipeline(NOOP_PIPELINE_NAME);
            String defaultPipeline = null;
            String finalPipeline = null;
            IndexMetadata indexMetadata = null;
            // start to look for default or final pipelines via settings found in the index meta data
            if (originalRequest != null) {
                indexMetadata = metadata.indices().get(resolveIndexName(originalRequest.index(), epochMillis));
            }
            // check the alias for the index request (this is how normal index requests are modeled)
            if (indexMetadata == null && indexRequest.index() != null) {
                IndexAbstraction indexAbstraction = metadata.getIndicesLookup().get(indexRequest.index());
                if (indexAbstraction != null && indexAbstraction.getWriteIndex() != null) {
                    indexMetadata = metadata.index(indexAbstraction.getWriteIndex());
                }
            }
            // check the alias for the action request (this is how upserts are modeled)
            if (indexMetadata == null && originalRequest != null && originalRequest.index() != null) {
                IndexAbstraction indexAbstraction = metadata.getIndicesLookup().get(originalRequest.index());
                if (indexAbstraction != null && indexAbstraction.getWriteIndex() != null) {
                    indexMetadata = metadata.index(indexAbstraction.getWriteIndex());
                }
            }
            if (indexMetadata != null) {
                final Settings indexSettings = indexMetadata.getSettings();
                if (IndexSettings.DEFAULT_PIPELINE.exists(indexSettings)) {
                    // find the default pipeline if one is defined from an existing index setting
                    defaultPipeline = IndexSettings.DEFAULT_PIPELINE.get(indexSettings);
                    indexRequest.setPipeline(defaultPipeline);
                }
                if (IndexSettings.FINAL_PIPELINE.exists(indexSettings)) {
                    // find the final pipeline if one is defined from an existing index setting
                    finalPipeline = IndexSettings.FINAL_PIPELINE.get(indexSettings);
                    indexRequest.setFinalPipeline(finalPipeline);
                }
            } else if (indexRequest.index() != null) {
                // the index does not exist yet (and this is a valid request), so match index
                // templates to look for pipelines in either a matching V2 template (which takes
                // precedence), or if a V2 template does not match, any V1 templates
                String v2Template = MetadataIndexTemplateService.findV2Template(metadata, indexRequest.index(), false);
                if (v2Template != null) {
                    Settings settings = MetadataIndexTemplateService.resolveSettings(metadata, v2Template);
                    if (IndexSettings.DEFAULT_PIPELINE.exists(settings)) {
                        defaultPipeline = IndexSettings.DEFAULT_PIPELINE.get(settings);
                        // we can not break in case a lower-order template has a final pipeline that we need to collect
                    }
                    if (IndexSettings.FINAL_PIPELINE.exists(settings)) {
                        finalPipeline = IndexSettings.FINAL_PIPELINE.get(settings);
                        // we can not break in case a lower-order template has a default pipeline that we need to collect
                    }
                    indexRequest.setPipeline(defaultPipeline != null ? defaultPipeline : NOOP_PIPELINE_NAME);
                    indexRequest.setFinalPipeline(finalPipeline != null ? finalPipeline : NOOP_PIPELINE_NAME);
                } else {
                    List templates = MetadataIndexTemplateService.findV1Templates(
                        metadata,
                        indexRequest.index(),
                        null
                    );
                    // order of templates are highest order first
                    for (final IndexTemplateMetadata template : templates) {
                        final Settings settings = template.settings();
                        if (defaultPipeline == null && IndexSettings.DEFAULT_PIPELINE.exists(settings)) {
                            defaultPipeline = IndexSettings.DEFAULT_PIPELINE.get(settings);
                            // we can not break in case a lower-order template has a final pipeline that we need to collect
                        }
                        if (finalPipeline == null && IndexSettings.FINAL_PIPELINE.exists(settings)) {
                            finalPipeline = IndexSettings.FINAL_PIPELINE.get(settings);
                            // we can not break in case a lower-order template has a default pipeline that we need to collect
                        }
                        if (defaultPipeline != null && finalPipeline != null) {
                            // we can break if we have already collected a default and final pipeline
                            break;
                        }
                    }
                    indexRequest.setPipeline(defaultPipeline != null ? defaultPipeline : NOOP_PIPELINE_NAME);
                    indexRequest.setFinalPipeline(finalPipeline != null ? finalPipeline : NOOP_PIPELINE_NAME);
                }
            }

            if (requestPipeline != null) {
                indexRequest.setPipeline(requestPipeline);
            }

            /*
             * We have to track whether or not the pipeline for this request has already been resolved. It can happen that the
             * pipeline for this request has already been derived yet we execute this loop again. That occurs if the bulk request
             * has been forwarded by a non-ingest coordinating node to an ingest node. In this case, the coordinating node will have
             * already resolved the pipeline for this request. It is important that we are able to distinguish this situation as we
             * can not double-resolve the pipeline because we will not be able to distinguish the case of the pipeline having been
             * set from a request pipeline parameter versus having been set by the resolution. We need to be able to distinguish
             * these cases as we need to reject the request if the pipeline was set by a required pipeline and there is a request
             * pipeline parameter too.
             */
            indexRequest.isPipelineResolved(true);
        }

        // return whether this index request has a pipeline
        return NOOP_PIPELINE_NAME.equals(indexRequest.getPipeline()) == false
            || NOOP_PIPELINE_NAME.equals(indexRequest.getFinalPipeline()) == false;
    }

    private static String resolveIndexName(final String unresolvedIndexName, final long epochMillis) {
        List resolvedNames = DATE_MATH_EXPRESSION_RESOLVER.resolve(
            new IndexNameExpressionResolver.ResolverContext(epochMillis),
            org.elasticsearch.core.List.of(unresolvedIndexName)
        );
        assert resolvedNames.size() == 1;
        return resolvedNames.get(0);
    }

    public ClusterService getClusterService() {
        return clusterService;
    }

    public ScriptService getScriptService() {
        return scriptService;
    }

    /**
     * Deletes the pipeline specified by id in the request.
     */
    public void delete(DeletePipelineRequest request, ActionListener listener) {
        clusterService.submitStateUpdateTask("delete-pipeline-" + request.getId(), new AckedClusterStateUpdateTask(request, listener) {
            @Override
            public ClusterState execute(ClusterState currentState) {
                return innerDelete(request, currentState);
            }
        });
    }

    static ClusterState innerDelete(DeletePipelineRequest request, ClusterState currentState) {
        IngestMetadata currentIngestMetadata = currentState.metadata().custom(IngestMetadata.TYPE);
        if (currentIngestMetadata == null) {
            return currentState;
        }
        Map pipelines = currentIngestMetadata.getPipelines();
        Set toRemove = new HashSet<>();
        for (String pipelineKey : pipelines.keySet()) {
            if (Regex.simpleMatch(request.getId(), pipelineKey)) {
                toRemove.add(pipelineKey);
            }
        }
        if (toRemove.isEmpty() && Regex.isMatchAllPattern(request.getId()) == false) {
            throw new ResourceNotFoundException("pipeline [{}] is missing", request.getId());
        } else if (toRemove.isEmpty()) {
            return currentState;
        }
        final Map pipelinesCopy = new HashMap<>(pipelines);
        ImmutableOpenMap indices = currentState.metadata().indices();
        for (String key : toRemove) {
            validateNotInUse(key, indices);
            pipelinesCopy.remove(key);
        }
        ClusterState.Builder newState = ClusterState.builder(currentState);
        newState.metadata(
            Metadata.builder(currentState.getMetadata()).putCustom(IngestMetadata.TYPE, new IngestMetadata(pipelinesCopy)).build()
        );
        return newState.build();
    }

    static void validateNotInUse(String pipeline, ImmutableOpenMap indices) {
        List defaultPipelineIndices = new ArrayList<>();
        List finalPipelineIndices = new ArrayList<>();
        for (IndexMetadata indexMetadata : indices.values()) {
            String defaultPipeline = IndexSettings.DEFAULT_PIPELINE.get(indexMetadata.getSettings());
            String finalPipeline = IndexSettings.FINAL_PIPELINE.get(indexMetadata.getSettings());
            if (pipeline.equals(defaultPipeline)) {
                defaultPipelineIndices.add(indexMetadata.getIndex().getName());
            }

            if (pipeline.equals(finalPipeline)) {
                finalPipelineIndices.add(indexMetadata.getIndex().getName());
            }
        }

        if (defaultPipelineIndices.size() > 0 || finalPipelineIndices.size() > 0) {
            throw new IllegalArgumentException(
                String.format(
                    Locale.ROOT,
                    "pipeline [%s] cannot be deleted because it is %s%s%s",
                    pipeline,
                    defaultPipelineIndices.size() > 0
                        ? String.format(
                            Locale.ROOT,
                            "the default pipeline for %s index(es) including [%s]",
                            defaultPipelineIndices.size(),
                            defaultPipelineIndices.stream().sorted().limit(3).collect(Collectors.joining(","))
                        )
                        : Strings.EMPTY,
                    defaultPipelineIndices.size() > 0 && finalPipelineIndices.size() > 0 ? " and " : Strings.EMPTY,
                    finalPipelineIndices.size() > 0
                        ? String.format(
                            Locale.ROOT,
                            "the final pipeline for %s index(es) including [%s]",
                            finalPipelineIndices.size(),
                            finalPipelineIndices.stream().sorted().limit(3).collect(Collectors.joining(","))
                        )
                        : Strings.EMPTY
                )
            );
        }
    }

    /**
     * @return pipeline configuration specified by id. If multiple ids or wildcards are specified multiple pipelines
     * may be returned
     */
    // Returning PipelineConfiguration instead of Pipeline, because Pipeline and Processor interface don't
    // know how to serialize themselves.
    public static List getPipelines(ClusterState clusterState, String... ids) {
        IngestMetadata ingestMetadata = clusterState.getMetadata().custom(IngestMetadata.TYPE);
        return innerGetPipelines(ingestMetadata, ids);
    }

    static List innerGetPipelines(IngestMetadata ingestMetadata, String... ids) {
        if (ingestMetadata == null) {
            return Collections.emptyList();
        }

        // if we didn't ask for _any_ ID, then we get them all (this is the same as if they ask for '*')
        if (ids.length == 0) {
            return new ArrayList<>(ingestMetadata.getPipelines().values());
        }

        List result = new ArrayList<>(ids.length);
        for (String id : ids) {
            if (Regex.isSimpleMatchPattern(id)) {
                for (Map.Entry entry : ingestMetadata.getPipelines().entrySet()) {
                    if (Regex.simpleMatch(id, entry.getKey())) {
                        result.add(entry.getValue());
                    }
                }
            } else {
                PipelineConfiguration pipeline = ingestMetadata.getPipelines().get(id);
                if (pipeline != null) {
                    result.add(pipeline);
                }
            }
        }
        return result;
    }

    /**
     * Stores the specified pipeline definition in the request.
     */
    public void putPipeline(
        PutPipelineRequest request,
        ActionListener listener,
        Consumer> nodeInfoListener
    ) throws Exception {

        Map pipelineConfig = null;
        IngestMetadata currentIngestMetadata = state.metadata().custom(IngestMetadata.TYPE);
        if (request.getVersion() == null
            && currentIngestMetadata != null
            && currentIngestMetadata.getPipelines().containsKey(request.getId())) {
            pipelineConfig = XContentHelper.convertToMap(request.getSource(), false, request.getXContentType()).v2();
            PipelineConfiguration currentPipeline = currentIngestMetadata.getPipelines().get(request.getId());
            if (currentPipeline.getConfigAsMap().equals(pipelineConfig)) {
                // existing pipeline matches request pipeline -- no need to update
                listener.onResponse(AcknowledgedResponse.TRUE);
                return;
            }
        }

        if (state.getNodes().getMinNodeVersion().before(Version.V_7_15_0)) {
            pipelineConfig = pipelineConfig == null
                ? XContentHelper.convertToMap(request.getSource(), false, request.getXContentType()).v2()
                : pipelineConfig;
            if (pipelineConfig.containsKey(Pipeline.META_KEY)) {
                throw new IllegalStateException("pipelines with _meta field require minimum node version of " + Version.V_7_15_0);
            }
        }

        final Map config = pipelineConfig == null
            ? XContentHelper.convertToMap(request.getSource(), false, request.getXContentType()).v2()
            : pipelineConfig;
        nodeInfoListener.accept(ActionListener.wrap(nodeInfos -> {
            Map ingestInfos = new HashMap<>();
            for (NodeInfo nodeInfo : nodeInfos.getNodes()) {
                ingestInfos.put(nodeInfo.getNode(), nodeInfo.getInfo(IngestInfo.class));
            }

            validatePipeline(ingestInfos, request.getId(), config);
            clusterService.submitStateUpdateTask("put-pipeline-" + request.getId(), new AckedClusterStateUpdateTask(request, listener) {
                @Override
                public ClusterState execute(ClusterState currentState) {
                    return innerPut(request, currentState);
                }
            });
        }, listener::onFailure));
    }

    /**
     * Returns the pipeline by the specified id
     */
    public Pipeline getPipeline(String id) {
        PipelineHolder holder = pipelines.get(id);
        if (holder != null) {
            return holder.pipeline;
        } else {
            return null;
        }
    }

    public Map getProcessorFactories() {
        return processorFactories;
    }

    @Override
    public IngestInfo info() {
        Map processorFactories = getProcessorFactories();
        List processorInfoList = new ArrayList<>(processorFactories.size());
        for (Map.Entry entry : processorFactories.entrySet()) {
            processorInfoList.add(new ProcessorInfo(entry.getKey()));
        }
        return new IngestInfo(processorInfoList);
    }

    Map pipelines() {
        return pipelines;
    }

    /**
     * Recursive method to obtain all of the non-failure processors for given compoundProcessor. Since conditionals are implemented as
     * wrappers to the actual processor, always prefer the actual processor's metric over the conditional processor's metric.
     * @param compoundProcessor The compound processor to start walking the non-failure processors
     * @param processorMetrics The list of {@link Processor} {@link IngestMetric} tuples.
     * @return the processorMetrics for all non-failure processor that belong to the original compoundProcessor
     */
    private static List> getProcessorMetrics(
        CompoundProcessor compoundProcessor,
        List> processorMetrics
    ) {
        // only surface the top level non-failure processors, on-failure processor times will be included in the top level non-failure
        for (Tuple processorWithMetric : compoundProcessor.getProcessorsWithMetrics()) {
            Processor processor = processorWithMetric.v1();
            IngestMetric metric = processorWithMetric.v2();
            if (processor instanceof CompoundProcessor) {
                getProcessorMetrics((CompoundProcessor) processor, processorMetrics);
            } else {
                // Prefer the conditional's metric since it only includes metrics when the conditional evaluated to true.
                if (processor instanceof ConditionalProcessor) {
                    metric = ((ConditionalProcessor) processor).getMetric();
                }
                processorMetrics.add(new Tuple<>(processor, metric));
            }
        }
        return processorMetrics;
    }

    // visible for testing
    public static ClusterState innerPut(PutPipelineRequest request, ClusterState currentState) {
        IngestMetadata currentIngestMetadata = currentState.metadata().custom(IngestMetadata.TYPE);

        BytesReference pipelineSource = request.getSource();
        if (request.getVersion() != null) {
            PipelineConfiguration currentPipeline = currentIngestMetadata != null
                ? currentIngestMetadata.getPipelines().get(request.getId())
                : null;
            if (currentPipeline == null) {
                throw new IllegalArgumentException(
                    String.format(
                        Locale.ROOT,
                        "version conflict, required version [%s] for pipeline [%s] but no pipeline was found",
                        request.getVersion(),
                        request.getId()
                    )
                );
            }

            final Integer currentVersion = currentPipeline.getVersion();
            if (Objects.equals(request.getVersion(), currentVersion) == false) {
                throw new IllegalArgumentException(
                    String.format(
                        Locale.ROOT,
                        "version conflict, required version [%s] for pipeline [%s] but current version is [%s]",
                        request.getVersion(),
                        request.getId(),
                        currentVersion
                    )
                );
            }

            Map pipelineConfig = XContentHelper.convertToMap(request.getSource(), false, request.getXContentType()).v2();
            final Integer specifiedVersion = (Integer) pipelineConfig.get("version");
            if (pipelineConfig.containsKey("version") && Objects.equals(specifiedVersion, currentVersion)) {
                throw new IllegalArgumentException(
                    String.format(
                        Locale.ROOT,
                        "cannot update pipeline [%s] with the same version [%s]",
                        request.getId(),
                        request.getVersion()
                    )
                );
            }

            // if no version specified in the pipeline definition, inject a version of [request.getVersion() + 1]
            if (specifiedVersion == null) {
                pipelineConfig.put("version", request.getVersion() == null ? 1 : request.getVersion() + 1);
                try {
                    XContentBuilder builder = XContentBuilder.builder(request.getXContentType().xContent()).map(pipelineConfig);
                    pipelineSource = BytesReference.bytes(builder);
                } catch (IOException e) {
                    throw new IllegalStateException(e);
                }
            }
        }

        Map pipelines;
        if (currentIngestMetadata != null) {
            pipelines = new HashMap<>(currentIngestMetadata.getPipelines());
        } else {
            pipelines = new HashMap<>();
        }

        pipelines.put(request.getId(), new PipelineConfiguration(request.getId(), pipelineSource, request.getXContentType()));
        ClusterState.Builder newState = ClusterState.builder(currentState);
        newState.metadata(
            Metadata.builder(currentState.getMetadata()).putCustom(IngestMetadata.TYPE, new IngestMetadata(pipelines)).build()
        );
        return newState.build();
    }

    void validatePipeline(Map ingestInfos, String pipelineId, Map pipelineConfig)
        throws Exception {
        if (ingestInfos.isEmpty()) {
            throw new IllegalStateException("Ingest info is empty");
        }

        Pipeline pipeline = Pipeline.create(pipelineId, pipelineConfig, processorFactories, scriptService);
        List exceptions = new ArrayList<>();
        for (Processor processor : pipeline.flattenAllProcessors()) {
            for (Map.Entry entry : ingestInfos.entrySet()) {
                String type = processor.getType();
                if (entry.getValue().containsProcessor(type) == false && ConditionalProcessor.TYPE.equals(type) == false) {
                    String message = "Processor type [" + processor.getType() + "] is not installed on node [" + entry.getKey() + "]";
                    exceptions.add(ConfigurationUtils.newConfigurationException(processor.getType(), processor.getTag(), null, message));
                }
            }
        }
        ExceptionsHelper.rethrowAndSuppress(exceptions);
    }

    public void executeBulkRequest(
        int numberOfActionRequests,
        Iterable> actionRequests,
        BiConsumer onFailure,
        BiConsumer onCompletion,
        IntConsumer onDropped,
        String executorName
    ) {

        threadPool.executor(executorName).execute(new AbstractRunnable() {

            @Override
            public void onFailure(Exception e) {
                onCompletion.accept(null, e);
            }

            @Override
            protected void doRun() {
                final Thread originalThread = Thread.currentThread();
                final AtomicInteger counter = new AtomicInteger(numberOfActionRequests);
                int i = 0;
                for (DocWriteRequest actionRequest : actionRequests) {
                    IndexRequest indexRequest = TransportBulkAction.getIndexWriteRequest(actionRequest);
                    if (indexRequest == null) {
                        if (counter.decrementAndGet() == 0) {
                            onCompletion.accept(originalThread, null);
                        }
                        assert counter.get() >= 0;
                        i++;
                        continue;
                    }

                    final String pipelineId = indexRequest.getPipeline();
                    indexRequest.setPipeline(NOOP_PIPELINE_NAME);
                    final String finalPipelineId = indexRequest.getFinalPipeline();
                    indexRequest.setFinalPipeline(NOOP_PIPELINE_NAME);
                    boolean hasFinalPipeline = true;
                    final List pipelines;
                    if (IngestService.NOOP_PIPELINE_NAME.equals(pipelineId) == false
                        && IngestService.NOOP_PIPELINE_NAME.equals(finalPipelineId) == false) {
                        pipelines = Arrays.asList(pipelineId, finalPipelineId);
                    } else if (IngestService.NOOP_PIPELINE_NAME.equals(pipelineId) == false) {
                        pipelines = Collections.singletonList(pipelineId);
                        hasFinalPipeline = false;
                    } else if (IngestService.NOOP_PIPELINE_NAME.equals(finalPipelineId) == false) {
                        pipelines = Collections.singletonList(finalPipelineId);
                    } else {
                        if (counter.decrementAndGet() == 0) {
                            onCompletion.accept(originalThread, null);
                        }
                        assert counter.get() >= 0;
                        i++;
                        continue;
                    }

                    executePipelines(
                        i,
                        pipelines.iterator(),
                        hasFinalPipeline,
                        indexRequest,
                        onDropped,
                        onFailure,
                        counter,
                        onCompletion,
                        originalThread
                    );

                    i++;
                }
            }
        });
    }

    private void executePipelines(
        final int slot,
        final Iterator it,
        final boolean hasFinalPipeline,
        final IndexRequest indexRequest,
        final IntConsumer onDropped,
        final BiConsumer onFailure,
        final AtomicInteger counter,
        final BiConsumer onCompletion,
        final Thread originalThread
    ) {
        assert it.hasNext();
        final String pipelineId = it.next();
        try {
            PipelineHolder holder = pipelines.get(pipelineId);
            if (holder == null) {
                throw new IllegalArgumentException("pipeline with id [" + pipelineId + "] does not exist");
            }
            Pipeline pipeline = holder.pipeline;
            String originalIndex = indexRequest.indices()[0];
            innerExecute(slot, indexRequest, pipeline, onDropped, e -> {
                if (e != null) {
                    logger.debug(
                        () -> new ParameterizedMessage(
                            "failed to execute pipeline [{}] for document [{}/{}]",
                            pipelineId,
                            indexRequest.index(),
                            indexRequest.id()
                        ),
                        e
                    );
                    onFailure.accept(slot, e);
                }

                Iterator newIt = it;
                boolean newHasFinalPipeline = hasFinalPipeline;
                String newIndex = indexRequest.indices()[0];

                if (Objects.equals(originalIndex, newIndex) == false) {
                    if (hasFinalPipeline && it.hasNext() == false) {
                        totalMetrics.ingestFailed();
                        onFailure.accept(
                            slot,
                            new IllegalStateException("final pipeline [" + pipelineId + "] can't change the target index")
                        );
                    } else {
                        indexRequest.isPipelineResolved(false);
                        resolvePipelines(null, indexRequest, state.metadata());
                        if (IngestService.NOOP_PIPELINE_NAME.equals(indexRequest.getFinalPipeline()) == false) {
                            newIt = Collections.singleton(indexRequest.getFinalPipeline()).iterator();
                            newHasFinalPipeline = true;
                        } else {
                            newIt = Collections.emptyIterator();
                        }
                    }
                }

                if (newIt.hasNext()) {
                    executePipelines(
                        slot,
                        newIt,
                        newHasFinalPipeline,
                        indexRequest,
                        onDropped,
                        onFailure,
                        counter,
                        onCompletion,
                        originalThread
                    );
                } else {
                    if (counter.decrementAndGet() == 0) {
                        onCompletion.accept(originalThread, null);
                    }
                    assert counter.get() >= 0;
                }
            });
        } catch (Exception e) {
            logger.debug(
                () -> new ParameterizedMessage(
                    "failed to execute pipeline [{}] for document [{}/{}]",
                    pipelineId,
                    indexRequest.index(),
                    indexRequest.id()
                ),
                e
            );
            onFailure.accept(slot, e);
            if (counter.decrementAndGet() == 0) {
                onCompletion.accept(originalThread, null);
            }
            assert counter.get() >= 0;
        }
    }

    public IngestStats stats() {
        IngestStats.Builder statsBuilder = new IngestStats.Builder();
        statsBuilder.addTotalMetrics(totalMetrics);
        pipelines.forEach((id, holder) -> {
            Pipeline pipeline = holder.pipeline;
            CompoundProcessor rootProcessor = pipeline.getCompoundProcessor();
            statsBuilder.addPipelineMetrics(id, pipeline.getMetrics());
            List> processorMetrics = new ArrayList<>();
            getProcessorMetrics(rootProcessor, processorMetrics);
            processorMetrics.forEach(t -> {
                Processor processor = t.v1();
                IngestMetric processorMetric = t.v2();
                statsBuilder.addProcessorMetrics(id, getProcessorName(processor), processor.getType(), processorMetric);
            });
        });
        return statsBuilder.build();
    }

    /**
     * Adds a listener that gets invoked with the current cluster state before processor factories
     * get invoked.
     *
     * This is useful for components that are used by ingest processors, so that they have the opportunity to update
     * before these components get used by the ingest processor factory.
     */
    public void addIngestClusterStateListener(Consumer listener) {
        ingestClusterStateListeners.add(listener);
    }

    // package private for testing
    static String getProcessorName(Processor processor) {
        // conditionals are implemented as wrappers around the real processor, so get the real processor for the correct type for the name
        if (processor instanceof ConditionalProcessor) {
            processor = ((ConditionalProcessor) processor).getInnerProcessor();
        }
        StringBuilder sb = new StringBuilder(5);
        sb.append(processor.getType());

        if (processor instanceof PipelineProcessor) {
            String pipelineName = ((PipelineProcessor) processor).getPipelineTemplate().newInstance(Collections.emptyMap()).execute();
            sb.append(":");
            sb.append(pipelineName);
        }
        String tag = processor.getTag();
        if (tag != null && tag.isEmpty() == false) {
            sb.append(":");
            sb.append(tag);
        }
        return sb.toString();
    }

    private void innerExecute(
        int slot,
        IndexRequest indexRequest,
        Pipeline pipeline,
        IntConsumer itemDroppedHandler,
        Consumer handler
    ) {
        if (pipeline.getProcessors().isEmpty()) {
            handler.accept(null);
            return;
        }

        long startTimeInNanos = System.nanoTime();
        // the pipeline specific stat holder may not exist and that is fine:
        // (e.g. the pipeline may have been removed while we're ingesting a document
        totalMetrics.preIngest();
        String index = indexRequest.index();
        String type = indexRequest.type();
        String id = indexRequest.id();
        String routing = indexRequest.routing();
        Long version = indexRequest.version();
        VersionType versionType = indexRequest.versionType();
        Map sourceAsMap = indexRequest.sourceAsMap();
        IngestDocument ingestDocument = new IngestDocument(index, type, id, routing, version, versionType, sourceAsMap);
        ingestDocument.executePipeline(pipeline, (result, e) -> {
            long ingestTimeInNanos = System.nanoTime() - startTimeInNanos;
            totalMetrics.postIngest(ingestTimeInNanos);
            if (e != null) {
                totalMetrics.ingestFailed();
                handler.accept(e);
            } else if (result == null) {
                itemDroppedHandler.accept(slot);
                handler.accept(null);
            } else {
                Map metadataMap = ingestDocument.extractMetadata();
                // it's fine to set all metadata fields all the time, as ingest document holds their starting values
                // before ingestion, which might also get modified during ingestion.
                indexRequest.index((String) metadataMap.get(IngestDocument.Metadata.INDEX));
                indexRequest.type((String) metadataMap.get(IngestDocument.Metadata.TYPE));
                indexRequest.id((String) metadataMap.get(IngestDocument.Metadata.ID));
                indexRequest.routing((String) metadataMap.get(IngestDocument.Metadata.ROUTING));
                indexRequest.version(((Number) metadataMap.get(IngestDocument.Metadata.VERSION)).longValue());
                if (metadataMap.get(IngestDocument.Metadata.VERSION_TYPE) != null) {
                    indexRequest.versionType(VersionType.fromString((String) metadataMap.get(IngestDocument.Metadata.VERSION_TYPE)));
                }
                if (metadataMap.get(IngestDocument.Metadata.IF_SEQ_NO) != null) {
                    indexRequest.setIfSeqNo(((Number) metadataMap.get(IngestDocument.Metadata.IF_SEQ_NO)).longValue());
                }
                if (metadataMap.get(IngestDocument.Metadata.IF_PRIMARY_TERM) != null) {
                    indexRequest.setIfPrimaryTerm(((Number) metadataMap.get(IngestDocument.Metadata.IF_PRIMARY_TERM)).longValue());
                }
                indexRequest.source(ingestDocument.getSourceAndMetadata(), indexRequest.getContentType());
                if (metadataMap.get(IngestDocument.Metadata.DYNAMIC_TEMPLATES) != null) {
                    Map mergedDynamicTemplates = new HashMap<>(indexRequest.getDynamicTemplates());
                    @SuppressWarnings("unchecked")
                    Map map = (Map) metadataMap.get(IngestDocument.Metadata.DYNAMIC_TEMPLATES);
                    mergedDynamicTemplates.putAll(map);
                    indexRequest.setDynamicTemplates(mergedDynamicTemplates);
                }
                handler.accept(null);
            }
        });
    }

    @Override
    public void applyClusterState(final ClusterChangedEvent event) {
        state = event.state();
        if (state.blocks().hasGlobalBlock(GatewayService.STATE_NOT_RECOVERED_BLOCK)) {
            return;
        }

        // Publish cluster state to components that are used by processor factories before letting
        // processor factories create new processor instances.
        // (Note that this needs to be done also in the case when there is no change to ingest metadata, because in the case
        // when only the part of the cluster state that a component is interested in, is updated.)
        ingestClusterStateListeners.forEach(consumer -> consumer.accept(state));

        IngestMetadata newIngestMetadata = state.getMetadata().custom(IngestMetadata.TYPE);
        if (newIngestMetadata == null) {
            return;
        }

        try {
            innerUpdatePipelines(newIngestMetadata);
        } catch (ElasticsearchParseException e) {
            logger.warn("failed to update ingest pipelines", e);
        }
    }

    void innerUpdatePipelines(IngestMetadata newIngestMetadata) {
        Map existingPipelines = this.pipelines;

        // Lazy initialize these variables in order to favour the most like scenario that there are no pipeline changes:
        Map newPipelines = null;
        List exceptions = null;
        // Iterate over pipeline configurations in ingest metadata and constructs a new pipeline if there is no pipeline
        // or the pipeline configuration has been modified
        for (PipelineConfiguration newConfiguration : newIngestMetadata.getPipelines().values()) {
            PipelineHolder previous = existingPipelines.get(newConfiguration.getId());
            if (previous != null && previous.configuration.equals(newConfiguration)) {
                continue;
            }

            if (newPipelines == null) {
                newPipelines = new HashMap<>(existingPipelines);
            }
            try {
                Pipeline newPipeline = Pipeline.create(
                    newConfiguration.getId(),
                    newConfiguration.getConfigAsMap(),
                    processorFactories,
                    scriptService
                );
                newPipelines.put(newConfiguration.getId(), new PipelineHolder(newConfiguration, newPipeline));

                if (previous == null) {
                    continue;
                }
                Pipeline oldPipeline = previous.pipeline;
                newPipeline.getMetrics().add(oldPipeline.getMetrics());
                List> oldPerProcessMetrics = new ArrayList<>();
                List> newPerProcessMetrics = new ArrayList<>();
                getProcessorMetrics(oldPipeline.getCompoundProcessor(), oldPerProcessMetrics);
                getProcessorMetrics(newPipeline.getCompoundProcessor(), newPerProcessMetrics);
                // Best attempt to populate new processor metrics using a parallel array of the old metrics. This is not ideal since
                // the per processor metrics may get reset when the arrays don't match. However, to get to an ideal model, unique and
                // consistent id's per processor and/or semantic equals for each processor will be needed.
                if (newPerProcessMetrics.size() == oldPerProcessMetrics.size()) {
                    Iterator> oldMetricsIterator = oldPerProcessMetrics.iterator();
                    for (Tuple compositeMetric : newPerProcessMetrics) {
                        String type = compositeMetric.v1().getType();
                        IngestMetric metric = compositeMetric.v2();
                        if (oldMetricsIterator.hasNext()) {
                            Tuple oldCompositeMetric = oldMetricsIterator.next();
                            String oldType = oldCompositeMetric.v1().getType();
                            IngestMetric oldMetric = oldCompositeMetric.v2();
                            if (type.equals(oldType)) {
                                metric.add(oldMetric);
                            }
                        }
                    }
                }
            } catch (ElasticsearchParseException e) {
                Pipeline pipeline = substitutePipeline(newConfiguration.getId(), e);
                newPipelines.put(newConfiguration.getId(), new PipelineHolder(newConfiguration, pipeline));
                if (exceptions == null) {
                    exceptions = new ArrayList<>();
                }
                exceptions.add(e);
            } catch (Exception e) {
                ElasticsearchParseException parseException = new ElasticsearchParseException(
                    "Error updating pipeline with id [" + newConfiguration.getId() + "]",
                    e
                );
                Pipeline pipeline = substitutePipeline(newConfiguration.getId(), parseException);
                newPipelines.put(newConfiguration.getId(), new PipelineHolder(newConfiguration, pipeline));
                if (exceptions == null) {
                    exceptions = new ArrayList<>();
                }
                exceptions.add(parseException);
            }
        }

        // Iterate over the current active pipelines and check whether they are missing in the pipeline configuration and
        // if so delete the pipeline from new Pipelines map:
        for (Map.Entry entry : existingPipelines.entrySet()) {
            if (newIngestMetadata.getPipelines().get(entry.getKey()) == null) {
                if (newPipelines == null) {
                    newPipelines = new HashMap<>(existingPipelines);
                }
                newPipelines.remove(entry.getKey());
            }
        }

        if (newPipelines != null) {
            // Update the pipelines:
            this.pipelines = Collections.unmodifiableMap(newPipelines);

            // Rethrow errors that may have occurred during creating new pipeline instances:
            if (exceptions != null) {
                ExceptionsHelper.rethrowAndSuppress(exceptions);
            }
        }
    }

    /**
     * Gets all the Processors of the given type from within a Pipeline.
     * @param pipelineId the pipeline to inspect
     * @param clazz the Processor class to look for
     * @return True if the pipeline contains an instance of the Processor class passed in
     */
    public 

List

getProcessorsInPipeline(String pipelineId, Class

clazz) { Pipeline pipeline = getPipeline(pipelineId); if (pipeline == null) { throw new IllegalArgumentException("pipeline with id [" + pipelineId + "] does not exist"); } List

processors = new ArrayList<>(); for (Processor processor : pipeline.flattenAllProcessors()) { if (clazz.isAssignableFrom(processor.getClass())) { processors.add(clazz.cast(processor)); } while (processor instanceof WrappingProcessor) { WrappingProcessor wrappingProcessor = (WrappingProcessor) processor; if (clazz.isAssignableFrom(wrappingProcessor.getInnerProcessor().getClass())) { processors.add(clazz.cast(wrappingProcessor.getInnerProcessor())); } processor = wrappingProcessor.getInnerProcessor(); // break in the case of self referencing processors in the event a processor author creates a // wrapping processor that has its inner processor refer to itself. if (wrappingProcessor == processor) { break; } } } return processors; } private static Pipeline substitutePipeline(String id, ElasticsearchParseException e) { String tag = e.getHeaderKeys().contains("processor_tag") ? e.getHeader("processor_tag").get(0) : null; String type = e.getHeaderKeys().contains("processor_type") ? e.getHeader("processor_type").get(0) : "unknown"; String errorMessage = "pipeline with id [" + id + "] could not be loaded, caused by [" + e.getDetailedMessage() + "]"; Processor failureProcessor = new AbstractProcessor(tag, "this is a placeholder processor") { @Override public IngestDocument execute(IngestDocument ingestDocument) { throw new IllegalStateException(errorMessage); } @Override public String getType() { return type; } }; String description = "this is a place holder pipeline, because pipeline with id [" + id + "] could not be loaded"; return new Pipeline(id, description, null, null, new CompoundProcessor(failureProcessor)); } static class PipelineHolder { final PipelineConfiguration configuration; final Pipeline pipeline; PipelineHolder(PipelineConfiguration configuration, Pipeline pipeline) { this.configuration = Objects.requireNonNull(configuration); this.pipeline = Objects.requireNonNull(pipeline); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy