All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.ingest.CompoundProcessor Maven / Gradle / Ivy

There is a newer version: 8.14.0
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.ingest;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.core.Tuple;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.BiConsumer;
import java.util.function.LongSupplier;
import java.util.stream.Collectors;

/**
 * A Processor that executes a list of other "processors". It executes a separate list of
 * "onFailureProcessors" when any of the processors throw an {@link Exception}.
 */
public class CompoundProcessor implements Processor {
    public static final String ON_FAILURE_MESSAGE_FIELD = "on_failure_message";
    public static final String ON_FAILURE_PROCESSOR_TYPE_FIELD = "on_failure_processor_type";
    public static final String ON_FAILURE_PROCESSOR_TAG_FIELD = "on_failure_processor_tag";
    public static final String ON_FAILURE_PIPELINE_FIELD = "on_failure_pipeline";

    private static final Logger logger = LogManager.getLogger(CompoundProcessor.class);

    private final boolean ignoreFailure;
    private final List processors;
    private final List onFailureProcessors;
    private final List> processorsWithMetrics;
    private final LongSupplier relativeTimeProvider;

    CompoundProcessor(LongSupplier relativeTimeProvider, Processor... processor) {
        this(false, Arrays.asList(processor), Collections.emptyList(), relativeTimeProvider);
    }

    public CompoundProcessor(Processor... processor) {
        this(false, Arrays.asList(processor), Collections.emptyList());
    }

    public CompoundProcessor(boolean ignoreFailure, List processors, List onFailureProcessors) {
        this(ignoreFailure, processors, onFailureProcessors, System::nanoTime);
    }

    CompoundProcessor(
        boolean ignoreFailure,
        List processors,
        List onFailureProcessors,
        LongSupplier relativeTimeProvider
    ) {
        super();
        this.ignoreFailure = ignoreFailure;
        this.processors = processors;
        this.onFailureProcessors = onFailureProcessors;
        this.relativeTimeProvider = relativeTimeProvider;
        this.processorsWithMetrics = new ArrayList<>(processors.size());
        processors.forEach(p -> processorsWithMetrics.add(new Tuple<>(p, new IngestMetric())));
    }

    List> getProcessorsWithMetrics() {
        return processorsWithMetrics;
    }

    public boolean isIgnoreFailure() {
        return ignoreFailure;
    }

    public List getOnFailureProcessors() {
        return onFailureProcessors;
    }

    public List getProcessors() {
        return processors;
    }

    public List flattenProcessors() {
        List allProcessors = new ArrayList<>(flattenProcessors(processors));
        allProcessors.addAll(flattenProcessors(onFailureProcessors));
        return allProcessors;
    }

    private static List flattenProcessors(List processors) {
        List flattened = new ArrayList<>();
        for (Processor processor : processors) {
            if (processor instanceof CompoundProcessor) {
                flattened.addAll(((CompoundProcessor) processor).flattenProcessors());
            } else {
                flattened.add(processor);
            }
        }
        return flattened;
    }

    @Override
    public String getType() {
        return "compound";
    }

    @Override
    public String getTag() {
        return "CompoundProcessor-" + flattenProcessors().stream().map(Processor::getTag).collect(Collectors.joining("-"));
    }

    @Override
    public String getDescription() {
        return null;
    }

    @Override
    public IngestDocument execute(IngestDocument ingestDocument) throws Exception {
        throw new UnsupportedOperationException("this method should not get executed");
    }

    @Override
    public void execute(IngestDocument ingestDocument, BiConsumer handler) {
        innerExecute(0, ingestDocument, handler);
    }

    void innerExecute(int currentProcessor, IngestDocument ingestDocument, BiConsumer handler) {
        if (currentProcessor == processorsWithMetrics.size()) {
            handler.accept(ingestDocument, null);
            return;
        }

        Tuple processorWithMetric = processorsWithMetrics.get(currentProcessor);
        final Processor processor = processorWithMetric.v1();
        final IngestMetric metric = processorWithMetric.v2();
        final long startTimeInNanos = relativeTimeProvider.getAsLong();
        /*
         * Our assumption is that the listener passed to the processor is only ever called once. However, there is no way to enforce
         * that in all processors and all of the code that they call. If the listener is called more than once it causes problems
         * such as the metrics being wrong. The listenerHasBeenCalled variable is used to make sure that the code in the listener
         * is only executed once.
         */
        final AtomicBoolean listenerHasBeenCalled = new AtomicBoolean(false);
        metric.preIngest();
        final AtomicBoolean postIngestHasBeenCalled = new AtomicBoolean(false);
        try {
            processor.execute(ingestDocument, (result, e) -> {
                if (listenerHasBeenCalled.getAndSet(true)) {
                    logger.warn("A listener was unexpectedly called more than once", new RuntimeException());
                    assert false : "A listener was unexpectedly called more than once";
                } else {
                    long ingestTimeInNanos = relativeTimeProvider.getAsLong() - startTimeInNanos;
                    metric.postIngest(ingestTimeInNanos);
                    postIngestHasBeenCalled.set(true);
                    if (e != null) {
                        executeOnFailure(currentProcessor, ingestDocument, handler, processor, metric, e);
                    } else {
                        if (result != null) {
                            innerExecute(currentProcessor + 1, result, handler);
                        } else {
                            handler.accept(null, null);
                        }
                    }
                }
            });
        } catch (Exception e) {
            long ingestTimeInNanos = relativeTimeProvider.getAsLong() - startTimeInNanos;
            if (postIngestHasBeenCalled.get()) {
                logger.warn("Preventing postIngest from being called more than once", new RuntimeException());
                assert false : "Attempt to call postIngest more than once";
            } else {
                metric.postIngest(ingestTimeInNanos);
            }
            executeOnFailure(currentProcessor, ingestDocument, handler, processor, metric, e);
        }
    }

    private void executeOnFailure(
        int currentProcessor,
        IngestDocument ingestDocument,
        BiConsumer handler,
        Processor processor,
        IngestMetric metric,
        Exception e
    ) {
        metric.ingestFailed();
        if (ignoreFailure) {
            innerExecute(currentProcessor + 1, ingestDocument, handler);
        } else {
            IngestProcessorException compoundProcessorException = newCompoundProcessorException(e, processor, ingestDocument);
            if (onFailureProcessors.isEmpty()) {
                handler.accept(null, compoundProcessorException);
            } else {
                executeOnFailureAsync(0, ingestDocument, compoundProcessorException, handler);
            }
        }
    }

    void executeOnFailureAsync(
        int currentOnFailureProcessor,
        IngestDocument ingestDocument,
        ElasticsearchException exception,
        BiConsumer handler
    ) {
        if (currentOnFailureProcessor == 0) {
            putFailureMetadata(ingestDocument, exception);
        }

        if (currentOnFailureProcessor == onFailureProcessors.size()) {
            removeFailureMetadata(ingestDocument);
            handler.accept(ingestDocument, null);
            return;
        }

        final Processor onFailureProcessor = onFailureProcessors.get(currentOnFailureProcessor);
        onFailureProcessor.execute(ingestDocument, (result, e) -> {
            if (e != null) {
                removeFailureMetadata(ingestDocument);
                handler.accept(null, newCompoundProcessorException(e, onFailureProcessor, ingestDocument));
                return;
            }
            if (result == null) {
                removeFailureMetadata(ingestDocument);
                handler.accept(null, null);
                return;
            }
            executeOnFailureAsync(currentOnFailureProcessor + 1, ingestDocument, exception, handler);
        });
    }

    private void putFailureMetadata(IngestDocument ingestDocument, ElasticsearchException cause) {
        List processorTypeHeader = cause.getHeader("processor_type");
        List processorTagHeader = cause.getHeader("processor_tag");
        List processorOriginHeader = cause.getHeader("pipeline_origin");
        String failedProcessorType = (processorTypeHeader != null) ? processorTypeHeader.get(0) : null;
        String failedProcessorTag = (processorTagHeader != null) ? processorTagHeader.get(0) : null;
        String failedPipelineId = (processorOriginHeader != null) ? processorOriginHeader.get(0) : null;
        Map ingestMetadata = ingestDocument.getIngestMetadata();
        ingestMetadata.put(ON_FAILURE_MESSAGE_FIELD, cause.getRootCause().getMessage());
        ingestMetadata.put(ON_FAILURE_PROCESSOR_TYPE_FIELD, failedProcessorType);
        ingestMetadata.put(ON_FAILURE_PROCESSOR_TAG_FIELD, failedProcessorTag);
        if (failedPipelineId != null) {
            ingestMetadata.put(ON_FAILURE_PIPELINE_FIELD, failedPipelineId);
        }
    }

    private void removeFailureMetadata(IngestDocument ingestDocument) {
        Map ingestMetadata = ingestDocument.getIngestMetadata();
        ingestMetadata.remove(ON_FAILURE_MESSAGE_FIELD);
        ingestMetadata.remove(ON_FAILURE_PROCESSOR_TYPE_FIELD);
        ingestMetadata.remove(ON_FAILURE_PROCESSOR_TAG_FIELD);
        ingestMetadata.remove(ON_FAILURE_PIPELINE_FIELD);
    }

    static IngestProcessorException newCompoundProcessorException(Exception e, Processor processor, IngestDocument document) {
        if (e instanceof IngestProcessorException && ((IngestProcessorException) e).getHeader("processor_type") != null) {
            return (IngestProcessorException) e;
        }

        IngestProcessorException exception = new IngestProcessorException(e);

        String processorType = processor.getType();
        if (processorType != null) {
            exception.addHeader("processor_type", processorType);
        }
        String processorTag = processor.getTag();
        if (processorTag != null) {
            exception.addHeader("processor_tag", processorTag);
        }
        if (document != null) {
            List pipelineStack = document.getPipelineStack();
            if (pipelineStack.isEmpty() == false) {
                exception.addHeader("pipeline_origin", pipelineStack);
            }
        }

        return exception;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy