All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opensearch.ingest.Processor Maven / Gradle / Ivy

There is a newer version: 2.18.0
Show newest version
/*
 * SPDX-License-Identifier: Apache-2.0
 *
 * The OpenSearch Contributors require contributions made to
 * this file be licensed under the Apache-2.0 license or a
 * compatible open source license.
 */

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*
 * Modifications Copyright OpenSearch Contributors. See
 * GitHub history for details.
 */

package org.opensearch.ingest;

import org.opensearch.client.Client;
import org.opensearch.common.util.concurrent.AtomicArray;
import org.opensearch.common.util.concurrent.ThreadContext;
import org.opensearch.env.Environment;
import org.opensearch.index.analysis.AnalysisRegistry;
import org.opensearch.indices.IndicesService;
import org.opensearch.script.ScriptService;
import org.opensearch.threadpool.Scheduler;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.LongSupplier;

/**
 * A processor implementation may modify the data belonging to a document.
 * Whether changes are made and what exactly is modified is up to the implementation.
 * 

* Processors may get called concurrently and thus need to be thread-safe. * * @opensearch.internal */ public interface Processor { /** * Introspect and potentially modify the incoming data. *

* Expert method: only override this method if a processor implementation needs to make an asynchronous call, * otherwise just overwrite {@link #execute(IngestDocument)}. */ default void execute(IngestDocument ingestDocument, BiConsumer handler) { final IngestDocument result; try { result = execute(ingestDocument); } catch (Exception e) { handler.accept(null, e); return; } handler.accept(result, null); } /** * Introspect and potentially modify the incoming data. * * @return If null is returned then the current document will be dropped and not be indexed, * otherwise this document will be kept and indexed */ IngestDocument execute(IngestDocument ingestDocument) throws Exception; /** * Process batched documents and they could be potentially modified by processors. * Only override this method if the processor can benefit from processing documents in batches, otherwise, please * use default implementation. * * @param ingestDocumentWrappers a list of wrapped IngestDocument * @param handler callback with IngestDocument result and exception wrapped in IngestDocumentWrapper. */ default void batchExecute(List ingestDocumentWrappers, Consumer> handler) { if (ingestDocumentWrappers.isEmpty()) { handler.accept(Collections.emptyList()); return; } int size = ingestDocumentWrappers.size(); AtomicInteger counter = new AtomicInteger(size); AtomicArray results = new AtomicArray<>(size); for (int i = 0; i < size; ++i) { innerExecute(i, ingestDocumentWrappers.get(i), results, counter, handler); } } private void innerExecute( int slot, IngestDocumentWrapper ingestDocumentWrapper, AtomicArray results, AtomicInteger counter, Consumer> handler ) { execute(ingestDocumentWrapper.getIngestDocument(), (doc, ex) -> { results.set(slot, new IngestDocumentWrapper(ingestDocumentWrapper.getSlot(), doc, ex)); if (counter.decrementAndGet() == 0) { handler.accept(results.asList()); } }); } /** * Gets the type of a processor */ String getType(); /** * Gets the tag of a processor. */ String getTag(); /** * Gets the description of a processor. */ String getDescription(); /** * A factory that knows how to construct a processor based on a map of maps. */ interface Factory { /** * Creates a processor based on the specified map of maps config. * @param processorFactories Other processors which may be created inside this processor * @param tag The tag for the processor * @param description A short description of what this processor does * @param config The configuration for the processor * * Note: Implementations are responsible for removing the used configuration keys, so that after */ Processor create(Map processorFactories, String tag, String description, Map config) throws Exception; } /** * Infrastructure class that holds services that can be used by processor factories to create processor instances * and that gets passed around to all {@link org.opensearch.plugins.IngestPlugin}s. */ class Parameters { /** * Useful to provide access to the node's environment like config directory to processor factories. */ public final Environment env; /** * Provides processors script support. */ public final ScriptService scriptService; /** * Provide analyzer support */ public final AnalysisRegistry analysisRegistry; /** * Allows processors to read headers set by {@link org.opensearch.action.support.ActionFilter} * instances that have run prior to in ingest. */ public final ThreadContext threadContext; public final LongSupplier relativeTimeSupplier; public final IngestService ingestService; public final Consumer genericExecutor; /** * Provides scheduler support */ public final BiFunction scheduler; /** * Provides access to the node client */ public final Client client; public final IndicesService indicesService; public Parameters( Environment env, ScriptService scriptService, AnalysisRegistry analysisRegistry, ThreadContext threadContext, LongSupplier relativeTimeSupplier, BiFunction scheduler, IngestService ingestService, Client client, Consumer genericExecutor, IndicesService indicesService ) { this.env = env; this.scriptService = scriptService; this.threadContext = threadContext; this.analysisRegistry = analysisRegistry; this.relativeTimeSupplier = relativeTimeSupplier; this.scheduler = scheduler; this.ingestService = ingestService; this.client = client; this.genericExecutor = genericExecutor; this.indicesService = indicesService; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy