All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.nifi.processor.ProcessSession Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nifi.processor;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Path;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
import java.util.regex.Pattern;
import org.apache.nifi.components.state.Scope;
import org.apache.nifi.components.state.StateMap;
import org.apache.nifi.controller.queue.QueueSize;
import org.apache.nifi.flowfile.FlowFile;
import org.apache.nifi.processor.exception.FlowFileAccessException;
import org.apache.nifi.processor.exception.FlowFileHandlingException;
import org.apache.nifi.processor.exception.MissingFlowFileException;
import org.apache.nifi.processor.exception.ProcessException;
import org.apache.nifi.processor.io.InputStreamCallback;
import org.apache.nifi.processor.io.OutputStreamCallback;
import org.apache.nifi.processor.io.StreamCallback;
import org.apache.nifi.provenance.ProvenanceReporter;
import org.apache.nifi.provenance.ProvenanceEventType;

/**
 * A process session encompasses all the behaviors a processor can perform to
 * obtain, clone, read, modify remove {@link FlowFile}s in an atomic unit.
 * A process session is always tied to a single {@link Processor} at any one time
 * and ensures no FlowFile can ever be accessed by any more than one processor at a given time.
 * The session also ensures that all FlowFiles are always accounted for.
 * The creator of a process session is always required to manage the session.
 * 

* A session is not considered thread safe. The session supports a unit of work that is either committed or rolled back. *

* As noted on specific methods and for specific exceptions * automated rollback will occur to ensure consistency of the repository. * However, several situations can result in exceptions yet not cause automated rollback. * In these cases the consistency of the repository will be retained * but callers will be able to indicate whether it should result in rollback or continue on toward a commit. *

* A process session has two types of 'terminal' methods that will result in the session being in a 'fresh' state, * containing no knowledge or any FlowFile, as if the session were newly created. * After one of these methods is called, the instance may be used again. * The terminal methods for a process session are {@link #commitAsync()} and {@link #rollback()} (and their overloads). *

* Additionally, the {@link #migrate(ProcessSession, Collection)} method transfers all knowledge of the provided FlowFiles * to the other process session, as if the FlowFiles never existed in this process session. * Note, however, that even if all FlowFiles are migrated via the {@link #migrate(ProcessSession, Collection)} method, * the session is not necessarily entirely cleared, as it still may have knowledge of counter adjustments or state changes, * e.g. see methods {@link #adjustCounter(String, long, boolean)} or {@link #setState(Map, Scope)}. * A commit or rollback will clear these changes as well. */ public interface ProcessSession { /** * Commits the current session ensuring all operations against {@link FlowFile}s within this session are atomically persisted. * All FlowFiles operated on within this session must be accounted for by transfer or removal or the commit will fail. *

* As soon as the commit completes the session is again ready to be used. *

* See {@link #commitAsync()}, {@link #commitAsync(Runnable)}, and {@link #commitAsync(Runnable, Consumer)}. * The asynchronous session commit methods are preferable to this commit call for most cases * as it defers when the actual commit happens to the framework so that it can optimize when the commit call takes place. * * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}). * @throws FlowFileHandlingException if not all {@link FlowFile}s acted upon within this session are accounted for * such that they have a transfer identified or where marked for removal. Automated rollback occurs. * @throws ProcessException if some general fault occurs while persisting the session. * Initiates automatic rollback. The root cause can be obtained via {@link Exception#getCause}. */ void commit(); /** * Commits the current session ensuring all operations against {@link FlowFile}s within this session are atomically persisted. * All FlowFiles operated on within this session must be accounted for by transfer or removal or the commit will fail. *

* Unlike the {@link #commit()} method, the persistence of data to the repositories * is not guaranteed to have occurred by the time that this method returns. * Therefore, if any follow-on actions are necessary after the data has been persisted to the repository * (for example, acknowledging receipt from a source system, removing a source file, etc.) that logic * should be performed only by invoking {@link #commitAsync(Runnable)} or {@link #commitAsync(Runnable, Consumer)} * and implementing that action in the provided callback. *

* As a result, the following very common idiom: *


     * getDataFromSource();
     * session.commit();
     * acknowledgeReceiptOfData();
     * 
* Cannot be simply changed to: *

     * getDataFromSource();
     * session.commitAsync();
     * acknowledgeReceiptOfData();
     * 
* Doing so could result in acknowledging receipt of data from the source system before data has been committed to the repositories. * If NiFi were to then be restarted, there is potential for data loss. * Rather, the following idiom should take its place to ensure that there is no data loss: *

     * getDataFromSource();
     * session.commitAsync( () -> acknowledgeReceiptOfData() );
     * 
*

* If the session cannot be committed, an error will be logged and the session will be rolled back instead. * * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}). * @throws FlowFileHandlingException if not all {@link FlowFile}s acted upon within this session are accounted for * such that they have a transfer identified or where marked for removal. Automated rollback occurs. */ void commitAsync(); /** * Commits the current session ensuring all operations against {@link FlowFile}s within this session are atomically persisted. * All FlowFiles operated on within this session must be accounted for by transfer or removal or the commit will fail. *

* If the session is successfully committed, the given {@code onSuccess} {@link Runnable} will be called. * At the point that the session commit is completed, any calls to {@link #rollback()} / {@link #rollback(boolean)} * will not undo that session commit but instead roll back any changes that may have occurred since. *

* If, for any reason, the session could not be committed, an error-level log message will be generated, * but the caller will not have a chance to perform any cleanup logic. * If such logic is necessary, use {@link #commitAsync(Runnable, Consumer)} instead. *

* Unlike the {@link #commit()} method, the persistence of data to the repositories * is not guaranteed to have occurred by the time that this method returns. * * @param onSuccess {@link Runnable} that will be called if and when the session is successfully committed; may be null * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}). * @throws FlowFileHandlingException if not all {@link FlowFile}s acted upon within this session are accounted for * such that they have a transfer identified or where marked for removal. Automated rollback occurs. */ default void commitAsync(Runnable onSuccess) { commitAsync(onSuccess, null); } /** * Commits the current session ensuring all operations against FlowFiles within this session are atomically persisted. * All FlowFiles operated on within this session must be accounted for by transfer or removal or the commit will fail. *

* If the session is successfully committed, the given {@code onSuccess} {@link Runnable} will be called. * At the point that the session commit is completed, any calls to {@link #rollback()} / {@link #rollback(boolean)} * will not undo that session commit but instead roll back any changes that may have occurred since. *

* If, for any reason, the session could not be committed, the given {@code onFailure} {@link Consumer} will be called * instead of the {@code onSuccess} {@link Runnable}. * The Consumer will be provided the Throwable that prevented the session commit from completing. *

* Unlike the {@link #commit()} method, the persistence of data to the repositories * is not guaranteed to have occurred by the time that this method returns. * * @param onSuccess {@link Runnable} that will be called if and when the session is successfully committed; may be null * @param onFailure {@link Consumer} that will be called if, for any reason, the session could not be committed; may be null * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}). * @throws FlowFileHandlingException if not all {@link FlowFile}s acted upon within this session are accounted for * such that they have a transfer identified or where marked for removal. Automated rollback occurs. */ void commitAsync(Runnable onSuccess, Consumer onFailure); /** * Reverts any changes made during this session. * All {@link FlowFile}s are restored back to their initial session state and back to their original queues. * If no changes were made since this session was last committed or rolled back, then this method has no effect. * This method can be called any number of times. * Calling this method is identical to calling {@link #rollback(boolean)} passing {@code false} as the parameter. */ void rollback(); /** * Reverts any changes made during this session. * All {@link FlowFile}s are restored back to their initial session state and back to their original queues, * after optionally being penalized. * If no changes were made since this session was last committed or rolled back, then this method has no effect. * This method can be called any number of times. * * @param penalize whether the {@link FlowFile}s that are being restored back to their queues should be penalized */ void rollback(boolean penalize); /** * Migrates ownership of the given {@code flowFiles} {@link FlowFile}s from {@code this} session to the given {@code newOwner} {@link ProcessSession}. *

* Note, that for any provided FlowFile, if the FlowFile has any child (e.g., by calling {@link #create(FlowFile)} * and passing the FlowFile as the argument), then all children that were created must also be in the Collection of provided FlowFiles. *

* Also note, that if any FlowFile given is not the most up-to-date version of that FlowFile, * then the most up-to-date version of the FlowFile will be migrated to the new owner. * For example, if a call to {@link #putAttribute(FlowFile, String, String)} is made, * passing {@code flowFile1} as the FlowFile, and then {@code flowFile1} is passed to this method, * then the newest version (including the newly added attribute) will be migrated, * not the outdated version of the FlowFile that {@code flowFile1} points to. * * @param newOwner the {@link ProcessSession} that is to become the new owner of the given {@link FlowFile}s * @param flowFiles the {@link FlowFile}s to migrate * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for any of the affected {@link FlowFile}s */ void migrate(ProcessSession newOwner, Collection flowFiles); /** * Migrates ownership of all {@link FlowFile}s from {@code this} session to the given {@code newOwner} {@link ProcessSession}. * Calling this method is identical to calling {@link #migrate(ProcessSession, Collection)} * passing all FlowFiles owned by this session as the parameter, * this encompasses both FlowFiles retrieved from the work queue and newly created or cloned ones. * * @param newOwner the {@link ProcessSession} that is to become the new owner of all {@link FlowFile}s * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for any of the affected {@link FlowFile}s */ void migrate(ProcessSession newOwner); /** * Adjusts counter data for the given counter name and takes care of registering the counter if not already present. * The adjustment occurs only if and when the process session is committed. * * @param name the name of the counter * @param delta the delta by which to modify the counter (+ or -) * @param immediate if true, the counter will be updated immediately, without regard to whether the session is committed or rolled back; * otherwise, the counter will be incremented only if and when the session is committed. */ void adjustCounter(String name, long delta, boolean immediate); /** * Returns the {@link FlowFile} from the work queue that is next highest priority to process. * If no FlowFiles are available, returns {@code null}. * * @return the {@link FlowFile} from the work queue that is next highest priority to process or {@code null}, if none available */ FlowFile get(); /** * Returns the next up to {@code maxResults} {@link FlowFile}s from the work queue that are the highest priority to process. * If no FlowFiles are available, returns an empty list. Will not return {@code null}. *

* If multiple incoming queues are present, the behavior is unspecified in terms of * whether all queues or only a single queue will be polled in a single call. * * @param maxResults the maximum number of {@link FlowFile}s to return * @return up to {@code maxResults} {@link FlowFile}s from the work queue * @throws IllegalArgumentException if {@code maxResults} is less than 0 */ List get(int maxResults); /** * Returns all {@link FlowFile}s from all the incoming queues which the given {@link FlowFileFilter} accepts. *

* Calls to this method provide exclusive access to the underlying queues. * That is, no other thread will be permitted to pull FlowFiles from or add FlowFiles * to this {@link Processor}'s incoming queues until this method call has returned. * * @param filter a {@link FlowFileFilter} to limit which {@link FlowFile}s are returned * @return all {@link FlowFile}s from all the incoming queues which the given {@link FlowFileFilter} {@code filter} accepts. */ List get(FlowFileFilter filter); /** * Returns the {@link QueueSize} that represents the number of {@link FlowFile}s and their combined data size * for all FlowFiles waiting to be processed by the {@link Processor} that owns {@code this} {@link ProcessSession}, * regardless of which connection the FlowFiles live on. * * @return the number of {@link FlowFile}s and their combined data size in the work queue */ QueueSize getQueueSize(); /** * Creates a new {@link FlowFile} in the repository with no content and without any linkage to a parent FlowFile. *

* This method is appropriate only when data is received or created from an external system. * Otherwise, this method should be avoided and instead {@link #create(FlowFile)} or {@link #create(Collection)} be used. *

* When this method is used, a {@link ProvenanceEventType#CREATE} or {@link ProvenanceEventType#RECEIVE} event should be generated. * See the {@link #getProvenanceReporter()} method and {@link ProvenanceReporter} class for more information. * * @return newly created FlowFile */ FlowFile create(); /** * Creates a new {@link FlowFile} in the repository with no content but with a parent linkage to the {@code parent}. * The newly created FlowFile will inherit all the parent's attributes, except for the UUID. *

* This method will automatically generate a {@link ProvenanceEventType#FORK} or a {@link ProvenanceEventType#JOIN} event, * depending on whether other FlowFiles are generated from the same parent before the session is committed. * * @param parent to base the new {@link FlowFile} on, inheriting attributes from * @return newly created {@link FlowFile} */ FlowFile create(FlowFile parent); /** * Creates a new {@link FlowFile} in the repository with no content but with a parent linkage to all {@code parents}. * The newly created FlowFile will inherit all the attributes that are in common to all parents, except for the UUID. *

* This method will automatically generate a {@link ProvenanceEventType#JOIN} event. * * @param parents to base the new {@link FlowFile} on, inheriting shared attributes from * @return newly created {@link FlowFile} */ FlowFile create(Collection parents); /** * Creates a new {@link FlowFile} with a parent linkage to the {@code example} FlowFile. * It is a clone of the given FlowFile as of the time this is called, both in attributes and content. *

* This method will automatically generate a {@link ProvenanceEventType#CLONE} event. * * @param example {@link FlowFile} to be the source of cloning - given FlowFile must be a part of the given session * @return {@link FlowFile} that is a clone of the given {@code example} FlowFile * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code example} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content */ FlowFile clone(FlowFile example); /** * Creates a new {@link FlowFile} with a parent linkage to the {@code parent} FlowFile. * It is a clone of the given FlowFile as of the time this is called, both in attributes and a subset of the content. * The content of the new FlowFile will be a subset of the byte sequence of the given FlowFile, * starting at the specified offset and with the length specified. *

* This method will automatically generate a {@link ProvenanceEventType#FORK} or a {@link ProvenanceEventType#CLONE} event, * if the offset is 0 and the size is exactly equal to the size of the example FlowFile. * * @param parent {@link FlowFile} to be the source of cloning - given FlowFile must be a part of the given session * @param offset of the parent {@link FlowFile}'s content to base the cloned FlowFile's content on * @param size in bytes of the parent {@link FlowFile}'s content to clone starting from the {@code offset} * @return {@link FlowFile} that is a partial clone of the given {@code parent} FlowFile whose content has the specified {@code size} * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code parent} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Or if the specified {@code offset} + {@code size} exceeds the size of the {@code parent} FlowFile's content. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content */ FlowFile clone(FlowFile parent, long offset, long size); /** * Sets a penalty for the given {@link FlowFile}, * which will make it unavailable to be operated on any further during the penalty period. * * @param flowFile to penalize * @return the penalized {@link FlowFile} * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. */ FlowFile penalize(FlowFile flowFile); /** * Updates the given {@link FlowFile}'s attributes with the given {@code key} / {@code value} pair. *

* If the {@code key} is named {@code uuid}, this attribute will be ignored. * * @param flowFile to update * @param key of attribute to add or modify * @param value of attribute to add or modify * @return the updated {@link FlowFile} with the attribute added or modified * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. */ FlowFile putAttribute(FlowFile flowFile, String key, String value); /** * Updates the given {@link FlowFile}'s attributes with the given {@code key} / {@code value} pairs. *

* If the map contains a key named {@code uuid}, this attribute will be ignored. * * @param flowFile to update * @param attributes the attributes to add or modify * @return the updated {@link FlowFile} with the attributes added or modified * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. */ FlowFile putAllAttributes(FlowFile flowFile, Map attributes); /** * Removes the attribute with the given {@code key} from the given {@link FlowFile}. *

* The attributes with the keys {@code uuid}, {@code path}, and {@code filename} will not be removed. * If the {@code key} is one of those, this method will return the same FlowFile without removing any attribute. * * @param flowFile to update * @param key of attribute to remove * @return the updated {@link FlowFile} with the matching attribute removed * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. */ FlowFile removeAttribute(FlowFile flowFile, String key); /** * Removes the attributes with the given {@code keys} from the given {@link FlowFile}. *

* The attributes with the keys {@code uuid}, {@code path}, and {@code filename} will not be removed. * * @param flowFile to update * @param keys of attributes to remove * @return the updated {@link FlowFile} with the matching attributes removed * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. */ FlowFile removeAllAttributes(FlowFile flowFile, Set keys); /** * Removes all attributes from the given {@link FlowFile} whose key matches the given pattern. *

* The attributes with the keys {@code uuid}, {@code path}, and {@code filename} will not be removed. * * @param flowFile to update * @param keyPattern pattern to match each {@link FlowFile} attribute against; may be null, in which case no attribute is removed * @return the updated {@link FlowFile} with the matching attributes removed * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. */ FlowFile removeAllAttributes(FlowFile flowFile, Pattern keyPattern); /** * Transfers the given {@link FlowFile} back to the work queue from which it was pulled. *

* The processor will not be able to operate on the given FlowFile until this session is committed. * Any modifications that have been made to the FlowFile will be maintained. * FlowFiles that are created by the processor cannot be transferred back to themselves via this method. * * @param flowFile to transfer * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws IllegalArgumentException if the given {@link FlowFile} was created by this processor */ void transfer(FlowFile flowFile); /** * Transfers the given {@link FlowFile}s back to the work queues from which the FlowFiles were pulled. *

* The processor will not be able to operate on the given FlowFiles until this session is committed. * Any modifications that have been made to the FlowFiles will be maintained. * FlowFiles that are created by the processor cannot be transferred back to themselves via this method. * * @param flowFiles to transfer * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if any of the given {@link FlowFile}s is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws IllegalArgumentException if any of the given {@link FlowFile}s was created by this processor */ void transfer(Collection flowFiles); /** * Transfers the given {@link FlowFile} to the appropriate destination processor work queue(s) based on the given {@code relationship}. *

* If the relationship leads to more than one destination the state of the FlowFile is replicated * such that each destination receives an exact copy of the FlowFile though each will have its own unique identity. * The destination processors will not be able to operate on the given FlowFile until this session is committed or * until the ownership of the session is migrated to another processor. * If ownership of the session is passed to a destination processor then that destination processor will have immediate visibility * of the transferred FlowFiles within the session. * * @param flowFile to transfer * @param relationship to transfer to * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws IllegalArgumentException if given relationship is not a known or registered relationship */ void transfer(FlowFile flowFile, Relationship relationship); /** * Transfers the given {@link FlowFile}s to the appropriate destination processor work queue(s) based on the given {@code relationship}. *

* If the relationship leads to more than one destination the state of each FlowFile is replicated * such that each destination receives an exact copy of the FlowFile though each will have its own unique identity. * The destination processors will not be able to operate on the given FlowFiles until this session is committed or * until the ownership of the session is migrated to another processor. * If ownership of the session is passed to a destination processor then that destination processor will have immediate visibility * of the transferred FlowFiles within the session. * * @param flowFiles to transfer * @param relationship to transfer to * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for any of the {@code flowFiles} {@link FlowFile}s * @throws FlowFileHandlingException if any of the given {@link FlowFile}s is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws IllegalArgumentException if given relationship is not a known or registered relationship */ void transfer(Collection flowFiles, Relationship relationship); /** * Ends the managed persistence for the given {@link FlowFile}. *

* The persistent attributes for the FlowFile are deleted and so is the content assuming nothing else references it. * This FlowFile will no longer be available for further operation. * * @param flowFile to remove * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the given {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. */ void remove(FlowFile flowFile); /** * Ends the managed persistence for the given {@link FlowFile}s. *

* The persistent attributes for the FlowFiles are deleted and so is the content assuming nothing else references it. * The FlowFiles will no longer be available for further operation. * * @param flowFiles to remove * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for any of the given {@code flowFiles} {@link FlowFile}s * @throws FlowFileHandlingException if any of the given {@link FlowFile}s is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. */ void remove(Collection flowFiles); /** * Executes the given {code reader} {@link InputStreamCallback} against the content of the given {@link FlowFile}. * * @param source the {@link FlowFile} to retrieve the content from * @param reader {@link InputStreamCallback} that will be called to read the {@link FlowFile} content * @throws IllegalStateException if detected that this method is being called from within a write callback * (see {@link #write(FlowFile, StreamCallback)}, {@link #write(FlowFile, OutputStreamCallback)}) * or while a write stream is open (see {@link #write(FlowFile)}) for the given {@code source} {@link FlowFile}. * Said another way, it is not permissible to call this method while writing to the same FlowFile. * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content; * if an attempt is made to access the {@link InputStream} provided to the given {@link InputStreamCallback} * after this method completed its execution */ void read(FlowFile source, InputStreamCallback reader) throws FlowFileAccessException; /** * Provides an {@link InputStream} that can be used to read the content of the given {@link FlowFile}. *

* This method differs from those that make use of callbacks in that this method returns an InputStream and expects the caller * to properly handle the lifecycle of the InputStream (i.e., the caller is responsible for ensuring that the InputStream is closed appropriately). * The session may or may not handle closing the stream when the session is commited or rolled back, * but the responsibility of doing so belongs to the caller. * * @param flowFile the {@link FlowFile} to retrieve the content from * @return an {@link InputStream} that can be used to read the content of the {@link FlowFile} * @throws IllegalStateException if detected that this method is being called from within a write callback * (see {@link #write(FlowFile, StreamCallback)}, {@link #write(FlowFile, OutputStreamCallback)}) * or while a write stream is open (see {@link #write(FlowFile)}) for the given {@code flowFile} {@link FlowFile}. * Said another way, it is not permissible to call this method while writing to the same FlowFile. * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content; * if an attempt is made to read from the stream after the session is committed or rolled back. */ InputStream read(FlowFile flowFile); /** * Combines the content of all given {@code sources} {@link FlowFile}s into a single given destination FlowFile. * * @param sources the {@link FlowFile}s whose content to merge * @param destination the {@link FlowFile} to use as the merged result * @return the updated {@code destination} destination {@link FlowFile} with changed content * @throws IllegalArgumentException if the given destination is contained within the sources * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for any of the given {@code sources} and {@code destination} {@link FlowFile}s * @throws FlowFileHandlingException if any of the given {@link FlowFile}s is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if any of the given {@link FlowFile}'s content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content; * the state of the {@code destination} {@link FlowFile} will be as it was prior to this call. */ FlowFile merge(Collection sources, FlowFile destination); /** * Combines the content of all given {@code sources} {@link FlowFile}s into a single given destination FlowFile. * * @param sources the {@link FlowFile}s whose content to merge * @param destination the {@link FlowFile} to use as the merged result * @param header bytes that will be added to the beginning of the merged output; may be null or empty * @param footer bytes that will be added to the end of the merged output; may be null or empty * @param demarcator bytes that will be placed in between each object merged together; may be null or empty * @return the updated {@code destination} {@link FlowFile} with changed content * @throws IllegalArgumentException if the given destination is contained within the sources * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for any of the given {@code sources} and {@code destination} {@link FlowFile}s * @throws FlowFileHandlingException if any of the given {@link FlowFile}s is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if any of the given {@link FlowFile}'s content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content; * the state of the {@code destination} {@link FlowFile} will be as it was prior to this call. */ FlowFile merge(Collection sources, FlowFile destination, byte[] header, byte[] footer, byte[] demarcator); /** * Executes the given {code writer} {@link OutputStreamCallback} against the content of the given {@link FlowFile}. * * @param source the {@link FlowFile} to write the content of * @param writer {@link InputStreamCallback} that will be called to write the {@link FlowFile} content * @return the updated {@code source} {@link FlowFile} with changed content * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the given {@code source} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content; * if an attempt is made to access the {@link OutputStream} provided to the given {@link OutputStreamCallback} * after this method completed its execution */ FlowFile write(FlowFile source, OutputStreamCallback writer) throws FlowFileAccessException; /** * Provides an {@link OutputStream} that can be used to write the content of the given {@link FlowFile}. *

* This method differs from those that make use of callbacks in that this method returns an OutputStream and expects the caller * to properly handle the lifecycle of the OutputStream (i.e., the caller is responsible for ensuring that the OutputStream is closed appropriately). * The session may or may not handle closing the stream when the session is commited or rolled back, * but the responsibility of doing so belongs to the caller. * * @param source the {@link FlowFile} to write the content of * @return an {@link OutputStream} that can be used to write the content of the {@link FlowFile} * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the given {@code source} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content; * if an attempt is made to write to the stream after the session is committed or rolled back. */ OutputStream write(FlowFile source); /** * Executes the given {code writer} {@link StreamCallback} against the content of the given {@link FlowFile}. * * @param source the {@link FlowFile} to read and write the content of * @param writer {@link StreamCallback} that will be called to read and write the {@link FlowFile} content * @return the updated {@code source} {@link FlowFile} with changed content * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the given {@code source} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content; * if an attempt is made to access the {@link InputStream} or {@link OutputStream} * provided to the given {@link StreamCallback} after this method completed its execution */ FlowFile write(FlowFile source, StreamCallback writer) throws FlowFileAccessException; /** * Executes the given {code writer} {@link OutputStreamCallback} against the content of the given {@link FlowFile}, * such that any data written to the OutputStream will be appended to the end of FlowFile's content. * * @param source the {@link FlowFile} to extend the content of * @param writer {@link OutputStreamCallback} that will be called to append the {@link FlowFile}'s content * @return the updated {@code source} {@link FlowFile} with changed content * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the given {@code source} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content; * if an attempt is made to access the {@link OutputStream} provided to the given {@link OutputStreamCallback} * after this method completed its execution */ FlowFile append(FlowFile source, OutputStreamCallback writer) throws FlowFileAccessException; /** * Writes to contents of the file a the {@code source} {@link Path} to the given {@link FlowFile}'s content. * * @param source the {@link Path} to the file from which content will be obtained * @param keepSourceFile if true the content is simply copied; * if false the original content might be used in a destructive way for efficiency, * such that the repository will have the data but the original data will be gone. * If false the source object will be removed or gone once imported. * It will not be restored if the session is rolled back so this must be used with caution. * In some cases it can result in tremendous efficiency gains but is also dangerous. * @param destination the {@link FlowFile} whose content will be updated * @return the updated {@code destination} {@link FlowFile} with changed content * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the given {@code source} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content */ FlowFile importFrom(Path source, boolean keepSourceFile, FlowFile destination); /** * Writes to contents of the {@code source} {@link InputStream} to the given {@link FlowFile}'s content. * * @param source the {@link InputStream} from which content will be obtained * @param destination the {@link FlowFile} whose content will be updated * @return the updated {@code destination} {@link FlowFile} with changed content * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the given {@code source} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content */ FlowFile importFrom(InputStream source, FlowFile destination); /** * Writes the content of the given {@link FlowFile} to the file at the given {@code destination} {@link Path}. * * @param flowFile the {@link FlowFile} to export the content of * @param destination the {@link Path} to a file to export the {@link FlowFile}'s content to * @param append if true will append to the current content of the file at the given path; * if false will replace any current content * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the given {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content */ void exportTo(FlowFile flowFile, Path destination, boolean append); /** * Writes the content of the given {@link FlowFile} to given {@code destination} {@link OutputStream}. * * @param flowFile the {@link FlowFile} to export the content of * @param destination the {@link OutputStream} to export the {@link FlowFile}'s content to * @throws IllegalStateException if detected that this method is being called from within a read or write callback * (see {@link #read(FlowFile, InputStreamCallback)}, {@link #write(FlowFile, StreamCallback)}, * {@link #write(FlowFile, OutputStreamCallback)}) or while a read or write stream is open * (see {@link #read(FlowFile)}, {@link #write(FlowFile)}) for the given {@code flowFile} {@link FlowFile} * @throws FlowFileHandlingException if the given {@link FlowFile} is already transferred or removed or doesn't belong to this session. * Automatic rollback will occur. * @throws MissingFlowFileException if the given {@link FlowFile} content cannot be found. * The FlowFile should no longer be referenced, will be internally destroyed. The session is automatically rolled back. * @throws FlowFileAccessException if some IO problem occurs accessing {@link FlowFile} content */ void exportTo(FlowFile flowFile, OutputStream destination); /** * Returns the {@link ProvenanceReporter} that is tied to {@code this} {@link ProcessSession}. * * @return the {@link ProvenanceReporter} that is tied to {@code this} {@link ProcessSession} */ ProvenanceReporter getProvenanceReporter(); /** * Updates the value of the component's state, setting it to given value. *

* This method does not update the remote State Provider immediately but rather caches the value until the session is committed. * At that point, it will publish the state to the remote State Provider, if the state is the latest according to the remote State Provider. * * @param state the value to change the state to * @param scope the {@link Scope} to use when storing the state * @throws IOException if unable to communicate with the underlying storage mechanism */ void setState(Map state, Scope scope) throws IOException; /** * Returns the current state for the component. *

* This return value will never be {@code null}. * If the state has not yet been set, the StateMap's version will be -1, and the map of values will be empty. * * @param scope the {@link Scope} to use when fetching the state * @return the current state for the component * @throws IOException if unable to communicate with the underlying storage mechanism */ StateMap getState(Scope scope) throws IOException; /** * Updates the value of the component's state, setting it to given {@code newValue}, * if and only if the current value is the same as the given {@code oldValue}. *

* The oldValue will be compared against the value of the state as it is known to {@code this} {@link ProcessSession}. * If the Process Session does not currently know the state, it will be fetched from the StateProvider. *

* This method does not update the remote State Provider immediately but rather caches the value until the session is committed. * At that point, it will publish the state to the remote State Provider, if the state is the latest according to the remote State Provider. * * @param oldValue the value to compare the state's current value against * @param newValue the new value to use if and only if the state's current value is the same as the given {@code oldValue} * @param scope the {@link Scope} to use for fetching the current and storing the new state * @return {@code true} if the state was updated to the {@code newValue}, * {@code false} if the state's current value was not equal to {@code oldValue} * @throws IOException if unable to communicate with the underlying storage mechanism */ boolean replaceState(StateMap oldValue, Map newValue, Scope scope) throws IOException; /** * Clears all keys and values from the component's state. *

* This method does not update the remote State Provider immediately but rather caches the value until the session is committed. * At that point, it will publish the state to the remote State Provider, if the state is the latest according to the remote State Provider. * * @param scope the {@link Scope} to use for clearing the state * @throws IOException if unable to communicate with the underlying storage mechanism. */ void clearState(Scope scope) throws IOException; }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy