All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.core.AbstractProcessor Maven / Gradle / Ivy

There is a newer version: 5.5.0
Show newest version
/*
 * Copyright (c) 2008-2023, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.core;

import com.hazelcast.jet.Traverser;
import com.hazelcast.jet.Traversers;
import com.hazelcast.logging.ILogger;

import javax.annotation.CheckReturnValue;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.Map.Entry;
import java.util.function.Function;

import static com.hazelcast.internal.util.ExceptionUtil.sneakyThrow;


/**
 * Base class to implement custom processors. Simplifies the contract of
 * {@code Processor} with several levels of convenience:
 * 
  1. * {@link Processor#init(Outbox, Context)} retains the supplied outbox * and the logger retrieved from the context. *
  2. * {@link #process(int, Inbox) process(n, inbox)} delegates to the matching * {@code tryProcessN()} with each item received in the inbox. *
  3. * There is also the general {@link #tryProcess(int, Object)} to which * the {@code tryProcessN} methods delegate by default. It is convenient * to override it when the processor doesn't care which edge an item * originates from. Another convenient idiom is to override {@code * tryProcessN()} for one or two specially treated edges and override * {@link #tryProcess(int, Object)} to process the rest of the edges, which * are treated uniformly. *
  4. * The {@code tryEmit(...)} methods avoid the need to deal with {@code Outbox} * directly. *
  5. * The {@code emitFromTraverser(...)} methods handle the boilerplate of * emission from a traverser. They are especially useful in the * {@link #complete()} step when there is a collection of items to emit. * The {@link Traversers} class contains traversers tailored to simplify * the implementation of {@code complete()}. *
  6. * The {@link FlatMapper FlatMapper} class additionally simplifies the * usage of {@code emitFromTraverser()} inside {@code tryProcess()}, in * a scenario where an input item results in a collection of output * items. {@code FlatMapper} is obtained from one of the factory methods * {@link #flatMapper(Function) flatMapper(...)}. *
* * @since Jet 3.0 */ public abstract class AbstractProcessor implements Processor { private ILogger logger; private Outbox outbox; private Object pendingItem; private Entry pendingSnapshotItem; // final implementations of the Processor API @Override public final void init(@Nonnull Outbox outbox, @Nonnull Context context) throws Exception { this.outbox = outbox; this.logger = context.logger(); init(context); } /** * Implements the boilerplate of polling the inbox, casting the items to * {@code Map.Entry}, and extracting the key and value. Forwards each * key-value pair to {@link #restoreFromSnapshot(Object, Object)}. */ @Override public final void restoreFromSnapshot(@Nonnull Inbox inbox) { for (Entry entry; (entry = (Entry) inbox.poll()) != null; ) { restoreFromSnapshot(entry.getKey(), entry.getValue()); } } /** * Implements the boilerplate of dispatching against the ordinal, * taking items from the inbox one by one, and invoking the * processing logic on each. */ @Override @SuppressWarnings("checkstyle:magicnumber") public void process(int ordinal, @Nonnull Inbox inbox) { try { switch (ordinal) { case 0: process0(inbox); break; case 1: process1(inbox); break; case 2: process2(inbox); break; case 3: process3(inbox); break; case 4: process4(inbox); break; default: processAny(ordinal, inbox); } } catch (Exception e) { throw sneakyThrow(e); } } // Callback methods designed to be overridden by subclasses /** * Method that can be overridden to perform any necessary initialization * for the processor. It is called exactly once and strictly before any of * the processing methods ({@link #process(int, Inbox) process()} and * {@link #complete()}), but after the outbox and {@link #getLogger() * logger} have been initialized. *

* Subclasses are not required to call this superclass method, it does * nothing. * * @param context the {@link Processor.Context context} associated with this * processor */ protected void init(@Nonnull Context context) throws Exception { } /** * Tries to process the supplied input item, which was received from the * edge with the supplied ordinal. May choose to process only partially * and return {@code false}, in which case it will be called again later * with the same {@code (ordinal, item)} combination before any other * processing method is called. *

* The default implementation throws an {@code UnsupportedOperationException}. *

* NOTE: unless the processor doesn't differentiate between * its inbound edges, the first choice should be leaving this method alone * and instead overriding the specific {@code tryProcessN()} methods for * each ordinal the processor expects. * * @param ordinal ordinal of the edge that delivered the item * @param item item to be processed * @return {@code true} if this item has now been processed, * {@code false} otherwise. */ protected boolean tryProcess(int ordinal, @Nonnull Object item) throws Exception { throw new UnsupportedOperationException("Missing implementation in " + getClass()); } /** * Tries to process the supplied input item, which was received from the * edge with ordinal 0. May choose to process only partially and return * {@code false}, in which case it will be called again later with the same * item before any other processing method is called. *

* The default implementation delegates to {@link #tryProcess(int, Object) * tryProcess(0, item)}. * * @param item item to be processed * @return {@code true} if this item has now been processed, * {@code false} otherwise. */ protected boolean tryProcess0(@Nonnull Object item) throws Exception { return tryProcess(0, item); } /** * Tries to process the supplied input item, which was received from the * edge with ordinal 1. May choose to process only partially and return * {@code false}, in which case it will be called again later with the same * item before any other processing method is called. *

* The default implementation delegates to {@link #tryProcess(int, Object) * tryProcess(1, item)}. * * @param item item to be processed * @return {@code true} if this item has now been processed, * {@code false} otherwise. */ protected boolean tryProcess1(@Nonnull Object item) throws Exception { return tryProcess(1, item); } /** * Tries to process the supplied input item, which was received from the * edge with ordinal 2. May choose to process only partially and return * {@code false}, in which case it will be called again later with the same * item before any other processing method is called. *

* The default implementation delegates to {@link #tryProcess(int, Object) * tryProcess(2, item)}. * * @param item item to be processed * @return {@code true} if this item has now been processed, * {@code false} otherwise. */ protected boolean tryProcess2(@Nonnull Object item) throws Exception { return tryProcess(2, item); } /** * Tries to process the supplied input item, which was received from the * edge with ordinal 3. May choose to process only partially and return * {@code false}, in which case it will be called again later with the same * item before any other processing method is called. *

* The default implementation delegates to {@link #tryProcess(int, Object) * tryProcess(3, item)}. * * @param item item to be processed * @return {@code true} if this item has now been processed, * {@code false} otherwise. */ protected boolean tryProcess3(@Nonnull Object item) throws Exception { return tryProcess(3, item); } /** * Tries to process the supplied input item, which was received from the * edge with ordinal 4. May choose to process only partially and return * {@code false}, in which case it will be called again later with the same * item before any other processing method is called. *

* The default implementation delegates to {@link #tryProcess(int, Object) * tryProcess(4, item)}. * * @param item item to be processed * @return {@code true} if this item has now been processed, * {@code false} otherwise. */ @SuppressWarnings("checkstyle:magicnumber") protected boolean tryProcess4(@Nonnull Object item) throws Exception { return tryProcess(4, item); } /** * Called to restore one key-value pair from the snapshot to processor's * internal state. *

* The default implementation throws an {@code * UnsupportedOperationException}, but it will not be called unless you * override {@link #saveToSnapshot()}. * * @param key key of the entry from the snapshot * @param value value of the entry from the snapshot */ protected void restoreFromSnapshot(@Nonnull Object key, @Nonnull Object value) { throw new UnsupportedOperationException("Missing implementation in " + getClass()); } /** * This basic implementation only forwards the passed watermark. */ @Override public boolean tryProcessWatermark(@Nonnull Watermark watermark) { return tryEmit(watermark); } // Convenience methods for subclasses, non-overridable /** * Returns the logger associated with this processor instance. */ protected final ILogger getLogger() { return logger; } protected final Outbox getOutbox() { return outbox; } /** * Offers the item to the outbox bucket at the supplied ordinal. *

* Emitted items should not be subsequently mutated because the same * instance might be used by a downstream processor in a different thread, * causing concurrent access. * * @return {@code true}, if the item was accepted. If {@code false} is * returned, the call must be retried later with the same (or equal) item. */ @CheckReturnValue protected final boolean tryEmit(int ordinal, @Nonnull Object item) { return outbox.offer(ordinal, item); } /** * Offers the item to all the outbox buckets (except the snapshot outbox). *

* Emitted items should not be subsequently mutated because the same * instance might be used by a downstream processor in a different thread, * causing concurrent access. * * @return {@code true}, if the item was accepted. If {@code false} is * returned, the call must be retried later with the same (or equal) item. */ @CheckReturnValue protected final boolean tryEmit(@Nonnull Object item) { return outbox.offer(item); } /** * Offers the item to the outbox buckets identified in the supplied array. *

* Emitted items should not be subsequently mutated because the same * instance might be used by a downstream processor in a different thread, * causing concurrent access. * * @return {@code true}, if the item was accepted. If {@code false} is * returned, the call must be retried later with the same (or equal) item. */ @CheckReturnValue protected final boolean tryEmit(@Nonnull int[] ordinals, @Nonnull Object item) { return outbox.offer(ordinals, item); } /** * Obtains items from the traverser and offers them to the outbox's buckets * identified in the supplied array. If the outbox refuses an item, it backs * off and returns {@code false}. *

* Emitted items should not be subsequently mutated because the same * instance might be used by a downstream processor in a different thread, * causing concurrent access. *

* If this method returns {@code false}, then the caller must retain the * traverser and pass it again in the subsequent invocation of this method, * so as to resume emitting where it left off. *

* For simplified usage from {@link #tryProcess(int, Object) * tryProcess(ordinal, item)} methods, see {@link FlatMapper}. * * @param ordinals ordinals of the target bucket * @param traverser traverser over items to emit * @return whether the traverser has been exhausted */ @SuppressWarnings("unchecked") protected final boolean emitFromTraverser(@Nonnull int[] ordinals, @Nonnull Traverser traverser) { E item; if (pendingItem != null) { item = (E) pendingItem; pendingItem = null; } else { item = traverser.next(); } for (; item != null; item = traverser.next()) { if (!tryEmit(ordinals, item)) { pendingItem = item; return false; } } return true; } /** * Obtains items from the traverser and offers them to the outbox's buckets * identified in the supplied array. If the outbox refuses an item, it backs * off and returns {@code false}. *

* Do not mutate the items you emit because the downstream processor may be * using them in a different thread, resulting in concurrent access. *

* If this method returns {@code false}, then you must retain the traverser * and pass it again in the subsequent invocation of this method, so as to * resume emitting where you left off. *

* For simplified usage in {@link #tryProcess(int, Object) * tryProcess(ordinal, item)} methods, see {@link FlatMapper}. * * @param ordinal ordinal of the target bucket * @param traverser traverser over items to emit * @return whether the traverser has been exhausted */ protected final boolean emitFromTraverser(int ordinal, @Nonnull Traverser traverser) { E item; if (pendingItem != null) { item = (E) pendingItem; pendingItem = null; } else { item = traverser.next(); } for (; item != null; item = traverser.next()) { if (!tryEmit(ordinal, item)) { pendingItem = item; return false; } } return true; } /** * Convenience for {@link #emitFromTraverser(int, Traverser)} * which emits to all ordinals. */ protected final boolean emitFromTraverser(@Nonnull Traverser traverser) { return emitFromTraverser(-1, traverser); } /** * Offers one key-value pair to the snapshot bucket. *

* The type of the offered key determines which processors receive the key * and value pair when it is restored. If the key is of type {@link * BroadcastKey}, the entry will be restored to all processor instances. * Otherwise, the key will be distributed according to default partitioning * and only a single processor instance will receive the key. *

* Keys and values offered to snapshot are serialized and can be further * mutated as soon as this method returns. * * @return {@code true}, if the item was accepted. If {@code false} is * returned, the call must be retried later with the same (or equal) key * and value. */ @CheckReturnValue protected final boolean tryEmitToSnapshot(@Nonnull Object key, @Nonnull Object value) { return outbox.offerToSnapshot(key, value); } /** * Obtains items from the traverser and offers them to the snapshot bucket * of the outbox. Each item is a {@code Map.Entry} and its key and value * are passed as the two arguments of {@link #tryEmitToSnapshot(Object, Object)}. * If the outbox refuses an item, it backs off and returns {@code false}. *

* Keys and values offered to snapshot are serialized and can be further * mutated as soon as this method returns. *

* If this method returns {@code false}, then the caller must retain the * traverser and pass it again in the subsequent invocation of this method, * so as to resume emitting where it left off. *

* The type of the offered key determines which processors receive the key * and value pair when it is restored. If the key is of type {@link * BroadcastKey}, the entry will be restored to all processor instances. * Otherwise, the key will be distributed according to default partitioning * and only a single processor instance will receive the key. * * @param traverser traverser over the items to emit to the snapshot * @return whether the traverser has been exhausted */ protected final > boolean emitFromTraverserToSnapshot(@Nonnull Traverser traverser) { Entry item; if (pendingSnapshotItem != null) { item = pendingSnapshotItem; pendingSnapshotItem = null; } else { item = traverser.next(); } for (; item != null; item = traverser.next()) { if (!tryEmitToSnapshot(item.getKey(), item.getValue())) { pendingSnapshotItem = item; return false; } } return true; } /** * Factory of {@link FlatMapper}. The {@code FlatMapper} will emit items to * the given output ordinal. */ @Nonnull protected final FlatMapper flatMapper( int ordinal, @Nonnull Function> mapper ) { return ordinal != -1 ? flatMapper(new int[]{ordinal}, mapper) : flatMapper(mapper); } /** * Factory of {@link FlatMapper}. The {@code FlatMapper} will emit items to * all defined output ordinals. */ @Nonnull protected final FlatMapper flatMapper( @Nonnull Function> mapper ) { return new FlatMapper<>(null, mapper); } /** * Factory of {@link FlatMapper}. The {@code FlatMapper} will emit items to * the ordinals identified in the array. */ @Nonnull protected final FlatMapper flatMapper( @Nonnull int[] ordinals, @Nonnull Function> mapper ) { return new FlatMapper<>(ordinals, mapper); } // End of convenience methods for subclass, non-overridable /** * A helper that simplifies the implementation of {@link #tryProcess(int, * Object) tryProcess(ordinal, item)} for emitting collections. User * supplies a {@code mapper} which takes an item and returns a traverser * over all output items that should be emitted. The {@link * #tryProcess(Object)} method obtains and passes the traverser to {@link * #emitFromTraverser(int, Traverser)}. *

* Example: *

     * public static class SplitWordsP extends AbstractProcessor {
     *
     *    {@code private FlatMapper flatMapper =
     *             flatMapper(item -> Traverser.over(item.split("\\W")));}
     *
     *    {@code @Override}
     *     protected boolean tryProcess(int ordinal, Object item) throws Exception {
     *         return flatMapper.tryProcess((String) item);
     *     }
     * }
* * @param type of the input item * @param type of the emitted item */ protected final class FlatMapper { private final int[] outputOrdinals; private final Function> mapper; private Traverser outputTraverser; private FlatMapper(@Nullable int[] outputOrdinals, @Nonnull Function> mapper) { this.outputOrdinals = outputOrdinals; this.mapper = mapper; } /** * Method designed to be called from one of {@code AbstractProcessor#tryProcessX()} * methods. The calling method must return this method's return * value. * * @param item the item to process * @return what the calling {@code tryProcessX()} method should return */ public boolean tryProcess(@Nonnull T item) { if (outputTraverser == null) { outputTraverser = mapper.apply(item); } if (emit()) { outputTraverser = null; return true; } return false; } private boolean emit() { return outputOrdinals != null ? emitFromTraverser(outputOrdinals, outputTraverser) : emitFromTraverser(outputTraverser); } } /** * Throws {@link UnsupportedOperationException} if watermark has non-zero * key. *

* Supposed to be used by processors that don't function properly with keyed * watermarks. */ protected void keyedWatermarkCheck(Watermark watermark) { if (watermark.key() != 0) { throw new UnsupportedOperationException("Keyed watermarks are not supported for " + this.getClass().getName()); } } // The processN methods contain repeated looping code in order to give an // easier job to the JIT compiler to optimize each case independently, and // to ensure that ordinal is dispatched on just once per process(ordinal, // inbox) call. private void process0(@Nonnull Inbox inbox) throws Exception { for (Object item; (item = inbox.peek()) != null && tryProcess0(item); ) { inbox.remove(); } } private void process1(@Nonnull Inbox inbox) throws Exception { for (Object item; (item = inbox.peek()) != null && tryProcess1(item); ) { inbox.remove(); } } private void process2(@Nonnull Inbox inbox) throws Exception { for (Object item; (item = inbox.peek()) != null && tryProcess2(item); ) { inbox.remove(); } } private void process3(@Nonnull Inbox inbox) throws Exception { for (Object item; (item = inbox.peek()) != null && tryProcess3(item); ) { inbox.remove(); } } private void process4(@Nonnull Inbox inbox) throws Exception { for (Object item; (item = inbox.peek()) != null && tryProcess4(item); ) { inbox.remove(); } } private void processAny(int ordinal, @Nonnull Inbox inbox) throws Exception { for (Object item; (item = inbox.peek()) != null && tryProcess(ordinal, item); ) { inbox.remove(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy