/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.cep.nfa;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.functions.DefaultOpenContext;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.api.common.functions.util.FunctionUtils;
import org.apache.flink.api.common.typeutils.CompositeTypeSerializerSnapshot;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
import org.apache.flink.api.common.typeutils.base.StringSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.cep.nfa.aftermatch.AfterMatchSkipStrategy;
import org.apache.flink.cep.nfa.compiler.NFACompiler;
import org.apache.flink.cep.nfa.sharedbuffer.EventId;
import org.apache.flink.cep.nfa.sharedbuffer.NodeId;
import org.apache.flink.cep.nfa.sharedbuffer.SharedBuffer;
import org.apache.flink.cep.nfa.sharedbuffer.SharedBufferAccessor;
import org.apache.flink.cep.pattern.conditions.IterativeCondition;
import org.apache.flink.cep.time.TimerService;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.CollectionUtil;
import org.apache.flink.util.FlinkRuntimeException;
import org.apache.flink.util.Preconditions;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Stack;
import static org.apache.flink.cep.nfa.MigrationUtils.deserializeComputationStates;
/**
* Non-deterministic finite automaton implementation.
*
* The {@link org.apache.flink.cep.operator.CepOperator CEP operator} keeps one NFA per key, for
* keyed input streams, and a single global NFA for non-keyed ones. When an event gets processed, it
* updates the NFA's internal state machine.
*
*
An event that belongs to a partially matched sequence is kept in an internal {@link
* SharedBuffer buffer}, which is a memory-optimized data-structure exactly for this purpose. Events
* in the buffer are removed when all the matched sequences that contain them are:
*
*
* - emitted (success)
*
- discarded (patterns containing NOT)
*
- timed-out (windowed patterns)
*
*
* The implementation is strongly based on the paper "Efficient Pattern Matching over Event
* Streams".
*
* @param Type of the processed events
* @see
* https://people.cs.umass.edu/~yanlei/publications/sase-sigmod08.pdf
*/
public class NFA {
/**
* A set of all the valid NFA states, as returned by the {@link NFACompiler NFACompiler}. These
* are directly derived from the user-specified pattern.
*/
private final Map> states;
/**
* The lengths of a windowed pattern, as specified using the {@link
* org.apache.flink.cep.pattern.Pattern#within(Time, WithinType)} Pattern.within(Time,
* WithinType)} method with {@code WithinType.PREVIOUS_AND_CURRENT}.
*/
private final Map windowTimes;
/**
* The length of a windowed pattern, as specified using the {@link
* org.apache.flink.cep.pattern.Pattern#within(Time) Pattern.within(Time)} method.
*/
private final long windowTime;
/**
* A flag indicating if we want timed-out patterns (in case of windowed patterns) to be emitted
* ({@code true}), or silently discarded ({@code false}).
*/
private final boolean handleTimeout;
public NFA(
final Collection> validStates,
final Map windowTimes,
final long windowTime,
final boolean handleTimeout) {
this.windowTime = windowTime;
this.handleTimeout = handleTimeout;
this.states = loadStates(validStates);
this.windowTimes = windowTimes;
}
private Map> loadStates(final Collection> validStates) {
Map> tmp = CollectionUtil.newHashMapWithExpectedSize(4);
for (State state : validStates) {
tmp.put(state.getName(), state);
}
return Collections.unmodifiableMap(tmp);
}
public long getWindowTime() {
return windowTime;
}
@VisibleForTesting
public Collection> getStates() {
return states.values();
}
public NFAState createInitialNFAState() {
Queue startingStates = new LinkedList<>();
for (State state : states.values()) {
if (state.isStart()) {
startingStates.add(ComputationState.createStartState(state.getName()));
}
}
return new NFAState(startingStates);
}
private State getState(ComputationState state) {
return states.get(state.getCurrentStateName());
}
private boolean isStartState(ComputationState state) {
State stateObject = getState(state);
if (stateObject == null) {
throw new FlinkRuntimeException(
"State "
+ state.getCurrentStateName()
+ " does not exist in the NFA. NFA has states "
+ states.values());
}
return stateObject.isStart();
}
private boolean isStopState(ComputationState state) {
State stateObject = getState(state);
if (stateObject == null) {
throw new FlinkRuntimeException(
"State "
+ state.getCurrentStateName()
+ " does not exist in the NFA. NFA has states "
+ states.values());
}
return stateObject.isStop();
}
private boolean isFinalState(ComputationState state) {
State stateObject = getState(state);
if (stateObject == null) {
throw new FlinkRuntimeException(
"State "
+ state.getCurrentStateName()
+ " does not exist in the NFA. NFA has states "
+ states.values());
}
return stateObject.isFinal();
}
/**
* Initialization method for the NFA. It is called before any element is passed and thus
* suitable for one time setup work.
*
* @param cepRuntimeContext runtime context of the enclosing operator
* @param conf The configuration containing the parameters attached to the contract.
*/
public void open(RuntimeContext cepRuntimeContext, Configuration conf) throws Exception {
for (State state : getStates()) {
for (StateTransition transition : state.getStateTransitions()) {
IterativeCondition condition = transition.getCondition();
FunctionUtils.setFunctionRuntimeContext(condition, cepRuntimeContext);
FunctionUtils.openFunction(condition, DefaultOpenContext.INSTANCE);
}
}
}
/** Tear-down method for the NFA. */
public void close() throws Exception {
for (State state : getStates()) {
for (StateTransition transition : state.getStateTransitions()) {
IterativeCondition condition = transition.getCondition();
FunctionUtils.closeFunction(condition);
}
}
}
/**
* Processes the next input event. If some of the computations reach a final state then the
* resulting event sequences are returned. If computations time out and timeout handling is
* activated, then the timed out event patterns are returned.
*
* If computations reach a stop state, the path forward is discarded and currently
* constructed path is returned with the element that resulted in the stop state.
*
* @param sharedBufferAccessor the accessor to SharedBuffer object that we need to work upon
* while processing
* @param nfaState The NFAState object that we need to affect while processing
* @param event The current event to be processed or null if only pruning shall be done
* @param timestamp The timestamp of the current event
* @param afterMatchSkipStrategy The skip strategy to use after per match
* @param timerService gives access to processing time and time characteristic, needed for
* condition evaluation
* @return Tuple of the collection of matched patterns (e.g. the result of computations which
* have reached a final state) and the collection of timed out patterns (if timeout handling
* is activated)
* @throws Exception Thrown if the system cannot access the state.
*/
public Collection