All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.cep.PatternStream Maven / Gradle / Ivy

There is a newer version: 2.0-preview1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.cep;

import org.apache.flink.annotation.Internal;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.typeutils.EitherTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.cep.functions.PatternProcessFunction;
import org.apache.flink.cep.functions.TimedOutPartialMatchHandler;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.functions.co.CoMapFunction;
import org.apache.flink.types.Either;
import org.apache.flink.util.OutputTag;

import java.util.UUID;

import static org.apache.flink.cep.PatternProcessFunctionBuilder.fromFlatSelect;
import static org.apache.flink.cep.PatternProcessFunctionBuilder.fromSelect;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * Stream abstraction for CEP pattern detection. A pattern stream is a stream which emits detected
 * pattern sequences as a map of events associated with their names. The pattern is detected using a
 * {@link org.apache.flink.cep.nfa.NFA}. In order to process the detected sequences, the user has to
 * specify a {@link PatternSelectFunction} or a {@link PatternFlatSelectFunction}.
 *
 * 

Additionally it allows to handle partially matched event patterns which have timed out. For * this the user has to specify a {@link PatternTimeoutFunction} or a {@link * PatternFlatTimeoutFunction}. * * @param Type of the events */ public class PatternStream { private final PatternStreamBuilder builder; private PatternStream(final PatternStreamBuilder builder) { this.builder = checkNotNull(builder); } PatternStream(final DataStream inputStream, final Pattern pattern) { this(PatternStreamBuilder.forStreamAndPattern(inputStream, pattern)); } PatternStream withComparator(final EventComparator comparator) { return new PatternStream<>(builder.withComparator(comparator)); } /** * Send late arriving data to the side output identified by the given {@link OutputTag}. A * record is considered late after the watermark has passed its timestamp. * *

You can get the stream of late data using {@link * SingleOutputStreamOperator#getSideOutput(OutputTag)} on the {@link * SingleOutputStreamOperator} resulting from the pattern processing operations. */ public PatternStream sideOutputLateData(OutputTag lateDataOutputTag) { return new PatternStream<>(builder.withLateDataOutputTag(lateDataOutputTag)); } /** Sets the time characteristic to processing time. */ public PatternStream inProcessingTime() { return new PatternStream<>(builder.inProcessingTime()); } /** Sets the time characteristic to event time. */ public PatternStream inEventTime() { return new PatternStream<>(builder.inEventTime()); } /** * Applies a process function to the detected pattern sequence. For each pattern sequence the * provided {@link PatternProcessFunction} is called. In order to process timed out partial * matches as well one can use {@link TimedOutPartialMatchHandler} as additional interface. * * @param patternProcessFunction The pattern process function which is called for each detected * pattern sequence. * @param Type of the resulting elements * @return {@link DataStream} which contains the resulting elements from the pattern process * function. */ public SingleOutputStreamOperator process( final PatternProcessFunction patternProcessFunction) { final TypeInformation returnType = TypeExtractor.getUnaryOperatorReturnType( patternProcessFunction, PatternProcessFunction.class, 0, 1, TypeExtractor.NO_INDEX, builder.getInputType(), null, false); return process(patternProcessFunction, returnType); } /** * Applies a process function to the detected pattern sequence. For each pattern sequence the * provided {@link PatternProcessFunction} is called. In order to process timed out partial * matches as well one can use {@link TimedOutPartialMatchHandler} as additional interface. * * @param patternProcessFunction The pattern process function which is called for each detected * pattern sequence. * @param Type of the resulting elements * @param outTypeInfo Explicit specification of output type. * @return {@link DataStream} which contains the resulting elements from the pattern process * function. */ public SingleOutputStreamOperator process( final PatternProcessFunction patternProcessFunction, final TypeInformation outTypeInfo) { return builder.build(outTypeInfo, builder.clean(patternProcessFunction)); } /** * Applies a select function to the detected pattern sequence. For each pattern sequence the * provided {@link PatternSelectFunction} is called. The pattern select function can produce * exactly one resulting element. * * @param patternSelectFunction The pattern select function which is called for each detected * pattern sequence. * @param Type of the resulting elements * @return {@link DataStream} which contains the resulting elements from the pattern select * function. */ public SingleOutputStreamOperator select( final PatternSelectFunction patternSelectFunction) { // we have to extract the output type from the provided pattern selection function manually // because the TypeExtractor cannot do that if the method is wrapped in a MapFunction final TypeInformation returnType = TypeExtractor.getUnaryOperatorReturnType( patternSelectFunction, PatternSelectFunction.class, 0, 1, TypeExtractor.NO_INDEX, builder.getInputType(), null, false); return select(patternSelectFunction, returnType); } /** * Applies a select function to the detected pattern sequence. For each pattern sequence the * provided {@link PatternSelectFunction} is called. The pattern select function can produce * exactly one resulting element. * * @param patternSelectFunction The pattern select function which is called for each detected * pattern sequence. * @param Type of the resulting elements * @param outTypeInfo Explicit specification of output type. * @return {@link DataStream} which contains the resulting elements from the pattern select * function. */ public SingleOutputStreamOperator select( final PatternSelectFunction patternSelectFunction, final TypeInformation outTypeInfo) { final PatternProcessFunction processFunction = fromSelect(builder.clean(patternSelectFunction)).build(); return process(processFunction, outTypeInfo); } /** * Applies a select function to the detected pattern sequence. For each pattern sequence the * provided {@link PatternSelectFunction} is called. The pattern select function can produce * exactly one resulting element. * *

Applies a timeout function to a partial pattern sequence which has timed out. For each * partial pattern sequence the provided {@link PatternTimeoutFunction} is called. The pattern * timeout function can produce exactly one resulting element. * *

You can get the stream of timed-out data resulting from the {@link * SingleOutputStreamOperator#getSideOutput(OutputTag)} on the {@link * SingleOutputStreamOperator} resulting from the select operation with the same {@link * OutputTag}. * * @param timedOutPartialMatchesTag {@link OutputTag} that identifies side output with timed out * patterns * @param patternTimeoutFunction The pattern timeout function which is called for each partial * pattern sequence which has timed out. * @param patternSelectFunction The pattern select function which is called for each detected * pattern sequence. * @param Type of the resulting timeout elements * @param Type of the resulting elements * @return {@link DataStream} which contains the resulting elements with the resulting timeout * elements in a side output. */ public SingleOutputStreamOperator select( final OutputTag timedOutPartialMatchesTag, final PatternTimeoutFunction patternTimeoutFunction, final PatternSelectFunction patternSelectFunction) { final TypeInformation rightTypeInfo = TypeExtractor.getUnaryOperatorReturnType( patternSelectFunction, PatternSelectFunction.class, 0, 1, TypeExtractor.NO_INDEX, builder.getInputType(), null, false); return select( timedOutPartialMatchesTag, patternTimeoutFunction, rightTypeInfo, patternSelectFunction); } /** * Applies a select function to the detected pattern sequence. For each pattern sequence the * provided {@link PatternSelectFunction} is called. The pattern select function can produce * exactly one resulting element. * *

Applies a timeout function to a partial pattern sequence which has timed out. For each * partial pattern sequence the provided {@link PatternTimeoutFunction} is called. The pattern * timeout function can produce exactly one resulting element. * *

You can get the stream of timed-out data resulting from the {@link * SingleOutputStreamOperator#getSideOutput(OutputTag)} on the {@link * SingleOutputStreamOperator} resulting from the select operation with the same {@link * OutputTag}. * * @param timedOutPartialMatchesTag {@link OutputTag} that identifies side output with timed out * patterns * @param patternTimeoutFunction The pattern timeout function which is called for each partial * pattern sequence which has timed out. * @param outTypeInfo Explicit specification of output type. * @param patternSelectFunction The pattern select function which is called for each detected * pattern sequence. * @param Type of the resulting timeout elements * @param Type of the resulting elements * @return {@link DataStream} which contains the resulting elements with the resulting timeout * elements in a side output. */ public SingleOutputStreamOperator select( final OutputTag timedOutPartialMatchesTag, final PatternTimeoutFunction patternTimeoutFunction, final TypeInformation outTypeInfo, final PatternSelectFunction patternSelectFunction) { final PatternProcessFunction processFunction = fromSelect(builder.clean(patternSelectFunction)) .withTimeoutHandler( timedOutPartialMatchesTag, builder.clean(patternTimeoutFunction)) .build(); return process(processFunction, outTypeInfo); } /** * Applies a select function to the detected pattern sequence. For each pattern sequence the * provided {@link PatternSelectFunction} is called. The pattern select function can produce * exactly one resulting element. * *

Applies a timeout function to a partial pattern sequence which has timed out. For each * partial pattern sequence the provided {@link PatternTimeoutFunction} is called. The pattern * timeout function can produce exactly one resulting element. * * @param patternTimeoutFunction The pattern timeout function which is called for each partial * pattern sequence which has timed out. * @param patternSelectFunction The pattern select function which is called for each detected * pattern sequence. * @param Type of the resulting timeout elements * @param Type of the resulting elements * @deprecated Use {@link PatternStream#select(OutputTag, PatternTimeoutFunction, * PatternSelectFunction)} that returns timed out events as a side-output * @return {@link DataStream} which contains the resulting elements or the resulting timeout * elements wrapped in an {@link Either} type. */ @Deprecated public SingleOutputStreamOperator> select( final PatternTimeoutFunction patternTimeoutFunction, final PatternSelectFunction patternSelectFunction) { final TypeInformation mainTypeInfo = TypeExtractor.getUnaryOperatorReturnType( patternSelectFunction, PatternSelectFunction.class, 0, 1, TypeExtractor.NO_INDEX, builder.getInputType(), null, false); final TypeInformation timeoutTypeInfo = TypeExtractor.getUnaryOperatorReturnType( patternTimeoutFunction, PatternTimeoutFunction.class, 0, 1, TypeExtractor.NO_INDEX, builder.getInputType(), null, false); final TypeInformation> outTypeInfo = new EitherTypeInfo<>(timeoutTypeInfo, mainTypeInfo); final OutputTag outputTag = new OutputTag<>(UUID.randomUUID().toString(), timeoutTypeInfo); final PatternProcessFunction processFunction = fromSelect(builder.clean(patternSelectFunction)) .withTimeoutHandler(outputTag, builder.clean(patternTimeoutFunction)) .build(); final SingleOutputStreamOperator mainStream = process(processFunction, mainTypeInfo); final DataStream timedOutStream = mainStream.getSideOutput(outputTag); return mainStream.connect(timedOutStream).map(new CoMapTimeout<>()).returns(outTypeInfo); } /** * Applies a flat select function to the detected pattern sequence. For each pattern sequence * the provided {@link PatternFlatSelectFunction} is called. The pattern flat select function * can produce an arbitrary number of resulting elements. * * @param patternFlatSelectFunction The pattern flat select function which is called for each * detected pattern sequence. * @param Type of the resulting elements * @return {@link DataStream} which contains the resulting elements from the pattern flat select * function. */ public SingleOutputStreamOperator flatSelect( final PatternFlatSelectFunction patternFlatSelectFunction) { // we have to extract the output type from the provided pattern selection function manually // because the TypeExtractor cannot do that if the method is wrapped in a MapFunction final TypeInformation outTypeInfo = TypeExtractor.getUnaryOperatorReturnType( patternFlatSelectFunction, PatternFlatSelectFunction.class, 0, 1, new int[] {1, 0}, builder.getInputType(), null, false); return flatSelect(patternFlatSelectFunction, outTypeInfo); } /** * Applies a flat select function to the detected pattern sequence. For each pattern sequence * the provided {@link PatternFlatSelectFunction} is called. The pattern flat select function * can produce an arbitrary number of resulting elements. * * @param patternFlatSelectFunction The pattern flat select function which is called for each * detected pattern sequence. * @param Type of the resulting elements * @param outTypeInfo Explicit specification of output type. * @return {@link DataStream} which contains the resulting elements from the pattern flat select * function. */ public SingleOutputStreamOperator flatSelect( final PatternFlatSelectFunction patternFlatSelectFunction, final TypeInformation outTypeInfo) { final PatternProcessFunction processFunction = fromFlatSelect(builder.clean(patternFlatSelectFunction)).build(); return process(processFunction, outTypeInfo); } /** * Applies a flat select function to the detected pattern sequence. For each pattern sequence * the provided {@link PatternFlatSelectFunction} is called. The pattern select function can * produce exactly one resulting element. * *

Applies a timeout function to a partial pattern sequence which has timed out. For each * partial pattern sequence the provided {@link PatternFlatTimeoutFunction} is called. The * pattern timeout function can produce exactly one resulting element. * *

You can get the stream of timed-out data resulting from the {@link * SingleOutputStreamOperator#getSideOutput(OutputTag)} on the {@link * SingleOutputStreamOperator} resulting from the select operation with the same {@link * OutputTag}. * * @param timedOutPartialMatchesTag {@link OutputTag} that identifies side output with timed out * patterns * @param patternFlatTimeoutFunction The pattern timeout function which is called for each * partial pattern sequence which has timed out. * @param patternFlatSelectFunction The pattern select function which is called for each * detected pattern sequence. * @param Type of the resulting timeout elements * @param Type of the resulting elements * @return {@link DataStream} which contains the resulting elements with the resulting timeout * elements in a side output. */ public SingleOutputStreamOperator flatSelect( final OutputTag timedOutPartialMatchesTag, final PatternFlatTimeoutFunction patternFlatTimeoutFunction, final PatternFlatSelectFunction patternFlatSelectFunction) { final TypeInformation rightTypeInfo = TypeExtractor.getUnaryOperatorReturnType( patternFlatSelectFunction, PatternFlatSelectFunction.class, 0, 1, new int[] {1, 0}, builder.getInputType(), null, false); return flatSelect( timedOutPartialMatchesTag, patternFlatTimeoutFunction, rightTypeInfo, patternFlatSelectFunction); } /** * Applies a flat select function to the detected pattern sequence. For each pattern sequence * the provided {@link PatternFlatSelectFunction} is called. The pattern select function can * produce exactly one resulting element. * *

Applies a timeout function to a partial pattern sequence which has timed out. For each * partial pattern sequence the provided {@link PatternFlatTimeoutFunction} is called. The * pattern timeout function can produce exactly one resulting element. * *

You can get the stream of timed-out data resulting from the {@link * SingleOutputStreamOperator#getSideOutput(OutputTag)} on the {@link * SingleOutputStreamOperator} resulting from the select operation with the same {@link * OutputTag}. * * @param timedOutPartialMatchesTag {@link OutputTag} that identifies side output with timed out * patterns * @param patternFlatTimeoutFunction The pattern timeout function which is called for each * partial pattern sequence which has timed out. * @param patternFlatSelectFunction The pattern select function which is called for each * detected pattern sequence. * @param outTypeInfo Explicit specification of output type. * @param Type of the resulting timeout elements * @param Type of the resulting elements * @return {@link DataStream} which contains the resulting elements with the resulting timeout * elements in a side output. */ public SingleOutputStreamOperator flatSelect( final OutputTag timedOutPartialMatchesTag, final PatternFlatTimeoutFunction patternFlatTimeoutFunction, final TypeInformation outTypeInfo, final PatternFlatSelectFunction patternFlatSelectFunction) { final PatternProcessFunction processFunction = fromFlatSelect(builder.clean(patternFlatSelectFunction)) .withTimeoutHandler( timedOutPartialMatchesTag, builder.clean(patternFlatTimeoutFunction)) .build(); return process(processFunction, outTypeInfo); } /** * Applies a flat select function to the detected pattern sequence. For each pattern sequence * the provided {@link PatternFlatSelectFunction} is called. The pattern flat select function * can produce an arbitrary number of resulting elements. * *

Applies a timeout function to a partial pattern sequence which has timed out. For each * partial pattern sequence the provided {@link PatternFlatTimeoutFunction} is called. The * pattern timeout function can produce an arbitrary number of resulting elements. * * @param patternFlatTimeoutFunction The pattern flat timeout function which is called for each * partial pattern sequence which has timed out. * @param patternFlatSelectFunction The pattern flat select function which is called for each * detected pattern sequence. * @param Type of the resulting timeout events * @param Type of the resulting events * @deprecated Use {@link PatternStream#flatSelect(OutputTag, PatternFlatTimeoutFunction, * PatternFlatSelectFunction)} that returns timed out events as a side-output * @return {@link DataStream} which contains the resulting events from the pattern flat select * function or the resulting timeout events from the pattern flat timeout function wrapped * in an {@link Either} type. */ @Deprecated public SingleOutputStreamOperator> flatSelect( final PatternFlatTimeoutFunction patternFlatTimeoutFunction, final PatternFlatSelectFunction patternFlatSelectFunction) { final TypeInformation timedOutTypeInfo = TypeExtractor.getUnaryOperatorReturnType( patternFlatTimeoutFunction, PatternFlatTimeoutFunction.class, 0, 1, new int[] {2, 0}, builder.getInputType(), null, false); final TypeInformation mainTypeInfo = TypeExtractor.getUnaryOperatorReturnType( patternFlatSelectFunction, PatternFlatSelectFunction.class, 0, 1, new int[] {1, 0}, builder.getInputType(), null, false); final OutputTag outputTag = new OutputTag<>(UUID.randomUUID().toString(), timedOutTypeInfo); final PatternProcessFunction processFunction = fromFlatSelect(builder.clean(patternFlatSelectFunction)) .withTimeoutHandler(outputTag, builder.clean(patternFlatTimeoutFunction)) .build(); final SingleOutputStreamOperator mainStream = process(processFunction, mainTypeInfo); final DataStream timedOutStream = mainStream.getSideOutput(outputTag); final TypeInformation> outTypeInfo = new EitherTypeInfo<>(timedOutTypeInfo, mainTypeInfo); return mainStream.connect(timedOutStream).map(new CoMapTimeout<>()).returns(outTypeInfo); } /** Used for joining results from timeout side-output for API backward compatibility. */ @Internal public static class CoMapTimeout implements CoMapFunction> { private static final long serialVersionUID = 2059391566945212552L; @Override public Either map1(R value) { return Either.Right(value); } @Override public Either map2(L value) { return Either.Left(value); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy