All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.espertech.esper.rowregex.EventRowRegexHelper Maven / Gradle / Ivy

There is a newer version: 7.1.0
Show newest version
/*
 ***************************************************************************************
 *  Copyright (C) 2006 EsperTech, Inc. All rights reserved.                            *
 *  http://www.espertech.com/esper                                                     *
 *  http://www.espertech.com                                                           *
 *  ---------------------------------------------------------------------------------- *
 *  The software in this package is published under the terms of the GPL license       *
 *  a copy of which has been included with this distribution in the license.txt file.  *
 ***************************************************************************************
 */
package com.espertech.esper.rowregex;

import com.espertech.esper.collection.Pair;
import com.espertech.esper.epl.expression.core.ExprNode;
import com.espertech.esper.view.View;
import com.espertech.esper.view.Viewable;

import java.util.*;

/**
 * Helper for match recognize.
 */
public class EventRowRegexHelper {
    public static EventRowRegexNFAViewService recursiveFindRegexService(Viewable top) {
        if (top instanceof EventRowRegexNFAViewService) {
            return (EventRowRegexNFAViewService) top;
        }
        for (View view : top.getViews()) {
            return recursiveFindRegexService(view);
        }
        return null;
    }

    protected static final Comparator END_STATE_COMPARATOR = new Comparator() {
        public int compare(RegexNFAStateEntry o1, RegexNFAStateEntry o2) {
            if (o1.getMatchEndEventSeqNo() > o2.getMatchEndEventSeqNo()) {
                return -1;
            }
            if (o1.getMatchEndEventSeqNo() < o2.getMatchEndEventSeqNo()) {
                return 1;
            }
            return 0;
        }
    };

    /**
     * Inspect variables recursively.
     *
     * @param parent            parent regex expression node
     * @param isMultiple        if the variable in the stack is multiple of single
     * @param variablesSingle   single variables list
     * @param variablesMultiple group variables list
     */
    protected static void recursiveInspectVariables(RowRegexExprNode parent, boolean isMultiple, Set variablesSingle, Set variablesMultiple) {
        if (parent instanceof RowRegexExprNodeNested) {
            RowRegexExprNodeNested nested = (RowRegexExprNodeNested) parent;
            for (RowRegexExprNode child : parent.getChildNodes()) {
                recursiveInspectVariables(child, nested.getType().isMultipleMatches() || isMultiple, variablesSingle, variablesMultiple);
            }
        } else if (parent instanceof RowRegexExprNodeAlteration) {
            for (RowRegexExprNode childAlteration : parent.getChildNodes()) {
                LinkedHashSet singles = new LinkedHashSet();
                LinkedHashSet multiples = new LinkedHashSet();

                recursiveInspectVariables(childAlteration, isMultiple, singles, multiples);

                variablesMultiple.addAll(multiples);
                variablesSingle.addAll(singles);
            }
            variablesSingle.removeAll(variablesMultiple);
        } else if (parent instanceof RowRegexExprNodeAtom) {
            RowRegexExprNodeAtom atom = (RowRegexExprNodeAtom) parent;
            String name = atom.getTag();
            if (variablesMultiple.contains(name)) {
                return;
            }
            if (variablesSingle.contains(name)) {
                variablesSingle.remove(name);
                variablesMultiple.add(name);
                return;
            }
            if (atom.getType().isMultipleMatches()) {
                variablesMultiple.add(name);
                return;
            }
            if (isMultiple) {
                variablesMultiple.add(name);
            } else {
                variablesSingle.add(name);
            }
        } else {
            for (RowRegexExprNode child : parent.getChildNodes()) {
                recursiveInspectVariables(child, isMultiple, variablesSingle, variablesMultiple);
            }
        }
    }

    /**
     * Build a list of start states from the parent node.
     *
     * @param parent                      to build start state for
     * @param variableDefinitions         each variable and its expressions
     * @param variableStreams             variable name and its stream number
     * @param exprRequiresMultimatchState indicator whether multi-match state required
     * @return strand of regex state nodes
     */
    protected static RegexNFAStrandResult recursiveBuildStartStates(RowRegexExprNode parent,
                                                                    Map variableDefinitions,
                                                                    Map> variableStreams,
                                                                    boolean[] exprRequiresMultimatchState
    ) {
        Stack nodeNumStack = new Stack();

        RegexNFAStrand strand = recursiveBuildStatesInternal(parent,
                variableDefinitions,
                variableStreams,
                nodeNumStack,
                exprRequiresMultimatchState);

        // add end state
        RegexNFAStateEnd end = new RegexNFAStateEnd();
        for (RegexNFAStateBase endStates : strand.getEndStates()) {
            endStates.addState(end);
        }

        // assign node num as a counter
        int nodeNumberFlat = 0;
        for (RegexNFAStateBase theBase : strand.getAllStates()) {
            theBase.setNodeNumFlat(nodeNumberFlat++);
        }

        return new RegexNFAStrandResult(new ArrayList(strand.getStartStates()), strand.getAllStates());
    }

    private static RegexNFAStrand recursiveBuildStatesInternal(RowRegexExprNode node,
                                                               Map variableDefinitions,
                                                               Map> variableStreams,
                                                               Stack nodeNumStack,
                                                               boolean[] exprRequiresMultimatchState
    ) {
        if (node instanceof RowRegexExprNodeAlteration) {
            int nodeNum = 0;

            List cumulativeStartStates = new ArrayList();
            List cumulativeStates = new ArrayList();
            List cumulativeEndStates = new ArrayList();

            boolean isPassthrough = false;
            for (RowRegexExprNode child : node.getChildNodes()) {
                nodeNumStack.push(nodeNum);
                RegexNFAStrand strand = recursiveBuildStatesInternal(child,
                        variableDefinitions,
                        variableStreams,
                        nodeNumStack,
                        exprRequiresMultimatchState);
                nodeNumStack.pop();

                cumulativeStartStates.addAll(strand.getStartStates());
                cumulativeStates.addAll(strand.getAllStates());
                cumulativeEndStates.addAll(strand.getEndStates());
                if (strand.isPassthrough()) {
                    isPassthrough = true;
                }

                nodeNum++;
            }

            return new RegexNFAStrand(cumulativeStartStates, cumulativeEndStates, cumulativeStates, isPassthrough);
        } else if (node instanceof RowRegexExprNodeConcatenation) {
            int nodeNum = 0;

            boolean isPassthrough = true;
            List cumulativeStates = new ArrayList();
            RegexNFAStrand[] strands = new RegexNFAStrand[node.getChildNodes().size()];

            for (RowRegexExprNode child : node.getChildNodes()) {
                nodeNumStack.push(nodeNum);
                strands[nodeNum] = recursiveBuildStatesInternal(child,
                        variableDefinitions,
                        variableStreams,
                        nodeNumStack,
                        exprRequiresMultimatchState);
                nodeNumStack.pop();

                cumulativeStates.addAll(strands[nodeNum].getAllStates());
                if (!strands[nodeNum].isPassthrough()) {
                    isPassthrough = false;
                }

                nodeNum++;
            }

            // determine start states: all states until the first non-passthrough start state
            List startStates = new ArrayList();
            for (int i = 0; i < strands.length; i++) {
                startStates.addAll(strands[i].getStartStates());
                if (!strands[i].isPassthrough()) {
                    break;
                }
            }

            // determine end states: all states from the back until the last non-passthrough end state
            List endStates = new ArrayList();
            for (int i = strands.length - 1; i >= 0; i--) {
                endStates.addAll(strands[i].getEndStates());
                if (!strands[i].isPassthrough()) {
                    break;
                }
            }

            // hook up the end state of each strand with the start states of each next strand
            for (int i = strands.length - 1; i >= 1; i--) {
                RegexNFAStrand current = strands[i];
                for (int j = i - 1; j >= 0; j--) {
                    RegexNFAStrand prior = strands[j];

                    for (RegexNFAStateBase endState : prior.getEndStates()) {
                        for (RegexNFAStateBase startState : current.getStartStates()) {
                            endState.addState(startState);
                        }
                    }

                    if (!prior.isPassthrough()) {
                        break;
                    }
                }
            }

            return new RegexNFAStrand(startStates, endStates, cumulativeStates, isPassthrough);
        } else if (node instanceof RowRegexExprNodeNested) {
            RowRegexExprNodeNested nested = (RowRegexExprNodeNested) node;
            nodeNumStack.push(0);
            RegexNFAStrand strand = recursiveBuildStatesInternal(node.getChildNodes().get(0),
                    variableDefinitions,
                    variableStreams,
                    nodeNumStack,
                    exprRequiresMultimatchState);
            nodeNumStack.pop();

            boolean isPassthrough = strand.isPassthrough() || nested.getType().isOptional();

            // if this is a repeating node then pipe back each end state to each begin state
            if (nested.getType().isMultipleMatches()) {
                for (RegexNFAStateBase endstate : strand.getEndStates()) {
                    for (RegexNFAStateBase startstate : strand.getStartStates()) {
                        if (!endstate.getNextStates().contains(startstate)) {
                            endstate.getNextStates().add(startstate);
                        }
                    }
                }
            }
            return new RegexNFAStrand(strand.getStartStates(), strand.getEndStates(), strand.getAllStates(), isPassthrough);
        } else {
            RowRegexExprNodeAtom atom = (RowRegexExprNodeAtom) node;

            // assign stream number for single-variables for most direct expression eval; multiple-variable gets -1
            int streamNum = variableStreams.get(atom.getTag()).getFirst();
            boolean multiple = variableStreams.get(atom.getTag()).getSecond();
            ExprNode expressionDef = variableDefinitions.get(atom.getTag());
            boolean exprRequiresMultimatch = exprRequiresMultimatchState[streamNum];

            RegexNFAStateBase nextState;
            if ((atom.getType() == RegexNFATypeEnum.ZERO_TO_MANY) || (atom.getType() == RegexNFATypeEnum.ZERO_TO_MANY_RELUCTANT)) {
                nextState = new RegexNFAStateZeroToMany(toString(nodeNumStack), atom.getTag(), streamNum, multiple, atom.getType().isGreedy(), expressionDef, exprRequiresMultimatch);
            } else if ((atom.getType() == RegexNFATypeEnum.ONE_TO_MANY) || (atom.getType() == RegexNFATypeEnum.ONE_TO_MANY_RELUCTANT)) {
                nextState = new RegexNFAStateOneToMany(toString(nodeNumStack), atom.getTag(), streamNum, multiple, atom.getType().isGreedy(), expressionDef, exprRequiresMultimatch);
            } else if ((atom.getType() == RegexNFATypeEnum.ONE_OPTIONAL) || (atom.getType() == RegexNFATypeEnum.ONE_OPTIONAL_RELUCTANT)) {
                nextState = new RegexNFAStateOneOptional(toString(nodeNumStack), atom.getTag(), streamNum, multiple, atom.getType().isGreedy(), expressionDef, exprRequiresMultimatch);
            } else if (expressionDef == null) {
                nextState = new RegexNFAStateAnyOne(toString(nodeNumStack), atom.getTag(), streamNum, multiple);
            } else {
                nextState = new RegexNFAStateFilter(toString(nodeNumStack), atom.getTag(), streamNum, multiple, expressionDef, exprRequiresMultimatch);
            }

            return new RegexNFAStrand(Collections.singletonList(nextState), Collections.singletonList(nextState),
                    Collections.singletonList(nextState), atom.getType().isOptional());
        }
    }

    private static String toString(Stack nodeNumStack) {
        StringBuilder builder = new StringBuilder();
        String delimiter = "";
        for (Integer atom : nodeNumStack) {
            builder.append(delimiter);
            builder.append(Integer.toString(atom));
            delimiter = ".";
        }
        return builder.toString();
    }

    public static Map> determineVisibility(RowRegexExprNode pattern) {
        Map> map = new HashMap>();
        ArrayDeque path = new ArrayDeque();
        recursiveFindPatternAtoms(pattern, path, map);
        return map;
    }

    private static void recursiveFindPatternAtoms(RowRegexExprNode parent, ArrayDeque path, Map> map) {
        path.add(parent);
        for (RowRegexExprNode child : parent.getChildNodes()) {
            if (child instanceof RowRegexExprNodeAtom) {
                handleAtom((RowRegexExprNodeAtom) child, path, map);
            } else {
                recursiveFindPatternAtoms(child, path, map);
            }
        }
        path.removeLast();
    }

    private static void handleAtom(RowRegexExprNodeAtom atom, ArrayDeque path, Map> map) {

        RowRegexExprNode[] patharr = path.toArray(new RowRegexExprNode[path.size()]);
        Set identifiers = null;

        for (int i = 0; i < patharr.length; i++) {
            RowRegexExprNode parent = patharr[i];
            if (!(parent instanceof RowRegexExprNodeConcatenation)) {
                continue;
            }

            RowRegexExprNodeConcatenation concat = (RowRegexExprNodeConcatenation) parent;
            int indexWithinConcat;
            if (i == patharr.length - 1) {
                indexWithinConcat = parent.getChildNodes().indexOf(atom);
            } else {
                indexWithinConcat = parent.getChildNodes().indexOf(patharr[i + 1]);
            }

            if (identifiers == null && indexWithinConcat > 0) {
                identifiers = new HashSet();
            }

            for (int j = 0; j < indexWithinConcat; j++) {
                RowRegexExprNode concatChildNode = concat.getChildNodes().get(j);
                recursiveCollectAtomsWExclude(concatChildNode, identifiers, atom.getTag());
            }
        }

        if (identifiers == null) {
            return;
        }

        Set existingVisibility = map.get(atom.getTag());
        if (existingVisibility == null) {
            map.put(atom.getTag(), identifiers);
        } else {
            existingVisibility.addAll(identifiers);
        }
    }

    private static void recursiveCollectAtomsWExclude(RowRegexExprNode node, Set identifiers, String excludedTag) {
        if (node instanceof RowRegexExprNodeAtom) {
            RowRegexExprNodeAtom atom = (RowRegexExprNodeAtom) node;
            if (!excludedTag.equals(atom.getTag())) {
                identifiers.add(atom.getTag());
            }
        }
        for (RowRegexExprNode child : node.getChildNodes()) {
            recursiveCollectAtomsWExclude(child, identifiers, excludedTag);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy