All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.oracle.truffle.regex.tregex.nfa.NFAState Maven / Gradle / Ivy

/*
 * Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * The Universal Permissive License (UPL), Version 1.0
 *
 * Subject to the condition set forth below, permission is hereby granted to any
 * person obtaining a copy of this software, associated documentation and/or
 * data (collectively the "Software"), free of charge and under any and all
 * copyright rights in the Software, and any and all patent rights owned or
 * freely licensable by each licensor hereunder covering either (i) the
 * unmodified Software as contributed to or provided by such licensor, or (ii)
 * the Larger Works (as defined below), to deal in both
 *
 * (a) the Software, and
 *
 * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
 * one is included with the Software each a "Larger Work" to which the Software
 * is contributed by such licensors),
 *
 * without restriction, including without limitation the rights to copy, create
 * derivative works of, display, perform, and distribute the Software and make,
 * use, sell, offer for sale, import, export, have made, and have sold the
 * Software and the Larger Work(s), and to sublicense the foregoing rights on
 * either these or other terms.
 *
 * This license is subject to the following condition:
 *
 * The above copyright notice and either this complete permission notice or at a
 * minimum a reference to the UPL must be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package com.oracle.truffle.regex.tregex.nfa;

import java.util.Arrays;
import java.util.Collection;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;

import com.oracle.truffle.api.CompilerDirectives.CompilationFinal;
import com.oracle.truffle.api.CompilerDirectives.TruffleBoundary;
import com.oracle.truffle.regex.charset.CodePointSet;
import com.oracle.truffle.regex.tregex.TRegexOptions;
import com.oracle.truffle.regex.tregex.automaton.BasicState;
import com.oracle.truffle.regex.tregex.automaton.StateSet;
import com.oracle.truffle.regex.tregex.parser.ast.LookBehindAssertion;
import com.oracle.truffle.regex.tregex.parser.ast.RegexAST;
import com.oracle.truffle.regex.tregex.parser.ast.RegexASTNode;
import com.oracle.truffle.regex.tregex.util.json.Json;
import com.oracle.truffle.regex.tregex.util.json.JsonArray;
import com.oracle.truffle.regex.tregex.util.json.JsonConvertible;
import com.oracle.truffle.regex.tregex.util.json.JsonObject;
import com.oracle.truffle.regex.util.CompilationFinalBitSet;

/**
 * Represents a single state in the NFA form of a regular expression. States may either be matcher
 * states or final states, where a matcher state matches a set of characters and a final state
 * indicates that a match has been found. A state may represent multiple nodes of a regex AST, if it
 * is the result of a product automaton composition of the "regular" regular expression and its
 * lookaround assertions, e.g. the NFA of an expression like /(?=[ab])a/ will contain a state that
 * matches both the 'a' in the lookahead assertion as well as following 'a' in the expression, and
 * therefore will have a state set containing two AST nodes.
 */
public final class NFAState extends BasicState implements JsonConvertible {

    private static final byte FLAGS_NONE = 0;
    private static final byte FLAG_HAS_PREFIX_STATES = 1 << N_FLAGS;

    private static final NFAStateTransition[] EMPTY_TRANSITIONS = new NFAStateTransition[0];

    private final StateSet stateSet;
    @CompilationFinal private short transitionToAnchoredFinalState = -1;
    @CompilationFinal private short transitionToUnAnchoredFinalState = -1;
    @CompilationFinal private short revTransitionToAnchoredFinalState = -1;
    @CompilationFinal private short revTransitionToUnAnchoredFinalState = -1;
    private CompilationFinalBitSet possibleResults;
    private final CodePointSet matcherBuilder;
    private final Set finishedLookBehinds;

    public NFAState(short id,
                    StateSet stateSet,
                    CodePointSet matcherBuilder,
                    Set finishedLookBehinds,
                    boolean hasPrefixStates) {
        this(id, stateSet, hasPrefixStates ? FLAG_HAS_PREFIX_STATES : FLAGS_NONE, null, matcherBuilder, finishedLookBehinds);
    }

    private NFAState(short id,
                    StateSet stateSet,
                    byte flags,
                    CodePointSet matcherBuilder,
                    Set finishedLookBehinds) {
        this(id, stateSet, flags, null, matcherBuilder, finishedLookBehinds);
    }

    private NFAState(short id,
                    StateSet stateSet,
                    byte flags,
                    CompilationFinalBitSet possibleResults,
                    CodePointSet matcherBuilder,
                    Set finishedLookBehinds) {
        super(id, EMPTY_TRANSITIONS);
        setFlag(flags);
        this.stateSet = stateSet;
        this.possibleResults = possibleResults;
        this.matcherBuilder = matcherBuilder;
        this.finishedLookBehinds = finishedLookBehinds;
    }

    public NFAState createTraceFinderCopy(short copyID) {
        return new NFAState(copyID, getStateSet(), getFlags(), matcherBuilder, finishedLookBehinds);
    }

    public CodePointSet getCharSet() {
        return matcherBuilder;
    }

    public Set getFinishedLookBehinds() {
        return finishedLookBehinds;
    }

    public StateSet getStateSet() {
        return stateSet;
    }

    public boolean hasPrefixStates() {
        return getFlag(FLAG_HAS_PREFIX_STATES);
    }

    public void setHasPrefixStates(boolean value) {
        setFlag(FLAG_HAS_PREFIX_STATES, value);
    }

    public boolean hasTransitionToAnchoredFinalState(boolean forward) {
        return (forward ? transitionToAnchoredFinalState : revTransitionToAnchoredFinalState) >= 0;
    }

    public short getTransitionToAnchoredFinalStateId(boolean forward) {
        return forward ? transitionToAnchoredFinalState : revTransitionToAnchoredFinalState;
    }

    public NFAStateTransition getTransitionToAnchoredFinalState(boolean forward) {
        assert hasTransitionToAnchoredFinalState(forward);
        return forward ? getSuccessors()[transitionToAnchoredFinalState] : getPredecessors()[revTransitionToAnchoredFinalState];
    }

    @Override
    public boolean hasTransitionToUnAnchoredFinalState(boolean forward) {
        return (forward ? transitionToUnAnchoredFinalState : revTransitionToUnAnchoredFinalState) >= 0;
    }

    public NFAStateTransition getTransitionToUnAnchoredFinalState(boolean forward) {
        assert hasTransitionToUnAnchoredFinalState(forward);
        return forward ? getSuccessors()[transitionToUnAnchoredFinalState] : getPredecessors()[revTransitionToUnAnchoredFinalState];
    }

    public short getTransitionToUnAnchoredFinalStateId(boolean forward) {
        return forward ? transitionToUnAnchoredFinalState : revTransitionToUnAnchoredFinalState;
    }

    public boolean hasTransitionToFinalState(boolean forward) {
        return hasTransitionToAnchoredFinalState(forward) || hasTransitionToUnAnchoredFinalState(forward);
    }

    public int getFirstTransitionToFinalStateIndex(boolean forward) {
        assert hasTransitionToFinalState(forward);
        return Math.min(Short.toUnsignedInt(getTransitionToAnchoredFinalStateId(forward)), Short.toUnsignedInt(getTransitionToUnAnchoredFinalStateId(forward)));
    }

    public NFAStateTransition getFirstTransitionToFinalState(boolean forward) {
        return getSuccessors(forward)[getFirstTransitionToFinalStateIndex(forward)];
    }

    public void addLoopBackNext(NFAStateTransition transition) {
        // loopBack transitions always have minimal priority, so no sorting is necessary
        updateFinalStateTransitions(transition, (short) getSuccessors().length);
        setSuccessors(Arrays.copyOf(getSuccessors(), getSuccessors().length + 1));
        getSuccessors()[getSuccessors().length - 1] = transition;
    }

    public void removeLoopBackNext() {
        setSuccessors(Arrays.copyOf(getSuccessors(), getSuccessors().length - 1));
        if (transitionToAnchoredFinalState == getSuccessors().length) {
            transitionToAnchoredFinalState = -1;
        }
        if (transitionToUnAnchoredFinalState == getSuccessors().length) {
            transitionToUnAnchoredFinalState = -1;
        }
    }

    public void setSuccessors(NFAStateTransition[] transitions, boolean createReverseTransitions) {
        setSuccessors(transitions);
        for (short i = 0; i < transitions.length; i++) {
            NFAStateTransition t = transitions[i];
            updateFinalStateTransitions(t, i);
            if (createReverseTransitions) {
                t.getTarget().incPredecessors();
            }
        }
    }

    private void updateFinalStateTransitions(NFAStateTransition transition, short i) {
        if (transitionToAnchoredFinalState == -1 && transition.getTarget().isAnchoredFinalState()) {
            transitionToAnchoredFinalState = i;
        }
        if (transitionToUnAnchoredFinalState == -1 && transition.getTarget().isUnAnchoredFinalState()) {
            transitionToUnAnchoredFinalState = i;
        }
    }

    public void removeSuccessor(NFAState state) {
        int remove = indexOfTransition(state);
        if (remove == -1) {
            return;
        }
        NFAStateTransition[] newNext = new NFAStateTransition[getSuccessors().length - 1];
        System.arraycopy(getSuccessors(), 0, newNext, 0, remove);
        System.arraycopy(getSuccessors(), remove + 1, newNext, remove, newNext.length - remove);
        setSuccessors(newNext);
        if (transitionToAnchoredFinalState == remove) {
            transitionToAnchoredFinalState = -1;
        } else if (transitionToAnchoredFinalState > remove) {
            transitionToAnchoredFinalState--;
        }
        if (transitionToUnAnchoredFinalState == remove) {
            transitionToUnAnchoredFinalState = -1;
        } else if (transitionToUnAnchoredFinalState > remove) {
            transitionToUnAnchoredFinalState--;
        }
    }

    private int indexOfTransition(NFAState target) {
        for (int i = 0; i < getSuccessors().length; i++) {
            if (getSuccessors()[i].getTarget() == target) {
                return i;
            }
        }
        return -1;
    }

    public void linkPredecessors() {
        for (NFAStateTransition t : getSuccessors()) {
            t.getTarget().addPredecessor(t);
            if (isAnchoredInitialState()) {
                t.getTarget().revTransitionToAnchoredFinalState = (short) t.getTarget().getNPredecessors();
            }
            if (isUnAnchoredInitialState()) {
                t.getTarget().revTransitionToUnAnchoredFinalState = (short) t.getTarget().getNPredecessors();
            }
        }
    }

    /**
     * Set of possible pre-calculated result indices as generated by the
     * {@link NFATraceFinderGenerator}. This set must be sorted, since the index values indicate the
     * priority of their respective pre-calculated results. Example: /(a)|([ab])/ will yield two
     * possible results, where the one corresponding to capture group 1 will have the higher
     * priority, so when a single 'a' is encountered when searching for a match, the pre-calculated
     * result corresponding to capture group 1 must be preferred.
     */
    public CompilationFinalBitSet getPossibleResults() {
        if (possibleResults == null) {
            return CompilationFinalBitSet.getEmptyInstance();
        }
        return possibleResults;
    }

    public boolean hasPossibleResults() {
        return !(possibleResults == null || possibleResults.isEmpty());
    }

    public void addPossibleResult(int index) {
        if (possibleResults == null) {
            possibleResults = new CompilationFinalBitSet(TRegexOptions.TRegexTraceFinderMaxNumberOfResults);
        }
        possibleResults.set(index);
    }

    @TruffleBoundary
    public String idToString() {
        return getStateSet().stream().map(x -> String.valueOf(x.getId())).collect(Collectors.joining(",", "(", ")")) + "[" + getId() + "]";
    }

    @TruffleBoundary
    @Override
    public String toString() {
        return idToString();
    }

    @Override
    public boolean equals(Object o) {
        return o instanceof NFAState && getId() == ((NFAState) o).getId();
    }

    @Override
    public int hashCode() {
        return getId();
    }

    @Override
    protected NFAStateTransition[] createTransitionsArray(int length) {
        return new NFAStateTransition[length];
    }

    @TruffleBoundary
    private JsonArray sourceSectionsToJson() {
        return RegexAST.sourceSectionsToJson(getStateSet().stream().map(x -> getStateSet().getStateIndex().getSourceSections(x)).filter(Objects::nonNull).flatMap(Collection::stream));
    }

    @TruffleBoundary
    @Override
    public JsonObject toJson() {
        return Json.obj(Json.prop("id", getId()),
                        Json.prop("stateSet", getStateSet().stream().map(x -> Json.val(x.getId()))),
                        Json.prop("sourceSections", sourceSectionsToJson()),
                        Json.prop("matcherBuilder", matcherBuilder.toString()),
                        Json.prop("forwardAnchoredFinalState", isAnchoredFinalState()),
                        Json.prop("forwardUnAnchoredFinalState", isUnAnchoredFinalState()),
                        Json.prop("reverseAnchoredFinalState", isAnchoredInitialState()),
                        Json.prop("reverseUnAnchoredFinalState", isUnAnchoredInitialState()),
                        Json.prop("next", Arrays.stream(getSuccessors()).map(x -> Json.val(x.getId()))),
                        Json.prop("prev", Arrays.stream(getPredecessors()).map(x -> Json.val(x.getId()))));
    }

    @TruffleBoundary
    public JsonObject toJson(boolean forward) {
        return Json.obj(Json.prop("id", getId()),
                        Json.prop("stateSet", getStateSet().stream().map(x -> Json.val(x.getId()))),
                        Json.prop("sourceSections", sourceSectionsToJson()),
                        Json.prop("matcherBuilder", matcherBuilder.toString()),
                        Json.prop("anchoredFinalState", isAnchoredFinalState(forward)),
                        Json.prop("unAnchoredFinalState", isUnAnchoredFinalState(forward)),
                        Json.prop("transitions", Arrays.stream(getSuccessors(forward)).map(x -> Json.val(x.getId()))));
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy