All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.regex.REProgram Maven / Gradle / Ivy

There is a newer version: 10.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2013 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * Originally part of Apache's Jakarta project (downloaded January 2012),
 * this file has been extensively modified for integration into Saxon by
 * Michael Kay, Saxonica.
 */

package net.sf.saxon.regex;

import net.sf.saxon.z.*;

import java.io.PrintStream;
import java.io.Serializable;

/**
 * A class that holds compiled regular expressions.
 *
 * @see net.sf.saxon.regex.REMatcher
 * @see RECompiler
 *
 * @author Jonathan Locke
 * @version $Id: REProgram.java 518156 2007-03-14 14:31:26Z vgritsenko $
 */
public class REProgram implements Serializable
{
    static final int OPT_HASBACKREFS = 1;
    static final int OPT_HASBOL      = 2;

    Operation[] instructions;
    REFlags flags;
    UnicodeString prefix;              // Prefix string optimization
    IntPredicate initialCharClass;
    int optimizationFlags;      // Optimization flags (REProgram.OPT_*)
    int maxParens = -1;
    boolean nullable = false;

    /**
     * Constructs a program object from a character array
     * @param parens Count of parens in the program
     * @param instructions Array with RE opcode instructions in it. The "next"
     * pointers within the operations must already have been converted to absolute
     * offsets.
     */
    public REProgram(Operation[] instructions, int parens, REFlags flags) {
        this.flags = flags;
        setInstructions(instructions);
        this.maxParens = parens;
    }



    /**
     * Sets a new regular expression program to run.  It is this method which
     * performs any special compile-time search optimizations.  Currently only
     * two optimizations are in place - one which checks for backreferences
     * (so that they can be lazily allocated) and another which attempts to
     * find an prefix anchor string so that substantial amounts of input can
     * potentially be skipped without running the actual program.
     * @param instructions Program instruction buffer
     */
    private void setInstructions(Operation[] instructions) {
        // Save reference to instruction array
        this.instructions = instructions;

        // Initialize other program-related variables
        this.optimizationFlags = 0;
        this.prefix = null;

        // Try various compile-time optimizations if there's a program

        if (instructions != null && instructions.length != 0) {
            int first = 0;
            while (instructions[first] instanceof Operation.OpContinue) {
                first++;
            }
            if (instructions[first] instanceof Operation.OpAtom) {
                prefix = ((Operation.OpAtom)instructions[first]).atom;
            }
            if (instructions[first] instanceof Operation.OpCharClass) {
                initialCharClass = ((Operation.OpCharClass)instructions[first]).predicate;
            }
            // If the first node is a branch
            if (instructions[first] instanceof Operation.OpBranch) {
                // to the end node
                int next = instructions[first].next;
                if (instructions[next] instanceof Operation.OpEndProgram) {
                    final Operation nextOp = instructions[first+1];
                    // the branch starts with an atom
                    if (nextOp instanceof Operation.OpAtom) {
                        // then get that atom as an prefix because there's no other choice
                        this.prefix = ((Operation.OpAtom)nextOp).atom;
                    }
                    // the branch starts with a BOL
                    else if (nextOp instanceof Operation.OpBOL) {
                        // then set the flag indicating that BOL is present
                        this.optimizationFlags |= OPT_HASBOL;
                    }
                }
            }

            // Check for backreferences
            for (Operation op : instructions) {
                if (op instanceof Operation.OpBackReference) {
                    optimizationFlags |= OPT_HASBACKREFS;
                    break;
                }
            }

            // Check for deterministic quantifiers; the optimization causes constructs such as A* or [0-9]+ to
            // be evaluated using iteration rather than recursion if there is no ambiguity about the ending condition,
            // which means there will never be any need to backtrack.
            boolean caseBlind = flags.isCaseIndependent();
            for (int i=0; igetPrefix will return null.
     * @return A copy of the prefix of current compiled RE program
     */
    public UnicodeString getPrefix() {
        return prefix;
    }

    /**
     * Output a human-readable printout of the program
     */

    public void display(PrintStream out) {
        for (int i=0; i
            


© 2015 - 2024 Weber Informatics LLC | Privacy Policy