net.sf.saxon.regex.REProgram Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of saxon-he Show documentation
Show all versions of saxon-he Show documentation
An OSGi bundle for Saxon-HE
//////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // Copyright (c) 2013 Saxonica Limited. // This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0. //////////////////////////////////////////////////////////////////////////////////////////////////////////////////// /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * Originally part of Apache's Jakarta project (downloaded January 2012), * this file has been extensively modified for integration into Saxon by * Michael Kay, Saxonica. */ package net.sf.saxon.regex; import net.sf.saxon.z.*; import java.io.PrintStream; import java.io.Serializable; /** * A class that holds compiled regular expressions. * * @see net.sf.saxon.regex.REMatcher * @see RECompiler * * @author Jonathan Locke * @version $Id: REProgram.java 518156 2007-03-14 14:31:26Z vgritsenko $ */ public class REProgram implements Serializable { static final int OPT_HASBACKREFS = 1; static final int OPT_HASBOL = 2; Operation[] instructions; REFlags flags; UnicodeString prefix; // Prefix string optimization IntPredicate initialCharClass; int optimizationFlags; // Optimization flags (REProgram.OPT_*) int maxParens = -1; boolean nullable = false; /** * Constructs a program object from a character array * @param parens Count of parens in the program * @param instructions Array with RE opcode instructions in it. The "next" * pointers within the operations must already have been converted to absolute * offsets. */ public REProgram(Operation[] instructions, int parens, REFlags flags) { this.flags = flags; setInstructions(instructions); this.maxParens = parens; } /** * Sets a new regular expression program to run. It is this method which * performs any special compile-time search optimizations. Currently only * two optimizations are in place - one which checks for backreferences * (so that they can be lazily allocated) and another which attempts to * find an prefix anchor string so that substantial amounts of input can * potentially be skipped without running the actual program. * @param instructions Program instruction buffer */ private void setInstructions(Operation[] instructions) { // Save reference to instruction array this.instructions = instructions; // Initialize other program-related variables this.optimizationFlags = 0; this.prefix = null; // Try various compile-time optimizations if there's a program if (instructions != null && instructions.length != 0) { int first = 0; while (instructions[first] instanceof Operation.OpContinue) { first++; } if (instructions[first] instanceof Operation.OpAtom) { prefix = ((Operation.OpAtom)instructions[first]).atom; } if (instructions[first] instanceof Operation.OpCharClass) { initialCharClass = ((Operation.OpCharClass)instructions[first]).predicate; } // If the first node is a branch if (instructions[first] instanceof Operation.OpBranch) { // to the end node int next = instructions[first].next; if (instructions[next] instanceof Operation.OpEndProgram) { final Operation nextOp = instructions[first+1]; // the branch starts with an atom if (nextOp instanceof Operation.OpAtom) { // then get that atom as an prefix because there's no other choice this.prefix = ((Operation.OpAtom)nextOp).atom; } // the branch starts with a BOL else if (nextOp instanceof Operation.OpBOL) { // then set the flag indicating that BOL is present this.optimizationFlags |= OPT_HASBOL; } } } // Check for backreferences for (Operation op : instructions) { if (op instanceof Operation.OpBackReference) { optimizationFlags |= OPT_HASBACKREFS; break; } } // Check for deterministic quantifiers; the optimization causes constructs such as A* or [0-9]+ to // be evaluated using iteration rather than recursion if there is no ambiguity about the ending condition, // which means there will never be any need to backtrack. boolean caseBlind = flags.isCaseIndependent(); for (int i=0; i
will return null. * @return A copy of the prefix of current compiled RE program */ public UnicodeString getPrefix() { return prefix; } /** * Output a human-readable printout of the program */ public void display(PrintStream out) { for (int i=0; igetPrefix