All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.regex.ARegularExpression Maven / Gradle / Ivy

There is a newer version: 10.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2013 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.regex;

import net.sf.saxon.om.SequenceIterator;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.value.StringValue;

import java.util.List;

/**
 * Glue class to interface the Jakarta regex engine to Saxon
 * (The prefix 'A' indicates an Apache regular expression, as distinct from
 * a JDK regular expression).
 */

public class ARegularExpression implements RegularExpression {

    UnicodeString rawPattern;
    String rawFlags;
    REProgram regex;

    /**
     * Create and compile a regular expression
     * @param pattern the regular expression
     * @param flags the flags (ixsmq)
     * @param hostLanguage one of "XP20", "XP30", "XSD10", "XSD11". Also allow combinations, e.g. "XP20/XSD11".
     * @param warnings a list to be populated with any warnings arising during compilation of the regex
     * @throws XPathException if the regular expression is invalid
     */

    public ARegularExpression(CharSequence pattern, String flags, String hostLanguage, List warnings) throws XPathException {
        rawFlags = flags;
        REFlags reFlags;
        try {
            reFlags = new REFlags(flags, hostLanguage);
        } catch (RESyntaxException err) {
            throw new XPathException(err.getMessage(),  "FORX0001");
        }
        try {
            rawPattern = UnicodeString.makeUnicodeString(pattern);
            RECompiler comp2 = new RECompiler();
            comp2.setFlags(reFlags);
            regex = comp2.compile(rawPattern);
            if (warnings != null) {
                for (String s : comp2.getWarnings()) {
                    warnings.add(s);
                }
            }
        } catch (RESyntaxException err) {
            throw new XPathException(err.getMessage(), "FORX0002");
        }
    }

    /**
     * Determine whether the regular expression matches a given string in its entirety
     *
     * @param input the string to match
     * @return true if the string matches, false otherwise
     */
    public boolean matches(CharSequence input) {
        if (input.length() == 0) {
            return regex.isNullable();
        }
        REMatcher matcher = new REMatcher(regex);
        return matcher.anchoredMatch(UnicodeString.makeUnicodeString(input));
    }

    /**
     * Determine whether the regular expression contains a match of a given string
     *
     * @param input the string to match
     * @return true if the string matches, false otherwise
     */
    public boolean containsMatch(CharSequence input) {
        REMatcher matcher = new REMatcher(regex);
        return matcher.match(UnicodeString.makeUnicodeString(input), 0);
    }

    /**
     * Use this regular expression to tokenize an input string.
     *
     * @param input the string to be tokenized
     * @return a SequenceIterator containing the resulting tokens, as objects of type StringValue
     */
    public SequenceIterator tokenize(CharSequence input) {
        return new ATokenIterator(UnicodeString.makeUnicodeString(input), new REMatcher(regex));
    }

    /**
     * Use this regular expression to analyze an input string, in support of the XSLT
     * analyze-string instruction. The resulting RegexIterator provides both the matching and
     * non-matching substrings, and allows them to be distinguished. It also provides access
     * to matched subgroups.
     *
     * @param input the character string to be analyzed using the regular expression
     * @return an iterator over matched and unmatched substrings
     */
    public RegexIterator analyze(CharSequence input) {
        return new ARegexIterator(UnicodeString.makeUnicodeString(input), rawPattern, new REMatcher(regex));
    }

    /**
     * Replace all substrings of a supplied input string that match the regular expression
     * with a replacement string.
     *
     * @param input       the input string on which replacements are to be performed
     * @param replacement the replacement string in the format of the XPath replace() function
     * @return the result of performing the replacement
     * @throws net.sf.saxon.trans.XPathException
     *          if the replacement string is invalid
     */
    public CharSequence replace(CharSequence input, CharSequence replacement) throws XPathException {
        REMatcher matcher = new REMatcher(regex);
        UnicodeString in = UnicodeString.makeUnicodeString(input);
        UnicodeString rep = UnicodeString.makeUnicodeString(replacement);
        try {
            return matcher.subst(in, rep);
        } catch (RESyntaxException err) {
            throw new XPathException(err.getMessage(), "FORX0004");
        }
    }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy