All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.expr.instruct.AnalyzeString Maven / Gradle / Ivy

There is a newer version: 12.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2022 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.expr.instruct;

import net.sf.saxon.Configuration;
import net.sf.saxon.event.Outputter;
import net.sf.saxon.event.PipelineConfiguration;
import net.sf.saxon.expr.*;
import net.sf.saxon.expr.parser.*;
import net.sf.saxon.om.*;
import net.sf.saxon.regex.RegexIterator;
import net.sf.saxon.regex.RegularExpression;
import net.sf.saxon.str.StringView;
import net.sf.saxon.trace.ExpressionPresenter;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.tree.iter.EmptyIterator;
import net.sf.saxon.type.*;
import net.sf.saxon.value.SequenceType;
import net.sf.saxon.value.StringValue;

import java.util.ArrayList;
import java.util.List;

/**
 * An xsl:analyze-string element in the stylesheet. New at XSLT 2.0
 */

public class AnalyzeString extends Instruction implements ContextOriginator {

    private final Operand selectOp;
    private final Operand regexOp;
    private final Operand flagsOp;
    private Operand matchingOp;
    private Operand nonMatchingOp;

    private final static OperandRole ACTION =
            new OperandRole(OperandRole.USES_NEW_FOCUS | OperandRole.HIGHER_ORDER, OperandUsage.NAVIGATION);
    private final static OperandRole SELECT =
            new OperandRole(OperandRole.SETS_NEW_FOCUS, OperandUsage.ABSORPTION, SequenceType.SINGLE_STRING);

    private RegularExpression pattern;

    /**
     * Construct an AnalyzeString instruction
     *
     * @param select      the expression containing the input string
     * @param regex       the regular expression
     * @param flags       the flags parameter
     * @param matching    actions to be applied to a matching substring. May be null.
     * @param nonMatching actions to be applied to a non-matching substring. May be null.
     * @param pattern     the compiled regular expression, if it was known statically
     */
    public AnalyzeString(Expression select,
                         Expression regex,
                         Expression flags,
                         Expression matching,
                         Expression nonMatching,
                         RegularExpression pattern) {
        selectOp = new Operand(this, select, SELECT);
        regexOp = new Operand(this, regex, OperandRole.SINGLE_ATOMIC);
        flagsOp = new Operand(this, flags, OperandRole.SINGLE_ATOMIC);
        if (matching != null) {
            matchingOp = new Operand(this, matching, ACTION);
        }
        if (nonMatching != null) {
            nonMatchingOp = new Operand(this, nonMatching, ACTION);
        }
        this.pattern = pattern;
    }

    public Expression getSelect() {
        return selectOp.getChildExpression();
    }

    public void setSelect(Expression select) {
        selectOp.setChildExpression(select);
    }

    public Expression getRegex() {
        return regexOp.getChildExpression();
    }

    public void setRegex(Expression regex) {
        regexOp.setChildExpression(regex);
    }

    public Expression getFlags() {
        return flagsOp.getChildExpression();
    }

    public void setFlags(Expression flags) {
        flagsOp.setChildExpression(flags);
    }

    public Expression getMatching() {
        return matchingOp == null ? null : matchingOp.getChildExpression();
    }

    public void setMatching(Expression matching) {
        if (matchingOp != null) {
            matchingOp.setChildExpression(matching);
        } else {
            matchingOp = new Operand(this, matching, ACTION);
        }
    }

    public Expression getNonMatching() {
        return nonMatchingOp == null ? null : nonMatchingOp.getChildExpression();
    }

    public void setNonMatching(Expression nonMatching) {
        if (nonMatchingOp != null) {
            nonMatchingOp.setChildExpression(nonMatching);
        } else {
            nonMatchingOp = new Operand(this, nonMatching, ACTION);
        }
    }

    @Override
    public int getInstructionNameCode() {
        return StandardNames.XSL_ANALYZE_STRING;
    }

    @Override
    public Iterable operands() {
        return operandSparseList(selectOp, regexOp, flagsOp, matchingOp, nonMatchingOp);
    }

    /**
     * An implementation of Expression must provide at least one of the methods evaluateItem(), iterate(), or process().
     * This method indicates which of these methods is prefered.
     */

    @Override
    public int getImplementationMethod() {
        return Expression.PROCESS_METHOD | Expression.ITERATE_METHOD;
    }

    /**
     * @return the compiled regular expression, if it was known statically
     */
    public RegularExpression getPatternExpression() {
        return pattern;
    }

    /**
     * Ask whether common subexpressions found in the operands of this expression can
     * be extracted and evaluated outside the expression itself. The result is irrelevant
     * in the case of operands evaluated with a different focus, which will never be
     * extracted in this way, even if they have no focus dependency.
     *
     * @return false for this kind of expression
     */
    @Override
    public boolean allowExtractingCommonSubexpressions() {
        return false;
    }

    /*@NotNull*/
    @Override
    public Expression typeCheck(ExpressionVisitor visitor, ContextItemStaticInfo contextInfo) throws XPathException {

        Configuration config = visitor.getConfiguration();
        selectOp.typeCheck(visitor, contextInfo);
        regexOp.typeCheck(visitor, contextInfo);
        flagsOp.typeCheck(visitor, contextInfo);

        if (matchingOp != null) {
            matchingOp.typeCheck(visitor, config.makeContextItemStaticInfo(BuiltInAtomicType.STRING, false));
        }
        if (nonMatchingOp != null) {
            nonMatchingOp.typeCheck(visitor, config.makeContextItemStaticInfo(BuiltInAtomicType.STRING, false));
        }

        TypeChecker tc = visitor.getConfiguration().getTypeChecker(false);

        RoleDiagnostic role = new RoleDiagnostic(RoleDiagnostic.INSTRUCTION, "analyze-string/select", 0);
        SequenceType required = SequenceType.OPTIONAL_STRING;
        // see bug 7976
        setSelect(tc.staticTypeCheck(getSelect(), required, role, visitor));

        role = new RoleDiagnostic(RoleDiagnostic.INSTRUCTION, "analyze-string/regex", 0);
        setRegex(tc.staticTypeCheck(getRegex(), SequenceType.SINGLE_STRING, role, visitor));

        role = new RoleDiagnostic(RoleDiagnostic.INSTRUCTION, "analyze-string/flags", 0);
        setFlags(tc.staticTypeCheck(getFlags(), SequenceType.SINGLE_STRING, role, visitor));

        return this;
    }


    /*@NotNull*/
    @Override
    public Expression optimize(ExpressionVisitor visitor, ContextItemStaticInfo contextInfo) throws XPathException {

        Configuration config = visitor.getConfiguration();
        selectOp.optimize(visitor, contextInfo);
        regexOp.optimize(visitor, contextInfo);
        flagsOp.optimize(visitor, contextInfo);

        if (matchingOp != null) {
            matchingOp.optimize(visitor, config.makeContextItemStaticInfo(BuiltInAtomicType.STRING, false));
        }
        if (nonMatchingOp != null) {
            nonMatchingOp.optimize(visitor, config.makeContextItemStaticInfo(BuiltInAtomicType.STRING, false));
        }

        List warnings = new ArrayList<>();
        precomputeRegex(config, warnings);
        for (String w : warnings) {
            visitor.getStaticContext().issueWarning(w, getLocation());
        }

        return this;
    }

    public void precomputeRegex(Configuration config, List warnings) throws XPathException {
        if (pattern == null && getRegex() instanceof StringLiteral && getFlags() instanceof StringLiteral) {
            try {
                final String regex = ((StringLiteral) this.getRegex()).stringify();
                final String flagstr = ((StringLiteral) getFlags()).stringify();

                String hostLang = "XP30";
                pattern = config.compileRegularExpression(StringView.tidy(regex), flagstr, hostLang, warnings);

            } catch (XPathException err) {
                if ("XTDE1150".equals(err.getErrorCodeLocalPart())) {
                    throw err;
                }
                if ("FORX0001".equals(err.getErrorCodeLocalPart())) {
                    invalidRegex("Error in regular expression flags: " + err, "FORX0001");
                } else {
                    invalidRegex("Error in regular expression: " + err, err.getErrorCodeLocalPart());
                }
            }
        }
    }

    private void invalidRegex(String message, String errorCode) throws XPathException {
        pattern = null;
        XPathException err = new XPathException(message, errorCode);
        err.setLocation(getLocation());
        throw err;
    }

    /**
     * Copy an expression. This makes a deep copy.
     *
     * @return the copy of the original expression
     * @param rm the rebinding map
     */

    /*@NotNull*/
    @Override
    public Expression copy(RebindingMap rm) {
        AnalyzeString a2 = new AnalyzeString(
                copy(getSelect(), rm), copy(getRegex(), rm), copy(getFlags(), rm), copy(getMatching(), rm), copy(getNonMatching(), rm), pattern);
        ExpressionTool.copyLocationInfo(this, a2);
        return a2;
    }

    private Expression copy(Expression exp, RebindingMap rebindings) {
        return exp == null ? null : exp.copy(rebindings);
    }


    /**
     * Check that any elements and attributes constructed or returned by this expression are acceptable
     * in the content model of a given complex type. It's always OK to say yes, since the check will be
     * repeated at run-time. The process of checking element and attribute constructors against the content
     * model of a complex type also registers the type of content expected of those constructors, so the
     * static validation can continue recursively.
     */

    @Override
    public void checkPermittedContents(SchemaType parentType, boolean whole) throws XPathException {
        if (getMatching() != null) {
            getMatching().checkPermittedContents(parentType, false);
        }
        if (getNonMatching() != null) {
            getNonMatching().checkPermittedContents(parentType, false);
        }
    }

    /**
     * Get the item type of the items returned by evaluating this instruction
     *
     * @return the static item type of the instruction
     */

    /*@NotNull*/
    @Override
    public ItemType getItemType() {
        if (getMatching() != null) {
            if (getNonMatching() != null) {
                TypeHierarchy th = getConfiguration().getTypeHierarchy();
                return Type.getCommonSuperType(getMatching().getItemType(), getNonMatching().getItemType(), th);
            } else {
                return getMatching().getItemType();
            }
        } else {
            if (getNonMatching() != null) {
                return getNonMatching().getItemType();
            } else {
                return ErrorType.getInstance();
            }
        }
    }

    /**
     * Compute the dependencies of an expression, as the union of the
     * dependencies of its subexpressions. (This is overridden for path expressions
     * and filter expressions, where the dependencies of a subexpression are not all
     * propogated). This method should be called only once, to compute the dependencies;
     * after that, getDependencies should be used.
     *
     * @return the depencies, as a bit-mask
     */

    @Override
    public int computeDependencies() {
        // some of the dependencies in the "action" part and in the grouping and sort keys aren't relevant,
        // because they don't depend on values set outside the for-each-group expression
        int dependencies = 0;
        dependencies |= getSelect().getDependencies();
        dependencies |= getRegex().getDependencies();
        dependencies |= getFlags().getDependencies();
        if (getMatching() != null) {
            dependencies |= getMatching().getDependencies() & ~
                    (StaticProperty.DEPENDS_ON_FOCUS | StaticProperty.DEPENDS_ON_REGEX_GROUP);
        }
        if (getNonMatching() != null) {
            dependencies |= getNonMatching().getDependencies() & ~
                    (StaticProperty.DEPENDS_ON_FOCUS | StaticProperty.DEPENDS_ON_REGEX_GROUP);
        }
        return dependencies;
    }


    /**
     * ProcessLeavingTail: called to do the real work of this instruction. This method
     * must be implemented in each subclass. The results of the instruction are written
     * to the current Receiver, which can be obtained via the Controller.
     *
     *
     * @param output the destination for the result
     * @param context The dynamic context of the transformation, giving access to the current node,
     *                the current variables, etc.
     * @return null if the instruction has completed execution; or a TailCall indicating
     *         a function call or template call that is delegated to the caller, to be made after the stack has
     *         been unwound so as to save stack space.
     */

    @Override
    public TailCall processLeavingTail(Outputter output, XPathContext context) throws XPathException {
        RegexIterator iter = getRegexIterator(context);
        XPathContextMajor c2 = context.newContext();
        c2.setOrigin(this);
        FocusIterator focusIter = c2.trackFocus(iter);
        c2.setCurrentRegexIterator(iter);

        PipelineConfiguration pipe = output.getPipelineConfiguration();
        pipe.setXPathContext(c2);

        Item it;
        while ((it = focusIter.next()) != null) {
            if (iter.isMatching()) {
                if (getMatching() != null) {
                    getMatching().process(output, c2);
                }
            } else {
                if (getNonMatching() != null) {
                    getNonMatching().process(output, c2);
                }
            }
        }

        pipe.setXPathContext(context);
        return null;

    }

    /**
     * Get an iterator over the substrings defined by the regular expression
     *
     * @param context the evaluation context
     * @return an iterator that returns matching and nonmatching substrings
     * @throws XPathException if evaluation fails with a dynamic error
     */

    private RegexIterator getRegexIterator(XPathContext context) throws XPathException {
        StringValue input = (StringValue)getSelect().evaluateItem(context);
        if (input == null) {
            input = StringValue.EMPTY_STRING;
        }
        RegularExpression re = pattern;
        if (re == null) {
            String flagstr = getFlags().evaluateAsString(context).toString();
            StringValue regexString = (StringValue)getRegex().evaluateItem(context);
            re = context.getConfiguration().compileRegularExpression(
                    regexString.getUnicodeStringValue(), flagstr, "XP30", null);
        }

        return re.analyze(input.getUnicodeStringValue());
    }

    /**
     * Return an Iterator to iterate over the values of a sequence. The value of every
     * expression can be regarded as a sequence, so this method is supported for all
     * expressions. This default implementation handles iteration for expressions that
     * return singleton values: for non-singleton expressions, the subclass must
     * provide its own implementation.
     *
     * @param context supplies the context for evaluation
     * @return a SequenceIterator that can be used to iterate over the result
     *         of the expression
     * @throws net.sf.saxon.trans.XPathException
     *          if any dynamic error occurs evaluating the
     *          expression
     */

    /*@NotNull*/
    @Override
    public SequenceIterator iterate(XPathContext context) throws XPathException {
        RegexIterator iter = getRegexIterator(context);
        XPathContextMajor c2 = context.newContext();
        c2.setOrigin(this);
        c2.trackFocus(iter);
        c2.setCurrentRegexIterator(iter);

        Expression matchingExpr = getMatching();
        Expression nonMatchExpr = getNonMatching();
        return new ContextMappingIterator(cxt -> {
            if (iter.isMatching()) {
                if (matchingExpr != null) {
                    return matchingExpr.iterate(c2);
                }
            } else {
                if (nonMatchExpr != null) {
                    return nonMatchExpr.iterate(c2);
                }
            }
            return EmptyIterator.getInstance();
        }, c2);
    }

    /**
     * Get a name identifying the kind of expression, in terms meaningful to a user.
     *
     * @return a name identifying the kind of expression, in terms meaningful to a user.
     * The name will always be in the form of a lexical XML QName, and should match the name used
     * in explain() output displaying the expression.
     */
    @Override
    public String getExpressionName() {
        return "analyzeString";
    }

    /**
     * Diagnostic print of expression structure. The abstract expression tree
     * is written to the supplied output destination.
     */

    @Override
    public void export(ExpressionPresenter out) throws XPathException {
        out.startElement("analyzeString", this);
        out.setChildRole("select");
        getSelect().export(out);
        out.setChildRole("regex");
        getRegex().export(out);
        out.setChildRole("flags");
        getFlags().export(out);
        if (getMatching() != null) {
            out.setChildRole("matching");
            getMatching().export(out);
        }
        if (getNonMatching() != null) {
            out.setChildRole("nonMatching");
            getNonMatching().export(out);
        }
        out.endElement();
    }


}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy