net.sf.saxon.value.AnyURIValue Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of Saxon-HE Show documentation
The XSLT and XQuery Processor
There is a newer version: 12.5
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2022 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.value;

import net.sf.saxon.str.*;
import net.sf.saxon.transpile.CSharpReplaceBody;
import net.sf.saxon.type.AtomicType;
import net.sf.saxon.type.BuiltInAtomicType;

import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;


/**
 * An XPath value of type xs:anyURI.
 * This is implemented as a subtype of StringValue even though xs:anyURI is not a subtype of
 * xs:string in the XPath type hierarchy. This enables type promotion from URI to String to happen
 * automatically in most cases where it is appropriate.
 * This implementation of xs:anyURI allows any string to be contained in the value space. To check that
 * the URI is valid according to some set of syntax rules, the caller should invoke a {@link net.sf.saxon.lib.StandardURIChecker}
 * before constructing the AnyURIValue.
 */

public final class AnyURIValue extends StringValue {

    /*@NotNull*/ public static final AnyURIValue EMPTY_URI = new AnyURIValue(EmptyUnicodeString.getInstance());


    /**
     * Constructor
     *
     * @param value the String value. Null is taken as equivalent to "". This constructor
     *              does not check that the value is a valid anyURI instance. It does however
     *              perform whitespace normalization.
     */

    public AnyURIValue(UnicodeString value) {
        super(value == null ? (UnicodeString) EmptyUnicodeString.getInstance() : Whitespace.collapseWhitespace(value));
        typeLabel = BuiltInAtomicType.ANY_URI;
    }

    public AnyURIValue(String value) {
        this(StringView.tidy(value));
    }

    /**
     * Constructor for a user-defined subtype of anyURI
     *
     * @param value the String value. Null is taken as equivalent to "".
     * @param type  a user-defined subtype of anyURI. It is the caller's responsibility
     *              to ensure that this is actually a subtype of anyURI, and that the value conforms
     *              to the definition of this type.
     */

    public AnyURIValue(UnicodeString value, AtomicType type) {
        super(value == null ? "" : Whitespace.collapseWhitespace(value).toString(), type);
    }


    /**
     * Create a copy of this atomic value, with a different type label
     *
     * @param typeLabel the type label of the new copy. The caller is responsible for checking that
     *                  the value actually conforms to this type.
     */

    /*@NotNull*/
    @Override
    public AnyURIValue copyAsSubType(AtomicType typeLabel) {
        AnyURIValue v = new AnyURIValue(this.getUnicodeStringValue());
        v.typeLabel = typeLabel;
        return v;
    }

    public StringValue convertToString() {
        return new StringValue(getContent(), BuiltInAtomicType.STRING);
    }

    /*@NotNull*/
    @Override
    public BuiltInAtomicType getPrimitiveType() {
        return BuiltInAtomicType.ANY_URI;
    }


    /*@Nullable*/
    @CSharpReplaceBody(code="return System.Uri.UnescapeDataString(s);")
    public static String decode(/*@Nullable*/ String s) {
        // Evaluates all escapes in s, applying UTF-8 decoding if needed.  Assumes
        // that escapes are well-formed syntactically, i.e., of the form %XX.  If a
        // sequence of escaped octets is not valid UTF-8 then the erroneous octets
        // are replaced with '\uFFFD'.
        // Exception: any "%" found between "[]" is left alone. It is an IPv6 literal
        //            with a scope_id
        //

        if (s == null) {
            return s;
        }
        int n = s.length();
        if (n == 0) {
            return s;
        }
        if (s.indexOf('%') < 0) {
            return s;
        }

        StringBuilder sb = new StringBuilder(n);
        ByteBuffer bb = ByteBuffer.allocate(n);
        Charset utf8 = StandardCharsets.UTF_8;

        // This is not horribly efficient, but it will do for now
        char c = s.charAt(0);
        boolean betweenBrackets = false;

        for (int i = 0; i < n; ) {
            assert c == s.charAt(i);    // Loop invariant
            if (c == '[') {
                betweenBrackets = true;
            } else if (betweenBrackets && c == ']') {
                betweenBrackets = false;
            }
            if (c != '%' || betweenBrackets) {
                sb.append(c);
                if (++i >= n) {
                    break;
                }
                c = s.charAt(i);
                continue;
            }
            bb.clear();
            for (; ; ) {
                assert n - i >= 2;
                bb.put(hex(s.charAt(++i), s.charAt(++i)));
                if (++i >= n) {
                    break;
                }
                c = s.charAt(i);
                if (c != '%') {
                    break;
                }
            }
            bb.flip();
            sb.append(utf8.decode(bb));
        }

        return sb.toString();
    }

    private static byte hex(char high, char low) {
        return (byte) ((hexToDec(high) << 4) | hexToDec(low));
    }

    private static int hexToDec(char c) {
        if (c >= '0' && c <= '9') {
            return c - '0';
        } else if (c >= 'a' && c <= 'f') {
            return c - 'a' + 10;
        } else if (c >= 'A' && c <= 'F') {
            return c - 'A' + 10;
        } else {
            return 0;
        }
    }


}