All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.value.AnyURIValue Maven / Gradle / Ivy

There is a newer version: 12.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2022 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.value;

import net.sf.saxon.str.*;
import net.sf.saxon.transpile.CSharpReplaceBody;
import net.sf.saxon.type.AtomicType;
import net.sf.saxon.type.BuiltInAtomicType;

import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;


/**
 * An XPath value of type xs:anyURI.
 * 

This is implemented as a subtype of StringValue even though xs:anyURI is not a subtype of * xs:string in the XPath type hierarchy. This enables type promotion from URI to String to happen * automatically in most cases where it is appropriate.

*

This implementation of xs:anyURI allows any string to be contained in the value space. To check that * the URI is valid according to some set of syntax rules, the caller should invoke a {@link net.sf.saxon.lib.StandardURIChecker} * before constructing the AnyURIValue.

*/ public final class AnyURIValue extends StringValue { /*@NotNull*/ public static final AnyURIValue EMPTY_URI = new AnyURIValue(EmptyUnicodeString.getInstance()); /** * Constructor * * @param value the String value. Null is taken as equivalent to "". This constructor * does not check that the value is a valid anyURI instance. It does however * perform whitespace normalization. */ public AnyURIValue(UnicodeString value) { super(value == null ? (UnicodeString) EmptyUnicodeString.getInstance() : Whitespace.collapseWhitespace(value)); typeLabel = BuiltInAtomicType.ANY_URI; } public AnyURIValue(String value) { this(StringView.tidy(value)); } /** * Constructor for a user-defined subtype of anyURI * * @param value the String value. Null is taken as equivalent to "". * @param type a user-defined subtype of anyURI. It is the caller's responsibility * to ensure that this is actually a subtype of anyURI, and that the value conforms * to the definition of this type. */ public AnyURIValue(UnicodeString value, AtomicType type) { super(value == null ? "" : Whitespace.collapseWhitespace(value).toString(), type); } /** * Create a copy of this atomic value, with a different type label * * @param typeLabel the type label of the new copy. The caller is responsible for checking that * the value actually conforms to this type. */ /*@NotNull*/ @Override public AnyURIValue copyAsSubType(AtomicType typeLabel) { AnyURIValue v = new AnyURIValue(this.getUnicodeStringValue()); v.typeLabel = typeLabel; return v; } public StringValue convertToString() { return new StringValue(getContent(), BuiltInAtomicType.STRING); } /*@NotNull*/ @Override public BuiltInAtomicType getPrimitiveType() { return BuiltInAtomicType.ANY_URI; } /*@Nullable*/ @CSharpReplaceBody(code="return System.Uri.UnescapeDataString(s);") public static String decode(/*@Nullable*/ String s) { // Evaluates all escapes in s, applying UTF-8 decoding if needed. Assumes // that escapes are well-formed syntactically, i.e., of the form %XX. If a // sequence of escaped octets is not valid UTF-8 then the erroneous octets // are replaced with '\uFFFD'. // Exception: any "%" found between "[]" is left alone. It is an IPv6 literal // with a scope_id // if (s == null) { return s; } int n = s.length(); if (n == 0) { return s; } if (s.indexOf('%') < 0) { return s; } StringBuilder sb = new StringBuilder(n); ByteBuffer bb = ByteBuffer.allocate(n); Charset utf8 = StandardCharsets.UTF_8; // This is not horribly efficient, but it will do for now char c = s.charAt(0); boolean betweenBrackets = false; for (int i = 0; i < n; ) { assert c == s.charAt(i); // Loop invariant if (c == '[') { betweenBrackets = true; } else if (betweenBrackets && c == ']') { betweenBrackets = false; } if (c != '%' || betweenBrackets) { sb.append(c); if (++i >= n) { break; } c = s.charAt(i); continue; } bb.clear(); for (; ; ) { assert n - i >= 2; bb.put(hex(s.charAt(++i), s.charAt(++i))); if (++i >= n) { break; } c = s.charAt(i); if (c != '%') { break; } } bb.flip(); sb.append(utf8.decode(bb)); } return sb.toString(); } private static byte hex(char high, char low) { return (byte) ((hexToDec(high) << 4) | hexToDec(low)); } private static int hexToDec(char c) { if (c >= '0' && c <= '9') { return c - '0'; } else if (c >= 'a' && c <= 'f') { return c - 'a' + 10; } else if (c >= 'A' && c <= 'F') { return c - 'A' + 10; } else { return 0; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy