org.apache.xerces.impl.dv.xs.AnyURIDV Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cookcc Show documentation
Show all versions of cookcc Show documentation
CookCC - a Lexer / Parser (LALR(1)) Project
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.xerces.impl.dv.xs;
import org.apache.xerces.impl.dv.InvalidDatatypeValueException;
import org.apache.xerces.impl.dv.ValidationContext;
import org.apache.xerces.util.URI;
/**
* Represent the schema type "anyURI"
*
* @xerces.internal
*
* @author Neeraj Bajaj, Sun Microsystems, inc.
* @author Sandy Gao, IBM
*
* @version $Id: AnyURIDV.java 699892 2008-09-28 21:08:27Z mrglavas $
*/
public class AnyURIDV extends TypeValidator {
private static final URI BASE_URI;
static {
URI uri = null;
try {
uri = new URI("abc://def.ghi.jkl");
} catch (URI.MalformedURIException ex) {
}
BASE_URI = uri;
}
public short getAllowedFacets(){
return (XSSimpleTypeDecl.FACET_LENGTH | XSSimpleTypeDecl.FACET_MINLENGTH | XSSimpleTypeDecl.FACET_MAXLENGTH | XSSimpleTypeDecl.FACET_PATTERN | XSSimpleTypeDecl.FACET_ENUMERATION | XSSimpleTypeDecl.FACET_WHITESPACE );
}
// before we return string we have to make sure it is correct URI as per spec.
// for some types (string and derived), they just return the string itself
public Object getActualValue(String content, ValidationContext context) throws InvalidDatatypeValueException {
// check 3.2.17.c0 must: URI (rfc 2396/2723)
try {
if( content.length() != 0 ) {
// encode special characters using XLink 5.4 algorithm
final String encoded = encode(content);
// Support for relative URLs
// According to Java 1.1: URLs may also be specified with a
// String and the URL object that it is related to.
new URI(BASE_URI, encoded );
}
} catch (URI.MalformedURIException ex) {
throw new InvalidDatatypeValueException("cvc-datatype-valid.1.2.1", new Object[]{content, "anyURI"});
}
// REVISIT: do we need to return the new URI object?
return content;
}
// which ASCII characters need to be escaped
private static boolean gNeedEscaping[] = new boolean[128];
// the first hex character if a character needs to be escaped
private static char gAfterEscaping1[] = new char[128];
// the second hex character if a character needs to be escaped
private static char gAfterEscaping2[] = new char[128];
private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
// initialize the above 3 arrays
static {
for (int i = 0; i <= 0x1f; i++) {
gNeedEscaping[i] = true;
gAfterEscaping1[i] = gHexChs[i >> 4];
gAfterEscaping2[i] = gHexChs[i & 0xf];
}
gNeedEscaping[0x7f] = true;
gAfterEscaping1[0x7f] = '7';
gAfterEscaping2[0x7f] = 'F';
char[] escChs = {' ', '<', '>', '"', '{', '}',
'|', '\\', '^', '~', '`'};
int len = escChs.length;
char ch;
for (int i = 0; i < len; i++) {
ch = escChs[i];
gNeedEscaping[ch] = true;
gAfterEscaping1[ch] = gHexChs[ch >> 4];
gAfterEscaping2[ch] = gHexChs[ch & 0xf];
}
}
// To encode special characters in anyURI, by using %HH to represent
// special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', etc.
// and non-ASCII characters (whose value >= 128).
private static String encode(String anyURI){
int len = anyURI.length(), ch;
StringBuffer buffer = new StringBuffer(len*3);
// for each character in the anyURI
int i = 0;
for (; i < len; i++) {
ch = anyURI.charAt(i);
// if it's not an ASCII character, break here, and use UTF-8 encoding
if (ch >= 128)
break;
if (gNeedEscaping[ch]) {
buffer.append('%');
buffer.append(gAfterEscaping1[ch]);
buffer.append(gAfterEscaping2[ch]);
}
else {
buffer.append((char)ch);
}
}
// we saw some non-ascii character
if (i < len) {
// get UTF-8 bytes for the remaining sub-string
byte[] bytes = null;
byte b;
try {
bytes = anyURI.substring(i).getBytes("UTF-8");
} catch (java.io.UnsupportedEncodingException e) {
// should never happen
return anyURI;
}
len = bytes.length;
// for each byte
for (i = 0; i < len; i++) {
b = bytes[i];
// for non-ascii character: make it positive, then escape
if (b < 0) {
ch = b + 256;
buffer.append('%');
buffer.append(gHexChs[ch >> 4]);
buffer.append(gHexChs[ch & 0xf]);
}
else if (gNeedEscaping[b]) {
buffer.append('%');
buffer.append(gAfterEscaping1[b]);
buffer.append(gAfterEscaping2[b]);
}
else {
buffer.append((char)b);
}
}
}
// If encoding happened, create a new string;
// otherwise, return the orginal one.
if (buffer.length() != len) {
return buffer.toString();
}
else {
return anyURI;
}
}
} // class AnyURIDV