com.caucho.xml.XmlChar Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of resin Show documentation
Show all versions of resin Show documentation
Resin Java Application Server
/*
* Copyright (c) 1998-2018 Caucho Technology -- all rights reserved
*
* This file is part of Resin(R) Open Source
*
* Each copy or derived work must preserve the copyright notice and this
* notice unmodified.
*
* Resin Open Source is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Resin Open Source is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
* of NON-INFRINGEMENT. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License
* along with Resin Open Source; if not, write to the
* Free SoftwareFoundation, Inc.
* 59 Temple Place, Suite 330
* Boston, MA 02111-1307 USA
*
* @author Scott Ferguson
*/
package com.caucho.xml;
/**
* XmlChar contains the XML character classes
*/
public class XmlChar {
static boolean isAsciiNameChar[];
private XmlChar() {}
public static boolean isWhitespace(int ch)
{
return ch <= 0x20 && (ch == 0x20 || ch == 0x9 || ch == 0xa || ch == 0xd);
}
public static boolean isChar(int ch)
{
return (ch >= 0x20 && ch <= 0xd7ff ||
ch == 0x9 ||
ch == 0xa ||
ch == 0xd ||
ch >= 0xe000 && ch <= 0xfff0);
}
public static boolean isNameStart(int ch)
{
return (ch >= 0x41 && ch <= 0x5a ||
ch >= 0x61 && ch <= 0x7a ||
ch == '_' || ch == ':' ||
ch > 0x7f && (isBaseChar(ch) ||
isIdeographic(ch)));
}
/**
* Returns a boolean array testing for ascii name characters.
*/
public static boolean []getAsciiNameCharArray()
{
return isAsciiNameChar;
}
/**
* Returns true if the character is an XML name character.
*/
public static boolean isNameChar(int ch)
{
if (ch < 0x20)
return false;
else if (ch < 128)
return isAsciiNameChar[ch];
else
return (isBaseChar(ch) ||
isIdeographic(ch) ||
isCombiningChar(ch) ||
isExtender(ch) ||
isDigit(ch));
}
private static boolean isBaseChar(int ch)
{
return (ch <= 0xff && (ch >= 0x0041 && ch <= 0x005A ||
ch >= 0x0061 && ch <= 0x007A ||
ch >= 0x00C0 && ch <= 0x00D6 ||
ch >= 0x00D8 && ch <= 0x00F6 ||
ch >= 0x00F8 && ch <= 0x00FF) ||
ch <= 0x1f5 && (ch >= 0x0100 && ch <= 0x0131 ||
ch >= 0x0134 && ch <= 0x013E ||
ch >= 0x0141 && ch <= 0x0148 ||
ch >= 0x014A && ch <= 0x017E ||
ch >= 0x0180 && ch <= 0x01C3 ||
ch >= 0x01CD && ch <= 0x01F0 ||
ch >= 0x01F4 && ch <= 0x01F5) ||
ch <= 0x2ff && (ch >= 0x01FA && ch <= 0x0217 ||
ch >= 0x0250 && ch <= 0x02A8 ||
ch >= 0x02BB && ch <= 0x02C1) ||
ch <= 0x3ff && (ch == 0x0386 ||
ch >= 0x0388 && ch <= 0x038A ||
ch == 0x038C ||
ch >= 0x038E && ch <= 0x03A1 ||
ch >= 0x03A3 && ch <= 0x03CE ||
ch >= 0x03D0 && ch <= 0x03D6 ||
ch == 0x03DA ||
ch == 0x03DC ||
ch == 0x03DE ||
ch == 0x03E0 ||
ch >= 0x03E2 && ch <= 0x03F3) ||
ch <= 0x4ff && (ch >= 0x0401 && ch <= 0x040C ||
ch >= 0x040E && ch <= 0x044F ||
ch >= 0x0451 && ch <= 0x045C ||
ch >= 0x045E && ch <= 0x0481 ||
ch >= 0x0490 && ch <= 0x04C4 ||
ch >= 0x04C7 && ch <= 0x04C8 ||
ch >= 0x04CB && ch <= 0x04CC ||
ch >= 0x04D0 && ch <= 0x04EB ||
ch >= 0x04EE && ch <= 0x04F5 ||
ch >= 0x04F8 && ch <= 0x04F9) ||
ch <= 0x5ff && (ch >= 0x0531 && ch <= 0x0556 ||
ch == 0x0559 ||
ch >= 0x0561 && ch <= 0x0586 ||
ch >= 0x05D0 && ch <= 0x05EA ||
ch >= 0x05F0 && ch <= 0x05F2) ||
ch <= 0x6ff && (ch >= 0x0621 && ch <= 0x063A ||
ch >= 0x0641 && ch <= 0x064A ||
ch >= 0x0671 && ch <= 0x06B7 ||
ch >= 0x06BA && ch <= 0x06BE ||
ch >= 0x06C0 && ch <= 0x06CE ||
ch >= 0x06D0 && ch <= 0x06D3 ||
ch == 0x06D5 ||
ch >= 0x06E5 && ch <= 0x06E6) ||
ch <= 0x9ff && (ch >= 0x0905 && ch <= 0x0939 ||
ch == 0x093D ||
ch >= 0x0958 && ch <= 0x0961 ||
ch >= 0x0985 && ch <= 0x098C ||
ch >= 0x098F && ch <= 0x0990 ||
ch >= 0x0993 && ch <= 0x09A8 ||
ch >= 0x09AA && ch <= 0x09B0 ||
ch == 0x09B2 ||
ch >= 0x09B6 && ch <= 0x09B9 ||
ch >= 0x09DC && ch <= 0x09DD ||
ch >= 0x09DF && ch <= 0x09E1 ||
ch >= 0x09F0 && ch <= 0x09F1) ||
ch <= 0xaff && (ch >= 0x0A05 && ch <= 0x0A0A ||
ch >= 0x0A0F && ch <= 0x0A10 ||
ch >= 0x0A13 && ch <= 0x0A28 ||
ch >= 0x0A2A && ch <= 0x0A30 ||
ch >= 0x0A32 && ch <= 0x0A33 ||
ch >= 0x0A35 && ch <= 0x0A36 ||
ch >= 0x0A38 && ch <= 0x0A39 ||
ch >= 0x0A59 && ch <= 0x0A5C ||
ch == 0x0A5E ||
ch >= 0x0A72 && ch <= 0x0A74 ||
ch >= 0x0A85 && ch <= 0x0A8B ||
ch == 0x0A8D ||
ch >= 0x0A8F && ch <= 0x0A91 ||
ch >= 0x0A93 && ch <= 0x0AA8 ||
ch >= 0x0AAA && ch <= 0x0AB0 ||
ch >= 0x0AB2 && ch <= 0x0AB3 ||
ch >= 0x0AB5 && ch <= 0x0AB9 ||
ch == 0x0ABD ||
ch == 0x0AE0) ||
ch <= 0xbff && (ch >= 0x0B05 && ch <= 0x0B0C ||
ch >= 0x0B0F && ch <= 0x0B10 ||
ch >= 0x0B13 && ch <= 0x0B28 ||
ch >= 0x0B2A && ch <= 0x0B30 ||
ch >= 0x0B32 && ch <= 0x0B33 ||
ch >= 0x0B36 && ch <= 0x0B39 ||
ch == 0x0B3D ||
ch >= 0x0B5C && ch <= 0x0B5D ||
ch >= 0x0B5F && ch <= 0x0B61 ||
ch >= 0x0B85 && ch <= 0x0B8A ||
ch >= 0x0B8E && ch <= 0x0B90 ||
ch >= 0x0B92 && ch <= 0x0B95 ||
ch >= 0x0B99 && ch <= 0x0B9A ||
ch == 0x0B9C ||
ch >= 0x0B9E && ch <= 0x0B9F ||
ch >= 0x0BA3 && ch <= 0x0BA4 ||
ch >= 0x0BA8 && ch <= 0x0BAA ||
ch >= 0x0BAE && ch <= 0x0BB5 ||
ch >= 0x0BB7 && ch <= 0x0BB9) ||
ch <= 0xcff && (ch >= 0x0C05 && ch <= 0x0C0C ||
ch >= 0x0C0E && ch <= 0x0C10 ||
ch >= 0x0C12 && ch <= 0x0C28 ||
ch >= 0x0C2A && ch <= 0x0C33 ||
ch >= 0x0C35 && ch <= 0x0C39 ||
ch >= 0x0C60 && ch <= 0x0C61 ||
ch >= 0x0C85 && ch <= 0x0C8C ||
ch >= 0x0C8E && ch <= 0x0C90 ||
ch >= 0x0C92 && ch <= 0x0CA8 ||
ch >= 0x0CAA && ch <= 0x0CB3 ||
ch >= 0x0CB5 && ch <= 0x0CB9 ||
ch == 0x0CDE ||
ch >= 0x0CE0 && ch <= 0x0CE1) ||
ch <= 0xdff && (ch >= 0x0D05 && ch <= 0x0D0C ||
ch >= 0x0D0E && ch <= 0x0D10 ||
ch >= 0x0D12 && ch <= 0x0D28 ||
ch >= 0x0D2A && ch <= 0x0D39 ||
ch >= 0x0D60 && ch <= 0x0D61) ||
ch <= 0xfff && (ch >= 0x0E01 && ch <= 0x0E2E ||
ch == 0x0E30 ||
ch >= 0x0E32 && ch <= 0x0E33 ||
ch >= 0x0E40 && ch <= 0x0E45 ||
ch >= 0x0E81 && ch <= 0x0E82 ||
ch == 0x0E84 ||
ch >= 0x0E87 && ch <= 0x0E88 ||
ch == 0x0E8A ||
ch == 0x0E8D ||
ch >= 0x0E94 && ch <= 0x0E97 ||
ch >= 0x0E99 && ch <= 0x0E9F ||
ch >= 0x0EA1 && ch <= 0x0EA3 ||
ch == 0x0EA5 ||
ch == 0x0EA7 ||
ch >= 0x0EAA && ch <= 0x0EAB ||
ch >= 0x0EAD && ch <= 0x0EAE ||
ch == 0x0EB0 ||
ch >= 0x0EB2 && ch <= 0x0EB3 ||
ch == 0x0EBD ||
ch >= 0x0EC0 && ch <= 0x0EC4 ||
ch >= 0x0F40 && ch <= 0x0F47 ||
ch >= 0x0F49 && ch <= 0x0F69) ||
ch <= 0x10ff && (ch >= 0x10A0 && ch <= 0x10C5 ||
ch >= 0x10D0 && ch <= 0x10F6) ||
ch <= 0x11ff && (ch == 0x1100 ||
ch >= 0x1102 && ch <= 0x1103 ||
ch >= 0x1105 && ch <= 0x1107 ||
ch == 0x1109 ||
ch >= 0x110B && ch <= 0x110C ||
ch >= 0x110E && ch <= 0x1112 ||
ch == 0x113C ||
ch == 0x113E ||
ch == 0x1140 ||
ch == 0x114C ||
ch == 0x114E ||
ch == 0x1150 ||
ch >= 0x1154 && ch <= 0x1155 ||
ch == 0x1159 ||
ch >= 0x115F && ch <= 0x1161 ||
ch == 0x1163 ||
ch == 0x1165 ||
ch == 0x1167 ||
ch == 0x1169 ||
ch >= 0x116D && ch <= 0x116E ||
ch >= 0x1172 && ch <= 0x1173 ||
ch == 0x1175 ||
ch == 0x119E ||
ch == 0x11A8 ||
ch == 0x11AB ||
ch >= 0x11AE && ch <= 0x11AF ||
ch >= 0x11B7 && ch <= 0x11B8 ||
ch == 0x11BA ||
ch >= 0x11BC && ch <= 0x11C2 ||
ch == 0x11EB ||
ch == 0x11F0 ||
ch == 0x11F9) ||
ch <= 0x1fff && (ch >= 0x1E00 && ch <= 0x1E9B ||
ch >= 0x1EA0 && ch <= 0x1EF9 ||
ch >= 0x1F00 && ch <= 0x1F15 ||
ch >= 0x1F18 && ch <= 0x1F1D ||
ch >= 0x1F20 && ch <= 0x1F45 ||
ch >= 0x1F48 && ch <= 0x1F4D ||
ch >= 0x1F50 && ch <= 0x1F57 ||
ch == 0x1F59 ||
ch == 0x1F5B ||
ch == 0x1F5D ||
ch >= 0x1F5F && ch <= 0x1F7D ||
ch >= 0x1F80 && ch <= 0x1FB4 ||
ch >= 0x1FB6 && ch <= 0x1FBC ||
ch == 0x1FBE ||
ch >= 0x1FC2 && ch <= 0x1FC4 ||
ch >= 0x1FC6 && ch <= 0x1FCC ||
ch >= 0x1FD0 && ch <= 0x1FD3 ||
ch >= 0x1FD6 && ch <= 0x1FDB ||
ch >= 0x1FE0 && ch <= 0x1FEC ||
ch >= 0x1FF2 && ch <= 0x1FF4 ||
ch >= 0x1FF6 && ch <= 0x1FFC) ||
ch == 0x2126 ||
ch >= 0x212A && ch <= 0x212B ||
ch == 0x212E ||
ch >= 0x2180 && ch <= 0x2182 ||
ch >= 0x3041 && ch <= 0x3094 ||
ch >= 0x30A1 && ch <= 0x30FA ||
ch >= 0x3105 && ch <= 0x312C ||
ch >= 0xAC00 && ch <= 0xD7A3);
}
private static boolean isIdeographic(int ch)
{
return (ch >= 0x4e00 && ch <= 0x9fa5 || ch == 0x3007 ||
ch >= 0x3021 && ch <= 0x3029);
}
private static boolean isCombiningChar(int ch)
{
if (ch < 0x300)
return false;
return (ch <= 0x6ff && (ch >= 0x0300 && ch <= 0x0345 ||
ch >= 0x0360 && ch <= 0x0361 ||
ch >= 0x0483 && ch <= 0x0486 ||
ch >= 0x0591 && ch <= 0x05A1 ||
ch >= 0x05A3 && ch <= 0x05B9 ||
ch >= 0x05BB && ch <= 0x05BD ||
ch == 0x05BF ||
ch >= 0x05C1 && ch <= 0x05C2 ||
ch == 0x05C4 ||
ch >= 0x064B && ch <= 0x0652 ||
ch == 0x0670 ||
ch >= 0x06D6 && ch <= 0x06DC ||
ch >= 0x06DD && ch <= 0x06DF ||
ch >= 0x06E0 && ch <= 0x06E4 ||
ch >= 0x06E7 && ch <= 0x06E8 ||
ch >= 0x06EA && ch <= 0x06ED) ||
ch <= 0x9ff && (ch >= 0x0901 && ch <= 0x0903 ||
ch == 0x093C ||
ch >= 0x093E && ch <= 0x094C ||
ch == 0x094D ||
ch >= 0x0951 && ch <= 0x0954 ||
ch >= 0x0962 && ch <= 0x0963 ||
ch >= 0x0981 && ch <= 0x0983 ||
ch == 0x09BC ||
ch == 0x09BE ||
ch == 0x09BF ||
ch >= 0x09C0 && ch <= 0x09C4 ||
ch >= 0x09C7 && ch <= 0x09C8 ||
ch >= 0x09CB && ch <= 0x09CD ||
ch == 0x09D7 ||
ch >= 0x09E2 && ch <= 0x09E3) ||
ch <= 0xaff && (ch == 0x0A02 ||
ch == 0x0A3C ||
ch == 0x0A3E ||
ch == 0x0A3F ||
ch >= 0x0A40 && ch <= 0x0A42 ||
ch >= 0x0A47 && ch <= 0x0A48 ||
ch >= 0x0A4B && ch <= 0x0A4D ||
ch >= 0x0A70 && ch <= 0x0A71 ||
ch >= 0x0A81 && ch <= 0x0A83 ||
ch == 0x0ABC ||
ch >= 0x0ABE && ch <= 0x0AC5 ||
ch >= 0x0AC7 && ch <= 0x0AC9 ||
ch >= 0x0ACB && ch <= 0x0ACD) ||
ch <= 0xbff && (ch >= 0x0B01 && ch <= 0x0B03 ||
ch == 0x0B3C ||
ch >= 0x0B3E && ch <= 0x0B43 ||
ch >= 0x0B47 && ch <= 0x0B48 ||
ch >= 0x0B4B && ch <= 0x0B4D ||
ch >= 0x0B56 && ch <= 0x0B57 ||
ch >= 0x0B82 && ch <= 0x0B83 ||
ch >= 0x0BBE && ch <= 0x0BC2 ||
ch >= 0x0BC6 && ch <= 0x0BC8 ||
ch >= 0x0BCA && ch <= 0x0BCD ||
ch == 0x0BD7) ||
ch <= 0xc00 && (ch >= 0x0C01 && ch <= 0x0C03 ||
ch >= 0x0C3E && ch <= 0x0C44 ||
ch >= 0x0C46 && ch <= 0x0C48 ||
ch >= 0x0C4A && ch <= 0x0C4D ||
ch >= 0x0C55 && ch <= 0x0C56 ||
ch >= 0x0C82 && ch <= 0x0C83 ||
ch >= 0x0CBE && ch <= 0x0CC4 ||
ch >= 0x0CC6 && ch <= 0x0CC8 ||
ch >= 0x0CCA && ch <= 0x0CCD ||
ch >= 0x0CD5 && ch <= 0x0CD6) ||
ch <= 0xeff && (ch >= 0x0D02 && ch <= 0x0D03 ||
ch >= 0x0D3E && ch <= 0x0D43 ||
ch >= 0x0D46 && ch <= 0x0D48 ||
ch >= 0x0D4A && ch <= 0x0D4D ||
ch == 0x0D57 ||
ch == 0x0E31 ||
ch >= 0x0E34 && ch <= 0x0E3A ||
ch >= 0x0E47 && ch <= 0x0E4E ||
ch == 0x0EB1 ||
ch >= 0x0EB4 && ch <= 0x0EB9 ||
ch >= 0x0EBB && ch <= 0x0EBC ||
ch >= 0x0EC8 && ch <= 0x0ECD) ||
ch <= 0xfff && (ch >= 0x0F18 && ch <= 0x0F19 ||
ch == 0x0F35 ||
ch == 0x0F37 ||
ch == 0x0F39 ||
ch == 0x0F3E ||
ch == 0x0F3F ||
ch >= 0x0F71 && ch <= 0x0F84 ||
ch >= 0x0F86 && ch <= 0x0F8B ||
ch >= 0x0F90 && ch <= 0x0F95 ||
ch == 0x0F97 ||
ch >= 0x0F99 && ch <= 0x0FAD ||
ch >= 0x0FB1 && ch <= 0x0FB7 ||
ch == 0x0FB9) ||
ch >= 0x20D0 && ch <= 0x20DC ||
ch == 0x20E1 ||
ch >= 0x302A && ch <= 0x302F ||
ch == 0x3099 ||
ch == 0x309A);
}
private static boolean isDigit(int ch)
{
return (ch >= 0x0030 && ch <= 0x0039 ||
ch >= 0x0660 && ch <= 0x0669 ||
ch >= 0x06F0 && ch <= 0x06F9 ||
ch >= 0x0966 && ch <= 0x096F ||
ch >= 0x09E6 && ch <= 0x09EF ||
ch >= 0x0A66 && ch <= 0x0A6F ||
ch >= 0x0AE6 && ch <= 0x0AEF ||
ch >= 0x0B66 && ch <= 0x0B6F ||
ch >= 0x0BE7 && ch <= 0x0BEF ||
ch >= 0x0C66 && ch <= 0x0C6F ||
ch >= 0x0CE6 && ch <= 0x0CEF ||
ch >= 0x0D66 && ch <= 0x0D6F ||
ch >= 0x0E50 && ch <= 0x0E59 ||
ch >= 0x0ED0 && ch <= 0x0ED9 ||
ch >= 0x0F20 && ch <= 0x0F29);
}
private static boolean isExtender(int ch)
{
return (ch == 0x00B7 ||
ch == 0x02D0 ||
ch == 0x02D1 ||
ch == 0x0387 ||
ch == 0x0640 ||
ch == 0x0E46 ||
ch == 0x0EC6 ||
ch == 0x3005 ||
ch >= 0x3031 && ch <= 0x3035 ||
ch >= 0x309D && ch <= 0x309E ||
ch >= 0x30FC && ch <= 0x30FE);
}
static {
isAsciiNameChar = new boolean[128];
for (int i = 0x30; i <= 0x39; i++)
isAsciiNameChar[i] = true;
for (int i = 0x41; i <= 0x5a; i++)
isAsciiNameChar[i] = true;
for (int i = 0x61; i <= 0x7a; i++)
isAsciiNameChar[i] = true;
isAsciiNameChar['_'] = true;
isAsciiNameChar[':'] = true;
isAsciiNameChar['.'] = true;
isAsciiNameChar['-'] = true;
}
}