org.opendaylight.yangtools.xsd.regex.Token Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opendaylight.yangtools.xsd.regex;
import java.util.Hashtable;
import java.util.Vector;
/**
* This class represents a node in parse tree.
*
* @xerces.internal
*
* @version $Id: Token.java 1638344 2014-11-11 20:15:46Z mrglavas $
*/
class Token implements java.io.Serializable {
private static final long serialVersionUID = 8484976002585487481L;
static final boolean COUNTTOKENS = true;
static int tokens = 0;
static final int CHAR = 0; // Literal char
static final int DOT = 11; // .
static final int CONCAT = 1; // XY
static final int UNION = 2; // X|Y|Z
static final int CLOSURE = 3; // X*
static final int RANGE = 4; // [a-zA-Z] etc.
static final int NRANGE = 5; // [^a-zA-Z] etc.
static final int PAREN = 6; // (X) or (?:X)
static final int EMPTY = 7; //
static final int ANCHOR = 8; // ^ $ \b \B \< \> \A \Z \z
static final int NONGREEDYCLOSURE = 9; // *? +?
static final int STRING = 10; // strings
static final int BACKREFERENCE = 12; // back references
static final int LOOKAHEAD = 20; // (?=...)
static final int NEGATIVELOOKAHEAD = 21; // (?!...)
static final int LOOKBEHIND = 22; // (?<=...)
static final int NEGATIVELOOKBEHIND = 23; // (?...)
static final int MODIFIERGROUP = 25; // (?ims-ims:...)
static final int CONDITION = 26; // (?(...)yes|no)
static final int UTF16_MAX = 0x10ffff;
final int type;
static final Token token_dot;
static final Token token_0to9;
static final Token token_wordchars;
static final Token token_not_0to9;
static final Token token_not_wordchars;
static final Token token_spaces;
static final Token token_not_spaces;
static final Token token_empty;
static final Token token_linebeginning;
static final Token token_linebeginning2;
static final Token token_lineend;
static final Token token_stringbeginning;
static final Token token_stringend;
static final Token token_stringend2;
static final Token token_wordedge;
static final Token token_not_wordedge;
static final Token token_wordbeginning;
static final Token token_wordend;
static {
token_empty = new Token(Token.EMPTY);
token_linebeginning = Token.createAnchor('^');
token_linebeginning2 = Token.createAnchor('@');
token_lineend = Token.createAnchor('$');
token_stringbeginning = Token.createAnchor('A');
token_stringend = Token.createAnchor('z');
token_stringend2 = Token.createAnchor('Z');
token_wordedge = Token.createAnchor('b');
token_not_wordedge = Token.createAnchor('B');
token_wordbeginning = Token.createAnchor('<');
token_wordend = Token.createAnchor('>');
token_dot = new Token(Token.DOT);
token_0to9 = Token.createRange();
token_0to9.addRange('0', '9');
token_wordchars = Token.createRange();
token_wordchars.addRange('0', '9');
token_wordchars.addRange('A', 'Z');
token_wordchars.addRange('_', '_');
token_wordchars.addRange('a', 'z');
token_spaces = Token.createRange();
token_spaces.addRange('\t', '\t');
token_spaces.addRange('\n', '\n');
token_spaces.addRange('\f', '\f');
token_spaces.addRange('\r', '\r');
token_spaces.addRange(' ', ' ');
token_not_0to9 = Token.complementRanges(Token.token_0to9);
token_not_wordchars = Token.complementRanges(Token.token_wordchars);
token_not_spaces = Token.complementRanges(Token.token_spaces);
}
static Token.ParenToken createLook(int type, Token child) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.ParenToken(type, child, 0);
}
static Token.ParenToken createParen(Token child, int pnumber) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.ParenToken(Token.PAREN, child, pnumber);
}
static Token.ClosureToken createClosure(Token tok) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.ClosureToken(Token.CLOSURE, tok);
}
static Token.ClosureToken createNGClosure(Token tok) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.ClosureToken(Token.NONGREEDYCLOSURE, tok);
}
static Token.ConcatToken createConcat(Token tok1, Token tok2) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.ConcatToken(tok1, tok2);
}
static Token.UnionToken createConcat() {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.UnionToken(Token.CONCAT); // *** It is not a bug.
}
static Token.UnionToken createUnion() {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.UnionToken(Token.UNION);
}
static Token createEmpty() {
return Token.token_empty;
}
static RangeToken createRange() {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new RangeToken(Token.RANGE);
}
static RangeToken createNRange() {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new RangeToken(Token.NRANGE);
}
static Token.CharToken createChar(int ch) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.CharToken(Token.CHAR, ch);
}
static private Token.CharToken createAnchor(int ch) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.CharToken(Token.ANCHOR, ch);
}
static Token.StringToken createBackReference(int refno) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.StringToken(Token.BACKREFERENCE, null, refno);
}
static Token.StringToken createString(String str) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.StringToken(Token.STRING, str, 0);
}
static Token.ModifierToken createModifierGroup(Token child, int add, int mask) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.ModifierToken(child, add, mask);
}
static Token.ConditionToken createCondition(int refno, Token condition,
Token yespat, Token nopat) {
if (COUNTTOKENS) {
Token.tokens ++;
}
return new Token.ConditionToken(refno, condition, yespat, nopat);
}
protected Token(int type) {
this.type = type;
}
/**
* A number of children.
*/
int size() {
return 0;
}
Token getChild(int index) {
return null;
}
void addChild(Token tok) {
throw new RuntimeException("Not supported.");
}
// for RANGE or NRANGE
protected void addRange(int start, int end) {
throw new RuntimeException("Not supported.");
}
protected void sortRanges() {
throw new RuntimeException("Not supported.");
}
protected void compactRanges() {
throw new RuntimeException("Not supported.");
}
protected void mergeRanges(Token tok) {
throw new RuntimeException("Not supported.");
}
protected void subtractRanges(Token tok) {
throw new RuntimeException("Not supported.");
}
protected void intersectRanges(Token tok) {
throw new RuntimeException("Not supported.");
}
static Token complementRanges(Token tok) {
return RangeToken.complementRanges(tok);
}
void setMin(int min) { // for CLOSURE
}
void setMax(int max) { // for CLOSURE
}
int getMin() { // for CLOSURE
return -1;
}
int getMax() { // for CLOSURE
return -1;
}
int getReferenceNumber() { // for STRING
return 0;
}
String getString() { // for STRING
return null;
}
int getParenNumber() {
return 0;
}
int getChar() {
return -1;
}
@Override
public String toString() {
return this.toString(0);
}
public String toString(int options) {
return this.type == Token.DOT ? "." : "";
}
/**
* How many characters are needed?
*/
final int getMinLength() {
switch (this.type) {
case CONCAT:
int sum = 0;
for (int i = 0; i < this.size(); i ++) {
sum += this.getChild(i).getMinLength();
}
return sum;
case CONDITION:
case UNION:
if (this.size() == 0) {
return 0;
}
int ret = this.getChild(0).getMinLength();
for (int i = 1; i < this.size(); i ++) {
int min = this.getChild(i).getMinLength();
if (min < ret) {
ret = min;
}
}
return ret;
case CLOSURE:
case NONGREEDYCLOSURE:
if (this.getMin() >= 0) {
return this.getMin() * this.getChild(0).getMinLength();
}
return 0;
case EMPTY:
case ANCHOR:
return 0;
case DOT:
case CHAR:
case RANGE:
case NRANGE:
return 1;
case INDEPENDENT:
case PAREN:
case MODIFIERGROUP:
return this.getChild(0).getMinLength();
case BACKREFERENCE:
return 0; // *******
case STRING:
return this.getString().length();
case LOOKAHEAD:
case NEGATIVELOOKAHEAD:
case LOOKBEHIND:
case NEGATIVELOOKBEHIND:
return 0; // ***** Really?
default:
throw new RuntimeException("Token#getMinLength(): Invalid Type: "+this.type);
}
}
final int getMaxLength() {
switch (this.type) {
case CONCAT:
int sum = 0;
for (int i = 0; i < this.size(); i ++) {
int d = this.getChild(i).getMaxLength();
if (d < 0) {
return -1;
}
sum += d;
}
return sum;
case CONDITION:
case UNION:
if (this.size() == 0) {
return 0;
}
int ret = this.getChild(0).getMaxLength();
for (int i = 1; ret >= 0 && i < this.size(); i ++) {
int max = this.getChild(i).getMaxLength();
if (max < 0) { // infinity
ret = -1;
break;
}
if (max > ret) {
ret = max;
}
}
return ret;
case CLOSURE:
case NONGREEDYCLOSURE:
if (this.getMax() >= 0) {
// When this.child.getMaxLength() < 0,
// this returns minus value
return this.getMax() * this.getChild(0).getMaxLength();
}
return -1;
case EMPTY:
case ANCHOR:
return 0;
case CHAR:
return 1;
case DOT:
case RANGE:
case NRANGE:
return 2;
case INDEPENDENT:
case PAREN:
case MODIFIERGROUP:
return this.getChild(0).getMaxLength();
case BACKREFERENCE:
return -1; // ******
case STRING:
return this.getString().length();
case LOOKAHEAD:
case NEGATIVELOOKAHEAD:
case LOOKBEHIND:
case NEGATIVELOOKBEHIND:
return 0; // ***** Really?
default:
throw new RuntimeException("Token#getMaxLength(): Invalid Type: "+this.type);
}
}
static final int FC_CONTINUE = 0;
static final int FC_TERMINAL = 1;
static final int FC_ANY = 2;
private static final boolean isSet(int options, int flag) {
return (options & flag) == flag;
}
final int analyzeFirstCharacter(RangeToken result, int options) {
switch (this.type) {
case CONCAT:
int ret = FC_CONTINUE;
for (int i = 0; i < this.size(); i ++) {
if ((ret = this.getChild(i).analyzeFirstCharacter(result, options)) != FC_CONTINUE) {
break;
}
}
return ret;
case UNION:
if (this.size() == 0) {
return FC_CONTINUE;
}
/*
* a|b|c -> FC_TERMINAL
* a|.|c -> FC_ANY
* a|b| -> FC_CONTINUE
*/
int ret2 = FC_CONTINUE;
boolean hasEmpty = false;
for (int i = 0; i < this.size(); i ++) {
ret2 = this.getChild(i).analyzeFirstCharacter(result, options);
if (ret2 == FC_ANY) {
break;
} else if (ret2 == FC_CONTINUE) {
hasEmpty = true;
}
}
return hasEmpty ? FC_CONTINUE : ret2;
case CONDITION:
int ret3 = this.getChild(0).analyzeFirstCharacter(result, options);
if (this.size() == 1) {
return FC_CONTINUE;
}
if (ret3 == FC_ANY) {
return ret3;
}
int ret4 = this.getChild(1).analyzeFirstCharacter(result, options);
if (ret4 == FC_ANY) {
return ret4;
}
return ret3 == FC_CONTINUE || ret4 == FC_CONTINUE ? FC_CONTINUE : FC_TERMINAL;
case CLOSURE:
case NONGREEDYCLOSURE:
this.getChild(0).analyzeFirstCharacter(result, options);
return FC_CONTINUE;
case EMPTY:
case ANCHOR:
return FC_CONTINUE;
case CHAR:
int ch = this.getChar();
result.addRange(ch, ch);
if (ch < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
ch = Character.toUpperCase((char)ch);
result.addRange(ch, ch);
ch = Character.toLowerCase((char)ch);
result.addRange(ch, ch);
}
return FC_TERMINAL;
case DOT:
return FC_ANY;
case RANGE:
result.mergeRanges(this);
return FC_TERMINAL;
case NRANGE: // ****
result.mergeRanges(Token.complementRanges(this));
return FC_TERMINAL;
case INDEPENDENT:
case PAREN:
return this.getChild(0).analyzeFirstCharacter(result, options);
case MODIFIERGROUP:
options |= ((ModifierToken)this).getOptions();
options &= ~((ModifierToken)this).getOptionsMask();
return this.getChild(0).analyzeFirstCharacter(result, options);
case BACKREFERENCE:
result.addRange(0, UTF16_MAX); // **** We can not optimize.
return FC_ANY;
case STRING:
int cha = this.getString().charAt(0);
int ch2;
if (REUtil.isHighSurrogate(cha)
&& this.getString().length() >= 2
&& REUtil.isLowSurrogate((ch2 = this.getString().charAt(1)))) {
cha = REUtil.composeFromSurrogates(cha, ch2);
}
result.addRange(cha, cha);
if (cha < 0x10000 && isSet(options, RegularExpression.IGNORE_CASE)) {
cha = Character.toUpperCase((char)cha);
result.addRange(cha, cha);
cha = Character.toLowerCase((char)cha);
result.addRange(cha, cha);
}
return FC_TERMINAL;
case LOOKAHEAD:
case NEGATIVELOOKAHEAD:
case LOOKBEHIND:
case NEGATIVELOOKBEHIND:
return FC_CONTINUE;
default:
throw new RuntimeException("Token#analyzeHeadCharacter(): Invalid Type: "+this.type);
}
}
private final boolean isShorterThan(Token tok) {
if (tok == null) {
return false;
}
/*
int mylength;
if (this.type == STRING) mylength = this.getString().length();
else if (this.type == CHAR) mylength = this.getChar() >= 0x10000 ? 2 : 1;
else throw new RuntimeException("Internal Error: Illegal type: "+this.type);
int otherlength;
if (tok.type == STRING) otherlength = tok.getString().length();
else if (tok.type == CHAR) otherlength = tok.getChar() >= 0x10000 ? 2 : 1;
else throw new RuntimeException("Internal Error: Illegal type: "+tok.type);
*/
int mylength;
if (this.type == STRING) {
mylength = this.getString().length();
} else {
throw new RuntimeException("Internal Error: Illegal type: "+this.type);
}
int otherlength;
if (tok.type == STRING) {
otherlength = tok.getString().length();
} else {
throw new RuntimeException("Internal Error: Illegal type: "+tok.type);
}
return mylength < otherlength;
}
static class FixedStringContainer {
Token token = null;
int options = 0;
FixedStringContainer() {
}
}
final void findFixedString(FixedStringContainer container, int options) {
switch (this.type) {
case CONCAT:
Token prevToken = null;
int prevOptions = 0;
for (int i = 0; i < this.size(); i ++) {
this.getChild(i).findFixedString(container, options);
if (prevToken == null || prevToken.isShorterThan(container.token)) {
prevToken = container.token;
prevOptions = container.options;
}
}
container.token = prevToken;
container.options = prevOptions;
return;
case UNION:
case CLOSURE:
case NONGREEDYCLOSURE:
case EMPTY:
case ANCHOR:
case RANGE:
case DOT:
case NRANGE:
case BACKREFERENCE:
case LOOKAHEAD:
case NEGATIVELOOKAHEAD:
case LOOKBEHIND:
case NEGATIVELOOKBEHIND:
case CONDITION:
container.token = null;
return;
case CHAR: // Ignore CHAR tokens.
container.token = null; // **
return; // **
case STRING:
container.token = this;
container.options = options;
return;
case INDEPENDENT:
case PAREN:
this.getChild(0).findFixedString(container, options);
return;
case MODIFIERGROUP:
options |= ((ModifierToken)this).getOptions();
options &= ~((ModifierToken)this).getOptionsMask();
this.getChild(0).findFixedString(container, options);
return;
default:
throw new RuntimeException("Token#findFixedString(): Invalid Type: "+this.type);
}
}
boolean match(int ch) {
throw new RuntimeException("NFAArrow#match(): Internal error: "+this.type);
}
// ------------------------------------------------------
private final static Hashtable categories = new Hashtable<>();
private final static Hashtable categories2 = new Hashtable<>();
private static final String[] categoryNames = {
"Cn", "Lu", "Ll", "Lt", "Lm", "Lo", "Mn", "Me", "Mc", "Nd",
"Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", null, "Co", "Cs",
"Pd", "Ps", "Pe", "Pc", "Po", "Sm", "Sc", "Sk", "So", // 28
"Pi", "Pf", // 29, 30
"L", "M", "N", "Z", "C", "P", "S", // 31-37
};
// Schema Rec. {Datatypes} - Punctuation
static final int CHAR_INIT_QUOTE = 29; // Pi - initial quote
static final int CHAR_FINAL_QUOTE = 30; // Pf - final quote
static final int CHAR_LETTER = 31;
static final int CHAR_MARK = 32;
static final int CHAR_NUMBER = 33;
static final int CHAR_SEPARATOR = 34;
static final int CHAR_OTHER = 35;
static final int CHAR_PUNCTUATION = 36;
static final int CHAR_SYMBOL = 37;
//blockNames in UNICODE 3.1 that supported by XML Schema REC
private static final String[] blockNames = {
/*0000..007F;*/ "Basic Latin",
/*0080..00FF;*/ "Latin-1 Supplement",
/*0100..017F;*/ "Latin Extended-A",
/*0180..024F;*/ "Latin Extended-B",
/*0250..02AF;*/ "IPA Extensions",
/*02B0..02FF;*/ "Spacing Modifier Letters",
/*0300..036F;*/ "Combining Diacritical Marks",
/*0370..03FF;*/ "Greek",
/*0400..04FF;*/ "Cyrillic",
/*0530..058F;*/ "Armenian",
/*0590..05FF;*/ "Hebrew",
/*0600..06FF;*/ "Arabic",
/*0700..074F;*/ "Syriac",
/*0780..07BF;*/ "Thaana",
/*0900..097F;*/ "Devanagari",
/*0980..09FF;*/ "Bengali",
/*0A00..0A7F;*/ "Gurmukhi",
/*0A80..0AFF;*/ "Gujarati",
/*0B00..0B7F;*/ "Oriya",
/*0B80..0BFF;*/ "Tamil",
/*0C00..0C7F;*/ "Telugu",
/*0C80..0CFF;*/ "Kannada",
/*0D00..0D7F;*/ "Malayalam",
/*0D80..0DFF;*/ "Sinhala",
/*0E00..0E7F;*/ "Thai",
/*0E80..0EFF;*/ "Lao",
/*0F00..0FFF;*/ "Tibetan",
/*1000..109F;*/ "Myanmar",
/*10A0..10FF;*/ "Georgian",
/*1100..11FF;*/ "Hangul Jamo",
/*1200..137F;*/ "Ethiopic",
/*13A0..13FF;*/ "Cherokee",
/*1400..167F;*/ "Unified Canadian Aboriginal Syllabics",
/*1680..169F;*/ "Ogham",
/*16A0..16FF;*/ "Runic",
/*1780..17FF;*/ "Khmer",
/*1800..18AF;*/ "Mongolian",
/*1E00..1EFF;*/ "Latin Extended Additional",
/*1F00..1FFF;*/ "Greek Extended",
/*2000..206F;*/ "General Punctuation",
/*2070..209F;*/ "Superscripts and Subscripts",
/*20A0..20CF;*/ "Currency Symbols",
/*20D0..20FF;*/ "Combining Marks for Symbols",
/*2100..214F;*/ "Letterlike Symbols",
/*2150..218F;*/ "Number Forms",
/*2190..21FF;*/ "Arrows",
/*2200..22FF;*/ "Mathematical Operators",
/*2300..23FF;*/ "Miscellaneous Technical",
/*2400..243F;*/ "Control Pictures",
/*2440..245F;*/ "Optical Character Recognition",
/*2460..24FF;*/ "Enclosed Alphanumerics",
/*2500..257F;*/ "Box Drawing",
/*2580..259F;*/ "Block Elements",
/*25A0..25FF;*/ "Geometric Shapes",
/*2600..26FF;*/ "Miscellaneous Symbols",
/*2700..27BF;*/ "Dingbats",
/*2800..28FF;*/ "Braille Patterns",
/*2E80..2EFF;*/ "CJK Radicals Supplement",
/*2F00..2FDF;*/ "Kangxi Radicals",
/*2FF0..2FFF;*/ "Ideographic Description Characters",
/*3000..303F;*/ "CJK Symbols and Punctuation",
/*3040..309F;*/ "Hiragana",
/*30A0..30FF;*/ "Katakana",
/*3100..312F;*/ "Bopomofo",
/*3130..318F;*/ "Hangul Compatibility Jamo",
/*3190..319F;*/ "Kanbun",
/*31A0..31BF;*/ "Bopomofo Extended",
/*3200..32FF;*/ "Enclosed CJK Letters and Months",
/*3300..33FF;*/ "CJK Compatibility",
/*3400..4DB5;*/ "CJK Unified Ideographs Extension A",
/*4E00..9FFF;*/ "CJK Unified Ideographs",
/*A000..A48F;*/ "Yi Syllables",
/*A490..A4CF;*/ "Yi Radicals",
/*AC00..D7A3;*/ "Hangul Syllables",
/*E000..F8FF;*/ "Private Use",
/*F900..FAFF;*/ "CJK Compatibility Ideographs",
/*FB00..FB4F;*/ "Alphabetic Presentation Forms",
/*FB50..FDFF;*/ "Arabic Presentation Forms-A",
/*FE20..FE2F;*/ "Combining Half Marks",
/*FE30..FE4F;*/ "CJK Compatibility Forms",
/*FE50..FE6F;*/ "Small Form Variants",
/*FE70..FEFE;*/ "Arabic Presentation Forms-B",
/*FEFF..FEFF;*/ "Specials",
/*FF00..FFEF;*/ "Halfwidth and Fullwidth Forms",
//missing Specials add manually
/*10300..1032F;*/ "Old Italic", // 84
/*10330..1034F;*/ "Gothic",
/*10400..1044F;*/ "Deseret",
/*1D000..1D0FF;*/ "Byzantine Musical Symbols",
/*1D100..1D1FF;*/ "Musical Symbols",
/*1D400..1D7FF;*/ "Mathematical Alphanumeric Symbols",
/*20000..2A6D6;*/ "CJK Unified Ideographs Extension B",
/*2F800..2FA1F;*/ "CJK Compatibility Ideographs Supplement",
/*E0000..E007F;*/ "Tags",
//missing 2 private use add manually
};
//ADD THOSE MANUALLY
//F0000..FFFFD; "Private Use",
//100000..10FFFD; "Private Use"
//FFF0..FFFD; "Specials",
static final String blockRanges =
"\u0000\u007F\u0080\u00FF\u0100\u017F\u0180\u024F\u0250\u02AF\u02B0\u02FF\u0300\u036F"
+"\u0370\u03FF\u0400\u04FF\u0530\u058F\u0590\u05FF\u0600\u06FF\u0700\u074F\u0780\u07BF"
+"\u0900\u097F\u0980\u09FF\u0A00\u0A7F\u0A80\u0AFF\u0B00\u0B7F\u0B80\u0BFF\u0C00\u0C7F\u0C80\u0CFF"
+"\u0D00\u0D7F\u0D80\u0DFF\u0E00\u0E7F\u0E80\u0EFF\u0F00\u0FFF\u1000\u109F\u10A0\u10FF\u1100\u11FF"
+"\u1200\u137F\u13A0\u13FF\u1400\u167F\u1680\u169F\u16A0\u16FF\u1780\u17FF\u1800\u18AF\u1E00\u1EFF"
+"\u1F00\u1FFF\u2000\u206F\u2070\u209F\u20A0\u20CF\u20D0\u20FF\u2100\u214F\u2150\u218F\u2190\u21FF\u2200\u22FF"
+"\u2300\u23FF\u2400\u243F\u2440\u245F\u2460\u24FF\u2500\u257F\u2580\u259F\u25A0\u25FF\u2600\u26FF\u2700\u27BF"
+"\u2800\u28FF\u2E80\u2EFF\u2F00\u2FDF\u2FF0\u2FFF\u3000\u303F\u3040\u309F\u30A0\u30FF\u3100\u312F\u3130\u318F"
+"\u3190\u319F\u31A0\u31BF\u3200\u32FF\u3300\u33FF\u3400\u4DB5\u4E00\u9FFF\uA000\uA48F\uA490\uA4CF"
+"\uAC00\uD7A3\uE000\uF8FF\uF900\uFAFF\uFB00\uFB4F\uFB50\uFDFF"
+"\uFE20\uFE2F\uFE30\uFE4F\uFE50\uFE6F\uFE70\uFEFE\uFEFF\uFEFF\uFF00\uFFEF";
static final int[] nonBMPBlockRanges = {
0x10300, 0x1032F, // 84
0x10330, 0x1034F,
0x10400, 0x1044F,
0x1D000, 0x1D0FF,
0x1D100, 0x1D1FF,
0x1D400, 0x1D7FF,
0x20000, 0x2A6D6,
0x2F800, 0x2FA1F,
0xE0000, 0xE007F
};
private static final int NONBMP_BLOCK_START = 84;
static protected RangeToken getRange(String name, boolean positive) {
if (Token.categories.size() == 0) {
synchronized (Token.categories) {
Token[] ranges = new Token[Token.categoryNames.length];
for (int i = 0; i < ranges.length; i ++) {
ranges[i] = Token.createRange();
}
int type;
for (int i = 0; i < 0x10000; i ++) {
type = Character.getType((char)i);
if (type == Character.START_PUNCTUATION ||
type == Character.END_PUNCTUATION) {
//build table of Pi values
if (i == 0x00AB || i == 0x2018 || i == 0x201B || i == 0x201C ||
i == 0x201F || i == 0x2039) {
type = CHAR_INIT_QUOTE;
}
//build table of Pf values
if (i == 0x00BB || i == 0x2019 || i == 0x201D || i == 0x203A ) {
type = CHAR_FINAL_QUOTE;
}
}
ranges[type].addRange(i, i);
switch (type) {
case Character.UPPERCASE_LETTER:
case Character.LOWERCASE_LETTER:
case Character.TITLECASE_LETTER:
case Character.MODIFIER_LETTER:
case Character.OTHER_LETTER:
type = CHAR_LETTER;
break;
case Character.NON_SPACING_MARK:
case Character.COMBINING_SPACING_MARK:
case Character.ENCLOSING_MARK:
type = CHAR_MARK;
break;
case Character.DECIMAL_DIGIT_NUMBER:
case Character.LETTER_NUMBER:
case Character.OTHER_NUMBER:
type = CHAR_NUMBER;
break;
case Character.SPACE_SEPARATOR:
case Character.LINE_SEPARATOR:
case Character.PARAGRAPH_SEPARATOR:
type = CHAR_SEPARATOR;
break;
case Character.CONTROL:
case Character.FORMAT:
case Character.SURROGATE:
case Character.PRIVATE_USE:
case Character.UNASSIGNED:
type = CHAR_OTHER;
break;
case Character.CONNECTOR_PUNCTUATION:
case Character.DASH_PUNCTUATION:
case Character.START_PUNCTUATION:
case Character.END_PUNCTUATION:
case CHAR_INIT_QUOTE:
case CHAR_FINAL_QUOTE:
case Character.OTHER_PUNCTUATION:
type = CHAR_PUNCTUATION;
break;
case Character.MATH_SYMBOL:
case Character.CURRENCY_SYMBOL:
case Character.MODIFIER_SYMBOL:
case Character.OTHER_SYMBOL:
type = CHAR_SYMBOL;
break;
default:
throw new RuntimeException("org.apache.xerces.utils.regex.Token#getRange(): Unknown Unicode category: "+type);
}
ranges[type].addRange(i, i);
} // for all characters
ranges[Character.UNASSIGNED].addRange(0x10000, Token.UTF16_MAX);
ranges[CHAR_OTHER].addRange(0x10000, Token.UTF16_MAX);
for (int i = 0; i < ranges.length; i ++) {
if (Token.categoryNames[i] != null) {
if (i == Character.UNASSIGNED) { // Unassigned
ranges[i].addRange(0x10000, Token.UTF16_MAX);
}
Token.categories.put(Token.categoryNames[i], ranges[i]);
Token.categories2.put(Token.categoryNames[i],
Token.complementRanges(ranges[i]));
}
}
//REVISIT: do we really need to support block names as in Unicode 3.1
// or we can just create all the names in IsBLOCKNAME format (XML Schema REC)?
//
StringBuffer buffer = new StringBuffer(50);
for (int i = 0; i < Token.blockNames.length; i ++) {
Token r1 = Token.createRange();
int location;
if (i < NONBMP_BLOCK_START) {
location = i*2;
int rstart = Token.blockRanges.charAt(location);
int rend = Token.blockRanges.charAt(location+1);
//DEBUGING
//System.out.println(n+" " +Integer.toHexString(rstart)
// +"-"+ Integer.toHexString(rend));
r1.addRange(rstart, rend);
} else {
location = (i - NONBMP_BLOCK_START) * 2;
r1.addRange(Token.nonBMPBlockRanges[location],
Token.nonBMPBlockRanges[location + 1]);
}
String n = Token.blockNames[i];
if (n.equals("Specials")) {
r1.addRange(0xfff0, 0xfffd);
}
if (n.equals("Private Use")) {
r1.addRange(0xF0000,0xFFFFD);
r1.addRange(0x100000,0x10FFFD);
}
Token.categories.put(n, r1);
Token.categories2.put(n, Token.complementRanges(r1));
buffer.setLength(0);
buffer.append("Is");
if (n.indexOf(' ') >= 0) {
for (int ci = 0; ci < n.length(); ci ++) {
if (n.charAt(ci) != ' ') {
buffer.append(n.charAt(ci));
}
}
}
else {
buffer.append(n);
}
Token.setAlias(buffer.toString(), n, true);
}
// TR#18 1.2
Token.setAlias("ASSIGNED", "Cn", false);
Token.setAlias("UNASSIGNED", "Cn", true);
Token all = Token.createRange();
all.addRange(0, Token.UTF16_MAX);
Token.categories.put("ALL", all);
Token.categories2.put("ALL", Token.complementRanges(all));
Token.registerNonXS("ASSIGNED");
Token.registerNonXS("UNASSIGNED");
Token.registerNonXS("ALL");
Token isalpha = Token.createRange();
isalpha.mergeRanges(ranges[Character.UPPERCASE_LETTER]); // Lu
isalpha.mergeRanges(ranges[Character.LOWERCASE_LETTER]); // Ll
isalpha.mergeRanges(ranges[Character.OTHER_LETTER]); // Lo
Token.categories.put("IsAlpha", isalpha);
Token.categories2.put("IsAlpha", Token.complementRanges(isalpha));
Token.registerNonXS("IsAlpha");
Token isalnum = Token.createRange();
isalnum.mergeRanges(isalpha); // Lu Ll Lo
isalnum.mergeRanges(ranges[Character.DECIMAL_DIGIT_NUMBER]); // Nd
Token.categories.put("IsAlnum", isalnum);
Token.categories2.put("IsAlnum", Token.complementRanges(isalnum));
Token.registerNonXS("IsAlnum");
Token isspace = Token.createRange();
isspace.mergeRanges(Token.token_spaces);
isspace.mergeRanges(ranges[CHAR_SEPARATOR]); // Z
Token.categories.put("IsSpace", isspace);
Token.categories2.put("IsSpace", Token.complementRanges(isspace));
Token.registerNonXS("IsSpace");
Token isword = Token.createRange();
isword.mergeRanges(isalnum); // Lu Ll Lo Nd
isword.addRange('_', '_');
Token.categories.put("IsWord", isword);
Token.categories2.put("IsWord", Token.complementRanges(isword));
Token.registerNonXS("IsWord");
Token isascii = Token.createRange();
isascii.addRange(0, 127);
Token.categories.put("IsASCII", isascii);
Token.categories2.put("IsASCII", Token.complementRanges(isascii));
Token.registerNonXS("IsASCII");
Token isnotgraph = Token.createRange();
isnotgraph.mergeRanges(ranges[CHAR_OTHER]);
isnotgraph.addRange(' ', ' ');
Token.categories.put("IsGraph", Token.complementRanges(isnotgraph));
Token.categories2.put("IsGraph", isnotgraph);
Token.registerNonXS("IsGraph");
Token isxdigit = Token.createRange();
isxdigit.addRange('0', '9');
isxdigit.addRange('A', 'F');
isxdigit.addRange('a', 'f');
Token.categories.put("IsXDigit", Token.complementRanges(isxdigit));
Token.categories2.put("IsXDigit", isxdigit);
Token.registerNonXS("IsXDigit");
Token.setAlias("IsDigit", "Nd", true);
Token.setAlias("IsUpper", "Lu", true);
Token.setAlias("IsLower", "Ll", true);
Token.setAlias("IsCntrl", "C", true);
Token.setAlias("IsPrint", "C", false);
Token.setAlias("IsPunct", "P", true);
Token.registerNonXS("IsDigit");
Token.registerNonXS("IsUpper");
Token.registerNonXS("IsLower");
Token.registerNonXS("IsCntrl");
Token.registerNonXS("IsPrint");
Token.registerNonXS("IsPunct");
Token.setAlias("alpha", "IsAlpha", true);
Token.setAlias("alnum", "IsAlnum", true);
Token.setAlias("ascii", "IsASCII", true);
Token.setAlias("cntrl", "IsCntrl", true);
Token.setAlias("digit", "IsDigit", true);
Token.setAlias("graph", "IsGraph", true);
Token.setAlias("lower", "IsLower", true);
Token.setAlias("print", "IsPrint", true);
Token.setAlias("punct", "IsPunct", true);
Token.setAlias("space", "IsSpace", true);
Token.setAlias("upper", "IsUpper", true);
Token.setAlias("word", "IsWord", true); // Perl extension
Token.setAlias("xdigit", "IsXDigit", true);
Token.registerNonXS("alpha");
Token.registerNonXS("alnum");
Token.registerNonXS("ascii");
Token.registerNonXS("cntrl");
Token.registerNonXS("digit");
Token.registerNonXS("graph");
Token.registerNonXS("lower");
Token.registerNonXS("print");
Token.registerNonXS("punct");
Token.registerNonXS("space");
Token.registerNonXS("upper");
Token.registerNonXS("word");
Token.registerNonXS("xdigit");
} // synchronized
} // if null
RangeToken tok = positive ? (RangeToken)Token.categories.get(name)
: (RangeToken)Token.categories2.get(name);
//if (tok == null) System.out.println(name);
return tok;
}
static protected RangeToken getRange(String name, boolean positive, boolean xs) {
RangeToken range = Token.getRange(name, positive);
if (xs && range != null && Token.isRegisterNonXS(name)) {
range = null;
}
return range;
}
static Hashtable nonxs = null;
/**
* This method is called by only getRange().
* So this method need not MT-safe.
*/
static protected void registerNonXS(String name) {
if (Token.nonxs == null) {
Token.nonxs = new Hashtable<>();
}
Token.nonxs.put(name, name);
}
static protected boolean isRegisterNonXS(String name) {
if (Token.nonxs == null) {
return false;
}
//DEBUG
//System.err.println("isRegisterNonXS: "+name);
return Token.nonxs.containsKey(name);
}
private static void setAlias(String newName, String name, boolean positive) {
Token t1 = Token.categories.get(name);
Token t2 = Token.categories2.get(name);
if (positive) {
Token.categories.put(newName, t1);
Token.categories2.put(newName, t2);
} else {
Token.categories2.put(newName, t1);
Token.categories.put(newName, t2);
}
}
// ------------------------------------------------------
static final String viramaString =
"\u094D"// ;DEVANAGARI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
+"\u09CD"//;BENGALI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
+"\u0A4D"//;GURMUKHI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
+"\u0ACD"//;GUJARATI SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
+"\u0B4D"//;ORIYA SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
+"\u0BCD"//;TAMIL SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
+"\u0C4D"//;TELUGU SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
+"\u0CCD"//;KANNADA SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
+"\u0D4D"//;MALAYALAM SIGN VIRAMA;Mn;9;ON;;;;;N;;;;;
+"\u0E3A"//;THAI CHARACTER PHINTHU;Mn;9;ON;;;;;N;THAI VOWEL SIGN PHINTHU;;;;
+"\u0F84";//;TIBETAN MARK HALANTA;Mn;9;ON;;;;;N;TIBETAN VIRAMA;;;;
static private Token token_grapheme = null;
static synchronized Token getGraphemePattern() {
if (Token.token_grapheme != null) {
return Token.token_grapheme;
}
Token base_char = Token.createRange(); // [{ASSIGNED}]-[{M},{C}]
base_char.mergeRanges(Token.getRange("ASSIGNED", true));
base_char.subtractRanges(Token.getRange("M", true));
base_char.subtractRanges(Token.getRange("C", true));
Token virama = Token.createRange();
for (int i = 0; i < Token.viramaString.length(); i++) {
virama.addRange(i, i);
}
Token combiner_wo_virama = Token.createRange();
combiner_wo_virama.mergeRanges(Token.getRange("M", true));
combiner_wo_virama.addRange(0x1160, 0x11ff); // hangul_medial and hangul_final
combiner_wo_virama.addRange(0xff9e, 0xff9f); // extras
Token left = Token.createUnion(); // base_char?
left.addChild(base_char);
left.addChild(Token.token_empty);
Token foo = Token.createUnion();
foo.addChild(Token.createConcat(virama, Token.getRange("L", true)));
foo.addChild(combiner_wo_virama);
foo = Token.createClosure(foo);
foo = Token.createConcat(left, foo);
Token.token_grapheme = foo;
return Token.token_grapheme;
}
/**
* Combing Character Sequence in Perl 5.6.
*/
static private Token token_ccs = null;
static synchronized Token getCombiningCharacterSequence() {
if (Token.token_ccs != null) {
return Token.token_ccs;
}
Token foo = Token.createClosure(Token.getRange("M", true)); // \pM*
foo = Token.createConcat(Token.getRange("M", false), foo); // \PM + \pM*
Token.token_ccs = foo;
return Token.token_ccs;
}
// ------------------------------------------------------
// ------------------------------------------------------
/**
* This class represents a node in parse tree.
*/
static class StringToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -4614366944218504172L;
String string;
final int refNumber;
StringToken(int type, String str, int n) {
super(type);
this.string = str;
this.refNumber = n;
}
@Override
int getReferenceNumber() { // for STRING
return this.refNumber;
}
@Override
String getString() { // for STRING
return this.string;
}
@Override
public String toString(int options) {
if (this.type == BACKREFERENCE) {
return "\\"+this.refNumber;
} else {
return REUtil.quoteMeta(this.string);
}
}
}
/**
* This class represents a node in parse tree.
*/
static class ConcatToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = 8717321425541346381L;
final Token child;
final Token child2;
ConcatToken(Token t1, Token t2) {
super(Token.CONCAT);
this.child = t1;
this.child2 = t2;
}
@Override
int size() {
return 2;
}
@Override
Token getChild(int index) {
return index == 0 ? this.child : this.child2;
}
@Override
public String toString(int options) {
String ret;
if (this.child2.type == CLOSURE && this.child2.getChild(0) == this.child) {
ret = this.child.toString(options)+"+";
} else if (this.child2.type == NONGREEDYCLOSURE && this.child2.getChild(0) == this.child) {
ret = this.child.toString(options)+"+?";
} else {
ret = this.child.toString(options)+this.child2.toString(options);
}
return ret;
}
}
/**
* This class represents a node in parse tree.
*/
static class CharToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -4394272816279496989L;
final int chardata;
CharToken(int type, int ch) {
super(type);
this.chardata = ch;
}
@Override
int getChar() {
return this.chardata;
}
@Override
public String toString(int options) {
String ret;
switch (this.type) {
case CHAR:
switch (this.chardata) {
case '|': case '*': case '+': case '?':
case '(': case ')': case '.': case '[':
case '{': case '\\':
ret = "\\"+(char)this.chardata;
break;
case '\f': ret = "\\f"; break;
case '\n': ret = "\\n"; break;
case '\r': ret = "\\r"; break;
case '\t': ret = "\\t"; break;
case 0x1b: ret = "\\e"; break;
//case 0x0b: ret = "\\v"; break;
default:
if (this.chardata >= 0x10000) {
String pre = "0"+Integer.toHexString(this.chardata);
ret = "\\v"+pre.substring(pre.length()-6, pre.length());
} else {
ret = ""+(char)this.chardata;
}
}
break;
case ANCHOR:
if (this == Token.token_linebeginning || this == Token.token_lineend) {
ret = ""+(char)this.chardata;
} else {
ret = "\\"+(char)this.chardata;
}
break;
default:
ret = null;
}
return ret;
}
@Override
boolean match(int ch) {
if (this.type == CHAR) {
return ch == this.chardata;
} else {
throw new RuntimeException("NFAArrow#match(): Internal error: "+this.type);
}
}
}
/**
* This class represents a node in parse tree.
*/
static class ClosureToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = 1308971930673997452L;
int min;
int max;
final Token child;
ClosureToken(int type, Token tok) {
super(type);
this.child = tok;
this.setMin(-1);
this.setMax(-1);
}
@Override
int size() {
return 1;
}
@Override
Token getChild(int index) {
return this.child;
}
@Override
final void setMin(int min) {
this.min = min;
}
@Override
final void setMax(int max) {
this.max = max;
}
@Override
final int getMin() {
return this.min;
}
@Override
final int getMax() {
return this.max;
}
@Override
public String toString(int options) {
String ret;
if (this.type == CLOSURE) {
if (this.getMin() < 0 && this.getMax() < 0) {
ret = this.child.toString(options)+"*";
} else if (this.getMin() == this.getMax()) {
ret = this.child.toString(options)+"{"+this.getMin()+"}";
} else if (this.getMin() >= 0 && this.getMax() >= 0) {
ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}";
} else if (this.getMin() >= 0 && this.getMax() < 0) {
ret = this.child.toString(options)+"{"+this.getMin()+",}";
} else {
throw new RuntimeException("Token#toString(): CLOSURE "
+this.getMin()+", "+this.getMax());
}
} else {
if (this.getMin() < 0 && this.getMax() < 0) {
ret = this.child.toString(options)+"*?";
} else if (this.getMin() == this.getMax()) {
ret = this.child.toString(options)+"{"+this.getMin()+"}?";
} else if (this.getMin() >= 0 && this.getMax() >= 0) {
ret = this.child.toString(options)+"{"+this.getMin()+","+this.getMax()+"}?";
} else if (this.getMin() >= 0 && this.getMax() < 0) {
ret = this.child.toString(options)+"{"+this.getMin()+",}?";
} else {
throw new RuntimeException("Token#toString(): NONGREEDYCLOSURE "
+this.getMin()+", "+this.getMax());
}
}
return ret;
}
}
/**
* This class represents a node in parse tree.
*/
static class ParenToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -5938014719827987704L;
final Token child;
final int parennumber;
ParenToken(int type, Token tok, int paren) {
super(type);
this.child = tok;
this.parennumber = paren;
}
@Override
int size() {
return 1;
}
@Override
Token getChild(int index) {
return this.child;
}
@Override
int getParenNumber() {
return this.parennumber;
}
@Override
public String toString(int options) {
String ret = null;
switch (this.type) {
case PAREN:
if (this.parennumber == 0) {
ret = "(?:"+this.child.toString(options)+")";
} else {
ret = "("+this.child.toString(options)+")";
}
break;
case LOOKAHEAD:
ret = "(?="+this.child.toString(options)+")";
break;
case NEGATIVELOOKAHEAD:
ret = "(?!"+this.child.toString(options)+")";
break;
case LOOKBEHIND:
ret = "(?<="+this.child.toString(options)+")";
break;
case NEGATIVELOOKBEHIND:
ret = "(?"+this.child.toString(options)+")";
break;
}
return ret;
}
}
/**
* (?(condition)yes-pattern|no-pattern)
*/
static class ConditionToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = 4353765277910594411L;
final int refNumber;
final Token condition;
final Token yes;
final Token no;
ConditionToken(int refno, Token cond, Token yespat, Token nopat) {
super(Token.CONDITION);
this.refNumber = refno;
this.condition = cond;
this.yes = yespat;
this.no = nopat;
}
@Override
int size() {
return this.no == null ? 1 : 2;
}
@Override
Token getChild(int index) {
if (index == 0) {
return this.yes;
}
if (index == 1) {
return this.no;
}
throw new RuntimeException("Internal Error: "+index);
}
@Override
public String toString(int options) {
String ret;
if (refNumber > 0) {
ret = "(?("+refNumber+")";
} else if (this.condition.type == Token.ANCHOR) {
ret = "(?("+this.condition+")";
} else {
ret = "(?"+this.condition;
}
if (this.no == null) {
ret += this.yes+")";
} else {
ret += this.yes+"|"+this.no+")";
}
return ret;
}
}
/**
* (ims-ims: .... )
*/
static class ModifierToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -9114536559696480356L;
final Token child;
final int add;
final int mask;
ModifierToken(Token tok, int add, int mask) {
super(Token.MODIFIERGROUP);
this.child = tok;
this.add = add;
this.mask = mask;
}
@Override
int size() {
return 1;
}
@Override
Token getChild(int index) {
return this.child;
}
int getOptions() {
return this.add;
}
int getOptionsMask() {
return this.mask;
}
@Override
public String toString(int options) {
return "(?"
+(this.add == 0 ? "" : REUtil.createOptionString(this.add))
+(this.mask == 0 ? "" : REUtil.createOptionString(this.mask))
+":"
+this.child.toString(options)
+")";
}
}
/**
* This class represents a node in parse tree.
* for UNION or CONCAT.
*/
static class UnionToken extends Token implements java.io.Serializable {
private static final long serialVersionUID = -2568843945989489861L;
Vector children;
UnionToken(int type) {
super(type);
}
@Override
void addChild(Token tok) {
if (tok == null) {
return;
}
if (this.children == null) {
this.children = new Vector<>();
}
if (this.type == UNION) {
this.children.addElement(tok);
return;
}
// This is CONCAT, and new child is CONCAT.
if (tok.type == CONCAT) {
for (int i = 0; i < tok.size(); i ++)
{
this.addChild(tok.getChild(i)); // Recursion
}
return;
}
int size = this.children.size();
if (size == 0) {
this.children.addElement(tok);
return;
}
Token previous = this.children.elementAt(size-1);
if (!((previous.type == CHAR || previous.type == STRING)
&& (tok.type == CHAR || tok.type == STRING))) {
this.children.addElement(tok);
return;
}
//System.err.println("Merge '"+previous+"' and '"+tok+"'.");
StringBuffer buffer;
int nextMaxLength = (tok.type == CHAR ? 2 : tok.getString().length());
if (previous.type == CHAR) { // Replace previous token by STRING
buffer = new StringBuffer(2 + nextMaxLength);
int ch = previous.getChar();
if (ch >= 0x10000) {
buffer.append(REUtil.decomposeToSurrogates(ch));
} else {
buffer.append((char)ch);
}
previous = Token.createString(null);
this.children.setElementAt(previous, size-1);
} else { // STRING
buffer = new StringBuffer(previous.getString().length() + nextMaxLength);
buffer.append(previous.getString());
}
if (tok.type == CHAR) {
int ch = tok.getChar();
if (ch >= 0x10000) {
buffer.append(REUtil.decomposeToSurrogates(ch));
} else {
buffer.append((char)ch);
}
} else {
buffer.append(tok.getString());
}
((StringToken)previous).string = new String(buffer);
}
@Override
int size() {
return this.children == null ? 0 : this.children.size();
}
@Override
Token getChild(int index) {
return this.children.elementAt(index);
}
@Override
public String toString(int options) {
String ret;
if (this.type == CONCAT) {
if (this.children.size() == 2) {
Token ch = this.getChild(0);
Token ch2 = this.getChild(1);
if (ch2.type == CLOSURE && ch2.getChild(0) == ch) {
ret = ch.toString(options)+"+";
} else if (ch2.type == NONGREEDYCLOSURE && ch2.getChild(0) == ch) {
ret = ch.toString(options)+"+?";
} else {
ret = ch.toString(options)+ch2.toString(options);
}
} else {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < this.children.size(); i ++) {
sb.append(this.children.elementAt(i).toString(options));
}
ret = new String(sb);
}
return ret;
}
if (this.children.size() == 2 && this.getChild(1).type == EMPTY) {
ret = this.getChild(0).toString(options)+"?";
} else if (this.children.size() == 2
&& this.getChild(0).type == EMPTY) {
ret = this.getChild(1).toString(options)+"??";
} else {
StringBuffer sb = new StringBuffer();
sb.append(this.children.elementAt(0).toString(options));
for (int i = 1; i < this.children.size(); i ++) {
sb.append('|');
sb.append(this.children.elementAt(i).toString(options));
}
ret = new String(sb);
}
return ret;
}
}
}