All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ximpleware.extended.VTDGenHuge Maven / Gradle / Ivy

Go to download

XimpleWare's VTD-XML is, far and away, the industry's most advanced and powerful XML processing model for SOA and Cloud Computing

There is a newer version: 2.13.4
Show newest version
/* 
 * Copyright (C) 2002-2015 XimpleWare, [email protected]
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
/*VTD-XML is protected by US patent 7133857, 7260652, an 7761459*/
package com.ximpleware.extended;
import java.io.*;


import com.ximpleware.extended.parser.*;

//import java.io.*;
/**
 * VTDGenHuge implementation supporting extended VTD (256GB file size).
 * Current support built-in entities only
 * It parses DTD, but doesn't resolve declared entities
 */
public class VTDGenHuge {
	// internal parser state

	private final static int STATE_LT_SEEN = 0; // encounter the first <
	private final static int STATE_START_TAG = 1;
	private final static int STATE_END_TAG = 2;
	private final static int STATE_ATTR_NAME = 3;
	private final static int STATE_ATTR_VAL = 4;
	private final static int STATE_TEXT = 5;
	private final static int STATE_DOC_START = 6; // beginning of document
	private final static int STATE_DOC_END = 7; // end of document 
	private final static int STATE_PI_TAG =8;
	private final static int STATE_PI_VAL = 9;
	private final static int STATE_DEC_ATTR_NAME = 10;
	private final static int STATE_COMMENT = 11;
	private final static int STATE_CDATA = 12;
	private final static int STATE_DOCTYPE = 13;
	private final static int STATE_END_COMMENT = 14;
	// comment appear after the last ending tag
	private final static int STATE_END_PI = 15;
	//private final static int STATE_END_PI_VAL = 17;

	public final static int IN_MEMORY = 0;
	public final static int MEM_MAPPED = 1;
	
	// token type
	public final static int TOKEN_STARTING_TAG = 0;
	public final static int TOKEN_ENDING_TAG = 1;
	public final static int TOKEN_ATTR_NAME = 2;
	public final static int TOKEN_ATTR_NS = 3;
	public final static int TOKEN_ATTR_VAL = 4;
	public final static int TOKEN_CHARACTER_DATA = 5;
	public final static int TOKEN_COMMENT = 6;
	public final static int TOKEN_PI_NAME = 7;
	public final static int TOKEN_PI_VAL = 8;
	public final static int TOKEN_DEC_ATTR_NAME = 9;
	public final static int TOKEN_DEC_ATTR_VAL = 10;
	public final static int TOKEN_CDATA_VAL = 11;
	public final static int TOKEN_DTD_VAL = 12;
	public final static int TOKEN_DOCUMENT = 13;

	// encoding format
	public final static int FORMAT_UTF8 = 2;
	public final static int FORMAT_ASCII = 0;

	public final static int FORMAT_ISO_8859_1 = 1;
	public final static int FORMAT_ISO_8859_2 = 3;
	public final static int FORMAT_ISO_8859_3 = 4;
	public final static int FORMAT_ISO_8859_4 = 5;
	public final static int FORMAT_ISO_8859_5 = 6;
	public final static int FORMAT_ISO_8859_6 = 7;
	public final static int FORMAT_ISO_8859_7 = 8;
	public final static int FORMAT_ISO_8859_8 = 9;
	public final static int FORMAT_ISO_8859_9 = 10;
	public final static int FORMAT_ISO_8859_10 = 11;
	public final static int FORMAT_ISO_8859_11 = 12;
	public final static int FORMAT_ISO_8859_12 = 13;
	public final static int FORMAT_ISO_8859_13 = 14;
	public final static int FORMAT_ISO_8859_14 = 15;
	public final static int FORMAT_ISO_8859_15 = 16;
	public final static int FORMAT_ISO_8859_16 = 17;
	
	public final static int FORMAT_WIN_1250 = 18;
	public final static int FORMAT_WIN_1251 = 19;
	public final static int FORMAT_WIN_1252 = 20;
	public final static int FORMAT_WIN_1253 = 21;
	public final static int FORMAT_WIN_1254 = 22;
	public final static int FORMAT_WIN_1255 = 23;
	public final static int FORMAT_WIN_1256 = 24;
	public final static int FORMAT_WIN_1257 = 25;
	public final static int FORMAT_WIN_1258 = 26;
	public final static int FORMAT_UTF_16LE = 64;
	public final static int FORMAT_UTF_16BE = 63;
	//namespace aware flag
	protected boolean ns;
	protected int VTDDepth; // Maximum Depth of VTDs
	protected int encoding;
	private int last_depth;
	private int last_l1_index;
	private int last_l2_index;
	private int last_l3_index;
	private int increment;
	private boolean BOM_detected;
	private boolean must_utf_8;
	private int ch;
	private int ch_temp;
	protected long offset;	// this is byte offset, not char offset as encoded in VTD
	private long temp_offset;
	protected int depth;


	protected long prev_offset;
	protected int rootIndex;
	protected IByteBuffer xb;
	protected FastLongBuffer VTDBuffer;
	protected FastLongBuffer l1Buffer;
	protected FastLongBuffer l2Buffer;
	protected FastIntBuffer l3Buffer;
	protected boolean br; //buffer reuse


	protected long docLen;
	// again, in terms of byte, not char as encoded in VTD
	protected long endOffset;
	protected long[] tag_stack;
	public long[] attr_name_array;
	public final static int MAX_DEPTH = 30; // maximum depth value
	protected long docOffset;

	// attr_name_array size
	private final static int ATTR_NAME_ARRAY_SIZE = 16;
	// tag_stack size
	private final static int TAG_STACK_SIZE = 32;
	// max prefix length
	public final static int MAX_PREFIX_LENGTH = (1<<8) -1;
	// max Qname length
	public final static int MAX_QNAME_LENGTH = (1<<11) -1;
	// max Token length
	public final static int MAX_TOKEN_LENGTH = (1<<17) -1;


	
	class UTF8Reader implements IReader {
		public UTF8Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			int temp = xb.byteAt(offset);
			//int a = 0, c = 0, d = 0, val = 0;
			if (temp >= 0) {
				offset++;
				return temp;
			}
			return handleUTF8(temp);

		}
		private int handleUTF8(int temp) throws EncodingExceptionHuge, ParseExceptionHuge{
		    int val,c,d,a,i;
			temp = temp & 0xff;
			switch (UTF8Char.byteCount(temp)) { // handle multi-byte code
			case 2:
				c = 0x1f;
				// A mask determine the val portion of the first byte
				d = 6; // 
				a = 1; //
				break;
			case 3:
				c = 0x0f;
				d = 12;
				a = 2;
				break;
			case 4:
				c = 0x07;
				d = 18;
				a = 3;
				break;
			case 5:
				c = 0x03;
				d = 24;
				a = 4;
				break;
			case 6:
				c = 0x01;
				d = 30;
				a = 5;
				break;
			default:
				throw new ParseExceptionHuge(
						"UTF 8 encoding error: should never happen");
			}
			val = (temp & c) << d;
			i = a - 1;
			while (i >= 0) {
				temp = xb.byteAt(offset + a - i);
				if ((temp & 0xc0) != 0x80)
					throw new ParseExceptionHuge(
							"UTF 8 encoding error: should never happen");
				val = val | ((temp & 0x3f) << ((i << 2) + (i << 1)));
				i--;
			}
			offset += a + 1;
			return val;
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, EncodingExceptionHuge, ParseExceptionHuge {
			//int a = 0, c = 0, d = 0, val = 0;
			int temp = xb.byteAt(offset);
			if (temp >= 0)
				if (ch == temp) {
					offset++;
					return true;
				} else {
					return false;
				}
			return skipUTF8(temp, ch);			
		}
		private boolean skipUTF8(int temp, int ch) throws EncodingExceptionHuge, ParseExceptionHuge{
		    int val, c, d, a, i;
		    temp = temp & 0xff;
			switch (UTF8Char.byteCount(temp)) { // handle multi-byte code
			case 2:
				c = 0x1f;
				// A mask determine the val portion of the first byte
				d = 6; // 
				a = 1; //
				break;
			case 3:
				c = 0x0f;
				d = 12;
				a = 2;
				break;
			case 4:
				c = 0x07;
				d = 18;
				a = 3;
				break;
			case 5:
				c = 0x03;
				d = 24;
				a = 4;
				break;
			case 6:
				c = 0x01;
				d = 30;
				a = 5;
				break;
			default:
				throw new ParseExceptionHuge(
						"UTF 8 encoding error: should never happen");
			}
			val = (temp & c) << d;
			i = a - 1;
			while (i >= 0) {
				temp = xb.byteAt(offset + a - i);
				if ((temp & 0xc0) != 0x80)
					throw new ParseExceptionHuge(
							"UTF 8 encoding error: should never happen");
				val = val | ((temp & 0x3f) << ((i << 2) + (i << 1)));
				i--;
			}
			if (val == ch){
			    offset += a + 1;
			    return true;
			}else
			    return false; 
			
		}

	}
	class UTF16BEReader implements IReader {
		public UTF16BEReader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			int val = 0;
			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			int temp = (xb.byteAt(offset)&0xff) << 8 | (xb.byteAt(offset+1)&0xff);
			if ((temp < 0xd800) || (temp > 0xdfff)) { // not a high surrogate
				offset += 2;
				return temp;
			} else {
				if (temp<0xd800 || temp>0xdbff)				
					throw new EncodingExceptionHuge("UTF 16 BE encoding error: should never happen");
				val = temp;
				temp = (xb.byteAt(offset+2)&0xff) << 8 | (xb.byteAt(offset+3)&0xff);
				if (temp < 0xdc00 || temp > 0xdfff) {
					// has to be a low surrogate here
					throw new EncodingExceptionHuge("UTF 16 BE encoding error: should never happen");
				}
				val = ((val - 0xd800)<<10) + (temp - 0xdc00) + 0x10000;
				offset += 4;
				return val;
				
			}
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			// implement UTF-16BE to UCS4 conversion
			int temp = (xb.byteAt(offset)&0xff) << 8 | (xb.byteAt(offset+1)&0xff);
			if ((temp < 0xd800) || (temp > 0xdfff)) { // not a high surrogate
				//offset += 2;
				if (temp == ch) {
					offset += 2;
					return true;
				} else
					return false;
			} else {
				if (temp<0xd800 || temp>0xdbff)				
					throw new EncodingExceptionHuge("UTF 16 BE encoding error: should never happen");
				int val = temp;
				temp = (xb.byteAt(offset+2)&0xff) << 8 | (xb.byteAt(offset+3)&0xff);
				if (temp < 0xdc00 || temp > 0xdfff) {
					// has to be a low surrogate here
					throw new EncodingExceptionHuge("UTF 16 BE encoding error: should never happen");
				}
				val = ((val - 0xd800) << 10) + (temp - 0xdc00) + 0x10000;
				if (val == ch) {
					offset += 4;
					return true;
				} else
					return false;
			}
		}
	}
	class UTF16LEReader implements IReader {

		public UTF16LEReader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			int val = 0;
			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			int temp = (xb.byteAt(offset+1) &0xff) << 8 | (xb.byteAt(offset)& 0xff);
			if (temp < 0xd800 || temp > 0xdfff) { // check for low surrogate
				offset += 2;
				return temp;
			} else {
				if (temp<0xd800 || temp>0xdbff)				
					throw new EncodingExceptionHuge("UTF 16 LE encoding error: should never happen");
				val = temp;
				temp = (xb.byteAt(offset+3) &0xff) << 8 | (xb.byteAt(offset+2)&0xff);
				if (temp < 0xdc00 || temp > 0xdfff) {
					// has to be high surrogate
					throw new EncodingExceptionHuge("UTF 16 LE encoding error: should never happen");
				}
				val = ((val - 0xd800) <<10) + (temp - 0xdc00) + 0x10000;
				offset += 4;
				return val;
			}
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, EncodingExceptionHuge, ParseExceptionHuge {

			int temp = (xb.byteAt(offset+1)&0xff) << 8 | (xb.byteAt(offset)&0xff);
			if (temp < 0xd800 ||temp > 0xdfff) { // check for low surrogate
				if (temp == ch) {
					offset += 2;
					return true;
				} else {
					return false;
				}
			} else {
				if (temp<0xd800 || temp>0xdbff)				
					throw new EncodingExceptionHuge("UTF 16 LE encoding error: should never happen");
				int val = temp;
				temp = (xb.byteAt(offset+3)&0xff)<< 8 | (xb.byteAt(offset+2)&0xff);
				if (temp < 0xdc00 || temp > 0xdfff) {
					// has to be high surrogate
					throw new EncodingExceptionHuge("UTF 16 LE encoding error: should never happen");
				}
				val = ((val - 0xd800)<<10) + (temp - 0xdc00) + 0x10000;
				if (val == ch) {
					offset += 4;
					return true;
				} else
					return false;
			}

		}
	}

	class ASCIIReader implements IReader {
		public ASCIIReader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			int a;
			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			a= xb.byteAt(offset++);
			if (a<0)
				throw new ParseExceptionHuge(
				"ASCII encoding error: invalid ASCII Char");
			return a&0x7f;
		}
		public boolean skipChar(int ch)
			throws ParseExceptionHuge, EOFExceptionHuge, EncodingExceptionHuge {

			if (ch == xb.byteAt(offset)) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
		
	}
	class ISO8859_1Reader implements IReader {
		public ISO8859_1Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return xb.byteAt(offset++) & 0xff;
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == xb.byteAt(offset)) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class ISO8859_2Reader implements IReader {
		public ISO8859_2Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_2.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_2.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	class ISO8859_3Reader implements IReader {
		public ISO8859_3Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_3.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_3.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class ISO8859_4Reader implements IReader {
		public ISO8859_4Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_4.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_4.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class ISO8859_5Reader implements IReader {
		public ISO8859_5Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_5.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_5.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class ISO8859_6Reader implements IReader {
		public ISO8859_6Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_6.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_6.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	class ISO8859_7Reader implements IReader {
		public ISO8859_7Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_7.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_7.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class ISO8859_8Reader implements IReader {
		public ISO8859_8Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_8.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_8.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}

	class ISO8859_9Reader implements IReader {
		public ISO8859_9Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_9.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_9.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}

	
	class ISO8859_10Reader implements IReader {
		public ISO8859_10Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_10.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_10.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class ISO8859_11Reader implements IReader {
		public ISO8859_11Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_11.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_11.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class ISO8859_13Reader implements IReader {
		public ISO8859_13Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_13.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_13.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class ISO8859_14Reader implements IReader {
		public ISO8859_14Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_14.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_14.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class ISO8859_15Reader implements IReader {
		public ISO8859_15Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return ISO8859_15.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == ISO8859_15.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	class WIN1250Reader implements IReader {
		public WIN1250Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return WIN1250.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == WIN1250.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	class WIN1251Reader implements IReader {
		public WIN1251Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return WIN1251.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == WIN1251.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	
	
	
	class WIN1252Reader implements IReader {
		public WIN1252Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return WIN1252.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == WIN1252.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class WIN1253Reader implements IReader {
		public WIN1253Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return WIN1253.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == WIN1253.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class WIN1254Reader implements IReader {
		public WIN1254Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return WIN1254.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == WIN1254.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class WIN1255Reader implements IReader {
		public WIN1255Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return WIN1255.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == WIN1255.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class WIN1256Reader implements IReader {
		public WIN1256Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return WIN1256.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == WIN1256.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class WIN1257Reader implements IReader {
		public WIN1257Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return WIN1257.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == WIN1257.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	
	class WIN1258Reader implements IReader {
		public WIN1258Reader() {
		}
		public int getChar()
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {

			if (offset >= endOffset)
				throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
			return WIN1258.decode(xb.byteAt(offset++));
		}
		public boolean skipChar(int ch)
			throws EOFExceptionHuge, ParseExceptionHuge, EncodingExceptionHuge {
			if (ch == WIN1258.decode(xb.byteAt(offset))) {
				offset++;
				return true;
			} else {
				return false;
			}
		}
	}
	protected IReader r;
	
	/**
	 * VTDGenHuge constructor method.
	 */
	public VTDGenHuge() {
		attr_name_array = new long[ATTR_NAME_ARRAY_SIZE];
		tag_stack = new long[TAG_STACK_SIZE];
		//scratch_buffer = new int[10];
		VTDDepth = 0;
		r = new UTF8Reader();
		br = false;
	}
	/**
	 * Clear internal states so VTDGEn can process the next file.
	 */
	public void clear() {
	    if (br==false){
	        VTDBuffer = null;
	        l1Buffer = null;
	        l2Buffer = null;
	        l3Buffer = null;
	    }
	    if(xb!=null)xb.close();
		xb = null;
		offset = temp_offset =0;
		last_depth = last_l1_index = last_l2_index = 0;
		rootIndex = 0;
		depth = -1;
		increment =1;
		BOM_detected = false;
		must_utf_8 = false;
		ch = ch_temp = 0;
		

	}
	/**
	 * This method will detect whether the entity is valid or not and increment offset.
	 * @return int
	 * @throws com.ximpleware.extended.ParseExceptionHuge Super class for any exception during parsing.
	 * @throws com.ximpleware.extended.EncodingExceptionHuge UTF/native encoding exception.
	 * @throws com.ximpleware.extended.EOFExceptionHuge End of file exception.
	 */
	private int entityIdentifier() throws EntityExceptionHuge, EncodingExceptionHuge,EOFExceptionHuge, ParseExceptionHuge {
		int ch = r.getChar();
		int val = 0;

		switch (ch) {
		case '#':
			ch = r.getChar();
			if (ch == 'x') {
				while (true) {
					ch = r.getChar();
					if (ch >= '0' && ch <= '9') {
						val = (val << 4) + (ch - '0');
					} else if (ch >= 'a' && ch <= 'f') {
						val = (val << 4) + (ch - 'a' + 10);
					} else if (ch >= 'A' && ch <= 'F') {
						val = (val << 4) + (ch - 'A' + 10);
					} else if (ch == ';') {
						return val;
					} else
						throw new EntityExceptionHuge("Errors in char reference: Illegal char following &#x.");
				}
			} else {
				while (true) {
					if (ch >= '0' && ch <= '9') {
						val = val * 10 + (ch - '0');
					} else if (ch == ';') {
						break;
					} else
						throw new EntityExceptionHuge("Errors in char reference: Illegal char following &#.");
						ch = r.getChar();
				}
			}
			if (!XMLChar.isValidChar(val)) {
				throw new EntityExceptionHuge("Errors in entity reference: Invalid XML char.");
			}
			return val;
			//break;

			case 'a' :
				ch = r.getChar();
				if (ch == 'm') {
					if (r.getChar() == 'p' && r.getChar() == ';') {
						//System.out.println(" entity for &");
						return '&';
					} else
						throw new EntityExceptionHuge("Errors in Entity: Illegal builtin reference");
				} else if (ch == 'p') {
					if (r.getChar() == 'o'
						&& r.getChar() == 's'
						&& r.getChar() == ';') {
						//System.out.println(" entity for ' ");
						return '\'';
					} else
						throw new EntityExceptionHuge("Errors in Entity: Illegal builtin reference");
				} else
					throw new EntityExceptionHuge("Errors in Entity: Illegal builtin reference");

			case 'q' :
				if (r.getChar() == 'u'
					&& r.getChar() == 'o'
					&& r.getChar() == 't'
					&& r.getChar() == ';') {
					return '"';
				} else
					throw new EntityExceptionHuge("Errors in Entity: Illegal builtin reference");
			case 'l' :
				if (r.getChar() == 't' && r.getChar() == ';') {
					return '<';
				} else
					throw new EntityExceptionHuge("Errors in Entity: Illegal builtin reference");
				//break;
			case 'g' :
				if (r.getChar() == 't' && r.getChar() == ';') {
					return '>';
				} else
					throw new EntityExceptionHuge("Errors in Entity: Illegal builtin reference");
			default :
				throw new EntityExceptionHuge("Errors in Entity: Illegal entity char");
		}
		//return val;
	}
	/**
	 * Format the string indicating the position (line number:offset)of the offset if 
	 * there is an exception.
	 * @return java.lang.String indicating the line number and offset of the exception
	 */
	private String formatLineNumber() {
		long so = docOffset;
		int lineNumber = 0;
		long lineOffset = 0;
		//long end = offset;

		if (encoding < FORMAT_UTF_16BE) {
			while (so <= offset-1) {
				if (xb.byteAt(so) == '\n') {
					lineNumber++;
					lineOffset = so;
				}
				//lineOffset++;
				so++;
			}
			lineOffset = offset - lineOffset;
		} else if (encoding == FORMAT_UTF_16BE) {
			while (so <= offset-2) {
				if (xb.byteAt(so + 1) == '\n' && xb.byteAt(so) == 0) {
					lineNumber++;
					lineOffset = so;
				}
				so += 2;
			}
			lineOffset = (offset - lineOffset) >> 1;
		} else {
			while (so <= offset-2) {
				if (xb.byteAt(so) == '\n' && xb.byteAt(so + 1) == 0) {
					lineNumber++;
					lineOffset = so;
				}
				so += 2;
			}
			lineOffset = (offset - lineOffset) >> 1;
		}
		return "\nLine Number: " + (lineNumber+1) + " Offset: " + (lineOffset-1);
	}
	/**
	 * Write the remaining portion of LC info
	 *
	 */
	private void finishUp(){
		if (last_depth == 1) {
			l1Buffer.append(((long) last_l1_index << 32) | 0xffffffffL);
		} else if (last_depth == 2) {
			l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL);
		}
	}
	
	
	/**
	 * The entity ignorant version of getCharAfterS.
	 * @return int
	 * @throws ParseExceptionHuge 
	 * @throws EncodingExceptionHuge 
	 * @throws com.ximpleware.extended.EOFExceptionHuge 
	 */
	private int getCharAfterS()
		throws ParseExceptionHuge, EncodingExceptionHuge, EOFExceptionHuge {
		int n;
		while (true) {
			n = r.getChar();
			if (n == ' ' || n == '\t' || n == '\n' || n == '\r') {
			} else
				return n;
		}
		//throw new EOFExceptionHuge("should never come here");
	}
	/**
	 * The entity aware version of getCharAfterS
	 * @return int
	 * @throws ParseExceptionHuge Super class for any exception during parsing.
	 * @throws EncodingExceptionHuge UTF/native encoding exception.
	 * @throws com.ximpleware.extended.EOFExceptionHuge End of file exception.
	 */
	private int getCharAfterSe()
		throws ParseExceptionHuge, EncodingExceptionHuge, EOFExceptionHuge {
		int n = 0;
		long temp; //offset saver
		while (true) {
			n = r.getChar();
			if (!XMLChar.isSpaceChar(n)) {
				if (n != '&')
					return n;
				else {
					temp = offset;
					if (!XMLChar.isSpaceChar(entityIdentifier())) {
						offset = temp; // rewind
						return '&';
					}
				}
			}
		}
	}
	/**
	 * This method returns the VTDNavHuge object after parsing, it also cleans 
	 * internal state so VTDGenHuge can process the next file.
	 * @return com.ximpleware.extended.VTDNavHuge
	 */
	public VTDNavHuge getNav() {
		// call VTDNav constructor
		VTDNavHuge vn =
			new VTDNavHuge(
				rootIndex,
				encoding,
				ns,
				VTDDepth,
				xb,
				VTDBuffer,
				l1Buffer,
				l2Buffer,
				l3Buffer,
				docOffset,
				docLen);
		clear();
		return vn;
	}
	/**
	 * Get the offset value of previous character.
	 * @return int
	 * @throws ParseExceptionHuge Super class for exceptions during parsing.
	 */
	private long getPrevOffset() throws ParseExceptionHuge {
		long prevOffset = offset;
		int temp;
		switch (encoding) {
			case FORMAT_UTF8 :
				do {
					prevOffset--;
				} while (xb.byteAt(prevOffset) <0 && 
				        (xb.byteAt(prevOffset) & (byte)0xc0) == (byte)0x80);
				
				return prevOffset;
			case FORMAT_ASCII :
			case FORMAT_ISO_8859_1:
			case FORMAT_ISO_8859_2:
			case FORMAT_ISO_8859_3:
			case FORMAT_ISO_8859_4:
			case FORMAT_ISO_8859_5:
			case FORMAT_ISO_8859_6:
			case FORMAT_ISO_8859_7:
			case FORMAT_ISO_8859_8:
			case FORMAT_ISO_8859_9:
			case FORMAT_ISO_8859_10:
			case FORMAT_WIN_1250:
			case FORMAT_WIN_1251:
			case FORMAT_WIN_1252:
			case FORMAT_WIN_1253:
			case FORMAT_WIN_1254:
			case FORMAT_WIN_1255:
			case FORMAT_WIN_1256:
			case FORMAT_WIN_1257:
			case FORMAT_WIN_1258:
				return offset - 1;
			case FORMAT_UTF_16LE :
			    temp= (xb.byteAt(offset)&0xff) << 8 | (xb.byteAt(offset + 1)&0xff);
				if (temp < 0xd800 || temp > 0xdfff) {
					return offset - 2;
				} else
					return offset - 4;
			case FORMAT_UTF_16BE :
			    temp =(xb.byteAt(offset)&0xff) << 8 | (xb.byteAt(offset + 1)&0xff);
				if (temp < 0xd800 || temp > 0xdfff) {
					return offset - 2;
				} else
					return offset - 4;
			default :
				throw new ParseExceptionHuge("Other Error: Should never happen");
		}
	}

	/**
	 * A private method that detects the BOM and decides document encoding
	 * @throws EncodingExceptionHuge
	 * @throws ParseExceptionHuge
	 */
	private void decide_encoding() throws EncodingExceptionHuge,ParseExceptionHuge {
	    if (xb.length()==0)
	        throw new EncodingExceptionHuge("Document is zero sized ");
		if (xb.byteAt(offset) == -2) {
			increment = 2;
			if (xb.byteAt(offset+1) == -1) {
				offset += 2;
				encoding = FORMAT_UTF_16BE;
				BOM_detected = true;
				r = new UTF16BEReader();
			} else
				throw new EncodingExceptionHuge("Unknown Character encoding: should be 0xff 0xfe");
		} else if (xb.byteAt(offset) == -1) {
			increment = 2;
			if (xb.byteAt(offset+1) == -2) {
				offset += 2;
				encoding = FORMAT_UTF_16LE;
				BOM_detected = true;
				r = new UTF16LEReader();
			} else
				throw new EncodingExceptionHuge("Unknown Character encoding: not UTF-16LE");
		} else if (xb.byteAt(offset) == -17){
		    if (xb.byteAt(offset+1) == -69 && xb.byteAt(offset+2)==-65){
		      offset +=3;
		      must_utf_8= true;
		    }
		    else 
		    	throw new EncodingExceptionHuge("Unknown Character encoding: not UTF-8");
		}
		else if (xb.byteAt(offset)==0){
			if (xb.byteAt(offset+1) == 0x3c 
					&& xb.byteAt(offset+2)== 0 
					&& xb.byteAt(offset+3)== 0x3f){
				encoding = FORMAT_UTF_16BE;
				increment = 2;
				r = new UTF16BEReader();
				}
			else
				throw new EncodingExceptionHuge("Unknown Character encoding: not UTF-16BE");
		}
		else if (xb.byteAt(offset)==0x3c){
			if (xb.byteAt(offset+1) == 0 
					&& xb.byteAt(offset+2) == 0x3f 
					&& xb.byteAt(offset+3) == 0){
				increment = 2;
				encoding = FORMAT_UTF_16LE;				
				r = new UTF16LEReader();
				}			
		}
		// check for max file size exception
		if (encoding < FORMAT_UTF_16BE) {
            if ((offset + (long) docLen) >= 1L << 37)
                throw new ParseExceptionHuge(
                        "Other error: file size too big >=128GB ");
        } else {
            if ((offset + (long) docLen) >= 1L << 37)
                throw new ParseExceptionHuge(
                        "Other error: file size too large >= 128GB");
        }
	}
	
	/**
	 * This method parses the XML file and returns a boolean indicating 
	 * if it is successful or not.
	 * @param fileName
	 * @param ns  namespace aware or not
	 * @param mode indicates whether the file is loaded in memory of memory mapped
	 * @return boolean indicating whether the parseFile is a success
	 *
	 */
	public boolean parseFile(String fileName, boolean ns, int mode){
	    //FileInputStream fis = null;
	    //File f = null;
	    try{
	        if (mode == IN_MEMORY){
	            XMLBuffer xb = new XMLBuffer();
	            xb.readFile(fileName);
	            this.setDoc(xb);
	            this.parse(ns);  // set namespace awareness to true
	            return true;
	        } else if (mode == MEM_MAPPED) {
	            XMLMemMappedBuffer xmb = new XMLMemMappedBuffer();
	            xmb.readFile(fileName);
	            this.setDoc(xmb);
	            this.parse(ns);  // set namespace awareness to true
	            return true;
	        } 
	        return false;
	    }catch(java.io.IOException e){    
	    }catch (ParseExceptionHuge e){
	    }
	    finally{
	    }
	    return false;	    
	}
	
	
	/**
	 * parseFile with default mode set to IN_MEMORY
	 * @param fileName
	 * @param ns
	 * @return boolean indicating whether the parseFile is a success
	 *
	 */
	public boolean parseFile(String fileName, boolean ns){
	    return parseFile(fileName, ns, IN_MEMORY);
	}
	
	
	
	/**
	 * Generating VTD tokens and Location cache info.
	 * @param NS boolean Enable namespace or not
	 * @throws ParseExceptionHuge Super class for any exceptions during parsing.     
	 * @throws EOFExceptionHuge End of file exception.    
	 * @throws EntityExceptionHuge Entity resolution exception.
	 * @throws EncodingExceptionHuge UTF/native encoding exception.
	 */
	public void parse(boolean NS)
		throws EncodingExceptionHuge, EOFExceptionHuge, EntityExceptionHuge, ParseExceptionHuge {

		// define internal variables	
		ns = NS;
		long length1 = 0, length2 = 0;
		int attr_count = 0 /*, ch = 0, ch_temp = 0*/;
		int parser_state = STATE_DOC_START;
		//boolean has_amp = false; 
		boolean is_ns = false;
		encoding = FORMAT_UTF8;
		boolean helper=false;
		//boolean docEnd = false;

		// first check first several bytes to figure out the encoding
		decide_encoding();

		// enter the main finite state machine
		try {
			writeVTD(0,0,TOKEN_DOCUMENT,depth);
			while (true) {
				switch (parser_state) {
					case STATE_LT_SEEN : //if (depth < -1)
						//    throw new ParseExceptionHuge("Other Errors: Invalid depth");
						temp_offset = offset;
						ch = r.getChar();
						if (XMLChar.isNameStartChar(ch)) {
							depth++;
							parser_state = STATE_START_TAG;
						} else {
							switch (ch) {
								case '/' :
									parser_state = STATE_END_TAG;
									break;
								case '?' :
									parser_state = process_qm_seen();
									break;
								case '!' : // three possibility (comment, CDATA, DOCTYPE)
									parser_state = process_ex_seen();
									break;
								default :
									throw new ParseExceptionHuge(
										"Other Error: Invalid char after <"
											+ formatLineNumber());
							}
						}
						break;

					case STATE_START_TAG : //name space is handled by
						while (true) {
							ch = r.getChar();
							if (XMLChar.isNameChar(ch)) {
								if (ch == ':') {
									length2 = offset - temp_offset - increment;
								}
							} else
								break;
						}
						length1 = offset - temp_offset - increment;
						if (depth > MAX_DEPTH) {
							throw new ParseExceptionHuge(
								"Other Error: Depth exceeds MAX_DEPTH"
									+ formatLineNumber());
						}
						//writeVTD(offset, TOKEN_STARTING_TAG, length2:length1, depth)
						long x = ((long) length1 << 38) |temp_offset;
						tag_stack[depth] = x;
						
						// System.out.println(
						//     " " + (temp_offset) + " " + length2 + ":" + length1 + " startingTag " + depth);
						if (depth > VTDDepth)
							VTDDepth = depth;
						if (encoding < FORMAT_UTF_16BE){
							if (length2>MAX_PREFIX_LENGTH
									|| length1 > MAX_QNAME_LENGTH)
								throw new ParseExceptionHuge(
										"Token Length Error: Starting tag prefix or qname length too long"					
										+ formatLineNumber());
							writeVTD(
								(temp_offset),
								(length2 << 10) | length1,
								TOKEN_STARTING_TAG,
								depth);
							}
						else{
							if (length2>(MAX_PREFIX_LENGTH <<1)
									|| length1 > (MAX_QNAME_LENGTH<<1))
								throw new ParseExceptionHuge(
										"Token Length Error: Starting tag prefix or qname length too long"
										+formatLineNumber());
							writeVTD(
								(temp_offset) >> 1,
								(length2 << 9) | (length1 >> 1),
								TOKEN_STARTING_TAG,
								depth);
						}
						//offset += length1;
						length2 = 0;
						if (XMLChar.isSpaceChar(ch)) {
							ch = getCharAfterS();
							if (XMLChar.isNameStartChar(ch)) {
								// seen an attribute here
								temp_offset = getPrevOffset();
								parser_state = STATE_ATTR_NAME;
								break;
							}
						}
						helper = true;
						if (ch == '/') {
							depth--;
							helper = false;
							ch = r.getChar();
						}
						if (ch == '>') {
						if (depth != -1) {
							temp_offset = offset;
							ch = getCharAfterSe(); // consume WSs
							if (ch == '<') {
								parser_state = STATE_LT_SEEN;
								if (r.skipChar('/')) {
									if (helper == true) {
										length1 = offset - temp_offset
												- (increment << 1);
										if (length1 > 0) {
											if (encoding < FORMAT_UTF_16BE)
												writeVTD((temp_offset),
														length1,
														TOKEN_CHARACTER_DATA,
														depth);
											else
												writeVTD((temp_offset) >> 1,
														(length1 >> 1),
														TOKEN_CHARACTER_DATA,
														depth);
										}
									}
									parser_state = STATE_END_TAG;
									break;
								}
							} else if (XMLChar.isContentChar(ch)) {
								//temp_offset = offset;
								parser_state = STATE_TEXT;
							} else if (ch == '&') {
								//has_amp = true;
								//temp_offset = offset;
								entityIdentifier();
								parser_state = STATE_TEXT;
							} else if (ch == ']') {
								if (r.skipChar(']')) {
									while (r.skipChar(']')) {
									}
									if (r.skipChar('>'))
										throw new ParseExceptionHuge(
												"Error in text content: ]]> in text content"
														+ formatLineNumber());
								}
								parser_state = STATE_TEXT;
							} else
								throw new ParseExceptionHuge(
										"Error in text content: Invalid char"
												+ formatLineNumber());
						} else {
							parser_state = STATE_DOC_END;
						}
						break;
					}
					throw new ParseExceptionHuge(
							"Starting tag Error: Invalid char in starting tag"
									+ formatLineNumber());

					case STATE_END_TAG :
						temp_offset = offset;
						long sos =  tag_stack[depth] & 0x1fffffffffL;
						int sl = (int) (tag_stack[depth] >> 38);
						
						offset = temp_offset+sl;
						
						if (offset>= endOffset)
							throw new EOFExceptionHuge("permature EOF reached, XML document incomplete");
						for (int i = 0; i < sl; i++) {
							if (xb.byteAt(sos + i) != xb.byteAt(temp_offset + i))
								throw new ParseExceptionHuge(
									"Ending tag error: Start/ending tag mismatch"
									+ formatLineNumber());
						}
						depth--;
						ch = getCharAfterS();
						if(ch != '>')
							throw new ParseExceptionHuge(
								"Ending tag error: Invalid char in ending tag "
								+ formatLineNumber()); 
						
						if (depth != -1) {
							temp_offset = offset;
							ch = getCharAfterS();
							if (ch == '<')
								parser_state = STATE_LT_SEEN;
							else if (XMLChar.isContentChar(ch)) {
								parser_state = STATE_TEXT;
							} 
							else if (ch == '&') {
								//has_amp = true;
								entityIdentifier();
								parser_state = STATE_TEXT;
							} 
							else if (ch == ']') {
								if (r.skipChar(']')) {
									while (r.skipChar(']')) {
									}
									if (r.skipChar('>'))
										throw new ParseExceptionHuge(
										"Error in text content: ]]> in text content"
										+ formatLineNumber());
								}
									parser_state = STATE_TEXT;
							}else
								throw new ParseExceptionHuge(
									"Other Error: Invalid char in xml"
									+ formatLineNumber());
						} else
							parser_state = STATE_DOC_END;
						break;
						
					case STATE_ATTR_NAME :

						if (ch == 'x') {
							if (r.skipChar('m')
								&& r.skipChar('l')
								&& r.skipChar('n')
								&& r.skipChar('s')) {
								ch = r.getChar();
								if (ch == '='
									|| XMLChar.isSpaceChar(ch)
									|| ch == ':') {
									is_ns = true; //break;
								}
							}
						}
						while (true) {
							if (XMLChar.isNameChar(ch)) {
								if (ch == ':') {
									length2 = offset - temp_offset - increment;
								}
								ch = r.getChar();
							} else
								break;
						}
						length1 = getPrevOffset() - temp_offset;
						// check for uniqueness here
						boolean unique = true;
						boolean unequal;
						for (int i = 0; i < attr_count; i++) {
							unequal = false;
							int prevLen = (int) attr_name_array[i] & 0x0001ffff;
							if (length1 == prevLen) {
								long prevOffset =
									 (attr_name_array[i] >> 17);
								for (int j = 0; j < prevLen; j++) {
									if (xb.byteAt(prevOffset + j)
										!= xb.byteAt(temp_offset + j)) {
										unequal = true;
										break;
									}
								}
							} else
								unequal = true;
							unique = unique && unequal;
						}
						if (!unique && attr_count != 0)
							throw new ParseExceptionHuge(
								"Error in attr: Attr name not unique"
									+ formatLineNumber());
						unique = true;
						if (attr_count < attr_name_array.length) {
							attr_name_array[attr_count] =
								( (temp_offset) << 17) | length1;
							attr_count++;
						} else // grow the attr_name_array by 16
							{
							long[] temp_array = attr_name_array;
							/*System.out.println(
								"size increase from "
									+ temp_array.length
									+ "  to "
									+ (attr_count + 16));*/
							attr_name_array =
								new long[attr_count + ATTR_NAME_ARRAY_SIZE];
							for (int i = 0; i < attr_count; i++) {
								attr_name_array[i] = temp_array[i];
							}
							attr_name_array[attr_count] =
								((temp_offset) << 17) | length1;
							attr_count++;
						}

						// after checking, write VTD
						if (is_ns) {
							if (encoding < FORMAT_UTF_16BE){
								if (length2>MAX_PREFIX_LENGTH
										|| length1 > MAX_QNAME_LENGTH)
									throw new ParseExceptionHuge(
											"Token length overflow error: Attr NS tag prefix or qname length too long"
											+formatLineNumber());
								writeVTD(
									temp_offset,
									(length2 << 10) | length1,
									TOKEN_ATTR_NS,
									depth);
							}
							else{
								if (length2>(MAX_PREFIX_LENGTH << 1)
										|| length1 > (MAX_QNAME_LENGTH <<1))
									throw new ParseExceptionHuge(
											"Token length overflow error: Attr NS prefix or qname length too long"
											+ formatLineNumber());
								writeVTD(
									temp_offset >> 1,
									(length2 << 9) | (length1 >> 1),
									TOKEN_ATTR_NS,
									depth);
							}
							is_ns = false;
						} else {
							if (encoding < FORMAT_UTF_16BE){
								if (length2>MAX_PREFIX_LENGTH
										|| length1 > MAX_QNAME_LENGTH)
									throw new ParseExceptionHuge(
											"Token Length Error: Attr name prefix or qname length too long"
											+ formatLineNumber());
								writeVTD(
									temp_offset,
									(length2 << 10) | length1,
									TOKEN_ATTR_NAME,
									depth);
							}
							else{
								if (length2>(MAX_PREFIX_LENGTH<<1)
										|| length1 > (MAX_QNAME_LENGTH<<1))
									throw new ParseExceptionHuge(
											"Token Length overflow error: Attr name prefix or qname length too long" 
											+ formatLineNumber());
								writeVTD(
									temp_offset >> 1,
									(length2 << 9) | (length1 >> 1),
									TOKEN_ATTR_NAME,
									depth);
							}
						}
						/*System.out.println(
						    " " + temp_offset + " " + length2 + ":" + length1 + " attr name " + depth);*/
						length2 = 0;
						if (XMLChar.isSpaceChar(ch)) {
							ch = getCharAfterS();
						}
						if (ch != '=')
							throw new ParseExceptionHuge(
								"Error in attr: invalid char"
									+ formatLineNumber());
						ch_temp = getCharAfterS();
						if (ch_temp != '"' && ch_temp != '\'')
							throw new ParseExceptionHuge(
								"Error in attr: invalid char (should be ' or \" )"
									+ formatLineNumber());
						temp_offset = offset;
						parser_state = STATE_ATTR_VAL;
						break;
						
					case STATE_ATTR_VAL :
						while (true) {
							ch = r.getChar();
							if (XMLChar.isValidChar(ch) && ch != '<') {
								if (ch == ch_temp)
									break;
								if (ch == '&') {
									// as in vtd spec, we mark attr val with entities
									if (!XMLChar
										.isValidChar(entityIdentifier())) {
										throw new ParseExceptionHuge(
											"Error in attr: Invalid XML char"
												+ formatLineNumber());
									}
								}

							} else
								throw new ParseExceptionHuge(
									"Error in attr: Invalid XML char"
										+ formatLineNumber());
						}

						length1 = offset - temp_offset - increment;
						if (encoding < FORMAT_UTF_16BE){
							if (length1 > MAX_TOKEN_LENGTH)
								  throw new ParseExceptionHuge("Token Length Error:"
											  +" Attr val too long (>0xfffff)"
											  + formatLineNumber());
							writeVTD(
								temp_offset,
								length1,
								TOKEN_ATTR_VAL,
								depth);
						}
						else{
							if (length1 > (MAX_TOKEN_LENGTH <<1))
								  throw new ParseExceptionHuge("Token Length Error:"
											  +" Attr val too long (>0xfffff)"
											  + formatLineNumber());
							writeVTD(
								temp_offset >> 1,
								length1 >> 1,
								TOKEN_ATTR_VAL,
								depth);
						}
						ch = r.getChar();
						if (XMLChar.isSpaceChar(ch)) {
							ch = getCharAfterS();
							if (XMLChar.isNameStartChar(ch)) {
								temp_offset = offset - increment;
								parser_state = STATE_ATTR_NAME;
								break;
							}
						}
						helper = true;
						if (ch == '/') {
							depth--;
							helper = false;
							ch = r.getChar();
						}

						if (ch == '>') {
							attr_count = 0;
							if (depth != -1) {
								temp_offset = offset;
								ch = getCharAfterSe();
								if (ch == '<') {
									parser_state = STATE_LT_SEEN;
									if (r.skipChar('/')) {
										if (helper == true) {
											length1 = offset - temp_offset
													- (increment << 1);
											//if (length1 > 0) {
												if (encoding < FORMAT_UTF_16BE)
													writeVTD((temp_offset),
															length1,
															TOKEN_CHARACTER_DATA,
															depth);
												else
													writeVTD((temp_offset) >> 1,
															(length1 >> 1),
															TOKEN_CHARACTER_DATA,
															depth);
											//}
										}
										parser_state = STATE_END_TAG;
										break;
									}
								} else if (XMLChar.isContentChar(ch)) {
									//temp_offset = offset;
									parser_state = STATE_TEXT;
								} else if (ch == '&') {
									//has_amp = true;
									//temp_offset = offset;
									entityIdentifier();
									parser_state = STATE_TEXT;
								} else if (ch == ']') {
									if (r.skipChar(']')) {
										while (r.skipChar(']')) {
										}
										if (r.skipChar('>'))
											throw new ParseExceptionHuge(
												"Error in text content: ]]> in text content"
													+ formatLineNumber());
									}
									parser_state = STATE_TEXT;
								}else
									throw new ParseExceptionHuge(
										"Error in text content: Invalid char"
											+ formatLineNumber());
							} else {
								parser_state = STATE_DOC_END;
							}
							break;
						}

						throw new ParseExceptionHuge(
							"Starting tag Error: Invalid char in starting tag"
								+ formatLineNumber());
						
					case STATE_TEXT :
						if (depth == -1)
							throw new ParseExceptionHuge(
								"Error in text content: Char data at the wrong place"
									+ formatLineNumber());
						while (true) {
							ch = r.getChar();
							if (XMLChar.isContentChar(ch)) {
							} else if (ch == '&') {
								//has_amp = true;
								if (!XMLChar.isValidChar(entityIdentifier()))
									throw new ParseExceptionHuge(
										"Error in text content: Invalid char in text content "
											+ formatLineNumber());
								//parser_state = STATE_TEXT;
							} else if (ch == '<') {
								break;
							} else if (ch == ']') {
								if (r.skipChar(']')) {
									while (r.skipChar(']')) {
									}
									if (r.skipChar('>'))
										throw new ParseExceptionHuge(
											"Error in text content: ]]> in text content"
												+ formatLineNumber());
								}
							} else
								throw new ParseExceptionHuge(
									"Error in text content: Invalid char in text content "
										+ formatLineNumber());
						}
						length1 = offset - increment - temp_offset;

						if (encoding < FORMAT_UTF_16BE)
							writeVTD(
								temp_offset,
								length1,
								TOKEN_CHARACTER_DATA,
								depth);
						else
							writeVTD(
								temp_offset >> 1,
								length1 >> 1,
								TOKEN_CHARACTER_DATA,
								depth);

						//has_amp = true;
						parser_state = STATE_LT_SEEN;
						break;
					case STATE_DOC_START :
						parser_state = process_start_doc();
						break;
					case STATE_DOC_END :
						//docEnd = true;
						parser_state = process_end_doc();
						break;
					case STATE_PI_TAG :
						parser_state = process_pi_tag();
						break;
						//throw new ParseExceptionHuge("Error in PI: Invalid char");
					case STATE_PI_VAL :
						parser_state = process_pi_val();
						break;

					case STATE_DEC_ATTR_NAME :
						parser_state = process_dec_attr();
						break;
						
					case STATE_COMMENT :
						parser_state = process_comment();
						break;
						
					case STATE_CDATA :
						parser_state = process_cdata();
						break;
						
					case STATE_DOCTYPE :
						parser_state = process_doc_type();
						break;
						
					case STATE_END_COMMENT :
						parser_state = process_end_comment();
						break;

					case STATE_END_PI :
						parser_state = process_end_pi();
						break;
						
					default :
						throw new ParseExceptionHuge(
							"Other error: invalid parser state"
								+formatLineNumber());
				}
			}
		} catch (EOFExceptionHuge e) {
			if (parser_state != STATE_DOC_END)
				throw e;
			finishUp();
		}
	}
	private void matchCPEncoding()throws ParseExceptionHuge{
	    if ((r.skipChar('p') || r.skipChar('P')) && r.skipChar('1')
                && r.skipChar('2') && r.skipChar('5')) {
            if (encoding <= FORMAT_UTF_16LE) {
                if (must_utf_8)
                    throw new EncodingExceptionHuge(
                            "Can't switch from UTF-8"
                                    + formatLineNumber());
                if (r.skipChar('0')){
				    encoding = FORMAT_WIN_1250;
				    r=new WIN1250Reader();
				    writeVTD(temp_offset, 6,
								TOKEN_DEC_ATTR_VAL,
								depth);				    
				}else if (r.skipChar('1')){
				    encoding = FORMAT_WIN_1251;
				    r=new WIN1251Reader();
				    writeVTD(temp_offset, 6,
								TOKEN_DEC_ATTR_VAL,
								depth);				    
				}else if (r.skipChar('2')){
				    encoding = FORMAT_WIN_1252;
				    r=new WIN1252Reader();
				    writeVTD(temp_offset, 6,
								TOKEN_DEC_ATTR_VAL,
								depth);				    
				}else if (r.skipChar('3')){
				    encoding = FORMAT_WIN_1253;
				    r=new WIN1253Reader();
				    writeVTD(temp_offset, 6,
								TOKEN_DEC_ATTR_VAL,
								depth);				    
				}else if (r.skipChar('4')){
				    encoding = FORMAT_WIN_1254;
				    r=new WIN1254Reader();
				    writeVTD(temp_offset, 6,
								TOKEN_DEC_ATTR_VAL,
								depth);				   
				}else if (r.skipChar('5') ){
				    encoding = FORMAT_WIN_1255;
				    r=new WIN1255Reader();
				    writeVTD(temp_offset, 6,
								TOKEN_DEC_ATTR_VAL,
								depth);				    
				}else if (r.skipChar('6')){
				    encoding = FORMAT_WIN_1256;
				    r=new WIN1256Reader();
				    writeVTD(temp_offset, 6,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else if (r.skipChar('7') ){
				    encoding = FORMAT_WIN_1257;
				    r=new WIN1257Reader();
				    writeVTD(temp_offset, 6,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else if (r.skipChar('8') ){
				    encoding = FORMAT_WIN_1258;
				    r=new WIN1258Reader();
				    writeVTD(temp_offset, 6,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else   
				    throw new ParseExceptionHuge(
						"XML decl error: Invalid Encoding"
						+ formatLineNumber());
                if (r.skipChar(ch_temp))
                    return;				
            } else
                throw new ParseExceptionHuge(
                        "XML decl error: Can't switch encoding to ISO-8859"
                                + formatLineNumber());
				
				}
	    throw new ParseExceptionHuge(
				"XML decl error: Invalid Encoding"
						+ formatLineNumber());	    
	}
	
	private void matchWindowsEncoding()throws ParseExceptionHuge{
	    if ((r.skipChar('i') || r.skipChar('I')) 
	            &&(r.skipChar('n') || r.skipChar('N'))
	            &&(r.skipChar('d') || r.skipChar('D'))
	            &&(r.skipChar('o') || r.skipChar('O'))
	            &&(r.skipChar('w') || r.skipChar('W'))
	            &&(r.skipChar('s') || r.skipChar('S'))
	            && r.skipChar('-')
	            && r.skipChar('1')
                && r.skipChar('2') 
                && r.skipChar('5')) {
            if (encoding <= FORMAT_UTF_16LE) {
                if (must_utf_8)
                    throw new EncodingExceptionHuge(
                            "Can't switch from UTF-8"
                                    + formatLineNumber());
                if (r.skipChar('0')){
				    encoding = FORMAT_WIN_1250;
				    r=new WIN1250Reader();
				    writeVTD(temp_offset, 12,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else if (r.skipChar('1')){
				    encoding = FORMAT_WIN_1251;
				    r=new WIN1251Reader();
				    writeVTD(temp_offset, 12,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else if (r.skipChar('2')){
				    encoding = FORMAT_WIN_1252;
				    r=new WIN1252Reader();
				    writeVTD(temp_offset, 12,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else if (r.skipChar('3')){
				    encoding = FORMAT_WIN_1253;
				    r=new WIN1253Reader();
				    writeVTD(temp_offset, 12,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else if (r.skipChar('4')){
				    encoding = FORMAT_WIN_1254;
				    r=new WIN1254Reader();
				    writeVTD(temp_offset, 12,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else if (r.skipChar('5')){
				    encoding = FORMAT_WIN_1255;
				    r=new WIN1255Reader();
				    writeVTD(temp_offset, 12,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else if (r.skipChar('6')){
				    encoding = FORMAT_WIN_1256;
				    r=new WIN1256Reader();
				    writeVTD(temp_offset, 12,
								TOKEN_DEC_ATTR_VAL,
								depth);				   
				}else if (r.skipChar('7')){
				    encoding = FORMAT_WIN_1257;
				    r=new WIN1257Reader();
				    writeVTD(temp_offset, 12,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else if (r.skipChar('8')){
				    encoding = FORMAT_WIN_1258;
				    r=new WIN1258Reader();
				    writeVTD(temp_offset, 12,
								TOKEN_DEC_ATTR_VAL,
								depth);				   
				}else 
				    throw new ParseExceptionHuge(
							"XML decl error: Invalid Encoding"
									+ formatLineNumber());
                if (r.skipChar(ch_temp))
                    return;
				
            } else
                throw new ParseExceptionHuge(
                        "XML decl error: Can't switch encoding to ISO-8859"
                                + formatLineNumber());				
				}
	    throw new ParseExceptionHuge(
				"XML decl error: Invalid Encoding"
						+ formatLineNumber());
	}
	private void matchUTFEncoding() throws ParseExceptionHuge{
		if ((r.skipChar('s') || r.skipChar('S')))
			if (r.skipChar('-')
					&& (r.skipChar('a') || r.skipChar('A'))
					&& (r.skipChar('s') || r.skipChar('S'))
					&& (r.skipChar('c') || r.skipChar('C'))
					&& (r.skipChar('i') || r.skipChar('I'))
					&& (r.skipChar('i') || r.skipChar('I'))
					&& r.skipChar(ch_temp)) {
				if (encoding != FORMAT_UTF_16LE
						&& encoding != FORMAT_UTF_16BE) {
					if (must_utf_8)
						throw new EncodingExceptionHuge(
								"Can't switch from UTF-8"
										+ formatLineNumber());
					encoding = FORMAT_ASCII;
					r=new ASCIIReader();					
						writeVTD(temp_offset, 8,
								TOKEN_DEC_ATTR_VAL,
								depth);
					
						return;
				} else
					throw new ParseExceptionHuge(
							"XML decl error: Can't switch encoding to US-ASCII"
									+ formatLineNumber());
			} else
				throw new ParseExceptionHuge(
						"XML decl error: Invalid Encoding"
								+ formatLineNumber());

		if ((r.skipChar('t') || r.skipChar('T'))
				&& (r.skipChar('f') || r.skipChar('F'))
				&& r.skipChar('-')) {
			if (r.skipChar('8') && r.skipChar(ch_temp)) {
				if (encoding != FORMAT_UTF_16LE
						&& encoding != FORMAT_UTF_16BE) {
					//encoding = FORMAT_UTF8;
					writeVTD(temp_offset, 5,
								TOKEN_DEC_ATTR_VAL,
								depth);					
						return;
				} else
					throw new ParseExceptionHuge(
							"XML decl error: Can't switch encoding to UTF-8"
									+ formatLineNumber());
			}
			if (r.skipChar('1') && r.skipChar('6')) {
				if (r.skipChar(ch_temp)) {
					if (encoding == FORMAT_UTF_16LE
							|| encoding == FORMAT_UTF_16BE) {
						if (!BOM_detected)
							throw new EncodingExceptionHuge(
									"BOM not detected for UTF-16"
											+ formatLineNumber());
							writeVTD(
									temp_offset >> 1,
									6,
									TOKEN_DEC_ATTR_VAL,
									depth);
						return;
					}
					throw new ParseExceptionHuge(
							"XML decl error: Can't switch encoding to UTF-16"
									+ formatLineNumber());
				} else if ((r.skipChar('l') || r.skipChar('L'))
						&& (r.skipChar('e') || r.skipChar('E'))
						&& r.skipChar(ch_temp)) {
					if (encoding == FORMAT_UTF_16LE) {
						r = new UTF16LEReader();						
							writeVTD(
									temp_offset >> 1,
									8,
									TOKEN_DEC_ATTR_VAL,
									depth);
						return;
					}
					throw new ParseExceptionHuge(
							"XML del error: Can't switch encoding to UTF-16LE"
									+ formatLineNumber());
				} else if ((r.skipChar('b') || r.skipChar('B'))
						&& (r.skipChar('e') || r.skipChar('E'))
						&& r.skipChar(ch_temp)) {
					if (encoding == FORMAT_UTF_16BE) {
						writeVTD(
									temp_offset >> 1,
									8,
									TOKEN_DEC_ATTR_VAL,
									depth);
						return;
					}
					throw new ParseExceptionHuge(
							"XML del error: Can't swtich encoding to UTF-16BE"
									+ formatLineNumber());
				}

				throw new ParseExceptionHuge(
						"XML decl error: Invalid encoding"
								+ formatLineNumber());
			}
		}
	}
	
	private void matchISOEncoding()throws ParseExceptionHuge{
		if ((r.skipChar('s') || r.skipChar('S'))
				&& (r.skipChar('o') || r.skipChar('O'))
				&& r.skipChar('-') && r.skipChar('8')
				&& r.skipChar('8') && r.skipChar('5')
				&& r.skipChar('9') && r.skipChar('-'))
				{
		    if (encoding <= FORMAT_UTF_16LE) {
				if (must_utf_8)
					throw new EncodingExceptionHuge(
							"Can't switch from UTF-8"
									+ formatLineNumber());
				if (r.skipChar('1')){
				 if (r.skipChar(ch_temp)) {
				     encoding = FORMAT_ISO_8859_1;
				     r = new ISO8859_1Reader();
				     writeVTD(temp_offset, 10,
							TOKEN_DEC_ATTR_VAL,
							depth);
				     return;
				 } else if (r.skipChar('0') ){
				     encoding = FORMAT_ISO_8859_10;
				     r = new ISO8859_10Reader();
				     writeVTD(temp_offset, 11,
								TOKEN_DEC_ATTR_VAL,
								depth);
				 } else if (r.skipChar('1') ){
				     encoding = FORMAT_ISO_8859_11;
				     r = new ISO8859_11Reader();
				     writeVTD(temp_offset, 11,
								TOKEN_DEC_ATTR_VAL,
								depth);
				 } else if (r.skipChar('3') ){
				     encoding = FORMAT_ISO_8859_13;
				     r = new ISO8859_13Reader();
				     writeVTD(temp_offset, 11,
								TOKEN_DEC_ATTR_VAL,
								depth);
				 }else if (r.skipChar('4') ){
				     encoding = FORMAT_ISO_8859_14;
				     r = new ISO8859_14Reader();
				     writeVTD(temp_offset, 11,
								TOKEN_DEC_ATTR_VAL,
								depth);
				 }else if (r.skipChar('5') ){
				     encoding = FORMAT_ISO_8859_15;
				     r = new ISO8859_15Reader();
				     writeVTD(temp_offset, 11,
								TOKEN_DEC_ATTR_VAL,
								depth);
				 } 
				 
				}else if (r.skipChar('2') ){
				    encoding = FORMAT_ISO_8859_2;
				    r = new ISO8859_2Reader();
				    writeVTD(temp_offset, 10,
								TOKEN_DEC_ATTR_VAL,
								depth);				    
				}else if (r.skipChar('3')){
				    r = new ISO8859_3Reader();
				    encoding = FORMAT_ISO_8859_3;
				    writeVTD(temp_offset, 10,
								TOKEN_DEC_ATTR_VAL,
								depth);				  
				}else if (r.skipChar('4') ){
				    r = new ISO8859_4Reader();
				    encoding = FORMAT_ISO_8859_4;
				    writeVTD(temp_offset, 10,
								TOKEN_DEC_ATTR_VAL,
								depth);				    
				}else if (r.skipChar('5') ){
				    encoding = FORMAT_ISO_8859_5;
				    r = new ISO8859_5Reader();
				    writeVTD(temp_offset, 10,
								TOKEN_DEC_ATTR_VAL,
								depth);				   
				}else if (r.skipChar('6') ){
				    encoding = FORMAT_ISO_8859_6;
				    r = new ISO8859_6Reader();
				    writeVTD(temp_offset, 10,
								TOKEN_DEC_ATTR_VAL,
								depth);
				}else if (r.skipChar('7') ){
				    encoding = FORMAT_ISO_8859_7;
				    r = new ISO8859_7Reader();
				    writeVTD(temp_offset, 10,
								TOKEN_DEC_ATTR_VAL,
								depth);				   
				}else if (r.skipChar('8') ){
				    encoding = FORMAT_ISO_8859_8;
				    r = new ISO8859_8Reader();
				    writeVTD(temp_offset, 10,
								TOKEN_DEC_ATTR_VAL,
								depth);				   
				}else if (r.skipChar('9')){
				    encoding = FORMAT_ISO_8859_9;
				    r = new ISO8859_9Reader();
				    writeVTD(temp_offset, 10,
								TOKEN_DEC_ATTR_VAL,
								depth);				   
				} else 		
				    throw new ParseExceptionHuge(
							"XML decl error: Invalid Encoding"
									+ formatLineNumber());
				if (r.skipChar(ch_temp))
				    return;				
			} else
				throw new ParseExceptionHuge(
						"XML decl error: Can't switch encoding to ISO-8859"
								+ formatLineNumber());
		}
		throw new ParseExceptionHuge(
				"XML decl error: Invalid Encoding"
						+ formatLineNumber());
	}
	/**
	 * This private method processes declaration attributes
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseExceptionHuge
	 * @throws EncodingExceptionHuge
	 * @throws EOFExceptionHuge
	 */
	private int process_dec_attr() throws ParseExceptionHuge, EncodingExceptionHuge, EOFExceptionHuge{
		//int length1;
		int parser_state;
		if (ch == 'v'
			&& r.skipChar('e')
			&& r.skipChar('r')
			&& r.skipChar('s')
			&& r.skipChar('i')
			&& r.skipChar('o')
			&& r.skipChar('n')) {
			ch = getCharAfterS();
			if (ch == '=') {
				/*System.out.println(
				    " " + (temp_offset - 1) + " " + 7 + " dec attr name version " + depth);*/
				if (encoding < FORMAT_UTF_16BE)
					writeVTD(
						temp_offset - 1,
						7,
						TOKEN_DEC_ATTR_NAME,
						depth);
				else
					writeVTD(
						(temp_offset -2) >> 1,
						7,
						TOKEN_DEC_ATTR_NAME,
						depth);
			} else
				throw new ParseExceptionHuge(
					"XML decl error: Invalid char"
						+ formatLineNumber());
		} else
			throw new ParseExceptionHuge(
				"XML decl error: should be version"
					+ formatLineNumber());
		ch_temp = getCharAfterS();
		if (ch_temp != '\'' && ch_temp != '"')
			throw new ParseExceptionHuge(
				"XML decl error: Invalid char to start attr name"
					+ formatLineNumber());
		temp_offset = offset;
		// support 1.0 or 1.1
		if (r.skipChar('1')
			&& r.skipChar('.')
			&& (r.skipChar('0') || r.skipChar('1'))) {
			/*System.out.println(
			    " " + temp_offset + " " + 3 + " dec attr val (version)" + depth);*/
			if (encoding < FORMAT_UTF_16BE)
				writeVTD(
					temp_offset,
					3,
					TOKEN_DEC_ATTR_VAL,
					depth);
			else
				writeVTD(
					temp_offset >> 1,
					3,
					TOKEN_DEC_ATTR_VAL,
					depth);
		} else
			throw new ParseExceptionHuge(
				"XML decl error: Invalid version(other than 1.0 or 1.1) detected"
					+ formatLineNumber());
		if (!r.skipChar(ch_temp))
			throw new ParseExceptionHuge(
				"XML decl error: version not terminated properly"
					+ formatLineNumber());
		ch = r.getChar();
		//? space or e 
		if (XMLChar.isSpaceChar(ch)) {
			ch = getCharAfterS();
			temp_offset = offset - increment;
			if (ch == 'e') {
				if (r.skipChar('n')
					&& r.skipChar('c')
					&& r.skipChar('o')
					&& r.skipChar('d')
					&& r.skipChar('i')
					&& r.skipChar('n')
					&& r.skipChar('g')) {
					ch = r.getChar();
					if (XMLChar.isSpaceChar(ch))
						ch = getCharAfterS();
					if (ch == '=') {
						/*System.out.println(
						    " " + (temp_offset) + " " + 8 + " dec attr name (encoding) " + depth);*/
						if (encoding < FORMAT_UTF_16BE)
							writeVTD(
								temp_offset,
								8,
								TOKEN_DEC_ATTR_NAME,
								depth);
						else
							writeVTD(
								temp_offset >> 1,
								8,
								TOKEN_DEC_ATTR_NAME,
								depth);
					} else
						throw new ParseExceptionHuge(
							"XML decl error: Invalid char"
								+ formatLineNumber());
					ch_temp = getCharAfterS();
					if (ch_temp != '"' && ch_temp != '\'')
						throw new ParseExceptionHuge(
							"XML decl error: Invalid char to start attr name"
								+ formatLineNumber());
					temp_offset = offset;
					ch = r.getChar();
					switch (ch) {
						case 'a' :
						case 'A' :
							if ((r.skipChar('s')
								|| r.skipChar('S'))
								&& (r.skipChar('c')
									|| r.skipChar('C'))
								&& (r.skipChar('i')
									|| r.skipChar('I'))
								&& (r.skipChar('i')
									|| r.skipChar('I'))
								&& r.skipChar(ch_temp)) {												
								if (encoding != FORMAT_UTF_16LE
									&& encoding
										!= FORMAT_UTF_16BE) {
									if (must_utf_8)
										throw new EncodingExceptionHuge("Can't switch from UTF-8"
												+ formatLineNumber());
									encoding = FORMAT_ASCII;
									r = new ASCIIReader();
									/*System.out.println(
									    " " + (temp_offset) + " " + 5 + " dec attr val (encoding) " + depth);*/
									
										writeVTD(
											temp_offset,
											5,
											TOKEN_DEC_ATTR_VAL,
											depth);
									
									break;
								} else
									throw new ParseExceptionHuge(
										"XML decl error: Can't switch encoding to ASCII"
											+ formatLineNumber());
							}
							throw new ParseExceptionHuge(
								"XML decl error: Invalid Encoding"
									+ formatLineNumber());
						case 'c':
						case 'C':
						    matchCPEncoding();
						    break;
						case 'i' :
						case 'I' :
						    matchISOEncoding();
						    break;
						case 'u' :
						case 'U' :
						    matchUTFEncoding();
						    break;
							// now deal with windows encoding
						case 'w' :
						case 'W' :
						    matchWindowsEncoding();
						    break;
						default :
							throw new ParseExceptionHuge(
								"XML decl Error: invalid encoding"
									+ formatLineNumber());
					}
					ch = r.getChar();
					if (XMLChar.isSpaceChar(ch))
						ch = getCharAfterS();
					temp_offset = offset - increment;
				} else
					throw new ParseExceptionHuge(
						"XML decl Error: Invalid char"
							+ formatLineNumber());
			}

			if (ch == 's') {
				if (r.skipChar('t')
					&& r.skipChar('a')
					&& r.skipChar('n')
					&& r.skipChar('d')
					&& r.skipChar('a')
					&& r.skipChar('l')
					&& r.skipChar('o')
					&& r.skipChar('n')
					&& r.skipChar('e')) {

					ch = getCharAfterS();
					if (ch != '=')
						throw new ParseExceptionHuge(
							"XML decl error: Invalid char"
								+ formatLineNumber());
					/*System.out.println(
					    " " + temp_offset + " " + 3 + " dec attr name (standalone) " + depth);*/
					if (encoding < FORMAT_UTF_16BE)
						writeVTD(
							temp_offset,
							10,
							TOKEN_DEC_ATTR_NAME,
							depth);
					else
						writeVTD(
							temp_offset >> 1,
							10,
							TOKEN_DEC_ATTR_NAME,
							depth);
					ch_temp = getCharAfterS();
					temp_offset = offset;
					if (ch_temp != '"' && ch_temp != '\'')
						throw new ParseExceptionHuge(
							"XML decl error: Invalid char to start attr name"
								+ formatLineNumber());
					ch = r.getChar();
					if (ch == 'y') {
						if (r.skipChar('e')
							&& r.skipChar('s')
							&& r.skipChar(ch_temp)) {
							/*System.out.println(
							    " " + (temp_offset) + " " + 3 + " dec attr val (standalone) " + depth);*/
							if (encoding < FORMAT_UTF_16BE)
								writeVTD(
									temp_offset,
									3,
									TOKEN_DEC_ATTR_VAL,
									depth);
							else
								writeVTD(
									temp_offset >> 1,
									3,
									TOKEN_DEC_ATTR_VAL,
									depth);
						} else
							throw new ParseExceptionHuge(
								"XML decl error: invalid val for standalone"
									+ formatLineNumber());
					} else if (ch == 'n') {
						if (r.skipChar('o')
							&& r.skipChar(ch_temp)) {
							/*System.out.println(
							    " " + (temp_offset) + " " + 2 + " dec attr val (standalone)" + depth);*/
							if (encoding < FORMAT_UTF_16BE)
								writeVTD(
									temp_offset,
									2,
									TOKEN_DEC_ATTR_VAL,
									depth);
							else
								writeVTD(
									temp_offset >> 1,
									2,
									TOKEN_DEC_ATTR_VAL,
									depth);
						} else
							throw new ParseExceptionHuge(
								"XML decl error: invalid val for standalone"
									+ formatLineNumber());
					} else
						throw new ParseExceptionHuge(
							"XML decl error: invalid val for standalone"
								+ formatLineNumber());
				} else
					throw new ParseExceptionHuge(
						"XML decl error" + formatLineNumber());
				ch = r.getChar();
				if (XMLChar.isSpaceChar(ch))
					ch = getCharAfterS();
			}
		}

		if (ch == '?' && r.skipChar('>')) {
			temp_offset = offset;
			ch = getCharAfterS();
			if (ch == '<') {
				parser_state = STATE_LT_SEEN;
			} else
				throw new ParseExceptionHuge(
					"Other Error: Invalid Char in XML"
						+ formatLineNumber());
		} else
			throw new ParseExceptionHuge(
				"XML decl Error: Invalid termination sequence"
					+ formatLineNumber());
		return parser_state;
	}
	/**
	 * This private method processes PI tag
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseExceptionHuge
	 * @throws EncodingExceptionHuge
	 * @throws EOFExceptionHuge
	 */
	private int process_pi_tag() throws ParseExceptionHuge, EncodingExceptionHuge, EOFExceptionHuge{
		long length1;
		int parser_state;
		while (true) {
			ch = r.getChar();
			if (!XMLChar.isNameChar(ch))
				break;
		}

		length1 = offset - temp_offset - increment;
		/*System.out.println(
		    ((char) XMLDoc[temp_offset])
		        + " "
		        + (temp_offset)
		        + " "
		        + length1
		        + " PI Target "
		        + depth); */
		if (encoding < FORMAT_UTF_16BE){
			if (length1 > MAX_TOKEN_LENGTH)
				  throw new ParseExceptionHuge("Token Length Error:"
							  +" PI name too long (>0xfffff)"
							  + formatLineNumber());
			writeVTD(
				(temp_offset),
				length1,
				TOKEN_PI_NAME,
				depth);
		}
		else{
			if(length1 > (MAX_TOKEN_LENGTH<<1))
				throw new ParseExceptionHuge("Token Length Error:"
							+" PI name too long (>0xfffff)"
							+ formatLineNumber());
			writeVTD(
				(temp_offset) >> 1,
				(length1 >> 1),
				TOKEN_PI_NAME,
				depth);
		}
		//length1 = 0;
		/*temp_offset = offset;
		if (XMLChar.isSpaceChar(ch)) {
			ch = r.getChar();
		}*/
		if (ch == '?') {
			if (r.skipChar('>')) {
				temp_offset = offset;
				ch = getCharAfterSe();
				if (ch == '<') {
					parser_state = STATE_LT_SEEN;
				} else if (XMLChar.isContentChar(ch)) {
					parser_state = STATE_TEXT;
				} else if (ch == '&') {
					//has_amp = true;
					entityIdentifier();
					parser_state = STATE_TEXT;
				} else if (ch == ']') {
					if (r.skipChar(']')) {
						while (r.skipChar(']')) {
						}
						if (r.skipChar('>'))
							throw new ParseExceptionHuge(
								"Error in text content: ]]> in text content"
									+ formatLineNumber());
					}
					parser_state = STATE_TEXT;
				}else
					throw new ParseExceptionHuge(
						"Error in text content: Invalid char"
							+ formatLineNumber());
				return parser_state;
			} else
				throw new ParseExceptionHuge(
					"Error in PI: invalid termination sequence"
						+ formatLineNumber());
		}
		parser_state = STATE_PI_VAL;
		return parser_state;
	}
	/**
	 * This private method processes PI val 
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseExceptionHuge
	 * @throws EncodingExceptionHuge
	 * @throws EOFExceptionHuge
	 */
	private int process_pi_val() throws ParseExceptionHuge, EncodingExceptionHuge, EOFExceptionHuge{
		int parser_state;
		long length1;
		if (!XMLChar.isSpaceChar(ch)) 
			throw new ParseExceptionHuge(
					"Error in PI: invalid termination sequence"
						+ formatLineNumber());
		temp_offset = offset;
		ch = r.getChar();
		while (true) {
			if (XMLChar.isValidChar(ch)) {
				//System.out.println(""+(char)ch);
				if (ch == '?')
					if (r.skipChar('>')) {
						break;
					} /*else
						throw new ParseExceptionHuge(
							"Error in PI: invalid termination sequence for PI"
								+ formatLineNumber());*/
			} else
				throw new ParseExceptionHuge(
					"Errors in PI: Invalid char in PI val"
						+ formatLineNumber());
			ch = r.getChar();
		}
		length1 = offset - temp_offset - (increment<<1);
		/*System.out.println(
		    ((char) XMLDoc[temp_offset])
		        + " "
		        + (temp_offset)
		        + " "
		        + length1
		        + " PI val "
		        + depth);*/
		if (encoding < FORMAT_UTF_16BE){
			if (length1 > MAX_TOKEN_LENGTH)
				  throw new ParseExceptionHuge("Token Length Error:"
							  +"PI VAL too long (>0xfffff)"
							  + formatLineNumber());
			writeVTD(temp_offset,
					length1, 
					TOKEN_PI_VAL,
					depth);
		}
		else{
			if (length1 > (MAX_TOKEN_LENGTH<<1))
				  throw new ParseExceptionHuge("Token Length Error:"
							  +"PI VAL too long (>0xfffff)"
							  + formatLineNumber());
			writeVTD(
				temp_offset >> 1,
				length1 >> 1,
				TOKEN_PI_VAL,
				depth);
		}
		//length1 = 0;
		temp_offset = offset;
		ch = getCharAfterSe();
		
		if (ch == '<') {
			parser_state = STATE_LT_SEEN;
		} else if (XMLChar.isContentChar(ch)) {
			//temp_offset = offset;
			parser_state = STATE_TEXT;
		} else if (ch == '&') {
			//has_amp = true;
			//temp_offset = offset;
			entityIdentifier();
			parser_state = STATE_TEXT;
		} else if (ch == ']') {
			if (r.skipChar(']')) {
				while (r.skipChar(']')) {
				}
				if (r.skipChar('>'))
					throw new ParseExceptionHuge(
						"Error in text content: ]]> in text content"
							+ formatLineNumber());
				
			}
			parser_state = STATE_TEXT;
		}else
			throw new ParseExceptionHuge(
				"Error in text content: Invalid char"
					+ formatLineNumber());
		return parser_state;

	}
	/**
	 * This private method process comment
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseExceptionHuge
	 * @throws EncodingExceptionHuge
	 * @throws EOFExceptionHuge
	 */
	private int process_comment() throws ParseExceptionHuge, EncodingExceptionHuge, EOFExceptionHuge{
		int parser_state;
		long length1;
		while (true) {
			ch = r.getChar();
			if (XMLChar.isValidChar(ch)) {
				if (ch == '-' && r.skipChar('-')) {
					length1 =
						offset - temp_offset -  (increment<<1);
					break;
				}
			} else
				throw new ParseExceptionHuge(
					"Error in comment: Invalid Char"
						+ formatLineNumber());
		}
		if (r.getChar() == '>') {
			//System.out.println(" " + (temp_offset) + " " + length1 + " comment " + depth);
			if (encoding < FORMAT_UTF_16BE)
				writeVTD(
					temp_offset,
					length1,
					TOKEN_COMMENT,
					depth);
			else
				writeVTD(
					temp_offset >> 1,
					length1 >> 1,
					TOKEN_COMMENT,
					depth);
			//length1 = 0;
			temp_offset = offset;
			ch = getCharAfterSe();
			if (ch == '<') {
				parser_state = STATE_LT_SEEN;
			} else if (XMLChar.isContentChar(ch)) {
				//temp_offset = offset;
				parser_state = STATE_TEXT;
			} else if (ch == '&') {
				//has_amp = true;
				//temp_offset = offset;
				entityIdentifier();
				parser_state = STATE_TEXT;
			} else if (ch == ']') {
				if (r.skipChar(']')) {
					while (r.skipChar(']')) {
					}
					if (r.skipChar('>'))
						throw new ParseExceptionHuge(
							"Error in text content: ]]> in text content"
								+ formatLineNumber());
				}
				parser_state = STATE_TEXT;
			}else
				throw new ParseExceptionHuge(
					"Error in text content: Invalid char"
						+ formatLineNumber());
			return parser_state;
		} else
			throw new ParseExceptionHuge(
				"Error in comment: Invalid terminating sequence"
					+ formatLineNumber());
	}
	private int process_end_doc() throws ParseExceptionHuge, EncodingExceptionHuge, EOFExceptionHuge {
	    int parser_state;
		ch = getCharAfterS();
		/* eof exception should be thrown here for premature ending*/
		if (ch == '<') {

			if (r.skipChar('?')) {
				/* processing instruction after end tag of root element*/
				temp_offset = offset;
				parser_state = STATE_END_PI;
				return parser_state;
			} else if (
				r.skipChar('!')
					&& r.skipChar('-')
					&& r.skipChar('-')) {
				// comments allowed after the end tag of the root element
				temp_offset = offset;
				parser_state = STATE_END_COMMENT;
				return parser_state;
			}
		}
		throw new ParseExceptionHuge(
			"Other Error: XML not terminated properly"
				+ formatLineNumber());
	}
	private int process_qm_seen()throws ParseExceptionHuge, EncodingExceptionHuge, EOFExceptionHuge {
	    temp_offset = offset;
		ch = r.getChar();
		if (XMLChar.isNameStartChar(ch)) {
			//temp_offset = offset;
			if ((ch == 'x' || ch == 'X')
				&& (r.skipChar('m')	|| r.skipChar('M'))
				&& (r.skipChar('l')	|| r.skipChar('L'))) {
				ch = r.getChar();
				if (ch == '?'
					|| XMLChar.isSpaceChar(ch))
					throw new ParseExceptionHuge(
						"Error in PI: [xX][mM][lL] not a valid PI targetname"
							+ formatLineNumber());
				offset = getPrevOffset();
			}
			return STATE_PI_TAG;
		}
		throw new ParseExceptionHuge(
			"Other Error: First char after ')) {
						break;
					} /*else
						throw new ParseExceptionHuge(
							"Error in CDATA: Invalid termination sequence"
								+ formatLineNumber());*/
				}
			} else
				throw new ParseExceptionHuge(
					"Error in CDATA: Invalid Char"
						+ formatLineNumber());
		}
		length1 = offset - temp_offset -  (increment<<1) - increment;
		if (encoding < FORMAT_UTF_16BE){
			
			writeVTD(
				temp_offset,
				length1,
				TOKEN_CDATA_VAL,
				depth);
		}
		else {
			
			writeVTD(
				temp_offset >> 1,
				length1 >> 1,
				TOKEN_CDATA_VAL,
				depth);
		}
		//System.out.println(" " + (temp_offset) + " " + length1 + " CDATA " + depth);
		ch = getCharAfterSe();
		if (ch == '<') {
			parser_state = STATE_LT_SEEN;
		} else if (XMLChar.isContentChar(ch)) {
			temp_offset = offset-1;
			parser_state = STATE_TEXT;
		} else if (ch == '&') {
			//has_amp = true;
			temp_offset = offset-1;
			entityIdentifier();
			parser_state = STATE_TEXT;
			//temp_offset = offset;
		} else if (ch == ']') {
		    temp_offset = offset-1;
			if (r.skipChar(']')) {
				while (r.skipChar(']')) {
				}
				if (r.skipChar('>'))
					throw new ParseExceptionHuge(
						"Error in text content: ]]> in text content"
							+ formatLineNumber());
			}
			parser_state = STATE_TEXT;
		}else
			throw new ParseExceptionHuge(
				"Other Error: Invalid char in xml"
					+ formatLineNumber());
		return parser_state;
	}
	
	/**
	 * This private method process DTD
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseExceptionHuge
	 * @throws EncodingExceptionHuge
	 * @throws EOFExceptionHuge
	 */
	private int process_doc_type() throws ParseExceptionHuge,EncodingExceptionHuge, EOFExceptionHuge{
		int z = 1, parser_state;
		long length1;
		while (true) {
			ch = r.getChar();
			if (XMLChar.isValidChar(ch)) {
				if (ch == '>')
					z--;
				else if (ch == '<')
					z++;
				if (z == 0)
					break;
			} else
				throw new ParseExceptionHuge(
					"Error in DOCTYPE: Invalid char"
						+ formatLineNumber());
		}
		length1 = offset - temp_offset - increment;
		/*System.out.println(
		    " " + (temp_offset) + " " + length1 + " DOCTYPE val " + depth);*/
		if (encoding < FORMAT_UTF_16BE){
			if (length1 > MAX_TOKEN_LENGTH)
				  throw new ParseExceptionHuge("Token Length Error:"
							  +" DTD val too long (>0xfffff)"
							  + formatLineNumber());
			writeVTD(
				temp_offset,
				length1,
				TOKEN_DTD_VAL,
				depth);
		}
		else{
			if (length1 > (MAX_TOKEN_LENGTH<<1))
				  throw new ParseExceptionHuge("Token Length Error:"
							  +" DTD val too long (>0xfffff)"
							  + formatLineNumber());
			writeVTD(
				temp_offset >> 1,
				length1 >> 1,
				TOKEN_DTD_VAL,
				depth);
		}
		ch = getCharAfterS();
		if (ch == '<') {
			parser_state = STATE_LT_SEEN;
		} else
			throw new ParseExceptionHuge(
				"Other Error: Invalid char in xml"
					+ formatLineNumber());
		return parser_state;
	}
	
	/**
	 * This private method processes PI after root document 
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseExceptionHuge
	 * @throws EncodingExceptionHuge
	 * @throws EOFExceptionHuge
	 */
	private int process_end_pi() throws ParseExceptionHuge,EncodingExceptionHuge, EOFExceptionHuge{
		long length1;int parser_state;
		ch = r.getChar();
		if (XMLChar.isNameStartChar(ch)) {
			if ((ch == 'x' || ch == 'X')
				&& (r.skipChar('m') || r.skipChar('M'))
				&& (r.skipChar('l') && r.skipChar('L'))) {
				//temp_offset = offset;
				ch = r.getChar();
				if (XMLChar.isSpaceChar(ch) || ch == '?')
					throw new ParseExceptionHuge(
						"Error in PI: [xX][mM][lL] not a valid PI target"
							+ formatLineNumber());
				//offset = temp_offset;
			}

			while (true) {
				//ch = getChar();
				if (!XMLChar.isNameChar(ch)) {
					break;
				}
				ch = r.getChar();
			}

			length1 = offset - temp_offset - increment;
			/*System.out.println(
			    ""
			        + (char) XMLDoc[temp_offset]
			        + " "
			        + (temp_offset)
			        + " "
			        + length1
			        + " PI Target "
			        + depth);*/
			if (encoding < FORMAT_UTF_16BE){
				if (length1 > MAX_TOKEN_LENGTH)
					  throw new ParseExceptionHuge("Token Length Error:"
								  +"PI name too long (>0xfffff)"
								  + formatLineNumber());
				writeVTD(
					temp_offset,
					length1,
					TOKEN_PI_NAME,
					depth);
			}
			else{
				if (length1 > (MAX_TOKEN_LENGTH<<1))
				  throw new ParseExceptionHuge("Token Length Error:"
						  +"PI name too long (>0xfffff)"
						  + formatLineNumber());
				writeVTD(
					temp_offset >> 1,
					length1 >> 1,
					TOKEN_PI_NAME,
					depth);
			}
			//length1 = 0;
			temp_offset = offset;
			if (XMLChar.isSpaceChar(ch)) {
				ch = getCharAfterS();

				while (true) {
					if (XMLChar.isValidChar(ch)) {
						if (ch == '?')
							if (r.skipChar('>')) {
								parser_state = STATE_DOC_END;
								break;
							} else
								throw new ParseExceptionHuge(
									"Error in PI: invalid termination sequence"
										+ formatLineNumber());
					} else
						throw new ParseExceptionHuge(
							"Error in PI: Invalid char in PI val"
								+ formatLineNumber());
					ch = r.getChar();
				}
				length1 = offset - temp_offset - (increment<<1);
				if (encoding < FORMAT_UTF_16BE){
					if (length1 > MAX_TOKEN_LENGTH)
						  throw new ParseExceptionHuge("Token Length Error:"
									  +"PI val too long (>0xfffff)"
									  + formatLineNumber());
					writeVTD(
						temp_offset,
						length1,
						TOKEN_PI_VAL,
						depth);
				}
				else{
					if (length1 > (MAX_TOKEN_LENGTH<<1))
						  throw new ParseExceptionHuge("Token Length Error:"
									  +"PI val too long (>0xfffff)"
									  + formatLineNumber());
					writeVTD(
						temp_offset >> 1,
						length1 >> 1,
						TOKEN_PI_VAL,
						depth);
				}
				//System.out.println(" " + temp_offset + " " + length1 + " PI val " + depth);
			} else {
				if ((ch == '?') && r.skipChar('>')) {
					parser_state = STATE_DOC_END;
				} else
					throw new ParseExceptionHuge(
						"Error in PI: invalid termination sequence"
							+ formatLineNumber());
			}
			//parser_state = STATE_DOC_END;
		} else
			throw new ParseExceptionHuge("Error in PI: invalid char in PI target"
					+formatLineNumber());
		return parser_state;
	}
	/**
	 * This private method process the comment after the root document
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseExceptionHuge
	 */
	private int process_end_comment()throws ParseExceptionHuge {
		int parser_state;
		long length1;
		while (true) {
			ch = r.getChar();
			if (XMLChar.isValidChar(ch)) {
				if (ch == '-' && r.skipChar('-')) {
					length1 =
						offset - temp_offset - (increment<<1);
					break;
				}
			} else
				throw new ParseExceptionHuge(
					"Error in comment: Invalid Char"
						+ formatLineNumber());
		}
		if (r.getChar() == '>') {
			//System.out.println(" " + temp_offset + " " + length1 + " comment " + depth);
			if (encoding < FORMAT_UTF_16BE)
				writeVTD(
					temp_offset,
					length1,
					TOKEN_COMMENT,
					depth);
			else
				writeVTD(
					temp_offset >> 1,
					length1 >> 1,
					TOKEN_COMMENT,
					depth);
			//length1 = 0;
			parser_state = STATE_DOC_END;
			return parser_state;
		}
		throw new ParseExceptionHuge(
			"Error in comment: '-->' expected"
				+ formatLineNumber());
	
	}
	/**
	 * Set the XMLDoc container.
	 * @param XMLBuffer xb1
	 */
	public void setDoc(IByteBuffer xb1) {
	    xb = xb1;
	    
		int a;
		br = false;
		depth = -1;
		increment =1;
		BOM_detected = false;
		must_utf_8 = false;
		ch = ch_temp = 0;
		temp_offset = 0;
		
		docOffset = offset = 0;
		//System.out.println("doc length is "+xb.length());
		docLen = xb.length();
		endOffset = xb.length();
		last_l1_index= last_l2_index = last_l3_index = last_depth =0;
		int i1=7,i2=9,i3=11;
		if (docLen <= 1024) {
			//a = 1024; //set the floor
			a = 6; i1=5; i2=5;i3=5;
		} else if (docLen <=4096){
		    a = 7; i1=6; i2=6; i3=6;
		}else if (docLen <=1024*16){
		    a =8; i1 = 7;i2=7;i3=7;
		}else if (docLen <= 1024 * 16 * 4) {
			//a = 2048;
			a = 11;
		} else if (docLen <= 1024 * 256) {
			//a = 1024 * 4;
			a = 12;
		} else if (docLen <= (1<<26)){
			//a = 1 << 15;
		    i1 = i2 = i3 = 12;
			a = 15;
		} else if (docLen <= (1<<30 )){
			//a = 1 << 15;
		    i1 = i2 = i3 = 13;
			a = 19;
		} else {
		    i1 = i2 = i3 = 16;
		    a = 23;
		}
		
		VTDBuffer = new FastLongBuffer(a, (int) (xb.length()>> (a+1)));
		l1Buffer = new FastLongBuffer(i1);
		l2Buffer = new FastLongBuffer(i2);
		l3Buffer = new FastIntBuffer(i3);
	}
	/**
	 * Write the VTD and LC into their storage container.
	 * @param offset int
	 * @param length int
	 * @param token_type int
	 * @param depth int
	 */
	private void writeVTD(long offset, long length, int token_type, int depth) {
	    /*System.out.print(" type "+token_type);
	    System.out.print(" length "+ Long.toHexString(length)+" length "+length);
	    System.out.print(" prefix length " + (length>>10));
	    System.out.print(" qn length " + (length & 0x3ff));
	    System.out.print(" offset "+offset);
	    System.out.println(" depth "+depth);*/
			switch (token_type) {
			case TOKEN_CHARACTER_DATA:
			case TOKEN_CDATA_VAL:
			case TOKEN_COMMENT:

			if (length > MAX_TOKEN_LENGTH) {
				long k;
				long r_offset = offset;
				for (k = length; k > MAX_TOKEN_LENGTH; k = k - MAX_TOKEN_LENGTH) {
					VTDBuffer.append(((long) ((token_type << 22)
							| ((depth & 0x1f) << 17) | MAX_TOKEN_LENGTH) << 38)
							| r_offset);
					/*VTDBuffer.append(((long) ((token_type << 28)
							| ((depth & 0xff) << 20) | MAX_TOKEN_LENGTH) << 32)
							| r_offset);*/
					r_offset += MAX_TOKEN_LENGTH;
				}
				VTDBuffer.append(((long) ((token_type << 22)
						| ((depth & 0x1f) << 17) | k) << 38)
						| r_offset);
				/*VTDBuffer.append(((long) ((token_type << 28)
						| ((depth & 0xff) << 20) | k) << 32)
						| r_offset);*/
			} else {
				VTDBuffer.append(((long) ((token_type << 22)
						| ((depth & 0x1f) << 17) | length) << 38)
						| offset);
				/*VTDBuffer.append(((long) ((token_type << 28)
						| ((depth & 0xff) << 20) | length) << 32)
						| offset);*/
			}
			break;
			
			//case TOKEN_ENDING_TAG: break;
		default:
			VTDBuffer.append(((long) ((token_type << 22)
					| ((depth & 0x1f) << 17) | length) << 38)
					| offset);
			/*VTDBuffer.append(((long) ((token_type << 28)
			        | ((depth & 0xff) << 20) | length) << 32)
			        | offset);*/
		}
		// remember VTD depth start from zero
		if (token_type == TOKEN_STARTING_TAG) {
			switch (depth) {
			case 0:
				rootIndex = VTDBuffer.size() - 1;
				break;
			case 1:
				if (last_depth == 1) {
					l1Buffer.append(((long) last_l1_index << 32) | 0xffffffffL);
				} else if (last_depth == 2) {
					l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL);
				}
				last_l1_index = VTDBuffer.size() - 1;
				last_depth = 1;
				break;
			case 2:
				if (last_depth == 1) {
					l1Buffer.append(((long) last_l1_index << 32)
							+ l2Buffer.size());
				} else if (last_depth == 2) {
					l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL);
				}
				last_l2_index = VTDBuffer.size() - 1;
				last_depth = 2;
				break;

			case 3:
				l3Buffer.append(VTDBuffer.size() - 1);
				if (last_depth == 2) {
					l2Buffer.append(((long) last_l2_index << 32)
							+ l3Buffer.size() - 1);
				}
				last_depth = 3;
				break;
			default:
			//rootIndex = VTDBuffer.size() - 1;
			}

		} /*else if (token_type == TOKEN_ENDING_TAG && (depth == 0)) {
			if (last_depth == 1) {
				l1Buffer.append(((long) last_l1_index << 32) | 0xffffffffL);
			} else if (last_depth == 2) {
				l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL);
			}
		}*/
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy