All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ximpleware.CustomVTDGen Maven / Gradle / Ivy

The newest version!
/**
 * Copyright 2011-2014 eBusiness Information, Groupe Excilys (www.excilys.com)
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 */
package com.ximpleware;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.MissingResourceException;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipFile;

import io.gatling.commons.util.HtmlHelper;

import com.ximpleware.parser.XMLChar;

public class CustomVTDGen extends VTDGen {

	// attr_name_array size
	private final static int ATTR_NAME_ARRAY_SIZE = 16;

	// max Token length
	private final static int STATE_ATTR_NAME = 3;
	private final static int STATE_ATTR_VAL = 4;
	private final static int STATE_CDATA = 12;
	private final static int STATE_COMMENT = 11;
	private final static int STATE_DEC_ATTR_NAME = 10;
	private final static int STATE_DOC_END = 7; // end of document
	private final static int STATE_DOC_START = 6; // beginning of document
	private final static int STATE_DOCTYPE = 13;
	private final static int STATE_END_COMMENT = 14;
	// comment appear after the last ending tag
	private final static int STATE_END_PI = 15;
	private final static int STATE_END_TAG = 2;
	// internal parser state

	private final static int STATE_LT_SEEN = 0; // encounter the first <
	private final static int STATE_PI_TAG = 8;
	private final static int STATE_PI_VAL = 9;
	private final static int STATE_START_TAG = 1;
	private final static int STATE_TEXT = 5;

	// tag_stack size
	private final static int TAG_STACK_SIZE = 256;

	// token type
	private long[] attr_name_array;
	private int attr_count;
	private long[] prefixed_attr_name_array;
	private int[] prefix_URL_array;
	private int prefixed_attr_count;
	private boolean BOM_detected;
	private int ch;
	private int ch_temp;
	private int length1, length2;
	private int increment;

	private int last_depth;
	private int last_l1_index;
	private int last_l2_index;
	private int last_l3_index;
	private int last_l4_index;
	private boolean must_utf_8;

	private int temp_offset;

	/**
	 * VTDGen constructor method.
	 */
	public CustomVTDGen() {
		attr_name_array = new long[ATTR_NAME_ARRAY_SIZE];
		prefixed_attr_name_array = new long[ATTR_NAME_ARRAY_SIZE];
		prefix_URL_array = new int[ATTR_NAME_ARRAY_SIZE];
		tag_stack = new long[TAG_STACK_SIZE];
		// scratch_buffer = new int[10];
		VTDDepth = 0;
		LcDepth = 3;

		br = false;
		e = new EOFException("permature EOF reached, XML document incomplete");
		ws = false;
		nsBuffer1 = new FastIntBuffer(4);
		nsBuffer2 = new FastLongBuffer(4);
		nsBuffer3 = new FastLongBuffer(4);
		currentElementRecord = 0;
		singleByteEncoding = true;
		shallowDepth = true;
		// offset_adj = 1;
	}

	/**
	 * Clear internal states so VTDGEn can process the next file.
	 */
	public void clear() {
		if (br == false) {
			VTDBuffer = null;
			l1Buffer = null;
			l2Buffer = null;
			l3Buffer = null;
			_l3Buffer = null;
			_l4Buffer = null;
			_l5Buffer = null;
		}
		XMLDoc = null;
		offset = temp_offset = 0;
		last_depth = last_l1_index = last_l2_index = last_l3_index = last_l4_index = 0;
		rootIndex = 0;
		depth = -1;
		increment = 1;
		BOM_detected = false;
		must_utf_8 = false;
		ch = ch_temp = 0;
		nsBuffer1.size = 0;
		nsBuffer2.size = 0;
		nsBuffer3.size = 0;
		currentElementRecord = 0;
	}

	/**
	 * Write white space records that are ignored by default
	 */
	private void addWhiteSpaceRecord() {
		if (depth > -1) {
			int length1 = offset - increment - temp_offset;
			if (length1 != 0)
				if (singleByteEncoding)// if (encoding < FORMAT_UTF_16BE)
					writeVTDText(temp_offset, length1, TOKEN_CHARACTER_DATA, depth);
				else
					writeVTDText(temp_offset >> 1, length1 >> 1, TOKEN_CHARACTER_DATA, depth);
		}
	}

	/**
	 * A private method that detects the BOM and decides document encoding
	 * 
	 * @throws EncodingException
	 * @throws ParseException
	 */
	private void decide_encoding() throws EncodingException, ParseException {
		if (XMLDoc.length == 0)
			throw new EncodingException("Document is zero sized ");
		if (XMLDoc[offset] == -2) {
			increment = 2;
			if (XMLDoc[offset + 1] == -1) {
				offset += 2;
				encoding = FORMAT_UTF_16BE;
				BOM_detected = true;
				r = new UTF16BEReader();
			} else
				throw new EncodingException("Unknown Character encoding: should be 0xff 0xfe");
		} else if (XMLDoc[offset] == -1) {
			increment = 2;
			if (XMLDoc[offset + 1] == -2) {
				offset += 2;
				encoding = FORMAT_UTF_16LE;
				BOM_detected = true;
				r = new UTF16LEReader();
			} else
				throw new EncodingException("Unknown Character encoding: not UTF-16LE");
		} else if (XMLDoc[offset] == -17) {
			if (XMLDoc[offset + 1] == -69 && XMLDoc[offset + 2] == -65) {
				offset += 3;
				must_utf_8 = true;
			} else
				throw new EncodingException("Unknown Character encoding: not UTF-8");
		} else if (XMLDoc[offset] == 0) {
			if (XMLDoc[offset + 1] == 0x3c && XMLDoc[offset + 2] == 0 && XMLDoc[offset + 3] == 0x3f) {
				encoding = FORMAT_UTF_16BE;
				increment = 2;
				r = new UTF16BEReader();
			} else
				throw new EncodingException("Unknown Character encoding: not UTF-16BE");
		} else if (XMLDoc[offset] == 0x3c) {
			if (XMLDoc[offset + 1] == 0 && XMLDoc[offset + 2] == 0x3f && XMLDoc[offset + 3] == 0) {
				increment = 2;
				encoding = FORMAT_UTF_16LE;
				r = new UTF16LEReader();
			}
		}
		// check for max file size exception
		if (encoding < FORMAT_UTF_16BE) {
			if (ns) {
				if ((offset + (long) docLen) >= 1L << 30)
					throw new ParseException("Other error: file size too big >=1GB ");
			} else {
				if ((offset + (long) docLen) >= 1L << 31)
					throw new ParseException("Other error: file size too big >=2GB ");
			}
		} else {
			// offset_adj = 2;
			if ((offset + (long) docLen) >= 1L << 31)
				throw new ParseException("Other error: file size too large >= 2GB");
		}
		if (encoding >= FORMAT_UTF_16BE)
			singleByteEncoding = false;
	}

	/**
	 * This method will detect whether the entity is valid or not and increment
	 * offset.
	 * 
	 * @return int
	 * @throws com.ximpleware.ParseException
	 *             Super class for any exception during parsing.
	 * @throws com.ximpleware.EncodingException
	 *             UTF/native encoding exception.
	 * @throws com.ximpleware.EOFException
	 *             End of file exception.
	 */
	private int entityIdentifier() throws EntityException, EncodingException, EOFException, ParseException {
		int ch = r.getChar();
		int val = 0;

		switch (ch) {
		case '#':
			ch = r.getChar();
			if (ch == 'x') {
				while (true) {
					ch = r.getChar();
					if (ch >= '0' && ch <= '9') {
						val = (val << 4) + (ch - '0');
					} else if (ch >= 'a' && ch <= 'f') {
						val = (val << 4) + (ch - 'a' + 10);
					} else if (ch >= 'A' && ch <= 'F') {
						val = (val << 4) + (ch - 'A' + 10);
					} else if (ch == ';') {
						return val;
					} else
						throw new EntityException("Errors in char reference: Illegal char following &#x.");
				}
			} else {
				while (true) {
					if (ch >= '0' && ch <= '9') {
						val = val * 10 + (ch - '0');
					} else if (ch == ';') {
						break;
					} else
						throw new EntityException("Errors in char reference: Illegal char following &#.");
					ch = r.getChar();
				}
			}
			if (!XMLChar.isValidChar(val)) {
				throw new EntityException("Errors in entity reference: Invalid XML char.");
			}
			return val;
			// break;

		default:
			int loop = 0;
			int maxLoop = 10;
			StringBuilder builder = new StringBuilder();
			while (loop < maxLoop) {
				loop++;
				if (ch == ';') {
					break;
				} else if (loop == maxLoop) {
					throw new EntityException("Errors in char reference: Illegal char following &.");
				}
				builder.append((char) ch);
				ch = r.getChar();
			}

			try {
				return HtmlHelper.htmlEntityToInt(builder.toString(), 0);

			} catch (MissingResourceException e) {
				throw new EntityException("unknown entity" + builder.toString());
			}
		}
		// return val;
	}

	/**
	 * Write the remaining portion of LC info
	 * 
	 */
	private void finishUp() {
		if (shallowDepth) {
			if (last_depth == 1) {
				l1Buffer.append(((long) last_l1_index << 32) | 0xffffffffL);
			} else if (last_depth == 2) {
				l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL);
			}
		} else {
			if (last_depth == 1) {
				l1Buffer.append(((long) last_l1_index << 32) | 0xffffffffL);
			} else if (last_depth == 2) {
				l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL);
			} else if (last_depth == 3) {
				_l3Buffer.append(((long) last_l3_index << 32) | 0xffffffffL);
			} else if (last_depth == 4) {
				_l4Buffer.append(((long) last_l4_index << 32) | 0xffffffffL);
			}
		}
	}

	/**
	 * Format the string indicating the position (line number:offset)of the
	 * offset if there is an exception.
	 * 
	 * @return java.lang.String indicating the line number and offset of the
	 *         exception
	 */
	private String formatLineNumber() {
		return formatLineNumber(offset);
	}

	private String formatLineNumber(int os) {
		int so = docOffset;
		int lineNumber = 0;
		int lineOffset = 0;

		if (encoding < FORMAT_UTF_16BE) {
			while (so <= os - 1) {
				if (XMLDoc[so] == '\n') {
					lineNumber++;
					lineOffset = so;
				}
				// lineOffset++;
				so++;
			}
			lineOffset = os - lineOffset;
		} else if (encoding == FORMAT_UTF_16BE) {
			while (so <= os - 2) {
				if (XMLDoc[so + 1] == '\n' && XMLDoc[so] == 0) {
					lineNumber++;
					lineOffset = so;
				}
				so += 2;
			}
			lineOffset = (os - lineOffset) >> 1;
		} else {
			while (so <= os - 2) {
				if (XMLDoc[so] == '\n' && XMLDoc[so + 1] == 0) {
					lineNumber++;
					lineOffset = so;
				}
				so += 2;
			}
			lineOffset = (os - lineOffset) >> 1;
		}
		return "\nLine Number: " + (lineNumber + 1) + " Offset: " + (lineOffset - 1);
	}

	/**
	 * The entity ignorant version of getCharAfterS.
	 * 
	 * @return int
	 * @throws ParseException
	 * @throws EncodingException
	 * @throws com.ximpleware.EOFException
	 */
	final private int getCharAfterS() throws ParseException, EncodingException, EOFException {
		int n;

		do {
			n = r.getChar();
			if ((n == ' ' || n == '\n' || n == '\t' || n == '\r')) {
				// if (XMLChar.isSpaceChar(n) ) {
			} else
				return n;
			n = r.getChar();
			if ((n == ' ' || n == '\n' || n == '\t' || n == '\r')) {
			} else
				return n;
			/*
			 * if (n == ' ' || n == '\n' || n =='\t'|| n == '\r' ) { } else
			 * return n;
			 */
		} while (true);
		// throw new EOFException("should never come here");
	}

	/**
	 * The entity aware version of getCharAfterS
	 * 
	 * @return int
	 * @throws ParseException
	 *             Super class for any exception during parsing.
	 * @throws EncodingException
	 *             UTF/native encoding exception.
	 * @throws com.ximpleware.EOFException
	 *             End of file exception.
	 */
	// private int getCharAfterSe()
	// throws ParseException, EncodingException, EOFException {
	// int n = 0;
	// int temp; //offset saver
	// while (true) {
	// n = r.getChar();
	// if (!XMLChar.isSpaceChar(n)) {
	// if (n != '&')
	// return n;
	// else {
	// temp = offset;
	// if (!XMLChar.isSpaceChar(entityIdentifier())) {
	// offset = temp; // rewind
	// return '&';
	// }
	// }
	// }
	// n = r.getChar();
	// if (!XMLChar.isSpaceChar(n)) {
	// if (n != '&')
	// return n;
	// else {
	// temp = offset;
	// if (!XMLChar.isSpaceChar(entityIdentifier())) {
	// offset = temp; // rewind
	// return '&';
	// }
	// }
	// }
	// }
	// }

	/**
	 * Pre-compute the size of VTD+XML index
	 * 
	 * @return size of the index
	 * 
	 */

	public long getIndexSize() {
		int size;
		if ((docLen & 7) == 0)
			size = docLen;
		else
			size = ((docLen >> 3) + 1) << 3;

		size += (VTDBuffer.size << 3) + (l1Buffer.size << 3) + (l2Buffer.size << 3);

		if ((l3Buffer.size & 1) == 0) { // even
			size += l3Buffer.size << 2;
		} else {
			size += (l3Buffer.size + 1) << 2; // odd
		}
		return size + 64;
	}

	/**
	 * This method returns the VTDNav object after parsing, it also cleans
	 * internal state so VTDGen can process the next file.
	 * 
	 * @return com.ximpleware.VTDNav
	 */
	public VTDNav getNav() {
		// call VTDNav constructor
		VTDNav vn;
		if (shallowDepth)
			vn = new VTDNav(rootIndex, encoding, ns, VTDDepth, new UniByteBuffer(XMLDoc), VTDBuffer, l1Buffer, l2Buffer, l3Buffer, docOffset, docLen);
		else
			vn = new VTDNav_L5(rootIndex, encoding, ns, VTDDepth, new UniByteBuffer(XMLDoc), VTDBuffer, l1Buffer, l2Buffer, _l3Buffer, _l4Buffer, _l5Buffer, docOffset, docLen);
		clear();
		r = new UTF8Reader();
		return vn;
	}

	/**
	 * Get the offset value of previous character.
	 * 
	 * @return int
	 * @throws ParseException
	 *             Super class for exceptions during parsing.
	 */
	private int getPrevOffset() throws ParseException {
		int prevOffset = offset;
		int temp;
		switch (encoding) {
		case FORMAT_UTF8:
			do {
				prevOffset--;
			} while (XMLDoc[prevOffset] < 0 && ((XMLDoc[prevOffset] & (byte) 0xc0) == (byte) 0x80));
			return prevOffset;
		case FORMAT_ASCII:
		case FORMAT_ISO_8859_1:
		case FORMAT_ISO_8859_2:
		case FORMAT_ISO_8859_3:
		case FORMAT_ISO_8859_4:
		case FORMAT_ISO_8859_5:
		case FORMAT_ISO_8859_6:
		case FORMAT_ISO_8859_7:
		case FORMAT_ISO_8859_8:
		case FORMAT_ISO_8859_9:
		case FORMAT_ISO_8859_10:
		case FORMAT_ISO_8859_11:
		case FORMAT_ISO_8859_13:
		case FORMAT_ISO_8859_14:
		case FORMAT_ISO_8859_15:
		case FORMAT_WIN_1250:
		case FORMAT_WIN_1251:
		case FORMAT_WIN_1252:
		case FORMAT_WIN_1253:
		case FORMAT_WIN_1254:
		case FORMAT_WIN_1255:
		case FORMAT_WIN_1256:
		case FORMAT_WIN_1257:
		case FORMAT_WIN_1258:
			return offset - 1;
		case FORMAT_UTF_16LE:
			temp = (XMLDoc[offset] & 0xff) << 8 | (XMLDoc[offset + 1] & 0xff);
			if (temp < 0xd800 || temp > 0xdfff) {
				return offset - 2;
			} else
				return offset - 4;
		case FORMAT_UTF_16BE:
			temp = (XMLDoc[offset] & 0xff) << 8 | (XMLDoc[offset + 1] & 0xff);
			if (temp < 0xd800 || temp > 0xdfff) {
				return offset - 2;
			} else
				return offset - 4;
		default:
			throw new ParseException("Other Error: Should never happen");
		}
	}

	/**
	 * This method loads the VTD+XML from a byte array
	 * 
	 * @return VTDNav
	 * @param ba
	 * @throws IOException
	 * @throws IndexReadException
	 * 
	 */
	public VTDNav loadIndex(byte[] ba) throws IOException, IndexReadException {
		IndexHandler.readIndex(ba, this);
		return getNav();
	}

	/**
	 * This method loads the VTD+XML from an input stream
	 * 
	 * @return VTDNav
	 * @param is
	 * @throws IOException
	 * @throws IndexReadException
	 * 
	 */
	public VTDNav loadIndex(InputStream is) throws IOException, IndexReadException {
		IndexHandler.readIndex(is, this);
		return getNav();
	}

	/**
	 * This method loads the VTD+XML from a file
	 * 
	 * @return VTDNav
	 * @param fileName
	 * @throws IOException
	 * @throws IndexReadException
	 * 
	 */
	public VTDNav loadIndex(String fileName) throws IOException, IndexReadException {
		FileInputStream fis = null;
		try {
			fis = new FileInputStream(fileName);
			return loadIndex(fis);
		} finally {
			if (fis != null)
				fis.close();
		}
	}

	/**
	 * Load the separate VTD index and XmL file. Refer to persistence model of
	 * separate vtd index for more details
	 * 
	 * @param XMLFileName
	 *            name of xml file
	 * @param VTDIndexName
	 *            name of the vtd index file
	 * @return VTDNav object
	 * @throws IOException
	 * @throws IndexReadException
	 * 
	 */
	public VTDNav loadSeparateIndex(String XMLFileName, String VTDIndexName) throws IOException, IndexReadException {
		FileInputStream xfis = null;
		FileInputStream vfis = null;
		xfis = new FileInputStream(XMLFileName);
		int size = (int) (new File(XMLFileName)).length();
		vfis = new FileInputStream(VTDIndexName);
		IndexHandler.readSeparateIndex(vfis, xfis, size, this);
		return getNav();
	}

	private void matchCPEncoding() throws ParseException {
		if ((r.skipChar('p') || r.skipChar('P')) && r.skipChar('1') && r.skipChar('2') && r.skipChar('5')) {
			if (encoding <= FORMAT_UTF_16LE) {
				if (must_utf_8)
					throw new EncodingException("Can't switch from UTF-8" + formatLineNumber());
				if (r.skipChar('0')) {
					encoding = FORMAT_WIN_1250;
					r = new WIN1250Reader();
					_writeVTD(temp_offset, 6, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('1')) {
					encoding = FORMAT_WIN_1251;
					r = new WIN1251Reader();
					_writeVTD(temp_offset, 6, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('2')) {
					encoding = FORMAT_WIN_1252;
					r = new WIN1252Reader();
					_writeVTD(temp_offset, 6, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('3')) {
					encoding = FORMAT_WIN_1253;
					r = new WIN1253Reader();
					_writeVTD(temp_offset, 6, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('4')) {
					encoding = FORMAT_WIN_1254;
					r = new WIN1254Reader();
					_writeVTD(temp_offset, 6, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('5')) {
					encoding = FORMAT_WIN_1255;
					r = new WIN1255Reader();
					_writeVTD(temp_offset, 6, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('6')) {
					encoding = FORMAT_WIN_1256;
					r = new WIN1256Reader();
					_writeVTD(temp_offset, 6, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('7')) {
					encoding = FORMAT_WIN_1257;
					r = new WIN1257Reader();
					_writeVTD(temp_offset, 6, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('8')) {
					encoding = FORMAT_WIN_1258;
					r = new WIN1258Reader();
					_writeVTD(temp_offset, 6, TOKEN_DEC_ATTR_VAL, depth);
				} else
					throw new ParseException("XML decl error: Invalid Encoding" + formatLineNumber());
				if (r.skipChar(ch_temp))
					return;
			} else
				throw new ParseException("XML decl error: Can't switch encoding to ISO-8859" + formatLineNumber());

		}
		throw new ParseException("XML decl error: Invalid Encoding" + formatLineNumber());
	}

	private void matchISOEncoding() throws ParseException {
		if ((r.skipChar('s') || r.skipChar('S')) && (r.skipChar('o') || r.skipChar('O')) && r.skipChar('-') && r.skipChar('8') && r.skipChar('8') && r.skipChar('5')
				&& r.skipChar('9') && r.skipChar('-')) {
			if (encoding <= FORMAT_UTF_16LE) {
				if (must_utf_8)
					throw new EncodingException("Can't switch from UTF-8" + formatLineNumber());
				if (r.skipChar('1')) {
					if (r.skipChar(ch_temp)) {
						encoding = FORMAT_ISO_8859_1;
						r = new ISO8859_1Reader();
						_writeVTD(temp_offset, 10, TOKEN_DEC_ATTR_VAL, depth);
						return;
					} else if (r.skipChar('0')) {
						encoding = FORMAT_ISO_8859_10;
						r = new ISO8859_10Reader();
						_writeVTD(temp_offset, 11, TOKEN_DEC_ATTR_VAL, depth);
					} else if (r.skipChar('1')) {
						encoding = FORMAT_ISO_8859_11;
						r = new ISO8859_11Reader();
						_writeVTD(temp_offset, 11, TOKEN_DEC_ATTR_VAL, depth);
					} else if (r.skipChar('3')) {
						encoding = FORMAT_ISO_8859_13;
						r = new ISO8859_13Reader();
						_writeVTD(temp_offset, 11, TOKEN_DEC_ATTR_VAL, depth);
					} else if (r.skipChar('4')) {
						encoding = FORMAT_ISO_8859_14;
						r = new ISO8859_14Reader();
						_writeVTD(temp_offset, 11, TOKEN_DEC_ATTR_VAL, depth);
					} else if (r.skipChar('5')) {
						encoding = FORMAT_ISO_8859_15;
						r = new ISO8859_15Reader();
						_writeVTD(temp_offset, 15, TOKEN_DEC_ATTR_VAL, depth);
					} else
						throw new ParseException("XML decl error: Invalid Encoding" + formatLineNumber());
				} else if (r.skipChar('2')) {
					encoding = FORMAT_ISO_8859_2;
					r = new ISO8859_2Reader();
					_writeVTD(temp_offset, 10, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('3')) {
					r = new ISO8859_3Reader();
					encoding = FORMAT_ISO_8859_3;
					_writeVTD(temp_offset, 10, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('4')) {
					r = new ISO8859_4Reader();
					encoding = FORMAT_ISO_8859_4;
					_writeVTD(temp_offset, 10, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('5')) {
					encoding = FORMAT_ISO_8859_5;
					r = new ISO8859_5Reader();
					_writeVTD(temp_offset, 10, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('6')) {
					encoding = FORMAT_ISO_8859_6;
					r = new ISO8859_6Reader();
					_writeVTD(temp_offset, 10, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('7')) {
					encoding = FORMAT_ISO_8859_7;
					r = new ISO8859_7Reader();
					_writeVTD(temp_offset, 10, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('8')) {
					encoding = FORMAT_ISO_8859_8;
					r = new ISO8859_8Reader();
					_writeVTD(temp_offset, 10, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('9')) {
					encoding = FORMAT_ISO_8859_9;
					r = new ISO8859_9Reader();
					_writeVTD(temp_offset, 10, TOKEN_DEC_ATTR_VAL, depth);
				} else
					throw new ParseException("XML decl error: Invalid Encoding" + formatLineNumber());
				if (r.skipChar(ch_temp))
					return;
			} else
				throw new ParseException("XML decl error: Can't switch encoding to ISO-8859" + formatLineNumber());
		}
		throw new ParseException("XML decl error: Invalid Encoding" + formatLineNumber());
	}

	private void matchUTFEncoding() throws ParseException {
		if ((r.skipChar('s') || r.skipChar('S')))
			if (r.skipChar('-') && (r.skipChar('a') || r.skipChar('A')) && (r.skipChar('s') || r.skipChar('S')) && (r.skipChar('c') || r.skipChar('C'))
					&& (r.skipChar('i') || r.skipChar('I')) && (r.skipChar('i') || r.skipChar('I')) && r.skipChar(ch_temp)) {
				if (singleByteEncoding) {
					if (must_utf_8)
						throw new EncodingException("Can't switch from UTF-8" + formatLineNumber());
					encoding = FORMAT_ASCII;
					r = new ASCIIReader();
					_writeVTD(temp_offset, 8, TOKEN_DEC_ATTR_VAL, depth);

					return;
				} else
					throw new ParseException("XML decl error: Can't switch encoding to US-ASCII" + formatLineNumber());
			} else
				throw new ParseException("XML decl error: Invalid Encoding" + formatLineNumber());

		if ((r.skipChar('t') || r.skipChar('T')) && (r.skipChar('f') || r.skipChar('F')) && r.skipChar('-')) {
			if (r.skipChar('8') && r.skipChar(ch_temp)) {
				if (singleByteEncoding) {
					// encoding = FORMAT_UTF8;
					_writeVTD(temp_offset, 5, TOKEN_DEC_ATTR_VAL, depth);
					return;
				} else
					throw new ParseException("XML decl error: Can't switch encoding to UTF-8" + formatLineNumber());
			}
			if (r.skipChar('1') && r.skipChar('6')) {
				if (r.skipChar(ch_temp)) {
					if (!singleByteEncoding) {
						if (!BOM_detected)
							throw new EncodingException("BOM not detected for UTF-16" + formatLineNumber());
						_writeVTD(temp_offset >> 1, 6, TOKEN_DEC_ATTR_VAL, depth);
						return;
					}
					throw new ParseException("XML decl error: Can't switch encoding to UTF-16" + formatLineNumber());
				} else if ((r.skipChar('l') || r.skipChar('L')) && (r.skipChar('e') || r.skipChar('E')) && r.skipChar(ch_temp)) {
					if (encoding == FORMAT_UTF_16LE) {
						r = new UTF16LEReader();
						_writeVTD(temp_offset >> 1, 8, TOKEN_DEC_ATTR_VAL, depth);
						return;
					}
					throw new ParseException("XML del error: Can't switch encoding to UTF-16LE" + formatLineNumber());
				} else if ((r.skipChar('b') || r.skipChar('B')) && (r.skipChar('e') || r.skipChar('E')) && r.skipChar(ch_temp)) {
					if (encoding == FORMAT_UTF_16BE) {
						_writeVTD(temp_offset >> 1, 8, TOKEN_DEC_ATTR_VAL, depth);
						return;
					}
					throw new ParseException("XML del error: Can't swtich encoding to UTF-16BE" + formatLineNumber());
				}

				throw new ParseException("XML decl error: Invalid encoding" + formatLineNumber());
			}
		}
	}

	private void matchWindowsEncoding() throws ParseException {
		if ((r.skipChar('i') || r.skipChar('I')) && (r.skipChar('n') || r.skipChar('N')) && (r.skipChar('d') || r.skipChar('D')) && (r.skipChar('o') || r.skipChar('O'))
				&& (r.skipChar('w') || r.skipChar('W')) && (r.skipChar('s') || r.skipChar('S')) && r.skipChar('-') && r.skipChar('1') && r.skipChar('2') && r.skipChar('5')) {
			if (encoding <= FORMAT_UTF_16LE) {
				if (must_utf_8)
					throw new EncodingException("Can't switch from UTF-8" + formatLineNumber());
				if (r.skipChar('0')) {
					encoding = FORMAT_WIN_1250;
					r = new WIN1250Reader();
					_writeVTD(temp_offset, 12, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('1')) {
					encoding = FORMAT_WIN_1251;
					r = new WIN1251Reader();
					_writeVTD(temp_offset, 12, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('2')) {
					encoding = FORMAT_WIN_1252;
					r = new WIN1252Reader();
					_writeVTD(temp_offset, 12, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('3')) {
					encoding = FORMAT_WIN_1253;
					r = new WIN1253Reader();
					_writeVTD(temp_offset, 12, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('4')) {
					encoding = FORMAT_WIN_1254;
					r = new WIN1254Reader();
					_writeVTD(temp_offset, 12, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('5')) {
					encoding = FORMAT_WIN_1255;
					r = new WIN1255Reader();
					_writeVTD(temp_offset, 12, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('6')) {
					encoding = FORMAT_WIN_1256;
					r = new WIN1256Reader();
					_writeVTD(temp_offset, 12, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('7')) {
					encoding = FORMAT_WIN_1257;
					r = new WIN1257Reader();
					_writeVTD(temp_offset, 12, TOKEN_DEC_ATTR_VAL, depth);
				} else if (r.skipChar('8')) {
					encoding = FORMAT_WIN_1258;
					r = new WIN1258Reader();
					_writeVTD(temp_offset, 12, TOKEN_DEC_ATTR_VAL, depth);
				} else
					throw new ParseException("XML decl error: Invalid Encoding" + formatLineNumber());
				if (r.skipChar(ch_temp))
					return;

			} else
				throw new ParseException("XML decl error: Can't switch encoding to ISO-8859" + formatLineNumber());
		}
		throw new ParseException("XML decl error: Invalid Encoding" + formatLineNumber());
	}

	/**
	 * Generating VTD tokens and Location cache info. When set to true, VTDGen
	 * conforms to XML namespace 1.0 spec
	 * 
	 * @param NS
	 *            boolean Enable namespace or not
	 * @throws ParseException
	 *             Super class for any exceptions during parsing.
	 * @throws EOFException
	 *             End of file exception.
	 * @throws EntityException
	 *             Entity resolution exception.
	 * @throws EncodingException
	 *             UTF/native encoding exception.
	 */
	public void parse(boolean NS) throws EncodingException, EOFException, EntityException, ParseException {

		// define internal variables
		ns = NS;
		length1 = length2 = 0;
		attr_count = prefixed_attr_count = 0 /* , ch = 0, ch_temp = 0 */;
		int parser_state = STATE_DOC_START;
		// boolean has_amp = false;
		is_ns = false;
		encoding = FORMAT_UTF8;
		boolean helper = false;
		boolean default_ns = false; // true xmlns='abc'
		boolean isXML = false; // true only for xmlns:xml
		singleByteEncoding = true;
		// first check first several bytes to figure out the encoding
		decide_encoding();

		// enter the main finite state machine
		try {
			_writeVTD(0, 0, TOKEN_DOCUMENT, depth);
			while (true) {
				switch (parser_state) {
				case STATE_LT_SEEN: // if (depth < -1)
					// throw new ParseException("Other Errors: Invalid depth");
					temp_offset = offset;
					ch = r.getChar();
					if (XMLChar.isNameStartChar(ch)) {
						depth++;
						parser_state = STATE_START_TAG;
					} else {
						switch (ch) {
						case '/':
							parser_state = STATE_END_TAG;
							break;
						case '?':
							parser_state = process_qm_seen();
							break;
						case '!': // three possibility (comment, CDATA, DOCTYPE)
							parser_state = process_ex_seen();
							break;
						default:
							throw new ParseException("Other Error: Invalid char after <" + formatLineNumber());
						}
					}
					break;

				case STATE_START_TAG: // name space is handled by
					do {
						ch = r.getChar();
						if (XMLChar.isNameChar(ch)) {
							if (ch == ':') {
								length2 = offset - temp_offset - increment;
								if (ns && checkPrefix2(temp_offset, length2))
									throw new ParseException("xmlns can't be an element prefix " + formatLineNumber(offset));
							}
						} else
							break;
						ch = r.getChar();
						if (XMLChar.isNameChar(ch)) {
							if (ch == ':') {
								length2 = offset - temp_offset - increment;
								if (ns && checkPrefix2(temp_offset, length2))
									throw new ParseException("xmlns can't be an element prefix " + formatLineNumber(offset));
							}
						} else
							break;
					} while (true);
					length1 = offset - temp_offset - increment;
					if (depth > MAX_DEPTH) {
						throw new ParseException("Other Error: Depth exceeds MAX_DEPTH" + formatLineNumber());
					}
					// writeVTD(offset, TOKEN_STARTING_TAG, length2:length1,
					// depth)
					long x = ((long) length1 << 32) + temp_offset;
					tag_stack[depth] = x;

					// System.out.println(
					// " " + (temp_offset) + " " + length2 + ":" + length1 +
					// " startingTag " + depth);
					if (depth > VTDDepth)
						VTDDepth = depth;
					// if (encoding < FORMAT_UTF_16BE){
					if (singleByteEncoding) {
						if (length2 > MAX_PREFIX_LENGTH || length1 > MAX_QNAME_LENGTH)
							throw new ParseException("Token Length Error: Starting tag prefix or qname length too long" + formatLineNumber());
						if (this.shallowDepth)
							writeVTD((temp_offset), (length2 << 11) | length1, TOKEN_STARTING_TAG, depth);
						else
							writeVTD_L5((temp_offset), (length2 << 11) | length1, TOKEN_STARTING_TAG, depth);
					} else {
						if (length2 > (MAX_PREFIX_LENGTH << 1) || length1 > (MAX_QNAME_LENGTH << 1))
							throw new ParseException("Token Length Error: Starting tag prefix or qname length too long" + formatLineNumber());
						if (this.shallowDepth)
							writeVTD((temp_offset) >> 1, (length2 << 10) | (length1 >> 1), TOKEN_STARTING_TAG, depth);
						else
							writeVTD_L5((temp_offset) >> 1, (length2 << 10) | (length1 >> 1), TOKEN_STARTING_TAG, depth);
					}
					if (ns) {
						if (length2 != 0) {
							length2 += increment;
							currentElementRecord = (((long) ((length2 << 16) | length1)) << 32) | temp_offset;
						} else
							currentElementRecord = 0;

						if (depth <= nsBuffer1.size - 1) {
							nsBuffer1.size = depth;
							int t = nsBuffer1.intAt(depth - 1) + 1;
							nsBuffer2.size = t;
							nsBuffer3.size = t;
						}
					}
					// offset += length1;
					length2 = 0;
					if (XMLChar.isSpaceChar(ch)) {
						ch = getCharAfterS();
						if (XMLChar.isNameStartChar(ch)) {
							// seen an attribute here
							temp_offset = getPrevOffset();
							parser_state = STATE_ATTR_NAME;
							break;
						}
					}
					helper = true;
					if (ch == '/') {
						depth--;
						helper = false;
						ch = r.getChar();
					}
					if (ch == '>') {
						if (ns) {
							nsBuffer1.append(nsBuffer3.size - 1);
							if (currentElementRecord != 0)
								qualifyElement();
						}

						// parser_state = processElementTail(helper);
						if (depth != -1) {
							temp_offset = offset;
							// ch = getCharAfterSe(); // consume WSs
							ch = getCharAfterS(); // consume WSs
							if (ch == '<') {
								if (ws)
									addWhiteSpaceRecord();
								parser_state = STATE_LT_SEEN;
								if (r.skipChar('/')) {
									if (helper) {
										length1 = offset - temp_offset - (increment << 1);
										// if (length1 > 0) {
										// if (encoding < FORMAT_UTF_16BE)
										if (singleByteEncoding)
											writeVTDText((temp_offset), length1, TOKEN_CHARACTER_DATA, depth);
										else
											writeVTDText((temp_offset) >> 1, (length1 >> 1), TOKEN_CHARACTER_DATA, depth);
										// }
									}
									parser_state = STATE_END_TAG;
									break;
								}
							} else if (XMLChar.isContentChar(ch)) {
								// temp_offset = offset;
								parser_state = STATE_TEXT;
							} else {
								parser_state = STATE_TEXT;
								handleOtherTextChar2(ch);
							}
						} else {
							parser_state = STATE_DOC_END;
						}
						break;
					}
					throw new ParseException("Starting tag Error: Invalid char in starting tag" + formatLineNumber());

				case STATE_END_TAG:
					temp_offset = offset;
					int sos = (int) tag_stack[depth];
					int sl = (int) (tag_stack[depth] >> 32);

					offset = temp_offset + sl;

					if (offset >= endOffset)
						throw new EOFException("permature EOF reached, XML document incomplete");
					for (int i = 0; i < sl; i++) {
						if (XMLDoc[sos + i] != XMLDoc[temp_offset + i])
							throw new ParseException("Ending tag error: Start/ending tag mismatch" + formatLineNumber());
					}
					depth--;
					ch = getCharAfterS();
					if (ch != '>')
						throw new ParseException("Ending tag error: Invalid char in ending tag " + formatLineNumber());

					if (depth != -1) {
						temp_offset = offset;
						ch = getCharAfterS();
						if (ch == '<') {
							if (ws)
								addWhiteSpaceRecord();
							parser_state = STATE_LT_SEEN;
						} else if (XMLChar.isContentChar(ch)) {
							parser_state = STATE_TEXT;
						} else {
							handleOtherTextChar2(ch);
							parser_state = STATE_TEXT;
						}
					} else
						parser_state = STATE_DOC_END;
					break;

				case STATE_ATTR_NAME:

					if (ch == 'x') {
						if (r.skipChar('m') && r.skipChar('l') && r.skipChar('n') && r.skipChar('s')) {
							ch = r.getChar();
							if (ch == '=' || XMLChar.isSpaceChar(ch)) {
								is_ns = true;
								default_ns = true;
							} else if (ch == ':') {
								is_ns = true; // break;
								default_ns = false;
							}
						}
					}
					do {
						if (XMLChar.isNameChar(ch)) {
							if (ch == ':') {
								length2 = offset - temp_offset - increment;
							}
						} else
							break;
						ch = r.getChar();
					} while (true);
					length1 = getPrevOffset() - temp_offset;
					if (is_ns && ns) {
						// make sure postfix isn't xmlns
						if (!default_ns) {
							if (increment == 1 && (length1 - length2 - 1 == 5) || (increment == 2 && (length1 - length2 - 2 == 10)))
								disallow_xmlns(temp_offset + length2 + increment);

							// if the post fix is xml, signal it
							if (increment == 1 && (length1 - length2 - 1 == 3) || (increment == 2 && (length1 - length2 - 2 == 6)))
								isXML = matchXML(temp_offset + length2 + increment);
						}
					}
					// check for uniqueness here
					checkAttributeUniqueness();
					// boolean unique = true;
					// boolean unequal;
					// for (int i = 0; i < attr_count; i++) {
					// unequal = false;
					// int prevLen = (int) attr_name_array[i];
					// if (length1 == prevLen) {
					// int prevOffset =
					// (int) (attr_name_array[i] >> 32);
					// for (int j = 0; j < prevLen; j++) {
					// if (XMLDoc[prevOffset + j]
					// != XMLDoc[temp_offset + j]) {
					// unequal = true;
					// break;
					// }
					// }
					// } else
					// unequal = true;
					// unique = unique && unequal;
					// }
					// if (!unique && attr_count != 0)
					// throw new ParseException(
					// "Error in attr: Attr name not unique"
					// + formatLineNumber());
					// unique = true;
					// if (attr_count < attr_name_array.length) {
					// attr_name_array[attr_count] =
					// ((long) (temp_offset) << 32) + length1;
					// attr_count++;
					// } else // grow the attr_name_array by 16
					// {
					// long[] temp_array = attr_name_array;
					// /*System.out.println(
					// "size increase from "
					// + temp_array.length
					// + "  to "
					// + (attr_count + 16));*/
					// attr_name_array =
					// new long[attr_count + ATTR_NAME_ARRAY_SIZE];
					// System.arraycopy(temp_array, 0,attr_name_array, 0,
					// attr_count);
					// /*for (int i = 0; i < attr_count; i++) {
					// attr_name_array[i] = temp_array[i];
					// }*/
					// attr_name_array[attr_count] =
					// ((long) (temp_offset) << 32) + length1;
					// attr_count++;
					// }

					// after checking, write VTD
					if (is_ns) { // if the prefix is xmlns: or xmlns
						// if (encoding < FORMAT_UTF_16BE){
						if (singleByteEncoding) {
							if (length2 > MAX_PREFIX_LENGTH || length1 > MAX_QNAME_LENGTH)
								throw new ParseException("Token length overflow error: Attr NS tag prefix or qname length too long" + formatLineNumber());
							_writeVTD(temp_offset, (length2 << 11) | length1, TOKEN_ATTR_NS, depth);
						} else {
							if (length2 > (MAX_PREFIX_LENGTH << 1) || length1 > (MAX_QNAME_LENGTH << 1))
								throw new ParseException("Token length overflow error: Attr NS prefix or qname length too long" + formatLineNumber());
							_writeVTD(temp_offset >> 1, (length2 << 10) | (length1 >> 1), TOKEN_ATTR_NS, depth);
						}
						// append to nsBuffer2
						if (ns) {
							// unprefixed xmlns are not recorded
							if (length2 != 0 && !isXML) {
								// nsBuffer2.append(VTDBuffer.size() - 1);
								long l = ((long) ((length2 << 16) | length1)) << 32 | temp_offset;
								nsBuffer3.append(l); // byte offset and byte
								// length
							}
						}

					} else {
						// if (encoding < FORMAT_UTF_16BE){
						if (singleByteEncoding) {
							if (length2 > MAX_PREFIX_LENGTH || length1 > MAX_QNAME_LENGTH)
								throw new ParseException("Token Length Error: Attr name prefix or qname length too long" + formatLineNumber());
							_writeVTD(temp_offset, (length2 << 11) | length1, TOKEN_ATTR_NAME, depth);
						} else {
							if (length2 > (MAX_PREFIX_LENGTH << 1) || length1 > (MAX_QNAME_LENGTH << 1))
								throw new ParseException("Token Length overflow error: Attr name prefix or qname length too long" + formatLineNumber());
							_writeVTD(temp_offset >> 1, (length2 << 10) | (length1 >> 1), TOKEN_ATTR_NAME, depth);
						}
					}
					/*
					 * System.out.println( " " + temp_offset + " " + length2 +
					 * ":" + length1 + " attr name " + depth);
					 */
					length2 = 0;
					if (XMLChar.isSpaceChar(ch)) {
						ch = getCharAfterS();
					}
					if (ch != '=')
						throw new ParseException("Error in attr: invalid char" + formatLineNumber());
					ch_temp = getCharAfterS();
					if (ch_temp != '"' && ch_temp != '\'')
						throw new ParseException("Error in attr: invalid char (should be ' or \" )" + formatLineNumber());
					temp_offset = offset;
					parser_state = STATE_ATTR_VAL;
					break;

				case STATE_ATTR_VAL:
					do {
						ch = r.getChar();
						if (XMLChar.isValidChar(ch) && ch != '<') {
							if (ch == ch_temp)
								break;
							if (ch == '&') {
								// as in vtd spec, we mark attr val with
								// entities
								if (!XMLChar.isValidChar(entityIdentifier())) {
									throw new ParseException("Error in attr: Invalid XML char" + formatLineNumber());
								}
							}
						} else
							throw new ParseException("Error in attr: Invalid XML char" + formatLineNumber());
					} while (true);

					length1 = offset - temp_offset - increment;
					if (ns && is_ns) {
						if (!default_ns && length1 == 0) {
							throw new ParseException(" non-default ns URL can't be empty" + formatLineNumber());
						}
						// identify nsURL return 0,1,2
						int t = identifyNsURL(temp_offset, length1);
						if (isXML) {// xmlns:xml
							if (t != 1)
								// URL points to
								// "http://www.w3.org/XML/1998/namespace"
								throw new ParseException("xmlns:xml can only point to" + "\"http://www.w3.org/XML/1998/namespace\"" + formatLineNumber());

						} else {
							if (!default_ns)
								nsBuffer2.append(((long) temp_offset << 32) | length1);
							if (t != 0) {
								if (t == 1)
									throw new ParseException("namespace declaration can't point to" + " \"http://www.w3.org/XML/1998/namespace\"" + formatLineNumber());
								throw new ParseException("namespace declaration can't point to" + " \"http://www.w3.org/2000/xmlns/\"" + formatLineNumber());
							}
						}
						// no ns URL points to
						// "http://www.w3.org/2000/xmlns/"

						// no ns URL points to
						// "http://www.w3.org/XML/1998/namespace"
					}

					if (singleByteEncoding) {
						// if (encoding < FORMAT_UTF_16BE){
						if (length1 > MAX_TOKEN_LENGTH)
							throw new ParseException("Token Length Error:" + " Attr val too long (>0xfffff)" + formatLineNumber());
						_writeVTD(temp_offset, length1, TOKEN_ATTR_VAL, depth);
					} else {
						if (length1 > (MAX_TOKEN_LENGTH << 1))
							throw new ParseException("Token Length Error:" + " Attr val too long (>0xfffff)" + formatLineNumber());
						_writeVTD(temp_offset >> 1, length1 >> 1, TOKEN_ATTR_VAL, depth);
					}

					isXML = false;
					is_ns = false;

					ch = r.getChar();
					if (XMLChar.isSpaceChar(ch)) {
						ch = getCharAfterS();
						if (XMLChar.isNameStartChar(ch)) {
							temp_offset = offset - increment;
							parser_state = STATE_ATTR_NAME;
							break;
						}
					}

					helper = true;
					if (ch == '/') {
						depth--;
						helper = false;
						ch = r.getChar();
					}

					if (ch == '>') {
						if (ns) {
							nsBuffer1.append(nsBuffer3.size - 1);
							if (prefixed_attr_count > 0)
								qualifyAttributes();
							if (prefixed_attr_count > 1) {
								checkQualifiedAttributeUniqueness();
							}
							if (currentElementRecord != 0)
								qualifyElement();
							prefixed_attr_count = 0;
						}
						attr_count = 0;
						// parser_state = processElementTail(helper);
						if (depth != -1) {
							temp_offset = offset;
							// ch = getCharAfterSe();
							ch = getCharAfterS();

							if (ch == '<') {
								if (ws)
									addWhiteSpaceRecord();
								parser_state = STATE_LT_SEEN;
								if (r.skipChar('/')) {
									if (helper) {
										length1 = offset - temp_offset - (increment << 1);
										// if (length1 > 0) {
										if (singleByteEncoding)// if (encoding <
																// FORMAT_UTF_16BE)
											writeVTDText((temp_offset), length1, TOKEN_CHARACTER_DATA, depth);
										else
											writeVTDText((temp_offset) >> 1, (length1 >> 1), TOKEN_CHARACTER_DATA, depth);
										// }
									}
									parser_state = STATE_END_TAG;
									break;
								}
							} else if (XMLChar.isContentChar(ch)) {
								// temp_offset = offset;
								parser_state = STATE_TEXT;
							} else {
								handleOtherTextChar2(ch);
								parser_state = STATE_TEXT;
							}
						} else {
							parser_state = STATE_DOC_END;
						}
						break;
					}

					throw new ParseException("Starting tag Error: Invalid char in starting tag" + formatLineNumber());

				case STATE_TEXT:
					if (depth == -1)
						throw new ParseException("Error in text content: Char data at the wrong place" + formatLineNumber());
					do {
						ch = r.getChar();
						// System.out.println(""+(char)ch);
						if (XMLChar.isContentChar(ch)) {
						} else if (ch == '<') {
							break;
						} else
							handleOtherTextChar(ch);
						ch = r.getChar();
						if (XMLChar.isContentChar(ch)) {
						} else if (ch == '<') {
							break;
						} else
							handleOtherTextChar(ch);
					} while (true);

					length1 = offset - increment - temp_offset;

					if (singleByteEncoding) // if (encoding < FORMAT_UTF_16BE)
						writeVTDText(temp_offset, length1, TOKEN_CHARACTER_DATA, depth);
					else
						writeVTDText(temp_offset >> 1, length1 >> 1, TOKEN_CHARACTER_DATA, depth);

					// has_amp = true;
					parser_state = STATE_LT_SEEN;
					break;
				case STATE_DOC_START:
					parser_state = process_start_doc();
					break;
				case STATE_DOC_END:
					// docEnd = true;
					parser_state = process_end_doc();
					break;
				case STATE_PI_TAG:
					parser_state = process_pi_tag();
					break;
				// throw new ParseException("Error in PI: Invalid char");
				case STATE_PI_VAL:
					parser_state = process_pi_val();
					break;

				case STATE_DEC_ATTR_NAME:
					parser_state = process_dec_attr();
					break;

				case STATE_COMMENT:
					parser_state = process_comment();
					break;

				case STATE_CDATA:
					parser_state = process_cdata();
					break;

				case STATE_DOCTYPE:
					parser_state = process_doc_type();
					break;

				case STATE_END_COMMENT:
					parser_state = process_end_comment();
					break;

				case STATE_END_PI:
					parser_state = process_end_pi();
					break;

				default:
					throw new ParseException("Other error: invalid parser state" + formatLineNumber());
				}
			}
		} catch (EOFException e) {
			if (parser_state != STATE_DOC_END)
				throw e;
			finishUp();
		}
	}

	private void checkQualifiedAttributeUniqueness() throws ParseException {
		// TODO Auto-generated method stub
		int preLen1, os1, postLen1, URLLen1, URLOs1, preLen2, os2, postLen2, URLLen2, URLOs2, k;
		for (int i = 0; i < prefixed_attr_count; i++) {
			preLen1 = (int) ((prefixed_attr_name_array[i] & 0xffff0000L) >> 16);
			postLen1 = (int) ((prefixed_attr_name_array[i] & 0xffffL)) - preLen1 - increment;
			os1 = (int) (prefixed_attr_name_array[i] >> 32) + preLen1 + increment;
			URLLen1 = nsBuffer2.lower32At(prefix_URL_array[i]);
			URLOs1 = nsBuffer2.upper32At(prefix_URL_array[i]);
			for (int j = i + 1; j < prefixed_attr_count; j++) {
				// prefix of i matches that of j
				preLen2 = (int) ((prefixed_attr_name_array[j] & 0xffff0000L) >> 16);
				postLen2 = (int) ((prefixed_attr_name_array[j] & 0xffffL)) - preLen2 - increment;
				os2 = (int) (prefixed_attr_name_array[j] >> 32) + preLen2 + increment;
				// System.out.println(new String(XMLDoc,os1, postLen1)
				// +" "+ new String(XMLDoc, os2, postLen2));
				if (postLen1 == postLen2) {
					k = 0;
					for (; k < postLen1; k++) {
						// System.out.println(i+" "+(char)(XMLDoc[os+k])+"<===>"+(char)(XMLDoc[preOs+k]));
						if (XMLDoc[os1 + k] != XMLDoc[os2 + k])
							break;
					}
					if (k == postLen1) {
						// found the match
						URLLen2 = nsBuffer2.lower32At(prefix_URL_array[j]);
						URLOs2 = nsBuffer2.upper32At(prefix_URL_array[j]);
						// System.out.println(" URLOs1 ===>" + URLOs1);
						// System.out.println("nsBuffer2 ===>"+nsBuffer2.longAt(i)+" i==>"+i);
						// System.out.println("URLLen2 "+ URLLen2+" URLLen1 "+
						// URLLen1+" ");
						if (matchURL(URLOs1, URLLen1, URLOs2, URLLen2))
							throw new ParseException(" qualified attribute names collide " + formatLineNumber(os2));
					}
				}
			}
			// System.out.println("======");
		}
	}

	private void qualifyAttributes() throws ParseException {
		int i1 = nsBuffer3.size - 1;
		int j = 0, i = 0;
		// two cases:
		// 1. the current element has no prefix, look for xmlns
		// 2. the current element has prefix, look for xmlns:something
		while (j < prefixed_attr_count) {
			int preLen = (int) ((prefixed_attr_name_array[j] & 0xffff0000L) >> 16);
			int preOs = (int) (prefixed_attr_name_array[j] >> 32);
			// System.out.println(new String(XMLDoc, preOs, preLen)+"===");
			i = i1;
			while (i >= 0) {
				int t = nsBuffer3.upper32At(i);
				// with prefix, get full length and prefix length
				if ((t & 0xffff) - (t >> 16) == preLen + increment) {
					// doing byte comparison here
					int os = nsBuffer3.lower32At(i) + (t >> 16) + increment;
					// System.out.println(new String(XMLDoc, os, preLen)+"");
					int k = 0;
					for (; k < preLen; k++) {
						// System.out.println(i+" "+(char)(XMLDoc[os+k])+"<===>"+(char)(XMLDoc[preOs+k]));
						if (XMLDoc[os + k] != XMLDoc[preOs + k])
							break;
					}
					if (k == preLen) {
						break; // found the match
					}
				}
				/*
				 * if ( (nsBuffer3.upper32At(i) & 0xffff0000) == 0){ return; }
				 */
				i--;
			}
			if (i < 0)
				throw new ParseException("Name space qualification Exception: prefixed attribute not qualified\n" + formatLineNumber(preOs));
			else
				prefix_URL_array[j] = i;
			j++;
			// no need to check if xml is the prefix
		}
		// for (int h=0;h"+(char)l);
			if (URL2.charAt(i) != (int) l)
				return 0;
			os += (int) (l >> 32);
		}

		// store offset value
		t = os;

		for (i = 0; i < 11 && os < g; i++) {
			l = _getCharResolved(os);
			if (URL1.charAt(i) != (int) l)
				break;
			os += (int) (l >> 32);
		}
		if (os == g)
			return 2;

		// so far a match
		os = t;
		for (i = 18; i < 36 && os < g; i++) {
			l = _getCharResolved(os);
			if (URL2.charAt(i) != (int) l)
				return 0;
			os += (int) (l >> 32);
		}
		if (os == g)
			return 1;

		return 0;
	}

	private boolean matchXML(int byte_offset) {
		// TODO Auto-generated method stub
		if (encoding < FORMAT_UTF_16BE) {
			if (XMLDoc[byte_offset] == 'x' && XMLDoc[byte_offset + 1] == 'm' && XMLDoc[byte_offset + 2] == 'l')
				return true;
		} else {
			if (encoding == FORMAT_UTF_16LE) {
				if (XMLDoc[byte_offset] == 'x' && XMLDoc[byte_offset + 1] == 0 && XMLDoc[byte_offset + 2] == 'm' && XMLDoc[byte_offset + 3] == 0 && XMLDoc[byte_offset + 4] == 'l'
						&& XMLDoc[byte_offset + 5] == 0)
					return true;
			} else {
				if (XMLDoc[byte_offset] == 0 && XMLDoc[byte_offset + 1] == 'x' && XMLDoc[byte_offset + 2] == 0 && XMLDoc[byte_offset + 3] == 'm' && XMLDoc[byte_offset + 4] == 0
						&& XMLDoc[byte_offset + 5] == 'l')
					return true;
			}
		}
		return false;
	}

	private void disallow_xmlns(int byte_offset) throws ParseException {
		// TODO Auto-generated method stub
		if (encoding < FORMAT_UTF_16BE) {
			if (XMLDoc[byte_offset] == 'x' && XMLDoc[byte_offset + 1] == 'm' && XMLDoc[byte_offset + 2] == 'l' && XMLDoc[byte_offset + 3] == 'n' && XMLDoc[byte_offset + 4] == 's')
				throw new ParseException("xmlns as a ns prefix can't be re-declared" + formatLineNumber(byte_offset));

		} else {
			if (encoding == FORMAT_UTF_16LE) {
				if (XMLDoc[byte_offset] == 'x' && XMLDoc[byte_offset + 1] == 0 && XMLDoc[byte_offset + 2] == 'm' && XMLDoc[byte_offset + 3] == 0 && XMLDoc[byte_offset + 4] == 'l'
						&& XMLDoc[byte_offset + 5] == 0 && XMLDoc[byte_offset + 6] == 'n' && XMLDoc[byte_offset + 7] == 0 && XMLDoc[byte_offset + 8] == 's'
						&& XMLDoc[byte_offset + 9] == 0)
					throw new ParseException("xmlns as a ns prefix can't be re-declared" + formatLineNumber(byte_offset));
			} else {
				if (XMLDoc[byte_offset] == 0 && XMLDoc[byte_offset + 1] == 'x' && XMLDoc[byte_offset + 2] == 0 && XMLDoc[byte_offset + 3] == 'm' && XMLDoc[byte_offset + 4] == 0
						&& XMLDoc[byte_offset + 5] == 'l' && XMLDoc[byte_offset + 6] == 0 && XMLDoc[byte_offset + 7] == 'n' && XMLDoc[byte_offset + 8] == 0
						&& XMLDoc[byte_offset + 9] == 's')
					throw new ParseException("xmlns as a ns prefix can't be re-declared" + formatLineNumber(byte_offset));
			}
		}
	}

	/**
	 * This method parses the XML file and returns a boolean indicating if it is
	 * successful or not.When set to true, VTDGen conforms to XML namespace 1.0
	 * spec
	 * 
	 * @param fileName
	 * @param ns
	 *            namespace aware or not
	 * @return boolean indicating whether the parseFile is a success
	 * 
	 */
	public boolean parseFile(String fileName, boolean ns) {
		FileInputStream fis = null;
		File f = null;
		try {
			f = new File(fileName);
			fis = new FileInputStream(f);
			byte[] b = new byte[(int) f.length()];

			// fis.read(b);

			int offset = 0;
			int numRead = 0;
			int numOfBytes = 1048576;// I choose this value randomally,
			// any other (not too big) value also can be here.
			if (b.length - offset < numOfBytes) {
				numOfBytes = b.length - offset;
			}
			while (offset < b.length && (numRead = fis.read(b, offset, numOfBytes)) >= 0) {
				offset += numRead;
				if (b.length - offset < numOfBytes) {
					numOfBytes = b.length - offset;
				}
			}
			// fis.read(b);
			this.setDoc(b);
			this.parse(ns); // set namespace awareness to true
			return true;
		} catch (java.io.IOException e) {
			throw new RuntimeException(e);
		} catch (ParseException e) {
			throw new RuntimeException(e);
		} finally {
			if (fis != null) {
				try {
					fis.close();
				} catch (Exception e) {
				}
			}
		}
	}

	/**
	 * This method inflates then parses GZIP'ed XML file and returns a boolean
	 * indicating if it is successful or not.When set to true, VTDGen conforms
	 * to XML namespace 1.0 spec
	 * 
	 * @param fileName
	 * @param ns
	 * @return
	 */
	public boolean parseGZIPFile(String GZIPfileName, boolean ns) {
		FileInputStream fis = null;
		// File f = null;
		try {
			fis = new FileInputStream(GZIPfileName);
			InputStream in = new GZIPInputStream(fis);
			ByteArrayOutputStream baos = new ByteArrayOutputStream();
			byte[] ba = new byte[65536];
			int noRead;
			while ((noRead = in.read(ba)) != -1) {
				baos.write(ba, 0, noRead);
			}
			this.setDoc(baos.toByteArray());
			this.parse(ns);
			return true;
		} catch (FileNotFoundException e) {
			throw new RuntimeException(e);
		} catch (IOException e) {
			throw new RuntimeException(e);
		} catch (ParseException e) {
			throw new RuntimeException(e);
		} finally {
			if (fis != null) {
				try {
					fis.close();
				} catch (Exception e) {
				}
			}
		}
	}

	/**
	 * This method inflates then parses ZIP'ed XML file and returns a boolean
	 * indicating if it is successful or not.When set to true, VTDGen conforms
	 * to XML namespace 1.0 spec
	 * 
	 * @param ZIPfileName
	 * @param XMLName
	 * @param ns
	 * @return
	 */
	public boolean parseZIPFile(String ZIPfileName, String XMLName, boolean ns) {
		InputStream is = null;
		ZipFile zf = null;

		try {
			zf = new ZipFile(ZIPfileName);
			is = zf.getInputStream(zf.getEntry(XMLName));
			// InputStream in = new ZipInputStream(fis);
			ByteArrayOutputStream baos = new ByteArrayOutputStream();
			byte[] ba = new byte[65536];
			int noRead;
			while ((noRead = is.read(ba)) != -1) {
				baos.write(ba, 0, noRead);
			}
			this.setDoc(baos.toByteArray());
			this.parse(ns);
			return true;
		} catch (FileNotFoundException e) {
			throw new RuntimeException(e);
		} catch (IOException e) {
			throw new RuntimeException(e);
		} catch (ParseException e) {
			throw new RuntimeException(e);
		} finally {
			if (zf != null) {
				try {
					zf.close();
				} catch (Exception e) {
				}
			}
		}
	}

	/**
	 * This method retrieves an XML document from the net using HTTP request If
	 * the returned content type is "application xml" then it will proceed with
	 * the parsing. Also notice that the content size can't be zero or negative,
	 * it must be a positive integer matching the size of the document
	 * 
	 * no exception is thrown in the case of failure, this method will simply
	 * return false
	 * 
	 * @param url
	 * @return boolean (status of parsing the XML referenced by the HTTP
	 *         request)
	 * 
	 */
	public boolean parseHttpUrl(String url, boolean ns) {
		URL url1 = null;
		InputStream in = null;
		HttpURLConnection urlConnection = null;
		try {
			url1 = new URL(url);
			in = url1.openStream();
			urlConnection = (HttpURLConnection) url1.openConnection();
			if (urlConnection.getResponseCode() == HttpURLConnection.HTTP_OK) {
				int len = urlConnection.getContentLength();
				if (len > 0) {
					// System.out.println("len  ===> " + len + "  "
					// + urlConnection.getContentType());
					byte[] ba = new byte[len];
					int k = len, offset = 0;
					while (offset < len & k > 0) {
						k = in.read(ba, offset, len - offset);
						offset += k;
					}
					this.setDoc(ba);
					this.parse(ns);
					return true;
				} else {
					ByteArrayOutputStream baos = new ByteArrayOutputStream();
					byte[] ba = new byte[4096];
					int k = -1;
					while ((k = in.read(ba)) > 0) {
						baos.write(ba, 0, k);
					}
					this.setDoc(baos.toByteArray());
					this.parse(ns);
					return true;
					// baos.w
				}
			}
		} catch (IOException e) {

		} catch (ParseException e) {

		} finally {
			try {
				if (in != null)
					in.close();
				if (urlConnection != null)
					urlConnection.disconnect();
			} catch (Exception e) {
			}
		}
		return false;
	}

	// private

	/**
	 * This private method processes CDATA section
	 * 
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseException
	 * @throws EncodingException
	 * @throws EOFException
	 */
	private int process_cdata() throws ParseException, EncodingException, EOFException {
		int parser_state;
		while (true) {
			ch = r.getChar();
			if (XMLChar.isValidChar(ch)) {
				if (ch == ']' && r.skipChar(']')) {
					while (r.skipChar(']'))
						;
					if (r.skipChar('>')) {
						break;
					} /*
					 * else throw new ParseException(
					 * "Error in CDATA: Invalid termination sequence" +
					 * formatLineNumber());
					 */
				}
			} else
				throw new ParseException("Error in CDATA: Invalid Char" + formatLineNumber());
		}
		length1 = offset - temp_offset - (increment << 1) - increment;
		if (singleByteEncoding) {// if (encoding < FORMAT_UTF_16BE){

			writeVTDText(temp_offset, length1, TOKEN_CDATA_VAL, depth);
		} else {

			writeVTDText(temp_offset >> 1, length1 >> 1, TOKEN_CDATA_VAL, depth);
		}
		// System.out.println(" " + (temp_offset) + " " + length1 + " CDATA " +
		// depth);
		temp_offset = offset;
		// ch = getCharAfterSe();
		ch = getCharAfterS();

		if (ch == '<') {
			if (ws)
				addWhiteSpaceRecord();
			parser_state = STATE_LT_SEEN;
		} else if (XMLChar.isContentChar(ch)) {
			// temp_offset = offset-1;
			parser_state = STATE_TEXT;
		} else if (ch == '&') {
			// has_amp = true;
			// temp_offset = offset-1;
			entityIdentifier();
			parser_state = STATE_TEXT;
			// temp_offset = offset;
		} else if (ch == ']') {
			// temp_offset = offset-1;
			if (r.skipChar(']')) {
				while (r.skipChar(']')) {
				}
				if (r.skipChar('>'))
					throw new ParseException("Error in text content: ]]> in text content" + formatLineNumber());
			}
			parser_state = STATE_TEXT;
		} else
			throw new ParseException("Other Error: Invalid char in xml" + formatLineNumber());
		return parser_state;
	}

	/**
	 * This private method process comment
	 * 
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseException
	 * @throws EncodingException
	 * @throws EOFException
	 */
	private int process_comment() throws ParseException, EncodingException, EOFException {
		int parser_state;
		while (true) {
			ch = r.getChar();
			if (XMLChar.isValidChar(ch)) {
				if (ch == '-' && r.skipChar('-')) {
					length1 = offset - temp_offset - (increment << 1);
					break;
				}
			} else
				throw new ParseException("Error in comment: Invalid Char" + formatLineNumber());
		}
		if (r.getChar() == '>') {
			// System.out.println(" " + (temp_offset) + " " + length1 +
			// " comment " + depth);
			if (singleByteEncoding)// if (encoding < FORMAT_UTF_16BE)
				writeVTDText(temp_offset, length1, TOKEN_COMMENT, depth);
			else
				writeVTDText(temp_offset >> 1, length1 >> 1, TOKEN_COMMENT, depth);
			// length1 = 0;
			temp_offset = offset;
			// ch = getCharAfterSe();
			ch = getCharAfterS();
			if (ch == '<') {
				if (ws)
					addWhiteSpaceRecord();
				parser_state = STATE_LT_SEEN;
			} else if (XMLChar.isContentChar(ch)) {
				// temp_offset = offset;
				parser_state = STATE_TEXT;
			} else if (ch == '&') {
				// has_amp = true;
				// temp_offset = offset;
				entityIdentifier();
				parser_state = STATE_TEXT;
			} else if (ch == ']') {
				if (r.skipChar(']')) {
					while (r.skipChar(']')) {
					}
					if (r.skipChar('>'))
						throw new ParseException("Error in text content: ]]> in text content" + formatLineNumber());
				}
				parser_state = STATE_TEXT;
			} else
				throw new ParseException("Error in text content: Invalid char" + formatLineNumber());
			return parser_state;
		} else
			throw new ParseException("Error in comment: Invalid terminating sequence" + formatLineNumber());
	}

	/**
	 * This private method processes declaration attributes
	 * 
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseException
	 * @throws EncodingException
	 * @throws EOFException
	 */
	private int process_dec_attr() throws ParseException, EncodingException, EOFException {
		int parser_state;
		if (ch == 'v' && r.skipChar('e') && r.skipChar('r') && r.skipChar('s') && r.skipChar('i') && r.skipChar('o') && r.skipChar('n')) {
			ch = getCharAfterS();
			if (ch == '=') {
				/*
				 * System.out.println( " " + (temp_offset - 1) + " " + 7 +
				 * " dec attr name version " + depth);
				 */
				if (singleByteEncoding)
					_writeVTD(temp_offset - 1, 7, TOKEN_DEC_ATTR_NAME, depth);
				else
					_writeVTD((temp_offset - 2) >> 1, 7, TOKEN_DEC_ATTR_NAME, depth);
			} else
				throw new ParseException("XML decl error: Invalid char" + formatLineNumber());
		} else
			throw new ParseException("XML decl error: should be version" + formatLineNumber());
		ch_temp = getCharAfterS();
		if (ch_temp != '\'' && ch_temp != '"')
			throw new ParseException("XML decl error: Invalid char to start attr name" + formatLineNumber());
		temp_offset = offset;
		// support 1.0 or 1.1
		if (r.skipChar('1') && r.skipChar('.') && (r.skipChar('0') || r.skipChar('1'))) {
			/*
			 * System.out.println( " " + temp_offset + " " + 3 +
			 * " dec attr val (version)" + depth);
			 */
			if (singleByteEncoding)
				_writeVTD(temp_offset, 3, TOKEN_DEC_ATTR_VAL, depth);
			else
				_writeVTD(temp_offset >> 1, 3, TOKEN_DEC_ATTR_VAL, depth);
		} else
			throw new ParseException("XML decl error: Invalid version(other than 1.0 or 1.1) detected" + formatLineNumber());
		if (!r.skipChar(ch_temp))
			throw new ParseException("XML decl error: version not terminated properly" + formatLineNumber());
		ch = r.getChar();
		// ? space or e
		if (XMLChar.isSpaceChar(ch)) {
			ch = getCharAfterS();
			temp_offset = offset - increment;
			if (ch == 'e') {
				if (r.skipChar('n') && r.skipChar('c') && r.skipChar('o') && r.skipChar('d') && r.skipChar('i') && r.skipChar('n') && r.skipChar('g')) {
					ch = r.getChar();
					if (XMLChar.isSpaceChar(ch))
						ch = getCharAfterS();
					if (ch == '=') {
						/*
						 * System.out.println( " " + (temp_offset) + " " + 8 +
						 * " dec attr name (encoding) " + depth);
						 */
						if (singleByteEncoding)
							_writeVTD(temp_offset, 8, TOKEN_DEC_ATTR_NAME, depth);
						else
							_writeVTD(temp_offset >> 1, 8, TOKEN_DEC_ATTR_NAME, depth);
					} else
						throw new ParseException("XML decl error: Invalid char" + formatLineNumber());
					ch_temp = getCharAfterS();
					if (ch_temp != '"' && ch_temp != '\'')
						throw new ParseException("XML decl error: Invalid char to start attr name" + formatLineNumber());
					temp_offset = offset;
					ch = r.getChar();
					switch (ch) {
					case 'a':
					case 'A':
						if ((r.skipChar('s') || r.skipChar('S')) && (r.skipChar('c') || r.skipChar('C')) && (r.skipChar('i') || r.skipChar('I'))
								&& (r.skipChar('i') || r.skipChar('I')) && r.skipChar(ch_temp)) {
							if (encoding != FORMAT_UTF_16LE && encoding != FORMAT_UTF_16BE) {
								if (must_utf_8)
									throw new EncodingException("Can't switch from UTF-8" + formatLineNumber());
								encoding = FORMAT_ASCII;
								r = new ASCIIReader();
								/*
								 * System.out.println( " " + (temp_offset) + " "
								 * + 5 + " dec attr val (encoding) " + depth);
								 */

								_writeVTD(temp_offset, 5, TOKEN_DEC_ATTR_VAL, depth);

								break;
							} else
								throw new ParseException("XML decl error: Can't switch encoding to ASCII" + formatLineNumber());
						}
						throw new ParseException("XML decl error: Invalid Encoding" + formatLineNumber());
					case 'c':
					case 'C':
						matchCPEncoding();
						break;
					case 'i':
					case 'I':
						matchISOEncoding();
						break;
					case 'u':
					case 'U':
						matchUTFEncoding();
						break;
					// now deal with windows encoding
					case 'w':
					case 'W':
						matchWindowsEncoding();
						break;
					default:
						throw new ParseException("XML decl Error: invalid encoding" + formatLineNumber());
					}
					ch = r.getChar();
					if (XMLChar.isSpaceChar(ch))
						ch = getCharAfterS();
					temp_offset = offset - increment;
				} else
					throw new ParseException("XML decl Error: Invalid char" + formatLineNumber());
			}

			if (ch == 's') {
				if (r.skipChar('t') && r.skipChar('a') && r.skipChar('n') && r.skipChar('d') && r.skipChar('a') && r.skipChar('l') && r.skipChar('o') && r.skipChar('n')
						&& r.skipChar('e')) {

					ch = getCharAfterS();
					if (ch != '=')
						throw new ParseException("XML decl error: Invalid char" + formatLineNumber());
					/*
					 * System.out.println( " " + temp_offset + " " + 3 +
					 * " dec attr name (standalone) " + depth);
					 */
					if (singleByteEncoding)
						_writeVTD(temp_offset, 10, TOKEN_DEC_ATTR_NAME, depth);
					else
						_writeVTD(temp_offset >> 1, 10, TOKEN_DEC_ATTR_NAME, depth);
					ch_temp = getCharAfterS();
					temp_offset = offset;
					if (ch_temp != '"' && ch_temp != '\'')
						throw new ParseException("XML decl error: Invalid char to start attr name" + formatLineNumber());
					ch = r.getChar();
					if (ch == 'y') {
						if (r.skipChar('e') && r.skipChar('s') && r.skipChar(ch_temp)) {
							/*
							 * System.out.println( " " + (temp_offset) + " " + 3
							 * + " dec attr val (standalone) " + depth);
							 */
							if (singleByteEncoding)
								_writeVTD(temp_offset, 3, TOKEN_DEC_ATTR_VAL, depth);
							else
								_writeVTD(temp_offset >> 1, 3, TOKEN_DEC_ATTR_VAL, depth);
						} else
							throw new ParseException("XML decl error: invalid val for standalone" + formatLineNumber());
					} else if (ch == 'n') {
						if (r.skipChar('o') && r.skipChar(ch_temp)) {
							/*
							 * System.out.println( " " + (temp_offset) + " " + 2
							 * + " dec attr val (standalone)" + depth);
							 */
							if (singleByteEncoding)
								_writeVTD(temp_offset, 2, TOKEN_DEC_ATTR_VAL, depth);
							else
								_writeVTD(temp_offset >> 1, 2, TOKEN_DEC_ATTR_VAL, depth);
						} else
							throw new ParseException("XML decl error: invalid val for standalone" + formatLineNumber());
					} else
						throw new ParseException("XML decl error: invalid val for standalone" + formatLineNumber());
				} else
					throw new ParseException("XML decl error" + formatLineNumber());
				ch = r.getChar();
				if (XMLChar.isSpaceChar(ch))
					ch = getCharAfterS();
			}
		}

		if (ch == '?' && r.skipChar('>')) {
			temp_offset = offset;
			ch = getCharAfterS();
			if (ch == '<') {
				parser_state = STATE_LT_SEEN;
			} else
				throw new ParseException("Other Error: Invalid Char in XML" + formatLineNumber());
		} else
			throw new ParseException("XML decl Error: Invalid termination sequence" + formatLineNumber());
		return parser_state;
	}

	/**
	 * This private method process DTD
	 * 
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseException
	 * @throws EncodingException
	 * @throws EOFException
	 */
	private int process_doc_type() throws ParseException, EncodingException, EOFException {
		int z = 1, parser_state;
		while (true) {
			ch = r.getChar();
			if (XMLChar.isValidChar(ch)) {
				if (ch == '>')
					z--;
				else if (ch == '<')
					z++;
				if (z == 0)
					break;
			} else
				throw new ParseException("Error in DOCTYPE: Invalid char" + formatLineNumber());
		}
		length1 = offset - temp_offset - increment;
		/*
		 * System.out.println( " " + (temp_offset) + " " + length1 +
		 * " DOCTYPE val " + depth);
		 */
		if (singleByteEncoding) {// if (encoding < FORMAT_UTF_16BE){
			if (length1 > MAX_TOKEN_LENGTH)
				throw new ParseException("Token Length Error:" + " DTD val too long (>0xfffff)" + formatLineNumber());
			_writeVTD(temp_offset, length1, TOKEN_DTD_VAL, depth);
		} else {
			if (length1 > (MAX_TOKEN_LENGTH << 1))
				throw new ParseException("Token Length Error:" + " DTD val too long (>0xfffff)" + formatLineNumber());
			_writeVTD(temp_offset >> 1, length1 >> 1, TOKEN_DTD_VAL, depth);
		}
		ch = getCharAfterS();
		if (ch == '<') {
			parser_state = STATE_LT_SEEN;
		} else
			throw new ParseException("Other Error: Invalid char in xml" + formatLineNumber());
		return parser_state;
	}

	/**
	 * This private method process the comment after the root document
	 * 
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseException
	 */
	private int process_end_comment() throws ParseException {
		int parser_state;
		while (true) {
			ch = r.getChar();
			if (XMLChar.isValidChar(ch)) {
				if (ch == '-' && r.skipChar('-')) {
					length1 = offset - temp_offset - (increment << 1);
					break;
				}
			} else
				throw new ParseException("Error in comment: Invalid Char" + formatLineNumber());
		}
		if (r.getChar() == '>') {
			// System.out.println(" " + temp_offset + " " + length1 +
			// " comment " + depth);
			if (singleByteEncoding) // if (encoding < FORMAT_UTF_16BE)
				writeVTDText(temp_offset, length1, TOKEN_COMMENT, depth);
			else
				writeVTDText(temp_offset >> 1, length1 >> 1, TOKEN_COMMENT, depth);
			parser_state = STATE_DOC_END;
			return parser_state;
		}
		throw new ParseException("Error in comment: '-->' expected" + formatLineNumber());

	}

	private int process_end_doc() throws ParseException, EncodingException, EOFException {
		int parser_state;
		ch = getCharAfterS();
		/* eof exception should be thrown here for premature ending */
		if (ch == '<') {

			if (r.skipChar('?')) {
				/* processing instruction after end tag of root element */
				temp_offset = offset;
				parser_state = STATE_END_PI;
				return parser_state;
			} else if (r.skipChar('!') && r.skipChar('-') && r.skipChar('-')) {
				// comments allowed after the end tag of the root element
				temp_offset = offset;
				parser_state = STATE_END_COMMENT;
				return parser_state;
			}
		}
		throw new ParseException("Other Error: XML not terminated properly" + formatLineNumber());
	}

	/**
	 * This private method processes PI after root document
	 * 
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseException
	 * @throws EncodingException
	 * @throws EOFException
	 */
	private int process_end_pi() throws ParseException, EncodingException, EOFException {
		int parser_state;
		ch = r.getChar();
		if (XMLChar.isNameStartChar(ch)) {
			if ((ch == 'x' || ch == 'X') && (r.skipChar('m') || r.skipChar('M')) && (r.skipChar('l') && r.skipChar('L'))) {
				// temp_offset = offset;
				ch = r.getChar();
				if (XMLChar.isSpaceChar(ch) || ch == '?')
					throw new ParseException("Error in PI: [xX][mM][lL] not a valid PI target" + formatLineNumber());
				// offset = temp_offset;
			}

			while (true) {
				// ch = getChar();
				if (!XMLChar.isNameChar(ch)) {
					break;
				}
				ch = r.getChar();
			}

			length1 = offset - temp_offset - increment;
			/*
			 * System.out.println( "" + (char) XMLDoc[temp_offset] + " " +
			 * (temp_offset) + " " + length1 + " PI Target " + depth);
			 */
			if (singleByteEncoding) {// if (encoding < FORMAT_UTF_16BE){
				if (length1 > MAX_TOKEN_LENGTH)
					throw new ParseException("Token Length Error:" + "PI name too long (>0xfffff)" + formatLineNumber());
				_writeVTD(temp_offset, length1, TOKEN_PI_NAME, depth);
			} else {
				if (length1 > (MAX_TOKEN_LENGTH << 1))
					throw new ParseException("Token Length Error:" + "PI name too long (>0xfffff)" + formatLineNumber());
				_writeVTD(temp_offset >> 1, length1 >> 1, TOKEN_PI_NAME, depth);
			}
			// length1 = 0;
			temp_offset = offset;
			if (XMLChar.isSpaceChar(ch)) {
				ch = getCharAfterS();

				while (true) {
					if (XMLChar.isValidChar(ch)) {
						if (ch == '?') {
							if (r.skipChar('>')) {
								parser_state = STATE_DOC_END;
								break;
							} else
								throw new ParseException("Error in PI: invalid termination sequence" + formatLineNumber());
						}
					} else
						throw new ParseException("Error in PI: Invalid char in PI val" + formatLineNumber());
					ch = r.getChar();
				}
				length1 = offset - temp_offset - (increment << 1);
				if (singleByteEncoding) {
					if (length1 > MAX_TOKEN_LENGTH)
						throw new ParseException("Token Length Error:" + "PI val too long (>0xfffff)" + formatLineNumber());
					_writeVTD(temp_offset, length1, TOKEN_PI_VAL, depth);
				} else {
					if (length1 > (MAX_TOKEN_LENGTH << 1))
						throw new ParseException("Token Length Error:" + "PI val too long (>0xfffff)" + formatLineNumber());
					_writeVTD(temp_offset >> 1, length1 >> 1, TOKEN_PI_VAL, depth);
				}
				// System.out.println(" " + temp_offset + " " + length1 +
				// " PI val " + depth);
			} else {
				if (singleByteEncoding) {
					_writeVTD((temp_offset), 0, TOKEN_PI_VAL, depth);
				} else {
					_writeVTD((temp_offset) >> 1, 0, TOKEN_PI_VAL, depth);
				}
				if ((ch == '?') && r.skipChar('>')) {
					parser_state = STATE_DOC_END;
				} else
					throw new ParseException("Error in PI: invalid termination sequence" + formatLineNumber());
			}
			// parser_state = STATE_DOC_END;
		} else
			throw new ParseException("Error in PI: invalid char in PI target" + formatLineNumber());
		return parser_state;
	}

	private int process_ex_seen() throws ParseException, EncodingException, EOFException {
		int parser_state;
		boolean hasDTD = false;
		ch = r.getChar();
		switch (ch) {
		case '-':
			if (r.skipChar('-')) {
				temp_offset = offset;
				parser_state = STATE_COMMENT;
				break;
			} else
				throw new ParseException("Error in comment: Invalid char sequence to start a comment" + formatLineNumber());
		case '[':
			if (r.skipChar('C') && r.skipChar('D') && r.skipChar('A') && r.skipChar('T') && r.skipChar('A') && r.skipChar('[') && (depth != -1)) {
				temp_offset = offset;
				parser_state = STATE_CDATA;
				break;
			} else {
				if (depth == -1)
					throw new ParseException("Error in CDATA: Wrong place for CDATA" + formatLineNumber());
				throw new ParseException("Error in CDATA: Invalid char sequence for CDATA" + formatLineNumber());
			}

		case 'D':
			if (r.skipChar('O') && r.skipChar('C') && r.skipChar('T') && r.skipChar('Y') && r.skipChar('P') && r.skipChar('E') && (depth == -1) && !hasDTD) {
				hasDTD = true;
				temp_offset = offset;
				parser_state = STATE_DOCTYPE;
				break;
			} else {
				if (hasDTD == true)
					throw new ParseException("Error for DOCTYPE: Only DOCTYPE allowed" + formatLineNumber());
				if (depth != -1)
					throw new ParseException("Error for DOCTYPE: DTD at wrong place" + formatLineNumber());
				throw new ParseException("Error for DOCTYPE: Invalid char sequence for DOCTYPE" + formatLineNumber());
			}
		default:
			throw new ParseException("Other Error: Unrecognized char after  "+(char)ch);
		}

		length1 = offset - temp_offset - increment;
		/*
		 * System.out.println( ((char) XMLDoc[temp_offset]) + " " +
		 * (temp_offset) + " " + length1 + " PI Target " + depth);
		 */
		// if (encoding < FORMAT_UTF_16BE){
		if (singleByteEncoding) {
			if (length1 > MAX_TOKEN_LENGTH)
				throw new ParseException("Token Length Error:" + " PI name too long (>0xfffff)" + formatLineNumber());
			_writeVTD((temp_offset), length1, TOKEN_PI_NAME, depth);
		} else {
			if (length1 > (MAX_TOKEN_LENGTH << 1))
				throw new ParseException("Token Length Error:" + " PI name too long (>0xfffff)" + formatLineNumber());
			_writeVTD((temp_offset) >> 1, (length1 >> 1), TOKEN_PI_NAME, depth);
		}
		// length1 = 0;
		// temp_offset = offset;
		/*
		 * if (XMLChar.isSpaceChar(ch)) { ch = r.getChar(); }
		 */
		// ch = r.getChar();
		if (ch == '?') {
			// insert zero length pi name tag
			if (singleByteEncoding) {
				_writeVTD((temp_offset), 0, TOKEN_PI_VAL, depth);
			} else {
				_writeVTD((temp_offset) >> 1, (0), TOKEN_PI_VAL, depth);
			}
			if (r.skipChar('>')) {
				temp_offset = offset;
				// ch = getCharAfterSe();
				ch = getCharAfterS();
				if (ch == '<') {
					if (ws)
						addWhiteSpaceRecord();
					parser_state = STATE_LT_SEEN;
				} else if (XMLChar.isContentChar(ch)) {
					parser_state = STATE_TEXT;
				} else if (ch == '&') {
					// has_amp = true;
					entityIdentifier();
					parser_state = STATE_TEXT;
				} else if (ch == ']') {
					if (r.skipChar(']')) {
						while (r.skipChar(']')) {
						}
						if (r.skipChar('>'))
							throw new ParseException("Error in text content: ]]> in text content" + formatLineNumber());
					}
					parser_state = STATE_TEXT;
				} else
					throw new ParseException("Error in text content: Invalid char" + formatLineNumber());
				return parser_state;
			} else
				throw new ParseException("Error in PI: invalid termination sequence" + formatLineNumber());
		}
		parser_state = STATE_PI_VAL;
		return parser_state;
	}

	/**
	 * This private method processes PI val
	 * 
	 * @return the parser state after which the parser loop jumps to
	 * @throws ParseException
	 * @throws EncodingException
	 * @throws EOFException
	 */
	private int process_pi_val() throws ParseException, EncodingException, EOFException {
		int parser_state;
		if (!XMLChar.isSpaceChar(ch))
			throw new ParseException("Error in PI: invalid termination sequence" + formatLineNumber());
		temp_offset = offset;
		ch = r.getChar();
		while (true) {
			if (XMLChar.isValidChar(ch)) {
				// System.out.println(""+(char)ch);
				if (ch == '?')
					if (r.skipChar('>')) {
						break;
					} /*
					 * else throw new ParseException(
					 * "Error in PI: invalid termination sequence for PI" +
					 * formatLineNumber());
					 */
			} else
				throw new ParseException("Errors in PI: Invalid char in PI val" + formatLineNumber());
			ch = r.getChar();
		}
		length1 = offset - temp_offset - (increment << 1);
		/*
		 * System.out.println( ((char) XMLDoc[temp_offset]) + " " +
		 * (temp_offset) + " " + length1 + " PI val " + depth);
		 */
		// if (length1 != 0)
		if (singleByteEncoding) {// if (encoding < FORMAT_UTF_16BE){
			if (length1 > MAX_TOKEN_LENGTH)
				throw new ParseException("Token Length Error:" + "PI VAL too long (>0xfffff)" + formatLineNumber());
			_writeVTD(temp_offset, length1, TOKEN_PI_VAL, depth);
		} else {
			if (length1 > (MAX_TOKEN_LENGTH << 1))
				throw new ParseException("Token Length Error:" + "PI VAL too long (>0xfffff)" + formatLineNumber());
			_writeVTD(temp_offset >> 1, length1 >> 1, TOKEN_PI_VAL, depth);
		}
		// length1 = 0;
		temp_offset = offset;
		// ch = getCharAfterSe();
		ch = getCharAfterS();
		if (ch == '<') {
			if (ws)
				addWhiteSpaceRecord();
			parser_state = STATE_LT_SEEN;
		} else if (XMLChar.isContentChar(ch)) {
			// temp_offset = offset;
			parser_state = STATE_TEXT;
		} else if (ch == '&') {
			// has_amp = true;
			// temp_offset = offset;
			entityIdentifier();
			parser_state = STATE_TEXT;
		} else if (ch == ']') {
			if (r.skipChar(']')) {
				while (r.skipChar(']')) {
				}
				if (r.skipChar('>'))
					throw new ParseException("Error in text content: ]]> in text content" + formatLineNumber());

			}
			parser_state = STATE_TEXT;
		} else
			throw new ParseException("Error in text content: Invalid char" + formatLineNumber());
		return parser_state;

	}

	private int process_qm_seen() throws ParseException, EncodingException, EOFException {
		temp_offset = offset;
		ch = r.getChar();
		if (XMLChar.isNameStartChar(ch)) {
			// temp_offset = offset;
			if ((ch == 'x' || ch == 'X') && (r.skipChar('m') || r.skipChar('M')) && (r.skipChar('l') || r.skipChar('L'))) {
				ch = r.getChar();
				if (ch == '?' || XMLChar.isSpaceChar(ch))
					throw new ParseException("Error in PI: [xX][mM][lL] not a valid PI targetname" + formatLineNumber());
				offset = getPrevOffset();
			}
			return STATE_PI_TAG;
		}
		throw new ParseException("Other Error: First char after > (a + 1));
			l1Buffer = new FastLongBuffer(i1);
			l2Buffer = new FastLongBuffer(i2);
			l3Buffer = new FastIntBuffer(i3);
		} else {

			int i1 = 7, i2 = 9, i3 = 11, i4 = 11, i5 = 11;
			if (docLen <= 1024) {
				// a = 1024; //set the floor
				a = 6;
				i1 = 5;
				i2 = 5;
				i3 = 5;
				i4 = 5;
				i5 = 5;
			} else if (docLen <= 4096) {
				a = 7;
				i1 = 6;
				i2 = 6;
				i3 = 6;
				i4 = 6;
				i5 = 6;
			} else if (docLen <= 1024 * 16) {
				a = 8;
				i1 = 7;
				i2 = 7;
				i3 = 7;
				i4 = 7;
				i5 = 7;
			} else if (docLen <= 1024 * 16 * 4) {
				// a = 2048;
				a = 11;
				i2 = 8;
				i3 = 8;
				i4 = 8;
				i5 = 8;
			} else if (docLen <= 1024 * 256) {
				// a = 1024 * 4;
				a = 12;
				i1 = 8;
				i2 = 9;
				i3 = 9;
				i4 = 9;
				i5 = 9;
			} else {
				// a = 1 << 15;
				a = 15;
			}

			VTDBuffer = new FastLongBuffer(a, len >> (a + 1));
			l1Buffer = new FastLongBuffer(i1);
			l2Buffer = new FastLongBuffer(i2);
			_l3Buffer = new FastLongBuffer(i3);
			_l4Buffer = new FastLongBuffer(i4);
			_l5Buffer = new FastIntBuffer(i5);
		}
	}

	/**
	 * The buffer-reuse version of setDoc The concept is to reuse LC and VTD
	 * buffer for XML parsing, instead of allocating every time
	 * 
	 * @param ba
	 * 
	 */
	public void setDoc_BR(byte[] ba) {
		setDoc_BR(ba, 0, ba.length);
	}

	/**
	 * The buffer-reuse version of setDoc The concept is to reuse LC and VTD
	 * buffer for XML parsing, instead of allocating every time
	 * 
	 * @param ba
	 *            byte[]
	 * @param os
	 *            int (in byte)
	 * @param len
	 *            int (in byte)
	 * 
	 */
	public void setDoc_BR(byte[] ba, int os, int len) {
		if (ba == null || os < 0 || len == 0 || ba.length < os + len) {
			throw new IllegalArgumentException("Illegal argument for setDoc_BR");
		}
		int a;
		br = true;
		depth = -1;
		increment = 1;
		BOM_detected = false;
		must_utf_8 = false;
		ch = ch_temp = 0;
		temp_offset = 0;
		XMLDoc = ba;
		docOffset = offset = os;
		docLen = len;
		endOffset = os + len;
		last_l1_index = last_l2_index = last_depth = last_l3_index = last_l4_index = 0;
		currentElementRecord = 0;
		nsBuffer1.size = 0;
		nsBuffer2.size = 0;
		nsBuffer3.size = 0;
		r = new UTF8Reader();
		if (shallowDepth) {
			int i1 = 8, i2 = 9, i3 = 11;
			if (docLen <= 1024) {
				// a = 1024; //set the floor
				a = 6;
				i1 = 5;
				i2 = 5;
				i3 = 5;
			} else if (docLen <= 4096) {
				a = 7;
				i1 = 6;
				i2 = 6;
				i3 = 6;
			} else if (docLen <= 1024 * 16) {
				a = 8;
				i1 = 7;
				i2 = 7;
				i3 = 7;
			} else if (docLen <= 1024 * 16 * 4) {
				// a = 2048;
				a = 11;
				i2 = 8;
				i3 = 8;
			} else if (docLen <= 1024 * 256) {
				// a = 1024 * 4;
				a = 12;
			} else {
				// a = 1 << 15;
				a = 15;
			}
			if (VTDBuffer == null) {
				VTDBuffer = new FastLongBuffer(a, len >> (a + 1));
				l1Buffer = new FastLongBuffer(i1);
				l2Buffer = new FastLongBuffer(i2);
				l3Buffer = new FastIntBuffer(i3);
			} else {
				VTDBuffer.size = 0;
				l1Buffer.size = 0;
				l2Buffer.size = 0;
				l3Buffer.size = 0;
			}
		} else {
			int i1 = 8, i2 = 9, i3 = 11, i4 = 11, i5 = 11;
			if (docLen <= 1024) {
				// a = 1024; //set the floor
				a = 6;
				i1 = 5;
				i2 = 5;
				i3 = 5;
				i4 = 5;
				i5 = 5;
			} else if (docLen <= 4096) {
				a = 7;
				i1 = 6;
				i2 = 6;
				i3 = 6;
				i4 = 6;
				i5 = 6;
			} else if (docLen <= 1024 * 16) {
				a = 8;
				i1 = 7;
				i2 = 7;
				i3 = 7;
			} else if (docLen <= 1024 * 16 * 4) {
				// a = 2048;
				a = 11;
				i2 = 8;
				i3 = 8;
				i4 = 8;
				i5 = 8;
			} else if (docLen <= 1024 * 256) {
				// a = 1024 * 4;
				a = 12;
				i1 = 8;
				i2 = 9;
				i3 = 9;
				i4 = 9;
				i5 = 9;
			} else if (docLen <= 1024 * 1024) {
				// a = 1024 * 4;
				a = 12;
				i1 = 8;
				i3 = 10;
				i4 = 10;
				i5 = 10;
			} else {
				// a = 1 << 15;
				a = 15;
				i1 = 8;
			}
			if (VTDBuffer == null) {
				VTDBuffer = new FastLongBuffer(a, len >> (a + 1));
				l1Buffer = new FastLongBuffer(i1);
				l2Buffer = new FastLongBuffer(i2);
				_l3Buffer = new FastLongBuffer(i3);
				_l4Buffer = new FastLongBuffer(i4);
				_l5Buffer = new FastIntBuffer(i5);
			} else {
				VTDBuffer.size = 0;
				l1Buffer.size = 0;
				l2Buffer.size = 0;
				_l3Buffer.size = 0;
				_l4Buffer.size = 0;
				_l5Buffer.size = 0;
			}
		}
	}

	/**
	 * This method writes the VTD+XML into an outputStream
	 * 
	 * @param os
	 * @throws IOException
	 * @throws IndexWriteException
	 * 
	 */
	public void writeIndex(OutputStream os) throws IOException, IndexWriteException {
		if (shallowDepth)
			IndexHandler.writeIndex_L3((byte) 1, this.encoding, this.ns, true, this.VTDDepth, 3, this.rootIndex, this.XMLDoc, this.docOffset, this.docLen, this.VTDBuffer,
					this.l1Buffer, this.l2Buffer, this.l3Buffer, os);
		else
			IndexHandler.writeIndex_L5((byte) 1, this.encoding, this.ns, true, this.VTDDepth, 5, this.rootIndex, this.XMLDoc, this.docOffset, this.docLen, this.VTDBuffer,
					this.l1Buffer, this.l2Buffer, this._l3Buffer, this._l4Buffer, this._l5Buffer, os);
	}

	/**
	 * This method writes the VTDs and LCs into an outputStream
	 * 
	 * @param os
	 * @throws IOException
	 * @throws IndexWriteException
	 * 
	 */
	public void writeSeparateIndex(OutputStream os) throws IOException, IndexWriteException {
		if (shallowDepth)
			IndexHandler.writeSeparateIndex_L3((byte) 2, this.encoding, this.ns, true, this.VTDDepth, 3, this.rootIndex,
			// this.XMLDoc,
					this.docOffset, this.docLen, this.VTDBuffer, this.l1Buffer, this.l2Buffer, this.l3Buffer, os);
		else
			IndexHandler.writeSeparateIndex_L5((byte) 2, this.encoding, this.ns, true, this.VTDDepth, 5, this.rootIndex,
			// this.XMLDoc,
					this.docOffset, this.docLen, this.VTDBuffer, this.l1Buffer, this.l2Buffer, this._l3Buffer, this._l4Buffer, this._l5Buffer, os);
	}

	/**
	 * This method writes the VTD+XML file into a file of the given name
	 * 
	 * @param fileName
	 * @throws IOException
	 * @throws IndexWriteException
	 * 
	 */
	public void writeIndex(String fileName) throws IOException, IndexWriteException {
		FileOutputStream fos = new FileOutputStream(fileName);
		writeIndex(fos);
		fos.close();
	}

	/**
	 * This method writes the VTDs and LCs into a file of the given name XML is
	 * not part of the index please refer to VTD-XML web site for the spec and
	 * explanation
	 * 
	 * @param fileName
	 * @throws IOException
	 * @throws IndexWriteException
	 * 
	 */
	public void writeSeparateIndex(String fileName) throws IOException, IndexWriteException {
		FileOutputStream fos = new FileOutputStream(fileName);
		writeSeparateIndex(fos);
		fos.close();
	}

	/**
	 * Write the VTD and LC into their storage container for where LC depth is
	 * 5.
	 * 
	 * @param offset
	 *            int
	 * @param length
	 *            int
	 * @param token_type
	 *            int
	 * @param depth
	 *            int
	 */
	private void writeVTD(int offset, int length, int token_type, int depth) {

		VTDBuffer.append(((long) ((token_type << 28) | ((depth & 0xff) << 20) | length) << 32) | offset);

		switch (depth) {
		case 0:
			rootIndex = VTDBuffer.size - 1;
			break;
		case 1:
			if (last_depth == 1) {
				l1Buffer.append(((long) last_l1_index << 32) | 0xffffffffL);
			} else if (last_depth == 2) {
				l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL);
			}
			last_l1_index = VTDBuffer.size - 1;
			last_depth = 1;
			break;
		case 2:
			if (last_depth == 1) {
				l1Buffer.append(((long) last_l1_index << 32) + l2Buffer.size);
			} else if (last_depth == 2) {
				l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL);
			}
			last_l2_index = VTDBuffer.size - 1;
			last_depth = 2;
			break;

		case 3:
			l3Buffer.append(VTDBuffer.size - 1);
			if (last_depth == 2) {
				l2Buffer.append(((long) last_l2_index << 32) + l3Buffer.size - 1);
			}
			last_depth = 3;
			break;
		default:
			// rootIndex = VTDBuffer.size() - 1;
		}
	}

	private void _writeVTD(int offset, int length, int token_type, int depth) {
		VTDBuffer.append(((long) ((token_type << 28) | ((depth & 0xff) << 20) | length) << 32) | offset);
	}

	private void writeVTDText(int offset, int length, int token_type, int depth) {
		if (length > MAX_TOKEN_LENGTH) {
			int k;
			int r_offset = offset;
			for (k = length; k > MAX_TOKEN_LENGTH; k = k - MAX_TOKEN_LENGTH) {
				VTDBuffer.append(((long) ((token_type << 28) | ((depth & 0xff) << 20) | MAX_TOKEN_LENGTH) << 32) | r_offset);
				r_offset += MAX_TOKEN_LENGTH;
			}
			VTDBuffer.append(((long) ((token_type << 28) | ((depth & 0xff) << 20) | k) << 32) | r_offset);
		} else {
			VTDBuffer.append(((long) ((token_type << 28) | ((depth & 0xff) << 20) | length) << 32) | offset);
		}
	}

	/**
	 * Write the VTD and LC into their storage container.
	 * 
	 * @param offset
	 *            int
	 * @param length
	 *            int
	 * @param token_type
	 *            int
	 * @param depth
	 *            int
	 */
	private void writeVTD_L5(int offset, int length, int token_type, int depth) {

		VTDBuffer.append(((long) ((token_type << 28) | ((depth & 0xff) << 20) | length) << 32) | offset);

		switch (depth) {
		case 0:
			rootIndex = VTDBuffer.size - 1;
			break;
		case 1:
			if (last_depth == 1) {
				l1Buffer.append(((long) last_l1_index << 32) | 0xffffffffL);
			} else if (last_depth == 2) {
				l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL);
			} else if (last_depth == 3) {
				_l3Buffer.append(((long) last_l3_index << 32) | 0xffffffffL);
			} else if (last_depth == 4) {
				_l4Buffer.append(((long) last_l4_index << 32) | 0xffffffffL);
			}
			last_l1_index = VTDBuffer.size - 1;
			last_depth = 1;
			break;
		case 2:
			if (last_depth == 1) {
				l1Buffer.append(((long) last_l1_index << 32) + l2Buffer.size);
			} else if (last_depth == 2) {
				l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL);
			} else if (last_depth == 3) {
				_l3Buffer.append(((long) last_l3_index << 32) | 0xffffffffL);
			} else if (last_depth == 4) {
				_l4Buffer.append(((long) last_l4_index << 32) | 0xffffffffL);
			}
			last_l2_index = VTDBuffer.size - 1;
			last_depth = 2;
			break;

		case 3:
			/*
			 * if (last_depth == 1) { l1Buffer.append(((long) last_l1_index <<
			 * 32) + l2Buffer.size); } else
			 */
			if (last_depth == 2) {
				l2Buffer.append(((long) last_l2_index << 32) + _l3Buffer.size);
			} else if (last_depth == 3) {
				_l3Buffer.append(((long) last_l3_index << 32) | 0xffffffffL);
			} else if (last_depth == 4) {
				_l4Buffer.append(((long) last_l4_index << 32) | 0xffffffffL);
			}
			last_l3_index = VTDBuffer.size - 1;
			last_depth = 3;
			break;

		case 4:
			/*
			 * if (last_depth == 1) { l1Buffer.append(((long) last_l1_index <<
			 * 32) + l2Buffer.size); } else if (last_depth == 2) {
			 * l2Buffer.append(((long) last_l2_index << 32) | 0xffffffffL); }
			 * else
			 */
			if (last_depth == 3) {
				_l3Buffer.append(((long) last_l3_index << 32) + _l4Buffer.size);
			} else if (last_depth == 4) {
				_l4Buffer.append(((long) last_l4_index << 32) | 0xffffffffL);
			}
			last_l4_index = VTDBuffer.size - 1;
			last_depth = 4;
			break;
		case 5:
			_l5Buffer.append(VTDBuffer.size - 1);
			if (last_depth == 4) {
				_l4Buffer.append(((long) last_l4_index << 32) + _l5Buffer.size - 1);
			}
			last_depth = 5;
			break;

		// default:
		// rootIndex = VTDBuffer.size() - 1;
		}
	}

	/**
	 * 
	 * @throws ParseException
	 */
	private void qualifyElement() throws ParseException {
		int i = nsBuffer3.size - 1;
		// two cases:
		// 1. the current element has no prefix, look for xmlns
		// 2. the current element has prefix, look for xmlns:something

		int preLen = (int) ((currentElementRecord & 0xffff000000000000L) >> 48);
		int preOs = (int) currentElementRecord;
		while (i >= 0) {
			int t = nsBuffer3.upper32At(i);
			// with prefix, get full length and prefix length
			if ((t & 0xffff) - (t >> 16) == preLen) {
				// doing byte comparison here
				int os = nsBuffer3.lower32At(i) + (t >> 16) + increment;
				int k = 0;
				for (; k < preLen - increment; k++) {
					if (XMLDoc[os + k] != XMLDoc[preOs + k])
						break;
				}
				if (k == preLen - increment)
					return; // found the match
			}
			/*
			 * if ( (nsBuffer3.upper32At(i) & 0xffff0000) == 0){ return; }
			 */
			i--;
		}
		// no need to check if xml is the prefix
		if (checkPrefix(preOs, preLen))
			return;

		// print line # column# and full element name
		throw new ParseException("Name space qualification Exception: Element not qualified\n" + formatLineNumber((int) currentElementRecord));
	}

	private boolean checkPrefix(int os, int len) {
		// int i=0;
		if (encoding < FORMAT_UTF_16BE) {
			if (len == 4 && XMLDoc[os] == 'x' && XMLDoc[os + 1] == 'm' && XMLDoc[os + 2] == 'l') {
				return true;
			}
		} else if (encoding == FORMAT_UTF_16BE) {
			if (len == 8 && XMLDoc[os] == 0 && XMLDoc[os + 1] == 'x' && XMLDoc[os + 2] == 0 && XMLDoc[os + 3] == 'm' && XMLDoc[os + 4] == 0 && XMLDoc[os + 5] == 'l') {
				return true;
			}
		} else {
			if (len == 8 && XMLDoc[os] == 'x' && XMLDoc[os + 1] == 0 && XMLDoc[os + 2] == 'm' && XMLDoc[os + 3] == 0 && XMLDoc[os + 4] == 'l' && XMLDoc[os + 5] == 0) {
				return true;
			}
		}
		return false;
	}

	private boolean checkPrefix2(int os, int len) {
		// int i=0;
		if (encoding < FORMAT_UTF_16BE) {
			if (len == 5 && XMLDoc[os] == 'x' && XMLDoc[os + 1] == 'm' && XMLDoc[os + 2] == 'l' && XMLDoc[os + 3] == 'n' && XMLDoc[os + 4] == 's') {
				return true;
			}
		} else if (encoding == FORMAT_UTF_16BE) {
			if (len == 10 && XMLDoc[os] == 0 && XMLDoc[os + 1] == 'x' && XMLDoc[os + 2] == 0 && XMLDoc[os + 3] == 'm' && XMLDoc[os + 4] == 0 && XMLDoc[os + 5] == 'l'
					&& XMLDoc[os + 6] == 0 && XMLDoc[os + 7] == 'n' && XMLDoc[os + 8] == 0 && XMLDoc[os + 9] == 's') {
				return true;
			}
		} else {
			if (len == 10 && XMLDoc[os] == 'x' && XMLDoc[os + 1] == 0 && XMLDoc[os + 2] == 'm' && XMLDoc[os + 3] == 0 && XMLDoc[os + 4] == 'l' && XMLDoc[os + 5] == 0
					&& XMLDoc[os + 6] == 'n' && XMLDoc[os + 3] == 0 && XMLDoc[os + 8] == 's' && XMLDoc[os + 5] == 0) {
				return true;
			}
		}
		return false;
	}

	private long _getCharResolved(int byte_offset) {

		int ch = 0;
		int val = 0;
		long inc = 2 << (increment - 1);
		long l = r._getChar(byte_offset);

		ch = (int) l;

		if (ch != '&')
			return l;

		// let us handle references here
		// currentOffset++;
		byte_offset += increment;
		ch = getCharUnit(byte_offset);
		byte_offset += increment;
		switch (ch) {
		case '#':

			ch = getCharUnit(byte_offset);

			if (ch == 'x') {
				while (true) {
					byte_offset += increment;
					inc += increment;
					ch = getCharUnit(byte_offset);

					if (ch >= '0' && ch <= '9') {
						val = (val << 4) + (ch - '0');
					} else if (ch >= 'a' && ch <= 'f') {
						val = (val << 4) + (ch - 'a' + 10);
					} else if (ch >= 'A' && ch <= 'F') {
						val = (val << 4) + (ch - 'A' + 10);
					} else if (ch == ';') {
						inc += increment;
						break;
					}
				}
			} else {
				while (true) {
					ch = getCharUnit(byte_offset);
					byte_offset += increment;
					inc += increment;
					if (ch >= '0' && ch <= '9') {
						val = val * 10 + (ch - '0');
					} else if (ch == ';') {
						break;
					}
				}
			}
			break;

		case 'a':
			ch = getCharUnit(byte_offset);
			if (encoding < FORMAT_UTF_16BE) {
				if (ch == 'm') {
					if (getCharUnit(byte_offset + 1) == 'p' && getCharUnit(byte_offset + 2) == ';') {
						inc = 5;
						val = '&';
					}
				} else if (ch == 'p') {
					if (getCharUnit(byte_offset + 1) == 'o' && getCharUnit(byte_offset + 2) == 's' && getCharUnit(byte_offset + 3) == ';') {
						inc = 6;
						val = '\'';
					}
				}
			} else {
				if (ch == 'm') {
					if (getCharUnit(byte_offset + 2) == 'p' && getCharUnit(byte_offset + 4) == ';') {
						inc = 10;
						val = '&';
					}
				} else if (ch == 'p') {
					if (getCharUnit(byte_offset + 2) == 'o' && getCharUnit(byte_offset + 4) == 's' && getCharUnit(byte_offset + 6) == ';') {
						inc = 12;
						val = '\'';
					}
				}
			}
			break;

		case 'q':

			if (encoding < FORMAT_UTF_16BE) {
				if (getCharUnit(byte_offset) == 'u' && getCharUnit(byte_offset + 1) == 'o' && getCharUnit(byte_offset + 2) == 't' && getCharUnit(byte_offset + 3) == ';') {
					inc = 6;
					val = '\"';
				}
			} else {
				if (getCharUnit(byte_offset) == 'u' && getCharUnit(byte_offset + 2) == 'o' && getCharUnit(byte_offset + 4) == 't' && getCharUnit(byte_offset + 6) == ';') {
					inc = 12;
					val = '\"';
				}
			}
			break;
		case 'l':
			if (encoding < FORMAT_UTF_16BE) {
				if (getCharUnit(byte_offset) == 't' && getCharUnit(byte_offset + 1) == ';') {
					// offset += 2;
					inc = 4;
					val = '<';
				}
			} else {
				if (getCharUnit(byte_offset) == 't' && getCharUnit(byte_offset + 2) == ';') {
					// offset += 2;
					inc = 8;
					val = '<';
				}
			}
			break;
		case 'g':
			if (encoding < FORMAT_UTF_16BE) {
				if (getCharUnit(byte_offset) == 't' && getCharUnit(byte_offset + 1) == ';') {
					inc = 4;
					val = '>';
				}
			} else {
				if (getCharUnit(byte_offset) == 't' && getCharUnit(byte_offset + 2) == ';') {
					inc = 8;
					val = '>';
				}
			}
			break;
		}

		// currentOffset++;
		return val | (inc << 32);
	}

	// return 0;

	private int getCharUnit(int byte_offset) {
		return (encoding <= 2) ? XMLDoc[byte_offset] & 0xff : (encoding < FORMAT_UTF_16BE) ? r.decode(byte_offset)
				: (encoding == FORMAT_UTF_16BE) ? (((int) XMLDoc[byte_offset]) << 8 | XMLDoc[byte_offset + 1]) : (((int) XMLDoc[byte_offset + 1]) << 8 | XMLDoc[byte_offset]);
	}

	private boolean matchURL(int bos1, int len1, int bos2, int len2) {
		long l1, l2;
		int i1 = bos1, i2 = bos2, i3 = bos1 + len1, i4 = bos2 + len2;
		// System.out.println("--->"+new String(XMLDoc, bos1, len1)+" "+new
		// String(XMLDoc,bos2,len2));
		while (i1 < i3 && i2 < i4) {
			l1 = _getCharResolved(i1);
			l2 = _getCharResolved(i2);
			if ((int) l1 != (int) l2)
				return false;
			i1 += (int) (l1 >> 32);
			i2 += (int) (l2 >> 32);
		}
		if (i1 == i3 && i2 == i4)
			return true;
		return false;
	}

	private void checkAttributeUniqueness() throws ParseException {
		boolean unique = true;
		boolean unequal;
		for (int i = 0; i < attr_count; i++) {
			unequal = false;
			int prevLen = (int) attr_name_array[i];
			if (length1 == prevLen) {
				int prevOffset = (int) (attr_name_array[i] >> 32);
				for (int j = 0; j < prevLen; j++) {
					if (XMLDoc[prevOffset + j] != XMLDoc[temp_offset + j]) {
						unequal = true;
						break;
					}
				}
			} else
				unequal = true;
			unique = unique && unequal;
		}
		if (!unique && attr_count != 0)
			throw new ParseException("Error in attr: Attr name not unique" + formatLineNumber());
		unique = true;
		if (attr_count < attr_name_array.length) {
			attr_name_array[attr_count] = ((long) (temp_offset) << 32) | length1;
			attr_count++;
		} else // grow the attr_name_array by 16
		{
			long[] temp_array = attr_name_array;
			/*
			 * System.out.println( "size increase from " + temp_array.length +
			 * "  to " + (attr_count + 16));
			 */
			attr_name_array = new long[attr_count + ATTR_NAME_ARRAY_SIZE];
			System.arraycopy(temp_array, 0, attr_name_array, 0, attr_count);
			/*
			 * for (int i = 0; i < attr_count; i++) { attr_name_array[i] =
			 * temp_array[i]; }
			 */
			attr_name_array[attr_count] = ((long) (temp_offset) << 32) | length1;
			attr_count++;
		}
		// insert prefix attr node into the prefixed_attr_name array
		// xml:something will not be inserted
		// System.out.println(" prefixed attr count ===>"+prefixed_attr_count);
		// System.out.println(" length2 ===>"+length2);
		if (ns && !is_ns && length2 != 0) {
			if ((increment == 1 && length2 == 3 && matchXML(temp_offset)) || (increment == 2 && length2 == 6 && matchXML(temp_offset))) {
				return;
			} else if (prefixed_attr_count < prefixed_attr_name_array.length) {
				prefixed_attr_name_array[prefixed_attr_count] = ((long) (temp_offset) << 32) | (length2 << 16) | length1;
				prefixed_attr_count++;
			} else {
				long[] temp_array1 = prefixed_attr_name_array;
				prefixed_attr_name_array = new long[prefixed_attr_count + ATTR_NAME_ARRAY_SIZE];
				prefix_URL_array = new int[prefixed_attr_count + ATTR_NAME_ARRAY_SIZE];
				System.arraycopy(temp_array1, 0, prefixed_attr_name_array, 0, prefixed_attr_count);
				// System.arraycopy(temp_array1, 0, prefixed_attr_val_array, 0,
				// prefixed_attr_count)
				/*
				 * for (int i = 0; i < attr_count; i++) { attr_name_array[i] =
				 * temp_array[i]; }
				 */
				prefixed_attr_name_array[prefixed_attr_count] = ((long) (temp_offset) << 32) | (length2 << 16) | length1;
				prefixed_attr_count++;
			}
		}
	}

	private void handleOtherTextChar(int ch) throws ParseException {
		if (ch == '&') {
			// has_amp = true;
			if (!XMLChar.isValidChar(entityIdentifier()))
				throw new ParseException("Error in text content: Invalid char in text content " + formatLineNumber());
			// parser_state = STATE_TEXT;
		} else if (ch == ']') {
			if (r.skipChar(']')) {
				while (r.skipChar(']')) {
				}
				if (r.skipChar('>'))
					throw new ParseException("Error in text content: ]]> in text content" + formatLineNumber());
			}
		} else
			throw new ParseException("Error in text content: Invalid char in text content " + formatLineNumber());
	}

	private void handleOtherTextChar2(int ch) throws ParseException {
		if (ch == '&') {
			// has_amp = true;
			// temp_offset = offset;
			entityIdentifier();
			// parser_state = STATE_TEXT;
		} else if (ch == ']') {
			if (r.skipChar(']')) {
				while (r.skipChar(']')) {
				}
				if (r.skipChar('>'))
					throw new ParseException("Error in text content: ]]> in text content" + formatLineNumber());
			}
			// parser_state = STATE_TEXT;
		} else
			throw new ParseException("Error in text content: Invalid char" + formatLineNumber());
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy