com.sun.xml.parser.Parser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of greenpepper-remote-agent Show documentation
The newest version!
/*
 * $Id: Parser.java,v 1.13 1999/05/14 16:50:22 mode Exp $
 * 
 * Copyright (c) 1998-1999 Sun Microsystems, Inc. All Rights Reserved.
 * 
 * This software is the confidential and proprietary information of Sun
 * Microsystems, Inc. ("Confidential Information").  You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Sun.
 * 
 * SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF THE
 * SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
 * PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR ANY DAMAGES
 * SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR DISTRIBUTING
 * THIS SOFTWARE OR ITS DERIVATIVES.
 */


package com.sun.xml.parser;

import java.io.IOException;
import java.io.Reader;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Locale;
import java.util.Vector;

import org.xml.sax.*;

import com.sun.xml.util.MessageCatalog;
import com.sun.xml.util.XmlChars;


//
// NOTE:  when maintaining this code, take care to keep the message
// catalogue(s) up to date!!  It's important that the diagnostics
// be informative.
//


/**
 * This implements a fast non-validating SAX parser.  This one always 
 * processes external parsed entities, strictly adheres to the XML 1.0
 * specification, and provides useful diagnostics.  It supports an optimization
 * allowing faster processing of valid standalone XML documents.  For
 * multi-language applications (such as web servers using XML processing
 * to create dynamic content), a method supports choosing a locale for
 * parser diagnostics which is both understood by the message recipient
 * and supported by the parser.
 *
 *  This conforms to the XML 1.0 specification.  To configure an XML
 * processor which tests document conformance against XML Namespaces,
 * provide a DtdEventListener which examines declarations of
 * entities and notations, and have your document listener check other
 * constraints such as ensuring xmlns* attribute values properly
 * declare all namespace prefixes.  (Only element and attribute names may
 * contain colons, and even then the name prefix before the colon must be
 * properly declared.)
 *
 * 
 SAX parsers produce a stream of parse events, which applications
 * process to create an object model which is specific to their tasks.
 * Applications which do not want to process event streams in that way
 * should use an API producing a standardized object model, such as the
 * W3C's Document Object Model (DOM).  This parser supports
 * building fully conformant DOM Document objects, through
 * use of DtdEventListener extensions to SAX in conjunction with an
 * appropriate implementation of a SAX DocumentHandler.  In
 * addition, it supports some features (exposing comments, CDATA sections,
 * and entity references) which are allowed by DOM but not required to
 * be reported by conformant XML processors.  (As usual, the default
 * handler for parsing events other than fatal errors ignores them.)
 *
 * @see ValidatingParser
 *
 * @author David Brownell
 * @version $Revision: 1.13 $
 */
public class Parser implements org.xml.sax.Parser
{
    // stack of input entities being merged
    private InputEntity		in;

    // temporaries reused during parsing
    private AttributeListImpl	attTmp;
    private StringBuffer	strTmp;
    private char		nameTmp [];
    private NameCache		nameCache;
    private char		charTmp [] = new char [2];

    // NOTE:  odd heap behavior, at least with classic VM: if "strTmp" is
    // reused, LOTS of extra memory is consumed in some simple situations.
    // JVM bug filed; it's no longer a win to reuse it as much, in any case. 

    // parsing modes
    private boolean		isValidating = false;
    private boolean		fastStandalone = false;
    private boolean		isInAttribute = false;

    // temporary DTD parsing state
    private boolean		inExternalPE;
    private boolean		doLexicalPE;
    private boolean		donePrologue;

    // info about the document
    private boolean		isStandalone;
    private String		rootElementName;

    // DTD state, used during parsing
    private boolean		ignoreDeclarations;
    private SimpleHashtable	elements = new SimpleHashtable (47);
    private SimpleHashtable	params = new SimpleHashtable (7);

    // exposed to package-private subclass
    Hashtable			notations = new Hashtable (7);
    SimpleHashtable		entities = new SimpleHashtable (17);

    // stuff associated with SAX
    private DocumentHandler     docHandler;
    private DTDHandler          dtdHandler;
    private EntityResolver      resolver;
    private ErrorHandler        errHandler;
    private Locale              locale;
    private Locator		locator;

    // extended parser API support
    private DtdEventListener	dtdListener;
    private LexicalEventListener lexicalListener;


    // Compile time option:  disable validation support for a better
    // fit in memory-critical environments (P-Java etc).  Doing that
    // and removing the validating parser support saves (at this time)
    // about 15% in size.

    private static final boolean	supportValidation = true;


    // string constants -- use these copies so "==" works
    // package private
    static final String		strANY = "ANY";
    static final String		strEMPTY = "EMPTY";

    ////////////////////////////////////////////////////////////////
    //
    // PARSER methods
    //
    ////////////////////////////////////////////////////////////////

    /** Constructs a SAX parser object. */
    public Parser ()
    {
	locator = new DocLocator ();
	setHandlers ();
    }


    /**
     * SAX: Used by applications to request locale for diagnostics.
     *
     * @param l The locale to use, or null to use system defaults
     *	(which may include only message IDs).
     * @throws SAXException If no diagnostic messages are available
     *	in that locale.
     */
    public void setLocale (Locale l)
    throws SAXException
    {
	if (l != null && !messages.isLocaleSupported (l.toString ()))
	    throw new SAXException (messages.getMessage (locale,
		    "P-078", new Object [] { l }));
	locale = l;
    }

    /** Returns the diagnostic locale. */
    public Locale getLocale ()
	{ return locale; }
    
    /**
     * Chooses a client locale to use for diagnostics, using the first
     * language specified in the list that is supported by this parser.
     * That locale is then set using 
     * setLocale().  Such a list could be provided by a variety of user
     * preference mechanisms, including the HTTP Accept-Language
     * header field.
     *
     * @see com.sun.xml.util.MessageCatalog
     *
     * @param languages Array of language specifiers, ordered with the most
     *	preferable one at the front.  For example, "en-ca" then "fr-ca",
     *  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
     * @return The chosen locale, or null.
     */
    public Locale chooseLocale (String languages [])
    throws SAXException
    {
	Locale	l = messages.chooseLocale (languages);

	if (l != null)
	    setLocale (l);
	return l;
    }


    /** SAX: Lets applications control entity resolution. */
    public void setEntityResolver (EntityResolver r)
	{ resolver = r; }
 
    /** Returns the object used to resolve entities */
    public EntityResolver getEntityResolver ()
	{ return resolver; }


    /**
     * SAX: Used by applications to see unparsed entity information,
     * this assigns the handler for the basic SAX DTD events as well as
     * the extended "DtdEventListener" events.  If the specified handler
     * supports the extended events, it receives those events; otherwise,
     * they are ignored.
     *
     * @see DtdEventListener
     */
    public void setDTDHandler (DTDHandler handler)
    {
	if (handler == null)
	    handler = defaultHandler;
	dtdHandler = handler;
	if (handler instanceof DtdEventListener)
	    dtdListener = (DtdEventListener) handler;
	else
	    dtdListener = defaultHandler;
    }

    /** Returns the handler used to deliver unparsed entity information. */
    public DTDHandler getDTDHandler ()
	{ return dtdHandler; }


    /**
     * SAX: The primary application hook into the parser, this
     * assigns the handler for the basic SAX document events as well as
     * the extended "lexical" events.  If the specified handler supports
     * the extended events, it receives those events; otherwise, they
     * are ignored.
     *
     * @see LexicalEventListener
     */
    public void setDocumentHandler (DocumentHandler handler)
    {
	if (handler == null)
	    handler = defaultHandler;
	docHandler = handler;
	if (handler instanceof LexicalEventListener)
	    lexicalListener = (LexicalEventListener) handler;
	else
	    lexicalListener = defaultHandler;
    }

    /** Returns the application being driven by the parser. */
    public DocumentHandler getDocumentHandler ()
	{ return docHandler; }


    /**
     * SAX: Used to override default error handling; for example, to
     * ensure that validity errors abort parsing, or to report
     * errors through the correct channels. 
     */
    public void setErrorHandler (ErrorHandler handler)
	{ errHandler = handler; }

    /** Returns the object used for error handling */
    public ErrorHandler getErrorHandler ()
	{ return errHandler; }


    /** SAX: Parse a document. */
    public void parse (InputSource in)
    throws SAXException, IOException
    {
	init ();
	parseInternal (in);
    }

    /** SAX: Parse a document. */
    public void parse (String uri)
    throws SAXException, IOException
    {
	InputSource	in;

	init ();
	// System.out.println ("parse (\"" + uri + "\")");
	in = resolver.resolveEntity (null, uri);

	// If custom resolver punts resolution to parser, handle it ...
	if (in == null)
	    in = Resolver.createInputSource (new java.net.URL (uri), false);

	// ... or if custom resolver doesn't correctly construct the
	// input entity, patch it up enough so relative URIs work, and
	// issue a warning to minimize later confusion.
	else if (in.getSystemId () == null) {
	    warning ("P-065", null);
	    in.setSystemId (uri);
	}

	parseInternal (in);
    }

    /**
     * Setting this flag enables faster processing of valid standalone
     * documents: external DTD information is not processed, and no
     * attribute normalization or defaulting is done.  This optimization
     * is only permitted in non-validating parsers; for validating
     * parsers, this mode is silently disabled.
     *
     * 
 For documents which are declared as standalone, but which are
     * not valid, a fatal error may be reported for references to externally
     * defined entities.  That could happen in any nonvalidating parser which
     * did not read externally defined entities.  Also, if any attribute
     * values need normalization or defaulting, it will not be done.
     */
    public void setFastStandalone (boolean value)
	{ fastStandalone = value && !isValidating; }

    /**
     * Returns true if standalone documents skip processing of
     * all external DTD information.
     */
    public boolean isFastStandalone ()
	{ return fastStandalone; }


    /**
     * In support of the HTML DOM model of client side
     * <xhtml:script> tag processing, this method permits
     * data to be spliced into the input stream.  This method would
     * normally be called from an endElement callback to put the
     * buffered result of calls such as DOM HTMLDocument.write
     * into the input stream.
     */
    public void pushInputBuffer (char buf [], int offset, int len)
    throws SAXException
    {
	if (len <= 0)
	    return;

	// arraycopy is inelegant, but that's the worst penalty for now
	if (offset != 0 || len != buf.length) {
	    char tmp [] = new char [len];
	    System.arraycopy (buf, offset, tmp, 0, len);
	    buf = tmp;
	}
	pushReader (buf, null, false);
    }


    // package private
    void setIsValidating (boolean value)
    {
	if (supportValidation)
	    isValidating = value;
	else
	    throw new RuntimeException (messages.getMessage (locale, "V-000"));
	if (value)
	    fastStandalone = false;
    }


    // makes sure the parser's reset to "before a document"
    private void init ()
    {
	in = null;

	// alloc temporary data used in parsing
	attTmp = new AttributeListImpl ();
	strTmp = new StringBuffer ();
	nameTmp = new char [20];
	nameCache = new NameCache ();

	// reset doc info
	isStandalone = false;
	rootElementName = null;
	isInAttribute = false;

	inExternalPE = false;
	doLexicalPE = false;
	donePrologue = false;

	entities.clear ();
	notations.clear ();
	params.clear ();
	elements.clear ();
	ignoreDeclarations = false;

	// initialize predefined references ... re-interpreted later
	builtin ("amp", "&");
	builtin ("lt", "<");
	builtin ("gt", ">");
	builtin ("quot", "\"");
	builtin ("apos", "'");

	if (locale == null)
	    locale = Locale.getDefault ();
	if (resolver == null)
	    resolver = new Resolver ();
	
	setHandlers ();
    }

    static private final ListenerBase	defaultHandler = new ListenerBase ();

    private void setHandlers ()
    {
	if (dtdHandler == null)
	    dtdHandler = defaultHandler;
	if (dtdListener == null)
	    dtdListener = defaultHandler;
	if (errHandler == null)
	    errHandler = defaultHandler;
	if (docHandler == null)
	    docHandler = defaultHandler;
	if (lexicalListener == null)
	    lexicalListener = defaultHandler;
    }

    private void builtin (String entityName, String entityValue)
    {
	InternalEntity entity;
	entity = new InternalEntity (entityName, entityValue.toCharArray ());
	entities.put (entityName, entity);
    }



    ////////////////////////////////////////////////////////////////
    //
    // parsing is by recursive descent, code roughly
    // following the BNF rules except tweaked for simple
    // lookahead.  rules are more or less in numeric order,
    // except where code sharing suggests other structures.
    //
    // a classic benefit of recursive descent parsers:  it's
    // relatively easy to get diagnostics that make sense.
    //
    ////////////////////////////////////////////////////////////////


    //
    // CHAPTER 2:  Documents
    //

    private void parseInternal (InputSource input)
    throws SAXException, IOException
    {
	if (input == null)
	    fatal ("P-000");

	try {
	    in = InputEntity.getInputEntity (errHandler, locale);
	    in.init (input, null, null, false);

	    //
	    // doc handler sees the locator, lots of PIs, DTD info
	    // about external entities and notations, then the body.
	    //Need to initialize this after InputEntity cos locator uses
	    //InputEntity's systemid, publicid, line no. etc

	    docHandler.setDocumentLocator (locator);

	    docHandler.startDocument ();

	    // [1] document ::= prolog element Misc*
	    // [22] prolog ::= XMLDecl? Misc* (DoctypeDecl Misc *)?

	    maybeXmlDecl ();
	    maybeMisc (false);

	    if (!maybeDoctypeDecl ()) {
		if (supportValidation && isValidating)
		    warning ("V-001", null);
	    }
	    
	    maybeMisc (false);
	    donePrologue = true;

	    //
	    // One root element ... then basically PIs before EOF.
	    //
	    if (!in.peekc ('<') || !maybeElement (null))
		fatal ("P-067");
	    //Check subclass. Used for validation of id refs.
	    afterRoot ();
	    maybeMisc (true);
	    if (!in.isEOF ())
		fatal ("P-001", new Object []
			{ Integer.toHexString (((int)getc ())) } );
	    docHandler.endDocument ();

	} catch (EndOfInputException e) {
	    if (!in.isDocument ()) {
		String name = in.getName ();
		do {	// force a relevant URI and line number  
		    in = in.pop ();
		} while (in.isInternal ());
		fatal ("P-002", new Object []
			{ name },
			e);
	    } else
		fatal ("P-003", null, e);

	} catch (RuntimeException e) {
	    // Don't discard location that triggered the exception
	    throw new SAXParseException (
		e.getMessage () != null
		    ? e.getMessage ()
		    : e.getClass ().getName (),
		locator.getPublicId (), locator.getSystemId (),
		locator.getLineNumber (), locator.getColumnNumber (),
		e);

	} finally {
	    // recycle temporary data used during parsing
	    strTmp = null;
	    attTmp = null;
	    nameTmp = null;
	    nameCache = null;

	    // ditto input sources etc
	    if (in != null) {
		in.close ();
		in = null;
	    }

	    // get rid of all DTD info ... some of it would be
	    // useful for editors etc, investigate later.

	    params.clear ();
	    entities.clear ();
	    notations.clear ();
	    elements.clear ();

	    afterDocument ();
	}
    }

    // package private -- for subclass 
    void afterRoot () throws SAXException { }

    // package private -- for subclass 
    void afterDocument () { }

    // role is for diagnostics
    private void whitespace (String roleId) throws IOException, SAXException
	// [3] S ::= (#x20 | #x9 | #xd | #xa)+
    {
	if (!maybeWhitespace ())
	    fatal ("P-004", new Object []
		    { messages.getMessage (locale, roleId) });
    }

   	// S?
    private boolean maybeWhitespace () throws IOException, SAXException
    {
	if (!(inExternalPE && doLexicalPE))
	    return in.maybeWhitespace ();

	// see getc() for the PE logic -- this lets us splice
	// expansions of PEs in "anywhere".  getc() has smarts,
	// so for external PEs we don't bypass it.

	// XXX we can marginally speed PE handling, and certainly
	// be cleaner (hence potentially more correct), by using
	// the observations that expanded PEs only start and stop
	// where whitespace is allowed.  getc wouldn't need any
	// "lexical" PE expansion logic, and no other method needs
	// to handle termination of PEs.  (parsing of literals would
	// still need to pop entities, but not parsing of references
	// in content.)

	char c = getc();
	boolean saw = false;

	while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
	    saw = true;

	    // this gracefully ends things when we stop playing
	    // with internal parameters.  caller should have a
	    // grammar rule allowing whitespace at end of entity.
	    if (in.isEOF () && !in.isInternal ())
		return saw;
	    c = getc ();
	}
	ungetc ();
	return saw;
    }

    private String maybeGetName ()
    throws IOException, SAXException
    {
	NameCacheEntry	entry = maybeGetNameCacheEntry ();
	return (entry == null) ? null : entry.name;
    }

    private NameCacheEntry maybeGetNameCacheEntry ()
    throws IOException, SAXException
    {
	// [5] Name ::= (Letter|'_'|':') (Namechar)*
	char		c = getc ();

	if (!XmlChars.isLetter (c) && c != ':' && c != '_') {
	    ungetc ();
	    return null;
	}
	return nameCharString (c);
    }

    // Used when parsing enumerations
    private String getNmtoken ()
    throws SAXException, IOException
    {
	// [7] Nmtoken ::= (Namechar)+
	char c = getc ();
	if (!XmlChars.isNameChar (c))
	    fatal ("P-006", new Object [] { new Character (c) });
	return nameCharString (c).name;
    }

    // n.b. this gets used when parsing attribute values (for
    // internal references) so we can't use strTmp; it's also
    // a hotspot for CPU and memory in the parser (called at least
    // once for each element) so this has been optimized a bit.

    private NameCacheEntry nameCharString (char c)
    throws IOException, SAXException
    {
	int	i = 1;

	nameTmp [0] = c;
	for (;;) {
	    if ((c = in.getNameChar ()) == 0)
		break;
	    if (i >= nameTmp.length) {
		char tmp [] = new char [nameTmp.length + 10];
		System.arraycopy (nameTmp, 0, tmp, 0, nameTmp.length);
		nameTmp = tmp;
	    }
	    nameTmp [i++] = c;
	}
	return nameCache.lookupEntry (nameTmp, i);
    }

    //
    // much similarity between parsing entity values in DTD
    // and attribute values (in DTD or content) ... both follow
    // literal parsing rules, newline canonicalization, etc
    //
    // leaves value in 'strTmp' ... either a "replacement text" (4.5),
    // or else partially normalized attribute value (the first bit
    // of 3.3.3's spec, without the "if not CDATA" bits).
    //
    private void parseLiteral (boolean isEntityValue)
    throws IOException, SAXException
    {
	// [9] EntityValue ::=
	//	'"' ([^"&%] | Reference | PEReference)* '"'
	//    |	"'" ([^'&%] | Reference | PEReference)* "'"
	// [10] AttValue ::=
	//	'"' ([^"&]  | Reference		     )* '"'
	//    |	"'" ([^'&]  | Reference		     )* "'"
	char		quote = getc ();
	char		c;
	InputEntity	source = in;

	if (quote != '\'' && quote != '"')
	    fatal ("P-007");

	// don't report entity expansions within attributes,
	// they're reported "fully expanded" via SAX
	isInAttribute = !isEntityValue;

	// get value into strTmp
	strTmp = new StringBuffer ();

	// scan, allowing entity push/pop wherever ...
	// expanded entities can't terminate the literal!
	for (;;) {
	    if (in != source && in.isEOF ()) {
		// we don't report end of parsed entities
		// within attributes (no SAX hooks)
		in = in.pop ();
		continue;
	    }
	    if ((c = getc ()) == quote && in == source)
		break;

	    //
	    // Basically the "reference in attribute value"
	    // row of the chart in section 4.4 of the spec
	    //
	    if (c == '&') {
		String	entityName = maybeGetName ();

		if (entityName != null) {
		    nextChar (';', "F-020", entityName);

		    // 4.4 says:  bypass these here ... we'll catch
		    // forbidden refs to unparsed entities on use
		    if (isEntityValue) {
			strTmp.append ('&');
			strTmp.append (entityName);
			strTmp.append (';');
			continue;
		    }
		    expandEntityInLiteral (entityName, entities, isEntityValue);


		// character references are always included immediately
		} else if ((c = getc ()) == '#') {
		    int tmp = parseCharNumber ();

		    if (tmp > 0xffff) {
			tmp = surrogatesToCharTmp (tmp);
			strTmp.append (charTmp [0]);
			if (tmp == 2)
			    strTmp.append (charTmp [1]);
		    } else
			strTmp.append ((char) tmp);
		} else
		    fatal ("P-009");
		continue;

	    }

	    // expand parameter entities only within entity value literals
	    if (c == '%' && isEntityValue) {
		String	entityName = maybeGetName ();

		if (entityName != null) {
		    nextChar (';', "F-021", entityName);
		    if (inExternalPE)
			expandEntityInLiteral (entityName,
				params, isEntityValue);
		    else
			fatal ("P-010", new Object [] { entityName });
		    continue;
		} else
		    fatal ("P-011");
	    }

	    // For attribute values ...
	    if (!isEntityValue) {
		// 3.3.3 says whitespace normalizes to space...
		if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
		    strTmp.append (' ');
		    continue;
		}

		// "<" not legal in parsed literals ...
		if (c == '<')
		    fatal ("P-012");
	    }

	    strTmp.append (c);
	}

	isInAttribute = false;
    }

    // does a SINGLE expansion of the entity (often reparsed later)
    private void expandEntityInLiteral (
	String		name,
	SimpleHashtable	table,
	boolean		isEntityValue
    ) throws SAXException, IOException
    {
	Object	entity = table.get (name);

	//
	// Note:  if entity is a PE (value.isPE) there is an XML
	// requirement that the content be "markkupdecl", but that error
	// is ignored here (as permitted by the XML spec).
	//
	if (entity instanceof InternalEntity) {
	    InternalEntity value = (InternalEntity) entity;
	    if (supportValidation && isValidating
		    && isStandalone
		    && !value.isFromInternalSubset)
		error ("V-002", new Object [] { name });
	    pushReader (value.buf, name, !value.isPE);

	} else if (entity instanceof ExternalEntity) {
	    if (!isEntityValue)	// must be a PE ...
		fatal ("P-013", new Object [] { name });
	    // XXX if this returns false ...
	    pushReader ((ExternalEntity) entity);

	} else if (entity == null) {
	    //
	    // Note:  much confusion about whether spec requires such
	    // errors to be fatal in many cases, but none about whether
	    // it allows "normal" errors to be unrecoverable!
	    //
	    fatal (
		(table == params) ? "V-022" : "P-014",
		new Object [] { name });
	}
    }

    // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
    // for PUBLIC and SYSTEM literals, also "'
    
    // NOTE:  XML spec should explicitly say that PE ref syntax is
    // ignored in PIs, comments, SystemLiterals, and Pubid Literal
    // values ... can't process the XML spec's own DTD without doing
    // that for comments.

    private String getQuotedString (String type, String extra)
    throws IOException, SAXException
    {
	// use in.getc to bypass PE processing
	char		 quote = in.getc ();

	if (quote != '\'' && quote != '"')
	    fatal ("P-015", new Object [] {
		messages.getMessage (locale, type, new Object [] { extra })
		});

	char		c;

	strTmp = new StringBuffer ();
	while ((c = in.getc ()) != quote)
	    strTmp.append ((char)c);
	return strTmp.toString ();
    }


    private String parsePublicId ()
    throws IOException, SAXException
    {
	// [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
	// [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
	String retval = getQuotedString ("F-033", null);
	for (int i = 0; i < retval.length (); i++) {
	    char c = retval.charAt (i);
	    if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
		    && !(c >= 'A' && c <= 'Z')
		    && !(c >= 'a' && c <= 'z'))
		fatal ("P-016", new Object [] { new Character (c) });
	}
	strTmp = new StringBuffer ();
	strTmp.append (retval);
	return normalize (false);
    }

	// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
	// handled by:  InputEntity.parsedContent()

    private boolean maybeComment (boolean skipStart)
    throws IOException, SAXException
    {
	// [15] Comment ::= ''
	if (!in.peek (skipStart ? "!--" : "