All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.xml.rpc.sp.Parser2 Maven / Gradle / Ivy

/*
 * Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License v. 2.0, which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * This Source Code may also be made available under the following Secondary
 * Licenses when the conditions for such availability set forth in the
 * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
 * version 2 with the GNU Classpath Exception, which is available at
 * https://www.gnu.org/software/classpath/license.html.
 *
 * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
 */

package com.sun.xml.rpc.sp;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

//
// NOTE:  when maintaining this code, take care to keep the message
// catalogue(s) up to date!!  It's important that the diagnostics
// be informative.
//

/**
 * This implements a fast non-validating top down parser.  This one always 
 * processes external parsed entities, strictly adheres to the XML 1.0
 * specification, and provides useful diagnostics.  It supports an optimization
 * allowing faster processing of valid standalone XML documents.  For
 * multi-language applications (such as web servers using XML processing
 * to create dynamic content), a method supports choosing a locale for
 * parser diagnostics which is both understood by the message recipient
 * and supported by the parser.
 *
 * @author David Brownell
 * @author Zhenghua Li
 * @author JAX-RPC RI Development Team
 */
public final class Parser2 {

    /* 
      This class was created by starting with Parser and making a few changes:
      a) adding methods to get access to the internal attribute list and
         namespace support objects
      b) eliminating the ATTR event
      c) renumbering the remaining events to match the new XMLReader interface
      d) setting the URI of xmlns attributes to "http://www.w3.org/2000/xmlns/",
         per the XML Information Set specification
     */

    // these are the name and value of the most
    // recently parsed item
    private String curName = null;
    private String curValue = null;
    // namespace support
    private String curURI = null;

    // stack of input entities being merged
    private InputEntity in;

    // temporaries reused during parsing
    private AttributesExImpl attTmp;
    private String[] parts = new String[3];
    private StringBuffer strTmp;
    private char nameTmp[];
    private NameCache nameCache;
    private char charTmp[] = new char[2];

    // namespace support
    private boolean namespace = false;
    private NamespaceSupport ns = null;

    // parsing modes
    private boolean isInAttribute = false;

    private boolean rejectDTDs = false;

    // temporary DTD parsing state
    private boolean inExternalPE;
    private boolean doLexicalPE;
    private boolean donePrologue;
    private boolean doneEpilogue;
    private boolean doneContent;

    private AttributesExImpl attr = null;
    private int attrIndex = 0;
    private boolean startEmptyStack = true;

    // info about the document
    private boolean isStandalone;
    private String rootElementName;

    // DTD state, used during parsing
    private boolean ignoreDeclarations;
    private SimpleHashtable elements = new SimpleHashtable(47);
    private SimpleHashtable params = new SimpleHashtable(7);

    // exposed to package-private subclass
    Map notations = new HashMap(7);
    SimpleHashtable entities = new SimpleHashtable(17);

    // string constants -- use these copies so "==" works
    // package private
    static final String strANY = "ANY";
    static final String strEMPTY = "EMPTY";

    private Locale locale;
    private EntityResolver resolver;
    Locator locator;
    private boolean fastStandalone = false;

    private static final String XMLNS_NAMESPACE_URI =
        "http://www.w3.org/2000/xmlns/";

    ////////////////////////////////////////////////////////////////
    //
    // PARSER methods
    //
    ////////////////////////////////////////////////////////////////

    /**
     * Used by applications to request locale for diagnostics.
     *
     * @param l The locale to use, or null to use system defaults
     *	(which may include only message IDs).
     * @throws ParseException If no diagnostic messages are available
     *	in that locale.
     */
    public void setLocale(Locale l) throws ParseException {
        if (l != null && !messages.isLocaleSupported(l.toString()))
            fatal(messages.getMessage(locale, "P-078", new Object[] { l }));
        locale = l;
    }

    /** Returns the diagnostic locale. */
    public Locale getLocale() {
        return locale;
    }

    public String getCurName() {
        return curName;
    }

    public String getCurURI() {
        return curURI;
    }

    public String getCurValue() {
        return curValue;
    }

    public NamespaceSupport getNamespaceSupport() {
        return ns;
    }

    public AttributesEx getAttributes() {
        return attr;
    }

    public int getLineNumber() {
        return locator.getLineNumber();
    }

    public int getColumnNumber() {
        return locator.getColumnNumber();
    }

    public String getPublicId() {
        return locator.getPublicId();
    }

    public String getSystemId() {
        return locator.getSystemId();
    }

    /**
     * Chooses a client locale to use for diagnostics, using the first
     * language specified in the list that is supported by this parser.
     * That locale is then set using 
     * setLocale().  Such a list could be provided by a variety of user
     * preference mechanisms, including the HTTP Accept-Language
     * header field.
     *
     * @see com.sun.xml.rpc.sp.MessageCatalog
     *
     * @param languages Array of language specifiers, ordered with the most
     *	preferable one at the front.  For example, "en-ca" then "fr-ca",
     *  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
     * @return The chosen locale, or null.
     */
    public Locale chooseLocale(String languages[]) throws ParseException {
        Locale l = messages.chooseLocale(languages);

        if (l != null)
            setLocale(l);
        return l;
    }

    /** Lets applications control entity resolution. */
    public void setEntityResolver(EntityResolver r) {
        resolver = r;
    }

    /** Returns the object used to resolve entities */
    public EntityResolver getEntityResolver() {
        return resolver;
    }

    /**
     * Setting this flag enables faster processing of valid standalone
     * documents: external DTD information is not processed, and no
     * attribute normalization or defaulting is done.  This optimization
     * is only permitted in non-validating parsers; for validating
     * parsers, this mode is silently disabled.
     *
     * 

For documents which are declared as standalone, but which are * not valid, a fatal error may be reported for references to externally * defined entities. That could happen in any nonvalidating parser which * did not read externally defined entities. Also, if any attribute * values need normalization or defaulting, it will not be done. */ public void setFastStandalone(boolean value) { fastStandalone = value; } /** * Returns true if standalone documents skip processing of * all external DTD information. */ public boolean isFastStandalone() { return fastStandalone; } // makes sure the parser's reset to "before a document" private void init() { in = null; // alloc temporary data used in parsing attTmp = new AttributesExImpl(); strTmp = new StringBuffer(); nameTmp = new char[20]; nameCache = new NameCache(); if (namespace) { if (ns == null) ns = new NamespaceSupport(); else ns.reset(); } // reset doc info isStandalone = false; rootElementName = null; isInAttribute = false; inExternalPE = false; doLexicalPE = false; donePrologue = false; doneEpilogue = false; doneContent = false; attr = null; attrIndex = 0; startEmptyStack = true; entities.clear(); notations.clear(); params.clear(); elements.clear(); ignoreDeclarations = false; stack.clear(); piQueue.clear(); // initialize predefined references ... re-interpreted later builtin("amp", "&"); builtin("lt", "<"); builtin("gt", ">"); builtin("quot", "\""); builtin("apos", "'"); if (locale == null) locale = Locale.getDefault(); if (resolver == null) resolver = new Resolver(); } private void builtin(String entityName, String entityValue) { InternalEntity entity; entity = new InternalEntity(entityName, entityValue.toCharArray()); entities.put(entityName, entity); } // package private -- for subclass void afterRoot() throws ParseException { } // package private -- for subclass void afterDocument() { } // role is for diagnostics private void whitespace(String roleId) throws IOException, ParseException // [3] S ::= (#x20 | #x9 | #xd | #xa)+ { if (!maybeWhitespace()) fatal("P-004", new Object[] { messages.getMessage(locale, roleId)}); } // S? private boolean maybeWhitespace() throws IOException, ParseException { if (!(inExternalPE && doLexicalPE)) return in.maybeWhitespace(); // see getc() for the PE logic -- this lets us splice // expansions of PEs in "anywhere". getc() has smarts, // so for external PEs we don't bypass it. // we can marginally speed PE handling, and certainly // be cleaner (hence potentially more correct), by using // the observations that expanded PEs only start and stop // where whitespace is allowed. getc wouldn't need any // "lexical" PE expansion logic, and no other method needs // to handle termination of PEs. (parsing of literals would // still need to pop entities, but not parsing of references // in content.) char c = getc(); boolean saw = false; while (c == ' ' || c == '\t' || c == '\n' || c == '\r') { saw = true; // this gracefully ends things when we stop playing // with internal parameters. caller should have a // grammar rule allowing whitespace at end of entity. if (in.isEOF() && !in.isInternal()) return saw; c = getc(); } ungetc(); return saw; } private String maybeGetName() throws IOException, ParseException { NameCacheEntry entry = maybeGetNameCacheEntry(); return (entry == null) ? null : entry.name; } private NameCacheEntry maybeGetNameCacheEntry() throws IOException, ParseException { // [5] Name ::= (Letter|'_'|':') (Namechar)* char c = getc(); if (!XmlChars.isLetter(c) && c != ':' && c != '_') { ungetc(); return null; } return nameCharString(c); } // Used when parsing enumerations private String getNmtoken() throws ParseException, IOException { // [7] Nmtoken ::= (Namechar)+ char c = getc(); if (!XmlChars.isNameChar(c)) fatal("P-006", new Object[] { new Character(c)}); return nameCharString(c).name; } // n.b. this gets used when parsing attribute values (for // internal references) so we can't use strTmp; it's also // a hotspot for CPU and memory in the parser (called at least // once for each element) so this has been optimized a bit. private NameCacheEntry nameCharString(char c) throws IOException, ParseException { int i = 1; nameTmp[0] = c; for (;;) { if ((c = in.getNameChar()) == 0) break; if (i >= nameTmp.length) { char tmp[] = new char[nameTmp.length + 10]; System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length); nameTmp = tmp; } nameTmp[i++] = c; } return nameCache.lookupEntry(nameTmp, i); } // // much similarity between parsing entity values in DTD // and attribute values (in DTD or content) ... both follow // literal parsing rules, newline canonicalization, etc // // leaves value in 'strTmp' ... either a "replacement text" (4.5), // or else partially normalized attribute value (the first bit // of 3.3.3's spec, without the "if not CDATA" bits). // private void parseLiteral(boolean isEntityValue) throws IOException, ParseException { // [9] EntityValue ::= // '"' ([^"&%] | Reference | PEReference)* '"' // | "'" ([^'&%] | Reference | PEReference)* "'" // [10] AttValue ::= // '"' ([^"&] | Reference )* '"' // | "'" ([^'&] | Reference )* "'" // Only expand PEs in getc() when processing entity value literals // and do not expand when processing AttValue. Save state of // doLexicalPE and restore it before returning. boolean savedLexicalPE = doLexicalPE; doLexicalPE = isEntityValue; char quote = getc(); char c; InputEntity source = in; if (quote != '\'' && quote != '"') fatal("P-007"); // don't report entity expansions within attributes, // they're reported "fully expanded" via SAX isInAttribute = !isEntityValue; // get value into strTmp strTmp = new StringBuffer(); // scan, allowing entity push/pop wherever ... // expanded entities can't terminate the literal! for (;;) { if (in != source && in.isEOF()) { // we don't report end of parsed entities // within attributes (no SAX hooks) in = in.pop(); continue; } if ((c = getc()) == quote && in == source) break; // // Basically the "reference in attribute value" // row of the chart in section 4.4 of the spec // if (c == '&') { String entityName = maybeGetName(); if (entityName != null) { nextChar(';', "F-020", entityName); // 4.4 says: bypass these here ... we'll catch // forbidden refs to unparsed entities on use if (isEntityValue) { strTmp.append('&'); strTmp.append(entityName); strTmp.append(';'); continue; } expandEntityInLiteral(entityName, entities, isEntityValue); // character references are always included immediately } else if ((c = getc()) == '#') { int tmp = parseCharNumber(); if (tmp > 0xffff) { tmp = surrogatesToCharTmp(tmp); strTmp.append(charTmp[0]); if (tmp == 2) strTmp.append(charTmp[1]); } else strTmp.append((char) tmp); } else fatal("P-009"); continue; } // expand parameter entities only within entity value literals if (c == '%' && isEntityValue) { String entityName = maybeGetName(); if (entityName != null) { nextChar(';', "F-021", entityName); if (inExternalPE) expandEntityInLiteral( entityName, params, isEntityValue); else fatal("P-010", new Object[] { entityName }); continue; } else fatal("P-011"); } // For attribute values ... if (!isEntityValue) { // 3.3.3 says whitespace normalizes to space... if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { strTmp.append(' '); continue; } // "<" not legal in parsed literals ... if (c == '<') fatal("P-012"); } strTmp.append(c); } isInAttribute = false; doLexicalPE = savedLexicalPE; } // does a SINGLE expansion of the entity (often reparsed later) private void expandEntityInLiteral( String name, SimpleHashtable table, boolean isEntityValue) throws ParseException, IOException { Object entity = table.get(name); // // Note: if entity is a PE (value.isPE) there is an XML // requirement that the content be "markkupdecl", but that error // is ignored here (as permitted by the XML spec). // if (entity instanceof InternalEntity) { InternalEntity value = (InternalEntity) entity; pushReader(value.buf, name, !value.isPE); } else if (entity instanceof ExternalEntity) { if (!isEntityValue) // must be a PE ... fatal("P-013", new Object[] { name }); // if this returns false ... pushReader((ExternalEntity) entity); } else if (entity == null) { // // Note: much confusion about whether spec requires such // errors to be fatal in many cases, but none about whether // it allows "normal" errors to be unrecoverable! // fatal((table == params) ? "V-022" : "P-014", new Object[] { name }); } } // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") // for PUBLIC and SYSTEM literals, also "' // NOTE: XML spec should explicitly say that PE ref syntax is // ignored in PIs, comments, SystemLiterals, and Pubid Literal // values ... can't process the XML spec's own DTD without doing // that for comments. private String getQuotedString(String type, String extra) throws IOException, ParseException { // use in.getc to bypass PE processing char quote = in.getc(); if (quote != '\'' && quote != '"') fatal( "P-015", new Object[] { messages.getMessage( locale, type, new Object[] { extra }) }); char c; strTmp = new StringBuffer(); while ((c = in.getc()) != quote) strTmp.append((char) c); return strTmp.toString(); } private String parsePublicId() throws IOException, ParseException { // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'") // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%] String retval = getQuotedString("F-033", null); for (int i = 0; i < retval.length(); i++) { char c = retval.charAt(i); if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1 && !(c >= 'A' && c <= 'Z') && !(c >= 'a' && c <= 'z')) fatal("P-016", new Object[] { new Character(c)}); } strTmp = new StringBuffer(); strTmp.append(retval); return normalize(false); } // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) // handled by: InputEntity.parsedContent() private boolean maybeComment(boolean skipStart) throws IOException, ParseException { // [15] Comment ::= '' if (!in.peek(skipStart ? "!--" : "