com.sun.xml.rpc.sp.Parser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of webservices-rt Show documentation
This module contains the Metro runtime code.
There is a newer version: 4.0.4
/*
 * Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License v. 2.0, which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * This Source Code may also be made available under the following Secondary
 * Licenses when the conditions for such availability set forth in the
 * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
 * version 2 with the GNU Classpath Exception, which is available at
 * https://www.gnu.org/software/classpath/license.html.
 *
 * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
 */

package com.sun.xml.rpc.sp;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

//
// NOTE:  when maintaining this code, take care to keep the message
// catalogue(s) up to date!!  It's important that the diagnostics
// be informative.
//

/**
 * This implements a fast non-validating top down parser.  This one always 
 * processes external parsed entities, strictly adheres to the XML 1.0
 * specification, and provides useful diagnostics.  It supports an optimization
 * allowing faster processing of valid standalone XML documents.  For
 * multi-language applications (such as web servers using XML processing
 * to create dynamic content), a method supports choosing a locale for
 * parser diagnostics which is both understood by the message recipient
 * and supported by the parser.
 *
 * @author David Brownell
 * @author Zhenghua Li
 * @author JAX-RPC RI Development Team
 */
public final class Parser {

    // these are the name and value of the most
    // recently parsed item
    private String curName = null;
    private String curValue = null;
    // namespace support
    private String curURI = null;

    // stack of input entities being merged
    private InputEntity in;

    // temporaries reused during parsing
    private AttributesExImpl attTmp;
    private String[] parts = new String[3];
    private StringBuffer strTmp;
    private char nameTmp[];
    private NameCache nameCache;
    private char charTmp[] = new char[2];

    // namespace support
    private boolean namespace = false;
    private NamespaceSupport ns = null;

    // parsing modes
    private boolean isInAttribute = false;

    // temporary DTD parsing state
    private boolean inExternalPE;
    private boolean doLexicalPE;
    private boolean donePrologue;
    private boolean doneEpilogue;
    private boolean doneContent;

    private AttributesExImpl attr = null;
    private int attrIndex = 0;
    private boolean startEmptyStack = true;

    // info about the document
    private boolean isStandalone;
    private String rootElementName;

    // DTD state, used during parsing
    private boolean ignoreDeclarations;
    private SimpleHashtable elements = new SimpleHashtable(47);
    private SimpleHashtable params = new SimpleHashtable(7);

    // exposed to package-private subclass
    Map notations = new HashMap(7);
    SimpleHashtable entities = new SimpleHashtable(17);

    // string constants -- use these copies so "==" works
    // package private
    static final String strANY = "ANY";
    static final String strEMPTY = "EMPTY";

    private Locale locale;
    private EntityResolver resolver;
    Locator locator;
    private boolean fastStandalone = false;

    ////////////////////////////////////////////////////////////////
    //
    // PARSER methods
    //
    ////////////////////////////////////////////////////////////////

    /**
     * Used by applications to request locale for diagnostics.
     *
     * @param l The locale to use, or null to use system defaults
     *	(which may include only message IDs).
     * @throws ParseException If no diagnostic messages are available
     *	in that locale.
     */
    public void setLocale(Locale l) throws ParseException {
        if (l != null && !messages.isLocaleSupported(l.toString()))
            fatal(messages.getMessage(locale, "P-078", new Object[] { l }));
        locale = l;
    }

    /** Returns the diagnostic locale. */
    public Locale getLocale() {
        return locale;
    }

    public String getCurName() {
        return curName;
    }

    public String getCurURI() {
        return curURI;
    }

    public String getCurValue() {
        return curValue;
    }

    public int getLineNumber() {
        return locator.getLineNumber();
    }

    public int getColumnNumber() {
        return locator.getColumnNumber();
    }

    public String getPublicId() {
        return locator.getPublicId();
    }

    public String getSystemId() {
        return locator.getSystemId();
    }

    /**
     * Chooses a client locale to use for diagnostics, using the first
     * language specified in the list that is supported by this parser.
     * That locale is then set using 
     * setLocale().  Such a list could be provided by a variety of user
     * preference mechanisms, including the HTTP Accept-Language
     * header field.
     *
     * @see com.sun.xml.rpc.sp.MessageCatalog
     *
     * @param languages Array of language specifiers, ordered with the most
     *	preferable one at the front.  For example, "en-ca" then "fr-ca",
     *  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
     * @return The chosen locale, or null.
     */
    public Locale chooseLocale(String languages[]) throws ParseException {
        Locale l = messages.chooseLocale(languages);

        if (l != null)
            setLocale(l);
        return l;
    }

    /** Lets applications control entity resolution. */
    public void setEntityResolver(EntityResolver r) {
        resolver = r;
    }

    /** Returns the object used to resolve entities */
    public EntityResolver getEntityResolver() {
        return resolver;
    }

    /**
     * Setting this flag enables faster processing of valid standalone
     * documents: external DTD information is not processed, and no
     * attribute normalization or defaulting is done.  This optimization
     * is only permitted in non-validating parsers; for validating
     * parsers, this mode is silently disabled.
     *
     *  For documents which are declared as standalone, but which are
     * not valid, a fatal error may be reported for references to externally
     * defined entities.  That could happen in any nonvalidating parser which
     * did not read externally defined entities.  Also, if any attribute
     * values need normalization or defaulting, it will not be done.
     */
    public void setFastStandalone(boolean value) {
        fastStandalone = value;
    }

    /**
     * Returns true if standalone documents skip processing of
     * all external DTD information.
     */
    public boolean isFastStandalone() {
        return fastStandalone;
    }

    // makes sure the parser's reset to "before a document"
    private void init() {
        in = null;

        // alloc temporary data used in parsing
        attTmp = new AttributesExImpl();
        strTmp = new StringBuffer();
        nameTmp = new char[20];
        nameCache = new NameCache();

        if (namespace) {
            if (ns == null)
                ns = new NamespaceSupport();
            else
                ns.reset();
        }

        // reset doc info
        isStandalone = false;
        rootElementName = null;
        isInAttribute = false;

        inExternalPE = false;
        doLexicalPE = false;
        donePrologue = false;
        doneEpilogue = false;
        doneContent = false;

        attr = null;
        attrIndex = 0;
        startEmptyStack = true;

        entities.clear();
        notations.clear();
        params.clear();
        elements.clear();
        ignoreDeclarations = false;

        stack.clear();
        piQueue.clear();

        // initialize predefined references ... re-interpreted later
        builtin("amp", "&");
        builtin("lt", "<");
        builtin("gt", ">");
        builtin("quot", "\"");
        builtin("apos", "'");

        if (locale == null)
            locale = Locale.getDefault();
        if (resolver == null)
            resolver = new Resolver();

    }

    private void builtin(String entityName, String entityValue) {
        InternalEntity entity;
        entity = new InternalEntity(entityName, entityValue.toCharArray());
        entities.put(entityName, entity);
    }

    // package private -- for subclass 
    void afterRoot() throws ParseException {
    }

    // package private -- for subclass 
    void afterDocument() {
    }

    // role is for diagnostics
    private void whitespace(String roleId) throws IOException, ParseException
    // [3] S ::= (#x20 | #x9 | #xd | #xa)+
    {
        if (!maybeWhitespace())
            fatal("P-004", new Object[] { messages.getMessage(locale, roleId)});
    }

    // S?
    private boolean maybeWhitespace() throws IOException, ParseException {
        if (!(inExternalPE && doLexicalPE))
            return in.maybeWhitespace();

        // see getc() for the PE logic -- this lets us splice
        // expansions of PEs in "anywhere".  getc() has smarts,
        // so for external PEs we don't bypass it.

        // we can marginally speed PE handling, and certainly
        // be cleaner (hence potentially more correct), by using
        // the observations that expanded PEs only start and stop
        // where whitespace is allowed.  getc wouldn't need any
        // "lexical" PE expansion logic, and no other method needs
        // to handle termination of PEs.  (parsing of literals would
        // still need to pop entities, but not parsing of references
        // in content.)

        char c = getc();
        boolean saw = false;

        while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
            saw = true;

            // this gracefully ends things when we stop playing
            // with internal parameters.  caller should have a
            // grammar rule allowing whitespace at end of entity.
            if (in.isEOF() && !in.isInternal())
                return saw;
            c = getc();
        }
        ungetc();
        return saw;
    }

    private String maybeGetName() throws IOException, ParseException {
        NameCacheEntry entry = maybeGetNameCacheEntry();
        return (entry == null) ? null : entry.name;
    }

    private NameCacheEntry maybeGetNameCacheEntry()
        throws IOException, ParseException {
        // [5] Name ::= (Letter|'_'|':') (Namechar)*
        char c = getc();

        if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
            ungetc();
            return null;
        }
        return nameCharString(c);
    }

    // Used when parsing enumerations
    private String getNmtoken() throws ParseException, IOException {
        // [7] Nmtoken ::= (Namechar)+
        char c = getc();
        if (!XmlChars.isNameChar(c))
            fatal("P-006", new Object[] { new Character(c)});
        return nameCharString(c).name;
    }

    // n.b. this gets used when parsing attribute values (for
    // internal references) so we can't use strTmp; it's also
    // a hotspot for CPU and memory in the parser (called at least
    // once for each element) so this has been optimized a bit.

    private NameCacheEntry nameCharString(char c)
        throws IOException, ParseException {
        int i = 1;

        nameTmp[0] = c;
        for (;;) {
            if ((c = in.getNameChar()) == 0)
                break;
            if (i >= nameTmp.length) {
                char tmp[] = new char[nameTmp.length + 10];
                System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
                nameTmp = tmp;
            }
            nameTmp[i++] = c;
        }
        return nameCache.lookupEntry(nameTmp, i);
    }

    //
    // much similarity between parsing entity values in DTD
    // and attribute values (in DTD or content) ... both follow
    // literal parsing rules, newline canonicalization, etc
    //
    // leaves value in 'strTmp' ... either a "replacement text" (4.5),
    // or else partially normalized attribute value (the first bit
    // of 3.3.3's spec, without the "if not CDATA" bits).
    //
    private void parseLiteral(boolean isEntityValue)
        throws IOException, ParseException {
        // [9] EntityValue ::=
        //	'"' ([^"&%] | Reference | PEReference)* '"'
        //    |	"'" ([^'&%] | Reference | PEReference)* "'"
        // [10] AttValue ::=
        //	'"' ([^"&]  | Reference		     )* '"'
        //    |	"'" ([^'&]  | Reference		     )* "'"

        // Only expand PEs in getc() when processing entity value literals
        // and do not expand when processing AttValue.  Save state of
        // doLexicalPE and restore it before returning.
        boolean savedLexicalPE = doLexicalPE;
        doLexicalPE = isEntityValue;

        char quote = getc();
        char c;
        InputEntity source = in;

        if (quote != '\'' && quote != '"')
            fatal("P-007");

        // don't report entity expansions within attributes,
        // they're reported "fully expanded" via SAX
        isInAttribute = !isEntityValue;

        // get value into strTmp
        strTmp = new StringBuffer();

        // scan, allowing entity push/pop wherever ...
        // expanded entities can't terminate the literal!
        for (;;) {
            if (in != source && in.isEOF()) {
                // we don't report end of parsed entities
                // within attributes (no SAX hooks)
                in = in.pop();
                continue;
            }
            if ((c = getc()) == quote && in == source)
                break;

            //
            // Basically the "reference in attribute value"
            // row of the chart in section 4.4 of the spec
            //
            if (c == '&') {
                String entityName = maybeGetName();

                if (entityName != null) {
                    nextChar(';', "F-020", entityName);

                    // 4.4 says:  bypass these here ... we'll catch
                    // forbidden refs to unparsed entities on use
                    if (isEntityValue) {
                        strTmp.append('&');
                        strTmp.append(entityName);
                        strTmp.append(';');
                        continue;
                    }
                    expandEntityInLiteral(entityName, entities, isEntityValue);

                    // character references are always included immediately
                } else if ((c = getc()) == '#') {
                    int tmp = parseCharNumber();

                    if (tmp > 0xffff) {
                        tmp = surrogatesToCharTmp(tmp);
                        strTmp.append(charTmp[0]);
                        if (tmp == 2)
                            strTmp.append(charTmp[1]);
                    } else
                        strTmp.append((char) tmp);
                } else
                    fatal("P-009");
                continue;

            }

            // expand parameter entities only within entity value literals
            if (c == '%' && isEntityValue) {
                String entityName = maybeGetName();

                if (entityName != null) {
                    nextChar(';', "F-021", entityName);
                    if (inExternalPE)
                        expandEntityInLiteral(
                            entityName,
                            params,
                            isEntityValue);
                    else
                        fatal("P-010", new Object[] { entityName });
                    continue;
                } else
                    fatal("P-011");
            }

            // For attribute values ...
            if (!isEntityValue) {
                // 3.3.3 says whitespace normalizes to space...
                if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
                    strTmp.append(' ');
                    continue;
                }

                // "<" not legal in parsed literals ...
                if (c == '<')
                    fatal("P-012");
            }

            strTmp.append(c);
        }

        isInAttribute = false;
        doLexicalPE = savedLexicalPE;
    }

    // does a SINGLE expansion of the entity (often reparsed later)
    private void expandEntityInLiteral(
        String name,
        SimpleHashtable table,
        boolean isEntityValue)
        throws ParseException, IOException {
        Object entity = table.get(name);

        //
        // Note:  if entity is a PE (value.isPE) there is an XML
        // requirement that the content be "markkupdecl", but that error
        // is ignored here (as permitted by the XML spec).
        //
        if (entity instanceof InternalEntity) {
            InternalEntity value = (InternalEntity) entity;
            pushReader(value.buf, name, !value.isPE);

        } else if (entity instanceof ExternalEntity) {
            if (!isEntityValue) // must be a PE ...
                fatal("P-013", new Object[] { name });
            // if this returns false ...
            pushReader((ExternalEntity) entity);

        } else if (entity == null) {
            //
            // Note:  much confusion about whether spec requires such
            // errors to be fatal in many cases, but none about whether
            // it allows "normal" errors to be unrecoverable!
            //
            fatal((table == params) ? "V-022" : "P-014", new Object[] { name });
        }
    }

    // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
    // for PUBLIC and SYSTEM literals, also "'

    // NOTE:  XML spec should explicitly say that PE ref syntax is
    // ignored in PIs, comments, SystemLiterals, and Pubid Literal
    // values ... can't process the XML spec's own DTD without doing
    // that for comments.

    private String getQuotedString(String type, String extra)
        throws IOException, ParseException {
        // use in.getc to bypass PE processing
        char quote = in.getc();

        if (quote != '\'' && quote != '"')
            fatal(
                "P-015",
                new Object[] {
                     messages.getMessage(
                        locale,
                        type,
                        new Object[] { extra })
            });

        char c;

        strTmp = new StringBuffer();
        while ((c = in.getc()) != quote)
            strTmp.append((char) c);
        return strTmp.toString();
    }

    private String parsePublicId() throws IOException, ParseException {
        // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
        // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
        String retval = getQuotedString("F-033", null);
        for (int i = 0; i < retval.length(); i++) {
            char c = retval.charAt(i);
            if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
                && !(c >= 'A' && c <= 'Z')
                && !(c >= 'a' && c <= 'z'))
                fatal("P-016", new Object[] { new Character(c)});
        }
        strTmp = new StringBuffer();
        strTmp.append(retval);
        return normalize(false);
    }

    // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
    // handled by:  InputEntity.parsedContent()

    private boolean maybeComment(boolean skipStart)
        throws IOException, ParseException {
        // [15] Comment ::= ''
        if (!in.peek(skipStart ? "!--" : "