All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.xml.rpc.sp.Parser Maven / Gradle / Ivy

There is a newer version: 4.0.4
Show newest version
/*
 * Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Eclipse Public License v. 2.0, which is available at
 * http://www.eclipse.org/legal/epl-2.0.
 *
 * This Source Code may also be made available under the following Secondary
 * Licenses when the conditions for such availability set forth in the
 * Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
 * version 2 with the GNU Classpath Exception, which is available at
 * https://www.gnu.org/software/classpath/license.html.
 *
 * SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
 */

package com.sun.xml.rpc.sp;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;

import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;

//
// NOTE:  when maintaining this code, take care to keep the message
// catalogue(s) up to date!!  It's important that the diagnostics
// be informative.
//

/**
 * This implements a fast non-validating top down parser.  This one always 
 * processes external parsed entities, strictly adheres to the XML 1.0
 * specification, and provides useful diagnostics.  It supports an optimization
 * allowing faster processing of valid standalone XML documents.  For
 * multi-language applications (such as web servers using XML processing
 * to create dynamic content), a method supports choosing a locale for
 * parser diagnostics which is both understood by the message recipient
 * and supported by the parser.
 *
 * @author David Brownell
 * @author Zhenghua Li
 * @author JAX-RPC RI Development Team
 */
public final class Parser {

    // these are the name and value of the most
    // recently parsed item
    private String curName = null;
    private String curValue = null;
    // namespace support
    private String curURI = null;

    // stack of input entities being merged
    private InputEntity in;

    // temporaries reused during parsing
    private AttributesExImpl attTmp;
    private String[] parts = new String[3];
    private StringBuffer strTmp;
    private char nameTmp[];
    private NameCache nameCache;
    private char charTmp[] = new char[2];

    // namespace support
    private boolean namespace = false;
    private NamespaceSupport ns = null;

    // parsing modes
    private boolean isInAttribute = false;

    // temporary DTD parsing state
    private boolean inExternalPE;
    private boolean doLexicalPE;
    private boolean donePrologue;
    private boolean doneEpilogue;
    private boolean doneContent;

    private AttributesExImpl attr = null;
    private int attrIndex = 0;
    private boolean startEmptyStack = true;

    // info about the document
    private boolean isStandalone;
    private String rootElementName;

    // DTD state, used during parsing
    private boolean ignoreDeclarations;
    private SimpleHashtable elements = new SimpleHashtable(47);
    private SimpleHashtable params = new SimpleHashtable(7);

    // exposed to package-private subclass
    Map notations = new HashMap(7);
    SimpleHashtable entities = new SimpleHashtable(17);

    // string constants -- use these copies so "==" works
    // package private
    static final String strANY = "ANY";
    static final String strEMPTY = "EMPTY";

    private Locale locale;
    private EntityResolver resolver;
    Locator locator;
    private boolean fastStandalone = false;

    ////////////////////////////////////////////////////////////////
    //
    // PARSER methods
    //
    ////////////////////////////////////////////////////////////////

    /**
     * Used by applications to request locale for diagnostics.
     *
     * @param l The locale to use, or null to use system defaults
     *	(which may include only message IDs).
     * @throws ParseException If no diagnostic messages are available
     *	in that locale.
     */
    public void setLocale(Locale l) throws ParseException {
        if (l != null && !messages.isLocaleSupported(l.toString()))
            fatal(messages.getMessage(locale, "P-078", new Object[] { l }));
        locale = l;
    }

    /** Returns the diagnostic locale. */
    public Locale getLocale() {
        return locale;
    }

    public String getCurName() {
        return curName;
    }

    public String getCurURI() {
        return curURI;
    }

    public String getCurValue() {
        return curValue;
    }

    public int getLineNumber() {
        return locator.getLineNumber();
    }

    public int getColumnNumber() {
        return locator.getColumnNumber();
    }

    public String getPublicId() {
        return locator.getPublicId();
    }

    public String getSystemId() {
        return locator.getSystemId();
    }

    /**
     * Chooses a client locale to use for diagnostics, using the first
     * language specified in the list that is supported by this parser.
     * That locale is then set using 
     * setLocale().  Such a list could be provided by a variety of user
     * preference mechanisms, including the HTTP Accept-Language
     * header field.
     *
     * @see com.sun.xml.rpc.sp.MessageCatalog
     *
     * @param languages Array of language specifiers, ordered with the most
     *	preferable one at the front.  For example, "en-ca" then "fr-ca",
     *  followed by "zh_CN".  Both RFC 1766 and Java styles are supported.
     * @return The chosen locale, or null.
     */
    public Locale chooseLocale(String languages[]) throws ParseException {
        Locale l = messages.chooseLocale(languages);

        if (l != null)
            setLocale(l);
        return l;
    }

    /** Lets applications control entity resolution. */
    public void setEntityResolver(EntityResolver r) {
        resolver = r;
    }

    /** Returns the object used to resolve entities */
    public EntityResolver getEntityResolver() {
        return resolver;
    }

    /**
     * Setting this flag enables faster processing of valid standalone
     * documents: external DTD information is not processed, and no
     * attribute normalization or defaulting is done.  This optimization
     * is only permitted in non-validating parsers; for validating
     * parsers, this mode is silently disabled.
     *
     * 

For documents which are declared as standalone, but which are * not valid, a fatal error may be reported for references to externally * defined entities. That could happen in any nonvalidating parser which * did not read externally defined entities. Also, if any attribute * values need normalization or defaulting, it will not be done. */ public void setFastStandalone(boolean value) { fastStandalone = value; } /** * Returns true if standalone documents skip processing of * all external DTD information. */ public boolean isFastStandalone() { return fastStandalone; } // makes sure the parser's reset to "before a document" private void init() { in = null; // alloc temporary data used in parsing attTmp = new AttributesExImpl(); strTmp = new StringBuffer(); nameTmp = new char[20]; nameCache = new NameCache(); if (namespace) { if (ns == null) ns = new NamespaceSupport(); else ns.reset(); } // reset doc info isStandalone = false; rootElementName = null; isInAttribute = false; inExternalPE = false; doLexicalPE = false; donePrologue = false; doneEpilogue = false; doneContent = false; attr = null; attrIndex = 0; startEmptyStack = true; entities.clear(); notations.clear(); params.clear(); elements.clear(); ignoreDeclarations = false; stack.clear(); piQueue.clear(); // initialize predefined references ... re-interpreted later builtin("amp", "&"); builtin("lt", "<"); builtin("gt", ">"); builtin("quot", "\""); builtin("apos", "'"); if (locale == null) locale = Locale.getDefault(); if (resolver == null) resolver = new Resolver(); } private void builtin(String entityName, String entityValue) { InternalEntity entity; entity = new InternalEntity(entityName, entityValue.toCharArray()); entities.put(entityName, entity); } // package private -- for subclass void afterRoot() throws ParseException { } // package private -- for subclass void afterDocument() { } // role is for diagnostics private void whitespace(String roleId) throws IOException, ParseException // [3] S ::= (#x20 | #x9 | #xd | #xa)+ { if (!maybeWhitespace()) fatal("P-004", new Object[] { messages.getMessage(locale, roleId)}); } // S? private boolean maybeWhitespace() throws IOException, ParseException { if (!(inExternalPE && doLexicalPE)) return in.maybeWhitespace(); // see getc() for the PE logic -- this lets us splice // expansions of PEs in "anywhere". getc() has smarts, // so for external PEs we don't bypass it. // we can marginally speed PE handling, and certainly // be cleaner (hence potentially more correct), by using // the observations that expanded PEs only start and stop // where whitespace is allowed. getc wouldn't need any // "lexical" PE expansion logic, and no other method needs // to handle termination of PEs. (parsing of literals would // still need to pop entities, but not parsing of references // in content.) char c = getc(); boolean saw = false; while (c == ' ' || c == '\t' || c == '\n' || c == '\r') { saw = true; // this gracefully ends things when we stop playing // with internal parameters. caller should have a // grammar rule allowing whitespace at end of entity. if (in.isEOF() && !in.isInternal()) return saw; c = getc(); } ungetc(); return saw; } private String maybeGetName() throws IOException, ParseException { NameCacheEntry entry = maybeGetNameCacheEntry(); return (entry == null) ? null : entry.name; } private NameCacheEntry maybeGetNameCacheEntry() throws IOException, ParseException { // [5] Name ::= (Letter|'_'|':') (Namechar)* char c = getc(); if (!XmlChars.isLetter(c) && c != ':' && c != '_') { ungetc(); return null; } return nameCharString(c); } // Used when parsing enumerations private String getNmtoken() throws ParseException, IOException { // [7] Nmtoken ::= (Namechar)+ char c = getc(); if (!XmlChars.isNameChar(c)) fatal("P-006", new Object[] { new Character(c)}); return nameCharString(c).name; } // n.b. this gets used when parsing attribute values (for // internal references) so we can't use strTmp; it's also // a hotspot for CPU and memory in the parser (called at least // once for each element) so this has been optimized a bit. private NameCacheEntry nameCharString(char c) throws IOException, ParseException { int i = 1; nameTmp[0] = c; for (;;) { if ((c = in.getNameChar()) == 0) break; if (i >= nameTmp.length) { char tmp[] = new char[nameTmp.length + 10]; System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length); nameTmp = tmp; } nameTmp[i++] = c; } return nameCache.lookupEntry(nameTmp, i); } // // much similarity between parsing entity values in DTD // and attribute values (in DTD or content) ... both follow // literal parsing rules, newline canonicalization, etc // // leaves value in 'strTmp' ... either a "replacement text" (4.5), // or else partially normalized attribute value (the first bit // of 3.3.3's spec, without the "if not CDATA" bits). // private void parseLiteral(boolean isEntityValue) throws IOException, ParseException { // [9] EntityValue ::= // '"' ([^"&%] | Reference | PEReference)* '"' // | "'" ([^'&%] | Reference | PEReference)* "'" // [10] AttValue ::= // '"' ([^"&] | Reference )* '"' // | "'" ([^'&] | Reference )* "'" // Only expand PEs in getc() when processing entity value literals // and do not expand when processing AttValue. Save state of // doLexicalPE and restore it before returning. boolean savedLexicalPE = doLexicalPE; doLexicalPE = isEntityValue; char quote = getc(); char c; InputEntity source = in; if (quote != '\'' && quote != '"') fatal("P-007"); // don't report entity expansions within attributes, // they're reported "fully expanded" via SAX isInAttribute = !isEntityValue; // get value into strTmp strTmp = new StringBuffer(); // scan, allowing entity push/pop wherever ... // expanded entities can't terminate the literal! for (;;) { if (in != source && in.isEOF()) { // we don't report end of parsed entities // within attributes (no SAX hooks) in = in.pop(); continue; } if ((c = getc()) == quote && in == source) break; // // Basically the "reference in attribute value" // row of the chart in section 4.4 of the spec // if (c == '&') { String entityName = maybeGetName(); if (entityName != null) { nextChar(';', "F-020", entityName); // 4.4 says: bypass these here ... we'll catch // forbidden refs to unparsed entities on use if (isEntityValue) { strTmp.append('&'); strTmp.append(entityName); strTmp.append(';'); continue; } expandEntityInLiteral(entityName, entities, isEntityValue); // character references are always included immediately } else if ((c = getc()) == '#') { int tmp = parseCharNumber(); if (tmp > 0xffff) { tmp = surrogatesToCharTmp(tmp); strTmp.append(charTmp[0]); if (tmp == 2) strTmp.append(charTmp[1]); } else strTmp.append((char) tmp); } else fatal("P-009"); continue; } // expand parameter entities only within entity value literals if (c == '%' && isEntityValue) { String entityName = maybeGetName(); if (entityName != null) { nextChar(';', "F-021", entityName); if (inExternalPE) expandEntityInLiteral( entityName, params, isEntityValue); else fatal("P-010", new Object[] { entityName }); continue; } else fatal("P-011"); } // For attribute values ... if (!isEntityValue) { // 3.3.3 says whitespace normalizes to space... if (c == ' ' || c == '\t' || c == '\n' || c == '\r') { strTmp.append(' '); continue; } // "<" not legal in parsed literals ... if (c == '<') fatal("P-012"); } strTmp.append(c); } isInAttribute = false; doLexicalPE = savedLexicalPE; } // does a SINGLE expansion of the entity (often reparsed later) private void expandEntityInLiteral( String name, SimpleHashtable table, boolean isEntityValue) throws ParseException, IOException { Object entity = table.get(name); // // Note: if entity is a PE (value.isPE) there is an XML // requirement that the content be "markkupdecl", but that error // is ignored here (as permitted by the XML spec). // if (entity instanceof InternalEntity) { InternalEntity value = (InternalEntity) entity; pushReader(value.buf, name, !value.isPE); } else if (entity instanceof ExternalEntity) { if (!isEntityValue) // must be a PE ... fatal("P-013", new Object[] { name }); // if this returns false ... pushReader((ExternalEntity) entity); } else if (entity == null) { // // Note: much confusion about whether spec requires such // errors to be fatal in many cases, but none about whether // it allows "normal" errors to be unrecoverable! // fatal((table == params) ? "V-022" : "P-014", new Object[] { name }); } } // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") // for PUBLIC and SYSTEM literals, also "' // NOTE: XML spec should explicitly say that PE ref syntax is // ignored in PIs, comments, SystemLiterals, and Pubid Literal // values ... can't process the XML spec's own DTD without doing // that for comments. private String getQuotedString(String type, String extra) throws IOException, ParseException { // use in.getc to bypass PE processing char quote = in.getc(); if (quote != '\'' && quote != '"') fatal( "P-015", new Object[] { messages.getMessage( locale, type, new Object[] { extra }) }); char c; strTmp = new StringBuffer(); while ((c = in.getc()) != quote) strTmp.append((char) c); return strTmp.toString(); } private String parsePublicId() throws IOException, ParseException { // [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'") // [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%] String retval = getQuotedString("F-033", null); for (int i = 0; i < retval.length(); i++) { char c = retval.charAt(i); if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1 && !(c >= 'A' && c <= 'Z') && !(c >= 'a' && c <= 'z')) fatal("P-016", new Object[] { new Character(c)}); } strTmp = new StringBuffer(); strTmp.append(retval); return normalize(false); } // [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) // handled by: InputEntity.parsedContent() private boolean maybeComment(boolean skipStart) throws IOException, ParseException { // [15] Comment ::= '' if (!in.peek(skipStart ? "!--" : "