com.sun.xml.rpc.sp.Parser Maven / Gradle / Ivy
Show all versions of webservices-rt Show documentation
/*
* Copyright (c) 1997, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This program and the accompanying materials are made available under the
* terms of the Eclipse Public License v. 2.0, which is available at
* http://www.eclipse.org/legal/epl-2.0.
*
* This Source Code may also be made available under the following Secondary
* Licenses when the conditions for such availability set forth in the
* Eclipse Public License v. 2.0 are satisfied: GNU General Public License,
* version 2 with the GNU Classpath Exception, which is available at
* https://www.gnu.org/software/classpath/license.html.
*
* SPDX-License-Identifier: EPL-2.0 OR GPL-2.0 WITH Classpath-exception-2.0
*/
package com.sun.xml.rpc.sp;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
//
// NOTE: when maintaining this code, take care to keep the message
// catalogue(s) up to date!! It's important that the diagnostics
// be informative.
//
/**
* This implements a fast non-validating top down parser. This one always
* processes external parsed entities, strictly adheres to the XML 1.0
* specification, and provides useful diagnostics. It supports an optimization
* allowing faster processing of valid standalone XML documents. For
* multi-language applications (such as web servers using XML processing
* to create dynamic content), a method supports choosing a locale for
* parser diagnostics which is both understood by the message recipient
* and supported by the parser.
*
* @author David Brownell
* @author Zhenghua Li
* @author JAX-RPC RI Development Team
*/
public final class Parser {
// these are the name and value of the most
// recently parsed item
private String curName = null;
private String curValue = null;
// namespace support
private String curURI = null;
// stack of input entities being merged
private InputEntity in;
// temporaries reused during parsing
private AttributesExImpl attTmp;
private String[] parts = new String[3];
private StringBuffer strTmp;
private char nameTmp[];
private NameCache nameCache;
private char charTmp[] = new char[2];
// namespace support
private boolean namespace = false;
private NamespaceSupport ns = null;
// parsing modes
private boolean isInAttribute = false;
// temporary DTD parsing state
private boolean inExternalPE;
private boolean doLexicalPE;
private boolean donePrologue;
private boolean doneEpilogue;
private boolean doneContent;
private AttributesExImpl attr = null;
private int attrIndex = 0;
private boolean startEmptyStack = true;
// info about the document
private boolean isStandalone;
private String rootElementName;
// DTD state, used during parsing
private boolean ignoreDeclarations;
private SimpleHashtable elements = new SimpleHashtable(47);
private SimpleHashtable params = new SimpleHashtable(7);
// exposed to package-private subclass
Map notations = new HashMap(7);
SimpleHashtable entities = new SimpleHashtable(17);
// string constants -- use these copies so "==" works
// package private
static final String strANY = "ANY";
static final String strEMPTY = "EMPTY";
private Locale locale;
private EntityResolver resolver;
Locator locator;
private boolean fastStandalone = false;
////////////////////////////////////////////////////////////////
//
// PARSER methods
//
////////////////////////////////////////////////////////////////
/**
* Used by applications to request locale for diagnostics.
*
* @param l The locale to use, or null to use system defaults
* (which may include only message IDs).
* @throws ParseException If no diagnostic messages are available
* in that locale.
*/
public void setLocale(Locale l) throws ParseException {
if (l != null && !messages.isLocaleSupported(l.toString()))
fatal(messages.getMessage(locale, "P-078", new Object[] { l }));
locale = l;
}
/** Returns the diagnostic locale. */
public Locale getLocale() {
return locale;
}
public String getCurName() {
return curName;
}
public String getCurURI() {
return curURI;
}
public String getCurValue() {
return curValue;
}
public int getLineNumber() {
return locator.getLineNumber();
}
public int getColumnNumber() {
return locator.getColumnNumber();
}
public String getPublicId() {
return locator.getPublicId();
}
public String getSystemId() {
return locator.getSystemId();
}
/**
* Chooses a client locale to use for diagnostics, using the first
* language specified in the list that is supported by this parser.
* That locale is then set using
* setLocale(). Such a list could be provided by a variety of user
* preference mechanisms, including the HTTP Accept-Language
* header field.
*
* @see com.sun.xml.rpc.sp.MessageCatalog
*
* @param languages Array of language specifiers, ordered with the most
* preferable one at the front. For example, "en-ca" then "fr-ca",
* followed by "zh_CN". Both RFC 1766 and Java styles are supported.
* @return The chosen locale, or null.
*/
public Locale chooseLocale(String languages[]) throws ParseException {
Locale l = messages.chooseLocale(languages);
if (l != null)
setLocale(l);
return l;
}
/** Lets applications control entity resolution. */
public void setEntityResolver(EntityResolver r) {
resolver = r;
}
/** Returns the object used to resolve entities */
public EntityResolver getEntityResolver() {
return resolver;
}
/**
* Setting this flag enables faster processing of valid standalone
* documents: external DTD information is not processed, and no
* attribute normalization or defaulting is done. This optimization
* is only permitted in non-validating parsers; for validating
* parsers, this mode is silently disabled.
*
* For documents which are declared as standalone, but which are
* not valid, a fatal error may be reported for references to externally
* defined entities. That could happen in any nonvalidating parser which
* did not read externally defined entities. Also, if any attribute
* values need normalization or defaulting, it will not be done.
*/
public void setFastStandalone(boolean value) {
fastStandalone = value;
}
/**
* Returns true if standalone documents skip processing of
* all external DTD information.
*/
public boolean isFastStandalone() {
return fastStandalone;
}
// makes sure the parser's reset to "before a document"
private void init() {
in = null;
// alloc temporary data used in parsing
attTmp = new AttributesExImpl();
strTmp = new StringBuffer();
nameTmp = new char[20];
nameCache = new NameCache();
if (namespace) {
if (ns == null)
ns = new NamespaceSupport();
else
ns.reset();
}
// reset doc info
isStandalone = false;
rootElementName = null;
isInAttribute = false;
inExternalPE = false;
doLexicalPE = false;
donePrologue = false;
doneEpilogue = false;
doneContent = false;
attr = null;
attrIndex = 0;
startEmptyStack = true;
entities.clear();
notations.clear();
params.clear();
elements.clear();
ignoreDeclarations = false;
stack.clear();
piQueue.clear();
// initialize predefined references ... re-interpreted later
builtin("amp", "&");
builtin("lt", "<");
builtin("gt", ">");
builtin("quot", "\"");
builtin("apos", "'");
if (locale == null)
locale = Locale.getDefault();
if (resolver == null)
resolver = new Resolver();
}
private void builtin(String entityName, String entityValue) {
InternalEntity entity;
entity = new InternalEntity(entityName, entityValue.toCharArray());
entities.put(entityName, entity);
}
// package private -- for subclass
void afterRoot() throws ParseException {
}
// package private -- for subclass
void afterDocument() {
}
// role is for diagnostics
private void whitespace(String roleId) throws IOException, ParseException
// [3] S ::= (#x20 | #x9 | #xd | #xa)+
{
if (!maybeWhitespace())
fatal("P-004", new Object[] { messages.getMessage(locale, roleId)});
}
// S?
private boolean maybeWhitespace() throws IOException, ParseException {
if (!(inExternalPE && doLexicalPE))
return in.maybeWhitespace();
// see getc() for the PE logic -- this lets us splice
// expansions of PEs in "anywhere". getc() has smarts,
// so for external PEs we don't bypass it.
// we can marginally speed PE handling, and certainly
// be cleaner (hence potentially more correct), by using
// the observations that expanded PEs only start and stop
// where whitespace is allowed. getc wouldn't need any
// "lexical" PE expansion logic, and no other method needs
// to handle termination of PEs. (parsing of literals would
// still need to pop entities, but not parsing of references
// in content.)
char c = getc();
boolean saw = false;
while (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
saw = true;
// this gracefully ends things when we stop playing
// with internal parameters. caller should have a
// grammar rule allowing whitespace at end of entity.
if (in.isEOF() && !in.isInternal())
return saw;
c = getc();
}
ungetc();
return saw;
}
private String maybeGetName() throws IOException, ParseException {
NameCacheEntry entry = maybeGetNameCacheEntry();
return (entry == null) ? null : entry.name;
}
private NameCacheEntry maybeGetNameCacheEntry()
throws IOException, ParseException {
// [5] Name ::= (Letter|'_'|':') (Namechar)*
char c = getc();
if (!XmlChars.isLetter(c) && c != ':' && c != '_') {
ungetc();
return null;
}
return nameCharString(c);
}
// Used when parsing enumerations
private String getNmtoken() throws ParseException, IOException {
// [7] Nmtoken ::= (Namechar)+
char c = getc();
if (!XmlChars.isNameChar(c))
fatal("P-006", new Object[] { new Character(c)});
return nameCharString(c).name;
}
// n.b. this gets used when parsing attribute values (for
// internal references) so we can't use strTmp; it's also
// a hotspot for CPU and memory in the parser (called at least
// once for each element) so this has been optimized a bit.
private NameCacheEntry nameCharString(char c)
throws IOException, ParseException {
int i = 1;
nameTmp[0] = c;
for (;;) {
if ((c = in.getNameChar()) == 0)
break;
if (i >= nameTmp.length) {
char tmp[] = new char[nameTmp.length + 10];
System.arraycopy(nameTmp, 0, tmp, 0, nameTmp.length);
nameTmp = tmp;
}
nameTmp[i++] = c;
}
return nameCache.lookupEntry(nameTmp, i);
}
//
// much similarity between parsing entity values in DTD
// and attribute values (in DTD or content) ... both follow
// literal parsing rules, newline canonicalization, etc
//
// leaves value in 'strTmp' ... either a "replacement text" (4.5),
// or else partially normalized attribute value (the first bit
// of 3.3.3's spec, without the "if not CDATA" bits).
//
private void parseLiteral(boolean isEntityValue)
throws IOException, ParseException {
// [9] EntityValue ::=
// '"' ([^"&%] | Reference | PEReference)* '"'
// | "'" ([^'&%] | Reference | PEReference)* "'"
// [10] AttValue ::=
// '"' ([^"&] | Reference )* '"'
// | "'" ([^'&] | Reference )* "'"
// Only expand PEs in getc() when processing entity value literals
// and do not expand when processing AttValue. Save state of
// doLexicalPE and restore it before returning.
boolean savedLexicalPE = doLexicalPE;
doLexicalPE = isEntityValue;
char quote = getc();
char c;
InputEntity source = in;
if (quote != '\'' && quote != '"')
fatal("P-007");
// don't report entity expansions within attributes,
// they're reported "fully expanded" via SAX
isInAttribute = !isEntityValue;
// get value into strTmp
strTmp = new StringBuffer();
// scan, allowing entity push/pop wherever ...
// expanded entities can't terminate the literal!
for (;;) {
if (in != source && in.isEOF()) {
// we don't report end of parsed entities
// within attributes (no SAX hooks)
in = in.pop();
continue;
}
if ((c = getc()) == quote && in == source)
break;
//
// Basically the "reference in attribute value"
// row of the chart in section 4.4 of the spec
//
if (c == '&') {
String entityName = maybeGetName();
if (entityName != null) {
nextChar(';', "F-020", entityName);
// 4.4 says: bypass these here ... we'll catch
// forbidden refs to unparsed entities on use
if (isEntityValue) {
strTmp.append('&');
strTmp.append(entityName);
strTmp.append(';');
continue;
}
expandEntityInLiteral(entityName, entities, isEntityValue);
// character references are always included immediately
} else if ((c = getc()) == '#') {
int tmp = parseCharNumber();
if (tmp > 0xffff) {
tmp = surrogatesToCharTmp(tmp);
strTmp.append(charTmp[0]);
if (tmp == 2)
strTmp.append(charTmp[1]);
} else
strTmp.append((char) tmp);
} else
fatal("P-009");
continue;
}
// expand parameter entities only within entity value literals
if (c == '%' && isEntityValue) {
String entityName = maybeGetName();
if (entityName != null) {
nextChar(';', "F-021", entityName);
if (inExternalPE)
expandEntityInLiteral(
entityName,
params,
isEntityValue);
else
fatal("P-010", new Object[] { entityName });
continue;
} else
fatal("P-011");
}
// For attribute values ...
if (!isEntityValue) {
// 3.3.3 says whitespace normalizes to space...
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') {
strTmp.append(' ');
continue;
}
// "<" not legal in parsed literals ...
if (c == '<')
fatal("P-012");
}
strTmp.append(c);
}
isInAttribute = false;
doLexicalPE = savedLexicalPE;
}
// does a SINGLE expansion of the entity (often reparsed later)
private void expandEntityInLiteral(
String name,
SimpleHashtable table,
boolean isEntityValue)
throws ParseException, IOException {
Object entity = table.get(name);
//
// Note: if entity is a PE (value.isPE) there is an XML
// requirement that the content be "markkupdecl", but that error
// is ignored here (as permitted by the XML spec).
//
if (entity instanceof InternalEntity) {
InternalEntity value = (InternalEntity) entity;
pushReader(value.buf, name, !value.isPE);
} else if (entity instanceof ExternalEntity) {
if (!isEntityValue) // must be a PE ...
fatal("P-013", new Object[] { name });
// if this returns false ...
pushReader((ExternalEntity) entity);
} else if (entity == null) {
//
// Note: much confusion about whether spec requires such
// errors to be fatal in many cases, but none about whether
// it allows "normal" errors to be unrecoverable!
//
fatal((table == params) ? "V-022" : "P-014", new Object[] { name });
}
}
// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
// for PUBLIC and SYSTEM literals, also "'
// NOTE: XML spec should explicitly say that PE ref syntax is
// ignored in PIs, comments, SystemLiterals, and Pubid Literal
// values ... can't process the XML spec's own DTD without doing
// that for comments.
private String getQuotedString(String type, String extra)
throws IOException, ParseException {
// use in.getc to bypass PE processing
char quote = in.getc();
if (quote != '\'' && quote != '"')
fatal(
"P-015",
new Object[] {
messages.getMessage(
locale,
type,
new Object[] { extra })
});
char c;
strTmp = new StringBuffer();
while ((c = in.getc()) != quote)
strTmp.append((char) c);
return strTmp.toString();
}
private String parsePublicId() throws IOException, ParseException {
// [12] PubidLiteral ::= ('"' PubidChar* '"') | ("'" PubidChar* "'")
// [13] PubidChar ::= #x20|#xd|#xa|[a-zA-Z0-9]|[-'()+,./:=?;!*#@$_%]
String retval = getQuotedString("F-033", null);
for (int i = 0; i < retval.length(); i++) {
char c = retval.charAt(i);
if (" \r\n-'()+,./:=?;!*#@$_%0123456789".indexOf(c) == -1
&& !(c >= 'A' && c <= 'Z')
&& !(c >= 'a' && c <= 'z'))
fatal("P-016", new Object[] { new Character(c)});
}
strTmp = new StringBuffer();
strTmp.append(retval);
return normalize(false);
}
// [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
// handled by: InputEntity.parsedContent()
private boolean maybeComment(boolean skipStart)
throws IOException, ParseException {
// [15] Comment ::= ''
if (!in.peek(skipStart ? "!--" : "