
org.enhydra.xml.xmlc.parsers.xerces.XercesParser Maven / Gradle / Ivy
The newest version!
/*
* enhydra Java Application Server Project
*
* The contents of this file are subject to the Enhydra Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License on
* the Enhydra web site ( http://www.enhydra.org/ ).
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific terms governing rights and limitations
* under the License.
*
* The Initial Developer of the Enhydra Application Server is Lutris
* Technologies, Inc. The Enhydra Application Server and portions created
* by Lutris Technologies, Inc. are Copyright Lutris Technologies, Inc.
* All Rights Reserved.
*
* Contributor(s):
*
* $Id: XercesParser.java,v 1.3 2005/01/26 08:29:24 jkjome Exp $
*/
package org.enhydra.xml.xmlc.parsers.xerces;
import java.io.IOException;
import org.enhydra.apache.xerces.framework.XMLAttrList;
import org.enhydra.apache.xerces.framework.XMLContentSpec;
import org.enhydra.apache.xerces.framework.XMLDocumentHandler;
import org.enhydra.apache.xerces.framework.XMLParser;
import org.enhydra.apache.xerces.readers.XMLEntityHandler;
import org.enhydra.apache.xerces.utils.QName;
import org.enhydra.apache.xerces.validators.common.XMLAttributeDecl;
import org.enhydra.xml.io.ErrorReporter;
import org.enhydra.xml.io.XMLEntityResolver;
import org.enhydra.xml.xmlc.XMLCError;
import org.enhydra.xml.xmlc.XMLCException;
import org.enhydra.xml.xmlc.dom.XMLCDocument;
import org.enhydra.xml.xmlc.dom.XMLCDomFactory;
import org.enhydra.xml.xmlc.metadata.MetaData;
import org.enhydra.xml.xmlc.metadata.Parser;
import org.enhydra.xml.xmlc.misc.LineNumberMap;
import org.enhydra.xml.xmlc.parsers.DocBuilder;
import org.enhydra.xml.xmlc.parsers.ParseTracer;
import org.enhydra.xml.xmlc.parsers.XMLCParser;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
/*
* FIXME:
* - How to handle built-in entities (see xerces DOMParser).
* - Split handlers, entity resolvers into inner classes.
* or turn into minimal parser, or just use native DOMParser
* (however must work with other DOMS)
* - DOM builder doesn't build ElementDecl nodes correctly.
* - Need a hack to org.enhydra.apache.xerces.dom.EntityReferenceImpl to
* not create entity references read-only. Need to figure out
* how to make this conform to what Xerces does (like to Entity
* nodes).
*/
/**
* Parse using the Xerces parser.
*/
public class XercesParser extends XMLParser
implements XMLCParser, EntityResolver,
XMLDocumentHandler, XMLDocumentHandler.DTDHandler {
/**
* Error handler class that handles mapping of line numbers
*/
private class MappingErrorHandler implements ErrorHandler {
private ErrorReporter fErrorReporter;
private LineNumberMap fLineNumberMap;
/**
* Constructor.
*/
public MappingErrorHandler(ErrorReporter errorReporter,
LineNumberMap lineNumberMap) {
fErrorReporter = errorReporter;
fLineNumberMap = lineNumberMap;
}
/**
* Wrap a SAXParseException with a SAXParseException with the
* location mapped. If there is no line number map, return the
* exception unchanged.
*/
private SAXParseException mapException(SAXParseException exception) {
if (fLineNumberMap == null) {
return exception;
} else {
LineNumberMap.Line line
= fLineNumberMap.getLineFromLineNum(exception.getLineNumber());
return new SAXParseException(exception.getMessage(),
exception.getPublicId(),
line.getFileName(),
line.getLineNum(),
exception.getColumnNumber(),
exception);
}
}
/**
* Receive notification of a SAX recoverable error.
*/
public void error(SAXParseException exception) throws SAXException {
fErrorReporter.error(mapException(exception));
}
/**
* Receive notification of a SAX warning.
*/
public void warning(SAXParseException exception) throws SAXException {
fErrorReporter.warning(mapException(exception));
}
/**
* Receive notification of a SAX non-recoverable error.
*/
public void fatalError(SAXParseException exception) throws SAXException {
fErrorReporter.fatalError(mapException(exception));
}
}
/**
* Verbose tracing.
*/
private XercesTracer fTracer;
/**
* The document builder.
*/
private DocBuilder fDocBuilder;
/**
* Is a CDATASection being processed?
*/
private boolean fProcessingCDATASection = false;
/**
* Currently processing a document.
*/
private static final int PROCESSING_DOCUMENT = 0;
/**
* Currently processing the external subset.
*/
private static final int PROCESSING_EXTERNAL_SUBSET = 1;
/**
* Currently processing the internal subset.
*/
private static final int PROCESSING_INTERNAL_SUBSET = 2;
/**
* Part of the document being processed.
*/
private int fProcessingState;
/**
* Classpath/XCatalog entity resolver.
*/
private XMLEntityResolver fResolver;
/*
* String pool indexes of built-in character entities.
*/
private int fAmpIndex;
private int fLtIndex;
private int fGtIndex;
private int fAposIndex;
private int fQuotIndex;
/**
* @see XMLCParser
*/
public XMLCDocument parse(InputSource input,
LineNumberMap lineNumberMap,
XMLCDomFactory domFactory,
MetaData metaData,
ErrorReporter errorReporter,
ParseTracer tracer)
throws IOException, XMLCException, SAXException {
Parser parser = metaData.getParser();
fTracer = new XercesTracer(fStringPool, tracer);
fDocBuilder = new DocBuilder(domFactory);
fProcessingState = PROCESSING_DOCUMENT;
// Configure parser.
initCharEntity();
initHandlers(true, this, this);
setEntityResolver(this);
if (lineNumberMap != null) {
setErrorHandler(new MappingErrorHandler(errorReporter,
lineNumberMap));
} else {
setErrorHandler(errorReporter);
}
setAllowJavaEncodings(true);
setNamespaces(true);
Boolean validate = parser.getValidate();
setValidation((validate == null) ? true : validate.booleanValue());
// Setup entity resolver
fResolver = new XMLEntityResolver();
if (tracer.enabled()) {
fResolver.setDebugWriter(tracer);
}
// Add defaults bore adding catalogs so defautls can be overridden.
fResolver.setDefaultResolving();
String[] xCatalog = parser.getXCatalogURLs();
for (int idx = 0; idx < xCatalog.length; idx++) {
fResolver.loadCatalog(new InputSource(xCatalog[idx]));
}
super.parse(input);
fDocBuilder.finish();
return fDocBuilder.getDocument();
}
/**
* Get a string from the string pool.
*/
private String getString(int index) {
return fStringPool.toString(index);
}
/*
* Initialize the built-in character entity name indexes.
*/
private void initCharEntity() {
fAmpIndex = fStringPool.addSymbol("amp");
fLtIndex = fStringPool.addSymbol("lt");
fGtIndex = fStringPool.addSymbol("gt");
fAposIndex = fStringPool.addSymbol("apos");
fQuotIndex = fStringPool.addSymbol("quot");
}
/**
* Determine of an entity is one of the standard character
* entities.
*/
boolean isCharEntity(int entityName) {
return ((entityName == fAmpIndex) ||
(entityName == fGtIndex) ||
(entityName == fLtIndex) ||
(entityName == fAposIndex) ||
(entityName == fQuotIndex));
}
/**
* Resolve an entity.
* @see EntityResolver#resolveEntity
*/
public InputSource resolveEntity(String publicId,
String systemId)
throws SAXException, IOException {
InputSource source = null;
if (fResolver != null) {
source = fResolver.resolveEntity(publicId, systemId);
}
fTracer.resolveEntity(publicId, systemId, source);
return source;
}
/**
* Handle start of document.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startDocument
*/
public void startDocument() throws Exception {
fTracer.startDocument();
}
/**
* Handle end document.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#endDocument
*/
public void endDocument() throws Exception {
fTracer.endDocument();
}
/**
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#xmlDecl
*/
public void xmlDecl(int version, int encoding, int standalone) throws Exception {
fTracer.xmlDecl(version, encoding, standalone);
fDocBuilder.setXMLVersion(getString(version));
fDocBuilder.setEncoding(getString(encoding));
}
/**
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#textDecl
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#textDecl
*/
public void textDecl(int version, int encoding) throws Exception {
fTracer.textDecl(version, encoding);
}
/**
* Handle start of a namespace declaration scope.
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startNamespaceDeclScope
*/
public void startNamespaceDeclScope(int prefix,
int uri) throws Exception {
fTracer.startNamespaceDeclScope(prefix, uri);
// Nothing to do as URI is in QName
}
/**
* Handle end of a namespace declaration scope.
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startNamespaceDeclScope
*/
public void endNamespaceDeclScope(int prefix) throws Exception {
fTracer.endNamespaceDeclScope(prefix);
}
/**
* Handle start of element.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startElement
*/
public void startElement(QName element,
XMLAttrList attrList,
int attrListHandle) throws Exception {
fTracer.startElement(element, attrList, attrListHandle);
fDocBuilder.startElement(getString(element.uri),
getString(element.rawname));
int attrIndex = attrListHandle;
while (attrIndex >= 0) {
if (attrList.isSpecified(attrIndex)) {
fDocBuilder.addAttribute(getString(attrList.getAttrURI(attrIndex)),
getString(attrList.getAttrName(attrIndex)),
getString(attrList.getAttValue(attrIndex)));
}
attrIndex = attrList.getNextAttr(attrIndex);
}
}
/**
* Handle end element.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#endElement
*/
public void endElement(QName element) throws Exception {
fTracer.endElement(element);
fDocBuilder.finishElement();
}
/**
* Determine if an entity reference should be processed.
* The entity reference callbacks are called at the start and external DTD
* with a null name, ignore these.
* Also ignore calls when processing attribute value entities references, as
* these are called before the start of element.
*/
private boolean shouldProcessEntityReference(int entityName,
int entityContext) {
return ((entityName >= 0) && (fProcessingState == PROCESSING_DOCUMENT)
&& (entityContext != XMLEntityHandler.ENTITYREF_IN_ATTVALUE));
}
/**
* Handle the start of an entity reference. If it's is one of the
* standard character entity references, we don't push the create
* the node, we just let the child be appended directly in its place.
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startEntityReference
*/
public void startEntityReference(int entityName,
int entityType,
int entityContext) throws Exception {
boolean shouldProcess
= shouldProcessEntityReference(entityName, entityContext);
fTracer.startEntityReference(entityName, entityType, entityContext,
shouldProcess);
if (shouldProcess && !isCharEntity(entityName)) {
fDocBuilder.startEntityReference(getString(entityName));
}
}
/**
* Handle the end of an entity reference.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#endEntityReference
*/
public void endEntityReference(int entityName,
int entityType,
int entityContext) throws Exception {
boolean shouldProcess
= shouldProcessEntityReference(entityName, entityContext);
fTracer.endEntityReference(entityName, entityType, entityContext, shouldProcess);
if (shouldProcess && !isCharEntity(entityName)) {
fDocBuilder.endEntityReference();
}
}
/**
* Not used.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#characters
*/
public void characters(int data) throws Exception {
throw new XMLCError("fatal error: method that should not be invoked called");
}
/**
* Not used.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#ignorableWhitespace
*/
public void ignorableWhitespace(int data) throws Exception {
throw new XMLCError("fatal error: method that should not be invoked called");
}
/**
* Handle start of CDATA section.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#startCDATA
*/
public void startCDATA() {
fTracer.startCDATA();
fProcessingCDATASection = true;
}
/**
* Handle for end of CDATA section.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#endCDATA
*/
public void endCDATA() {
fTracer.endCDATA();
fProcessingCDATASection = false;
}
/**
* Handle processing instruction.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#processingInstruction
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#processingInstruction
*/
public void processingInstruction(int target,
int data) throws Exception {
fTracer.processingInstruction(target, data);
if (fProcessingState == PROCESSING_DOCUMENT) {
fDocBuilder.addProcessingInstruction(getString(target),
getString(data));
}
}
/**
* Handle a comment.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#comment
*/
public void comment(int comment) throws Exception {
fTracer.comment(comment);
if (fProcessingState == PROCESSING_DOCUMENT) {
fDocBuilder.addComment(getString(comment));
}
}
/**
* Handle characters.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#characters
*/
public void characters(char ch[],
int start,
int length) throws Exception {
fTracer.characters(ch, start, length);
if (fProcessingCDATASection) {
fDocBuilder.addCDATASection(new String(ch, start, length));
} else {
fDocBuilder.addTextNode(new String(ch, start, length));
}
}
/**
* Handle ignorable whitespace.
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler#ignorableWhitespace
*/
public void ignorableWhitespace(char ch[],
int start,
int length) throws Exception {
fTracer.ignorableWhitespace(ch, start, length);
//FIXME: is this right, we ignore them.
}
/**
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#startDTD
*/
public void startDTD(QName rootElement,
int publicId,
int systemId) {
fTracer.startDTD(rootElement, publicId, systemId);
if ((publicId < 0) && (systemId < 0)) {
fProcessingState = PROCESSING_INTERNAL_SUBSET;
} else {
fProcessingState = PROCESSING_EXTERNAL_SUBSET;
}
fDocBuilder.setDocumentTypeName(getString(rootElement.rawname));
fDocBuilder.setPublicId(getString(publicId));
fDocBuilder.setSystemId(getString(systemId));
}
/**
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#internalSubset
*/
public void internalSubset(int internalSubset) {
fTracer.internalSubset(internalSubset);
fDocBuilder.setInternalSubset(getString(internalSubset));
}
/**
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#endDTD
*/
public void endDTD() {
fTracer.endDTD();
fProcessingState = PROCESSING_DOCUMENT;
}
/**
* Recursively search for a #PCDATA leaf. The content spec object is
* passed to avoid reallocating. It's contents will be wipped out.
*/
private boolean searchForPCData(int contentSpecIndex,
XMLContentSpec.Provider provider,
XMLContentSpec contentSpec) {
if (!provider.getContentSpec(contentSpecIndex, contentSpec)) {
return false;
}
int value = contentSpec.value;
int otherValue = contentSpec.otherValue;
switch (contentSpec.type) {
case XMLContentSpec.CONTENTSPECNODE_LEAF:
if ((value == -1) && (otherValue == -1)) {
return true; // #PCDATA leaf
}
break;
case XMLContentSpec.CONTENTSPECNODE_ZERO_OR_ONE:
case XMLContentSpec.CONTENTSPECNODE_ZERO_OR_MORE:
case XMLContentSpec.CONTENTSPECNODE_ONE_OR_MORE:
// search left side only
if (searchForPCData(value, provider, contentSpec)) {
return true; // #PCDATA found below
}
break;
case XMLContentSpec.CONTENTSPECNODE_CHOICE:
case XMLContentSpec.CONTENTSPECNODE_SEQ:
// search left and right sides
if (searchForPCData(value, provider, contentSpec)) {
return true; // #PCDATA found below
}
if (searchForPCData(otherValue, provider, contentSpec)) {
return true; // #PCDATA found below
}
break;
case XMLContentSpec.CONTENTSPECNODE_ANY:
case XMLContentSpec.CONTENTSPECNODE_ANY_OTHER:
case XMLContentSpec.CONTENTSPECNODE_ANY_NS:
case XMLContentSpec.CONTENTSPECNODE_ALL:
case XMLContentSpec.CONTENTSPECNODE_ANY_LAX:
case XMLContentSpec.CONTENTSPECNODE_ANY_OTHER_LAX:
case XMLContentSpec.CONTENTSPECNODE_ANY_NS_LAX:
case XMLContentSpec.CONTENTSPECNODE_ANY_SKIP:
case XMLContentSpec.CONTENTSPECNODE_ANY_OTHER_SKIP:
case XMLContentSpec.CONTENTSPECNODE_ANY_NS_SKIP:
default:
// Don't recurse
break;
}
return false;
}
/**
* Determine if #PCDATA is part of the content spec. Logic for this method
* stolen from org.enhydra.apache.xerces.framework.XMLContentSpec.
*/
private boolean hasPCData(int contentSpecIndex,
XMLContentSpec.Provider contentSpecProvider) {
return searchForPCData(contentSpecIndex, contentSpecProvider,
new XMLContentSpec());
}
/**
*
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#elementDecl
*/
public void elementDecl(QName elementDecl,
int contentSpecType,
int contentSpecIndex,
XMLContentSpec.Provider contentSpecProvider) throws Exception {
fTracer.elementDecl(elementDecl, contentSpecType, contentSpecIndex,
contentSpecProvider);
if (hasPCData(contentSpecIndex, contentSpecProvider)) {
fDocBuilder.addPCDataContentElement(getString(elementDecl.rawname));
}
}
/**
*
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#attlistDecl
*/
public void attlistDecl(QName elementDecl,
QName attributeDecl,
int attType,
boolean attList,
String enumString,
int attDefaultType,
int attDefaultValue) throws Exception {
fTracer.attlistDecl(elementDecl, attributeDecl, attType, attList,
enumString, attDefaultType, attDefaultValue);
if (attType == XMLAttributeDecl.TYPE_ID) {
fDocBuilder.addIdAttribute(getString(elementDecl.localpart),
getString(attributeDecl.localpart));
}
}
/**
*
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#internalPEDecl
*/
public void internalPEDecl(int entityName,
int entityValue) {
fTracer.internalPEDecl(entityName, entityValue);
}
/**
*
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#externalPEDecl
*/
public void externalPEDecl(int entityName,
int publicId,
int systemId) {
fTracer.externalPEDecl(entityName, publicId, systemId);
}
/**
*
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#internalEntityDecl
*/
public void internalEntityDecl(int entityName,
int entityValue) {
fTracer.internalEntityDecl(entityName, entityValue);
}
/**
*
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#externalEntityDecl
*/
public void externalEntityDecl(int entityName,
int publicId,
int systemId) {
fTracer.externalEntityDecl(entityName, publicId, systemId);
}
/**
*
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#unparsedEntityDecl
*/
public void unparsedEntityDecl(int entityName,
int publicId,
int systemId,
int notationName) {
fTracer.unparsedEntityDecl(entityName, publicId,
systemId, notationName);
}
/**
*
*
* @see org.enhydra.apache.xerces.framework.XMLDocumentHandler.DTDHandler#notationDecl
*/
public void notationDecl(int notationName,
int publicId,
int systemId) {
fTracer.notationDecl(notationName, publicId, systemId);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy