org.apache.excalibur.xml.impl.JaxpParser Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.excalibur.xml.impl;
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.apache.avalon.excalibur.pool.Poolable;
import org.apache.avalon.framework.activity.Disposable;
import org.apache.avalon.framework.component.Component;
import org.apache.avalon.framework.logger.AbstractLogEnabled;
import org.apache.avalon.framework.parameters.ParameterException;
import org.apache.avalon.framework.parameters.Parameterizable;
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.avalon.framework.service.ServiceException;
import org.apache.avalon.framework.service.ServiceManager;
import org.apache.avalon.framework.service.Serviceable;
import org.apache.excalibur.xml.EntityResolver;
import org.apache.excalibur.xml.dom.DOMParser;
import org.apache.excalibur.xml.sax.SAXParser;
import org.w3c.dom.Document;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.LexicalHandler;
import org.xml.sax.helpers.DefaultHandler;
/**
* An XMLParser that is only dependant on JAXP 1.1 compliant parsers.
*
* The configuration can contain the following parameters :
*
* - validate (boolean, default =
false
) : should the parser
* validate parsed documents ?
*
* - namespace-prefixes (boolean, default =
false
) : do we want
* namespaces declarations also as 'xmlns:' attributes ?
* Note : setting this to true
confuses some XSL
* processors (e.g. Saxon).
*
* - stop-on-warning (boolean, default =
true
) : should the parser
* stop parsing if a warning occurs ?
*
* - stop-on-recoverable-error (boolean, default =
true
) : should the parser
* stop parsing if a recoverable error occurs ?
*
* - reuse-parsers (boolean, default =
true
) : do we want to reuse
* parsers or create a new parser for each parse ?
* Note : even if this parameter is true
, parsers are not
* recycled in case of parsing errors : some parsers (e.g. Xerces) don't like
* to be reused after failure.
*
* - sax-parser-factory (string, optional) : the name of the
SAXParserFactory
* implementation class to be used instead of using the standard JAXP mechanism
* (SAXParserFactory.newInstance()
). This allows to choose
* unambiguously the JAXP implementation to be used when several of them are
* available in the classpath.
*
* - drop-dtd-comments : should comment() events from DTD's be dropped? Since this implementation
* does not support the DeclHandler interface anyway, it is quite useless to only have the comments
* from DTD. And the comment events from the internal DTD subset would appear in the serialized output
* again.
*
*
*
* @author Avalon Development Team
* @version CVS $Revision: 1.7 $ $Date: 2004/03/28 18:56:45 $
* @avalon.component
* @avalon.service type=SAXParser
* @avalon.service type=DOMParser
* @x-avalon.info name=jaxp-parser
* @x-avalon.lifestyle type=poolable
*/
public final class JaxpParser
extends AbstractLogEnabled
implements SAXParser, DOMParser,
Poolable, Parameterizable, Serviceable, Disposable, ErrorHandler, Component
{
private static final ContentHandler NULL = new DefaultHandler();
/** the SAX Parser factory */
private SAXParserFactory m_factory;
/** The SAX reader. It is created lazily by {@link #setupXMLReader()}
and cleared if a parsing error occurs. */
private XMLReader m_reader;
/** the Entity Resolver */
private EntityResolver m_resolver;
/** do we want namespaces also as attributes ? */
private boolean m_nsPrefixes;
/** do we want to reuse parsers ? */
private boolean m_reuseParsers;
/** do we stop on warnings ? */
private boolean m_stopOnWarning;
/** do we stop on recoverable errors ? */
private boolean m_stopOnRecoverableError;
/** the hint to the entity resolver */
private String m_resolverHint;
/** the Document Builder factory */
private DocumentBuilderFactory m_docFactory;
/** The DOM builder. It is created lazily by {@link #setupDocumentBuilder()}
and cleared if a parsing error occurs. */
private DocumentBuilder m_docBuilder;
/** Should comments appearing between start/endDTD events be dropped ? */
private boolean m_dropDtdComments;
/** The serviec manager */
private ServiceManager m_manager;
/**
* Get the Entity Resolver from the component m_manager
*
* @avalon.dependency type="EntityResolver" optional="true"
*/
public void service( final ServiceManager manager )
throws ServiceException
{
m_manager = manager;
if( manager.hasService( EntityResolver.ROLE ) )
{
if ( m_resolverHint != null )
{
// select the configured resolver
m_resolver = (EntityResolver)manager.lookup( EntityResolver.ROLE + "/" + m_resolverHint );
}
else
{
// use default resolver
m_resolver = (EntityResolver)manager.lookup( EntityResolver.ROLE );
}
if( getLogger().isDebugEnabled() )
{
getLogger().debug( "JaxpParser: Using EntityResolver: " + m_resolver );
}
}
}
/* (non-Javadoc)
* @see org.apache.avalon.framework.activity.Disposable#dispose()
*/
public void dispose()
{
if ( m_manager != null )
{
m_manager.release( m_resolver );
m_manager = null;
m_resolver = null;
}
}
public void parameterize( final Parameters params )
throws ParameterException
{
// Validation and namespace prefixes parameters
boolean validate = params.getParameterAsBoolean( "validate", false );
m_nsPrefixes = params.getParameterAsBoolean( "namespace-prefixes", false );
m_reuseParsers = params.getParameterAsBoolean( "reuse-parsers", true );
m_stopOnWarning = params.getParameterAsBoolean( "stop-on-warning", true );
m_stopOnRecoverableError = params.getParameterAsBoolean( "stop-on-recoverable-error", true );
m_dropDtdComments = params.getParameterAsBoolean( "drop-dtd-comments", false );
m_resolverHint = params.getParameter( "resolver-hint", null );
// Get the SAXFactory
final String saxParserFactoryName = params.getParameter( "sax-parser-factory",
"javax.xml.parsers.SAXParserFactory" );
if( "javax.xml.parsers.SAXParserFactory".equals( saxParserFactoryName ) )
{
m_factory = SAXParserFactory.newInstance();
}
else
{
try
{
final Class factoryClass = loadClass( saxParserFactoryName );
m_factory = (SAXParserFactory)factoryClass.newInstance();
}
catch( Exception e )
{
throw new ParameterException( "Cannot load SAXParserFactory class " + saxParserFactoryName, e );
}
}
m_factory.setNamespaceAware( true );
m_factory.setValidating( validate );
// Get the DocumentFactory
final String documentBuilderFactoryName = params.getParameter( "document-builder-factory",
"javax.xml.parsers.DocumentBuilderFactory" );
if( "javax.xml.parsers.DocumentBuilderFactory".equals( documentBuilderFactoryName ) )
{
m_docFactory = DocumentBuilderFactory.newInstance();
}
else
{
try
{
final Class factoryClass = loadClass( documentBuilderFactoryName );
m_docFactory = (DocumentBuilderFactory)factoryClass.newInstance();
}
catch( Exception e )
{
throw new ParameterException( "Cannot load DocumentBuilderFactory class " + documentBuilderFactoryName, e );
}
}
m_docFactory.setNamespaceAware( true );
m_docFactory.setValidating( validate );
if( getLogger().isDebugEnabled() )
{
getLogger().debug( "JaxpParser: validating: " + validate +
", namespace-prefixes: " + m_nsPrefixes +
", reuse parser: " + m_reuseParsers +
", stop on warning: " + m_stopOnWarning +
", stop on recoverable-error: " + m_stopOnRecoverableError +
", saxParserFactory: " + saxParserFactoryName +
", documentBuilderFactory: " + documentBuilderFactoryName +
", resolver hint: " + m_resolverHint );
}
}
/**
* Load a class
*/
private Class loadClass( String name ) throws Exception
{
ClassLoader loader = Thread.currentThread().getContextClassLoader();
if( loader == null )
{
loader = getClass().getClassLoader();
}
return loader.loadClass( name );
}
/**
* Parse the InputSource
and send
* SAX events to the consumer.
* Attention: the consumer can implement the
* LexicalHandler
as well.
* The parse should take care of this.
*/
public void parse( final InputSource in,
final ContentHandler contentHandler,
final LexicalHandler lexicalHandler )
throws SAXException, IOException
{
setupXMLReader();
// Ensure we will use a fresh new parser at next parse in case of failure
XMLReader tmpReader = m_reader;
m_reader = null;
try
{
LexicalHandler theLexicalHandler = null;
if ( null == lexicalHandler
&& contentHandler instanceof LexicalHandler)
{
theLexicalHandler = (LexicalHandler)contentHandler;
}
if( null != lexicalHandler )
{
theLexicalHandler = lexicalHandler;
}
if (theLexicalHandler != null)
{
if (m_dropDtdComments)
theLexicalHandler = new DtdCommentEater(theLexicalHandler);
tmpReader.setProperty( "http://xml.org/sax/properties/lexical-handler",
theLexicalHandler );
}
}
catch( final SAXException e )
{
final String message =
"SAX2 driver does not support property: " +
"'http://xml.org/sax/properties/lexical-handler'";
getLogger().warn( message );
}
tmpReader.setErrorHandler( this );
tmpReader.setContentHandler( contentHandler );
if( null != m_resolver )
{
tmpReader.setEntityResolver( m_resolver );
}
tmpReader.parse( in );
// Here, parsing was successful : restore reader
if( m_reuseParsers )
{
try
{
tmpReader.setProperty( "http://xml.org/sax/properties/lexical-handler",
null );
}
catch( final SAXException e )
{
// Already Logged
}
tmpReader.setContentHandler( NULL );
m_reader = tmpReader;
}
}
/**
* Parse the {@link InputSource} and send
* SAX events to the consumer.
* Attention: the consumer can implement the
* {@link LexicalHandler} as well.
* The parse should take care of this.
*/
public void parse( InputSource in, ContentHandler consumer )
throws SAXException, IOException
{
this.parse( in, consumer,
(consumer instanceof LexicalHandler ? (LexicalHandler)consumer : null));
}
/**
* Creates a new {@link XMLReader} if needed.
*/
private void setupXMLReader()
throws SAXException
{
if( null == m_reader )
{
// Create the XMLReader
try
{
m_reader = m_factory.newSAXParser().getXMLReader();
}
catch( final ParserConfigurationException pce )
{
final String message = "Cannot produce a valid parser";
throw new SAXException( message, pce );
}
m_reader.setFeature( "http://xml.org/sax/features/namespaces", true );
if( m_nsPrefixes )
{
try
{
m_reader.setFeature( "http://xml.org/sax/features/namespace-prefixes",
m_nsPrefixes );
}
catch( final SAXException se )
{
final String message =
"SAX2 XMLReader does not support setting feature: " +
"'http://xml.org/sax/features/namespace-prefixes'";
getLogger().warn( message );
}
}
}
}
/**
* Parses a new Document object from the given InputSource.
*/
public Document parseDocument( final InputSource input )
throws SAXException, IOException
{
setupDocumentBuilder();
// Ensure we will use a fresh new parser at next parse in case of failure
DocumentBuilder tmpBuilder = m_docBuilder;
m_docBuilder = null;
if( null != m_resolver )
{
tmpBuilder.setEntityResolver( m_resolver );
}
Document result = tmpBuilder.parse( input );
// Here, parsing was successful : restore builder
if( m_reuseParsers )
{
m_docBuilder = tmpBuilder;
}
return result;
}
/**
* Creates a new {@link DocumentBuilder} if needed.
*/
private void setupDocumentBuilder()
throws SAXException
{
if( null == m_docBuilder )
{
try
{
m_docBuilder = m_docFactory.newDocumentBuilder();
}
catch( final ParserConfigurationException pce )
{
final String message = "Could not create DocumentBuilder";
throw new SAXException( message, pce );
}
}
}
/**
* Return a new {@link Document}.
*/
public Document createDocument()
throws SAXException
{
setupDocumentBuilder();
return m_docBuilder.newDocument();
}
/**
* Receive notification of a recoverable error.
*/
public void error( final SAXParseException spe )
throws SAXException
{
final String message =
"Error parsing " + spe.getSystemId() + " (line " +
spe.getLineNumber() + " col. " + spe.getColumnNumber() +
"): " + spe.getMessage();
if( m_stopOnRecoverableError )
{
throw new SAXException( message, spe );
}
getLogger().error( message, spe );
}
/**
* Receive notification of a fatal error.
*/
public void fatalError( final SAXParseException spe )
throws SAXException
{
final String message =
"Fatal error parsing " + spe.getSystemId() + " (line " +
spe.getLineNumber() + " col. " + spe.getColumnNumber() +
"): " + spe.getMessage();
throw new SAXException( message, spe );
}
/**
* Receive notification of a warning.
*/
public void warning( final SAXParseException spe )
throws SAXException
{
final String message =
"Warning parsing " + spe.getSystemId() + " (line " +
spe.getLineNumber() + " col. " + spe.getColumnNumber() +
"): " + spe.getMessage();
if( m_stopOnWarning )
{
throw new SAXException( message, spe );
}
getLogger().warn( message, spe );
}
/**
* A LexicalHandler implementation that strips all comment events between
* startDTD and endDTD. In all other cases the events are forwarded to another
* LexicalHandler.
*/
private static class DtdCommentEater implements LexicalHandler
{
private LexicalHandler next;
private boolean inDTD;
public DtdCommentEater(LexicalHandler nextHandler)
{
this.next = nextHandler;
}
public void startDTD (String name, String publicId, String systemId)
throws SAXException
{
inDTD = true;
next.startDTD(name, publicId, systemId);
}
public void endDTD ()
throws SAXException
{
inDTD = false;
next.endDTD();
}
public void startEntity (String name)
throws SAXException
{
next.startEntity(name);
}
public void endEntity (String name)
throws SAXException
{
next.endEntity(name);
}
public void startCDATA ()
throws SAXException
{
next.startCDATA();
}
public void endCDATA ()
throws SAXException
{
next.endCDATA();
}
public void comment (char ch[], int start, int length)
throws SAXException
{
if (!inDTD)
next.comment(ch, start, length);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy