com.sun.msv.reader.dtd.DTDReader Maven / Gradle / Ivy
/*
* Copyright (c) 2001-2013 Oracle and/or its affiliates. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of Oracle nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.sun.msv.reader.dtd;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.relaxng.datatype.Datatype;
import org.relaxng.datatype.DatatypeException;
import org.relaxng.datatype.DatatypeLibrary;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.LocatorImpl;
import com.sun.msv.datatype.xsd.ngimpl.DataTypeLibraryImpl;
import com.sun.msv.grammar.ChoiceNameClass;
import com.sun.msv.grammar.Expression;
import com.sun.msv.grammar.ExpressionPool;
import com.sun.msv.grammar.NameClass;
import com.sun.msv.grammar.ReferenceExp;
import com.sun.msv.grammar.SimpleNameClass;
import com.sun.msv.grammar.dtd.LocalNameClass;
import com.sun.msv.grammar.trex.ElementPattern;
import com.sun.msv.grammar.trex.TREXGrammar;
import com.sun.msv.reader.Controller;
import com.sun.msv.reader.GrammarReaderController;
import com.sun.msv.scanner.dtd.DTDEventListener;
import com.sun.msv.scanner.dtd.DTDParser;
import com.sun.msv.scanner.dtd.InputEntity;
import com.sun.msv.util.StringPair;
/**
* constructs {@link TREXGrammar} object that exactly matches to
* the parsed DTD.
*
*
* Each element declaration will have its own ReferenceExp by the name
* of the element.
*
*
* Note that this class does NOT extend GrammarReader, because DTD
* is not written in XML format.
*
* @author Kohsuke KAWAGUCHI
*/
public class DTDReader implements DTDEventListener {
public DTDReader( GrammarReaderController controller, ExpressionPool pool ) {
this.controller = new Controller(controller);
grammar = new TREXGrammar(pool);
}
public static TREXGrammar parse( InputSource source,
GrammarReaderController controller ) {
return parse( source, controller, new ExpressionPool() );
}
public static TREXGrammar parse( InputSource source,
GrammarReaderController controller,
ExpressionPool pool ) {
try {
DTDReader reader = new DTDReader(controller,pool);
DTDParser parser = new DTDParser();
parser.setDtdHandler(reader);
parser.setEntityResolver(controller);
parser.parse(source);
return reader.getResult();
} catch( SAXParseException e ) {
return null; // this error was already handled by GrammarReaderController
} catch( Exception e ) {
controller.error( new Locator[0], e.getMessage(), e );
return null;
}
}
protected final Controller controller;
/**
* Used to create Datatype objects. This datatype library
* should be able to handle XML Schema datatypes because
* those are the names we ask to this library.
*/
private DatatypeLibrary datatypeLibrary = new DataTypeLibraryImpl();
public void setDatatypeLibrary( DatatypeLibrary datatypeLibrary ) {
this.datatypeLibrary = datatypeLibrary;
}
/**
* Obtains a Datatype object from its name.
*/
public Datatype createDatatype( String name ) {
try {
if( DTDParser.TYPE_CDATA.equals(name) )
return datatypeLibrary.createDatatype("normalizedString");
if( DTDParser.TYPE_ENUMERATION.equals(name) )
return datatypeLibrary.createDatatype("token");
return datatypeLibrary.createDatatype(name);
} catch( DatatypeException e ) {
// we expect this datatype library to be complete
e.printStackTrace();
throw new InternalError();
}
}
/**
* map from prefix to set of possible namespace URI.
* default namespace (without prefix) is stored by using "" as a key.
*/
protected final Map namespaces = createInitialNamespaceMap();
protected final static Map createInitialNamespaceMap() {
Map m = new java.util.HashMap();
// prefix xml is implicitly declared.
Set s = new java.util.HashSet();
s.add("http://www.w3.org/XML/1998/namespace");
m.put("xml",s);
return m;
}
/**
* when this value is in the above set, that indicates
* we couldn't detect what URIs are going to be used with that prefix.
*/
protected static final String ABANDON_URI_SNIFFING = "*";
protected NameClass getNameClass( String maybeQName, boolean handleAsAttribute ) {
String[] s = splitQName(maybeQName);
if(s[0].length()==0 && handleAsAttribute )
// if this is an attribute and unprefixed, it is local to the element.
return new SimpleNameClass(s[0],s[1]);
Set vec = (Set)namespaces.get(s[0]/*uri*/);
if(vec==null) {
if(s[0].equals(""))
// this DTD does not attempt to use namespace.
// this is OK and we assume anonymous namespace.
return new SimpleNameClass("",s[1]);
// we found element name like "html:p" but
// we haven't see any "xmlns:html" attribute declaration.
// this is considered as an error for MSV.
controller.error( new Locator[]{locator},
Localizer.localize( ERR_UNDECLARED_PREFIX, s[0] ), null );
// recover by returning something
return new LocalNameClass( s[1]/*local*/ );
}
if( vec.contains(ABANDON_URI_SNIFFING) ) {
// System.out.println("sniffing abandoned for "+s[0]);
// possibly multiple URI can be assigned.
// so fall back to use LocalNameClass to at least check local part.
return new LocalNameClass( s[1] );
}
// create choice of all possible namespace, and
// return it.
String[] candidates = (String[])vec.toArray(new String[vec.size()]);
NameClass nc = new SimpleNameClass( candidates[0], s[1] );
// System.out.println("candidate for "+s[0]+" is "+ candidates[0] );
for( int i=1; i
* Its defeinition will be later filled by the choice of all elements.
*/
protected final ReferenceExp getAnyExp() {
return grammar.namedPatterns.getOrCreate("$ all $");
}
public void endContentModel( String elementName, short type ) {
if( contentModel==null )
// assertion failed.
// this must be a bug of DTDScanner.
throw new Error();
switch(type) {
case CONTENT_MODEL_CHILDREN:
case CONTENT_MODEL_ANY:
break; // do nothing.
case CONTENT_MODEL_EMPTY:
contentModel = Expression.epsilon;
break;
case CONTENT_MODEL_MIXED:
if( contentModel != Expression.nullSet )
contentModel = grammar.pool.createMixed(
grammar.pool.createZeroOrMore(contentModel));
else
// this happens when mixed content model is #PCDATA only.
contentModel = Expression.anyString;
break;
}
// memorize the location
setDeclaredLocationOf( grammar.namedPatterns.getOrCreate(elementName) );
// memorize parsed content model.
elementDecls.put( elementName, contentModel );
contentModel = null;
}
/**
* processes occurence (?,+,*) of the given expression
*/
protected Expression processOccurs( Expression item, short occurence ) {
switch( occurence ) {
case OCCURENCE_ONCE: return item;
case OCCURENCE_ONE_OR_MORE: return grammar.pool.createOneOrMore(item);
case OCCURENCE_ZERO_OR_MORE: return grammar.pool.createZeroOrMore(item);
case OCCURENCE_ZERO_OR_ONE: return grammar.pool.createOptional(item);
default: // assertion failed. this must be a bug of DTDScanner.
throw new Error();
}
}
protected class Context {
final Expression exp;
final short connectorType;
final Context previous;
Context( Context prev, Expression exp, short connector ) {
this.exp = exp;
this.connectorType = connector;
this.previous = prev;
}
}
protected Context contextStack;
protected Expression contentModel;
protected short connectorType;
protected final short CONNECTOR_UNKNOWN = -999;
public void childElement( String elementName, short occurence ) {
Expression exp = processOccurs(
grammar.namedPatterns.getOrCreate(elementName),
occurence);
if( connectorType == CONNECTOR_UNKNOWN ) {
// this must be the first child element within this model group.
if( contentModel!=null ) throw new Error();
contentModel = exp;
} else {
combineToContentModel(exp);
}
}
protected void combineToContentModel( Expression exp ) {
switch( connectorType ) {
case CHOICE:
contentModel = grammar.pool.createChoice( contentModel, exp );
break;
case SEQUENCE:
contentModel = grammar.pool.createSequence( contentModel, exp );
break;
default:
// assertion failed. no such connector.
throw new Error();
}
}
public void mixedElement( String elementName ) {
if( contentModel==null )
// assertion failed. contentModel must be prepared by startContentModel method.
throw new Error();
contentModel = grammar.pool.createChoice( contentModel,
grammar.namedPatterns.getOrCreate(elementName) );
}
public void startModelGroup() {
// push context
contextStack = new Context( contextStack, contentModel, connectorType );
contentModel = null;
connectorType = CONNECTOR_UNKNOWN;
}
public void endModelGroup( short occurence ) {
Expression exp = processOccurs( contentModel, occurence );
// pop context
contentModel = contextStack.exp;
connectorType = contextStack.connectorType;
contextStack = contextStack.previous;
if( contentModel==null )
// this model group is the first item in the parent model group.
contentModel = exp;
else
combineToContentModel(exp);
}
public void connector( short type ) throws SAXException {
if( this.connectorType==CONNECTOR_UNKNOWN )
this.connectorType = type;
else
if( this.connectorType!=type )
// assertion failed.
// within a model group, operator must be the same.
throw new Error();
}
private Set getPossibleNamespaces( String prefix ) {
Set s = (Set)namespaces.get(prefix);
if(s!=null) return s;
s = new java.util.HashSet();
namespaces.put(prefix,s);
return s;
}
/**
* this flag is set to true after reporting WRN_ATTEMPT_TO_USE_NAMESPACE.
* this is used to prevent issuing the same warning more than once.
*/
private boolean reportedXmlnsWarning = false;
public void attributeDecl(
String elementName, String attributeName, String attributeType,
String[] enums, short attributeUse, String defaultValue )
throws SAXException {
if( attributeName.startsWith("xmlns") ) {
// this is namespace declaration
if( !reportedXmlnsWarning )
controller.warning( new Locator[]{locator},
Localizer.localize( WRN_ATTEMPT_TO_USE_NAMESPACE ) );
reportedXmlnsWarning = true;
if( defaultValue==null )
// we don't have a default value, so no way to determine URI.
defaultValue = ABANDON_URI_SNIFFING;
Set s;
if( attributeName.equals("xmlns") )
s = getPossibleNamespaces("");
else
s = getPossibleNamespaces( attributeName.substring(6) );
s.add( defaultValue );
// System.out.println("add " + defaultValue + " for att name " + attributeName );
// xmlns:* cannot be added to attr constraint expression.
return;
}
Map attList = (Map)attributeDecls.get(elementName);
if( attList==null ) {
// the first attribute for this element.
attList = new java.util.HashMap();
attributeDecls.put(elementName,attList);
}
Expression body = createAttributeBody(
elementName,attributeName,attributeType,enums,
attributeUse,defaultValue);
AttModel am = new AttModel( body, attributeUse==USE_REQUIRED );
setDeclaredLocationOf(am);
// add it to the list.
attList.put( attributeName, am );
}
/**
* Creates an attribute body from the declaration
* found in the DTD.
*/
protected Expression createAttributeBody(
String elementName, String attributeName, String attributeType,
String[] enums, short attributeUse, String defaultValue )
throws SAXException {
// create Datatype that validates attribute value.
Datatype dt = createDatatype(attributeType);
StringPair str = new StringPair("",attributeType);
if(enums!=null) {
Expression exp = Expression.nullSet;
for( int i=0; i