com.sun.msv.writer.relaxng.RELAXNGWriter Maven / Gradle / Ivy
/*
* Copyright (c) 2001-2013 Oracle and/or its affiliates. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of Oracle nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.sun.msv.writer.relaxng;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.xml.sax.DocumentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.LocatorImpl;
import com.sun.msv.grammar.AttributeExp;
import com.sun.msv.grammar.BinaryExp;
import com.sun.msv.grammar.ChoiceExp;
import com.sun.msv.grammar.ChoiceNameClass;
import com.sun.msv.grammar.ConcurExp;
import com.sun.msv.grammar.DataExp;
import com.sun.msv.grammar.DifferenceNameClass;
import com.sun.msv.grammar.ElementExp;
import com.sun.msv.grammar.Expression;
import com.sun.msv.grammar.ExpressionCloner;
import com.sun.msv.grammar.ExpressionVisitor;
import com.sun.msv.grammar.Grammar;
import com.sun.msv.grammar.InterleaveExp;
import com.sun.msv.grammar.ListExp;
import com.sun.msv.grammar.MixedExp;
import com.sun.msv.grammar.NameClass;
import com.sun.msv.grammar.NameClassVisitor;
import com.sun.msv.grammar.NamespaceNameClass;
import com.sun.msv.grammar.OneOrMoreExp;
import com.sun.msv.grammar.OtherExp;
import com.sun.msv.grammar.ReferenceExp;
import com.sun.msv.grammar.SequenceExp;
import com.sun.msv.grammar.SimpleNameClass;
import com.sun.msv.grammar.ValueExp;
import com.sun.msv.grammar.util.ExpressionWalker;
import com.sun.msv.grammar.util.PossibleNamesCollector;
import com.sun.msv.reader.datatype.xsd.XSDVocabulary;
import com.sun.msv.reader.trex.ng.RELAXNGReader;
import com.sun.msv.util.StringPair;
import com.sun.msv.writer.GrammarWriter;
import com.sun.msv.writer.SAXRuntimeException;
import com.sun.msv.writer.XMLWriter;
/**
* converts any Grammar into RELAX NG XML representation through SAX1 events.
*
* How it works
*
*
* {@link Grammar} object can be thought as a (possibly) cyclic graph
* made from {@link Expression}. For example, the following simple
* TREX pattern will be represented as following AGM.
*
*
*
*
*
*
* abc
*
*
*
*
*
*
* abc
*
*
*
*
*
*
*
*
*
*
* Note that
*
*
* - sub expressions are shared (see <string> expression).
*
- there is a cycle in the graph.
*
- several syntax elements are replaced by others
* (e.g., <optional>P</optional> -> <choice><empty/>P</choice>)
*
*
*
* To write these expressions into TREX XML representation,
* we have to take care of cycles, since cyclic references cannot be written into
* XML without first cut it and use <ref>/<define> pair.
*
*
*
* First, this algorithm splits the grammar into "islands".
* Island is a tree of expressions; it has a head expression
* and most importantly it doesn't contain any cycles in it. Member of an island
* can be always reached from its head.
*
*
*
* TREXWriter will make every {@link ElementExp} and
* {@link ReferenceExp} a head of their own island. So each of them
* has their own island.
*
* It is guaranteed that this split will always give islands without inner cycles.
* Several islands can form a cycle, but one island can never have a cycle in it.
* This is because there is always at least one ElementExp in any cycle.
*
*
*
* Note that since expressions are shared, one expression can be
* a member of several islands (although this isn't depicted in the above figure.)
*
*
* Then, this algorithm merges some islands. For example, island E is
* referenced only once (from island D). This means that there is no need to
* give a name to this pattern. Instead, island E can simply written as a
* subordinate of island D.
*
* In other words, any island who is only referenced at most once is merged
* into its referer. This step makes the output more compact.
*
*
*
* Next, TREXWriter assigns a name to each island. It tries to use the name of
* the head expression. If a head is anonymous ReferenceExp (ReferenceExp whose
* name field is null
) or there is a name conflict, TREXWriter
* will add some suffix to make the name unique.
*
* Finally, each island is written as one named pattern under <define>
* element. All inter-island references are replaced by <ref> element.
*
*
* Why SAX1?
*
* Due to the bug and insufficient supports for the serialization through SAX2,
* The decision is made to use SAX1. SAX1 allows us to control namespace prefix
* mappings better than SAX2.
*
*
* @author Kohsuke KAWAGUCHI
*/
public class RELAXNGWriter implements GrammarWriter, Context {
protected XMLWriter writer = new XMLWriter();
public XMLWriter getWriter() { return writer; }
public void setDocumentHandler( DocumentHandler handler ) {
writer.setDocumentHandler(handler);
}
public void write( Grammar g ) throws SAXException {
// find a namespace URI that can be used as default "ns" attribute.
write(g,sniffDefaultNs(g.getTopLevel()));
}
/**
* generates SAX2 events of the specified grammar.
*
* @param defaultNs
* if specified, this namespace URI is used as "ns" attribute
* of grammar element. Can be null.
*
* @exception IllegalArgumentException
* If the given grammar is beyond the expressive power of TREX
* (e.g., some RELAX NG grammar), then this exception is thrown.
*/
public void write( Grammar g, String _defaultNs ) throws SAXException {
this.defaultNs = _defaultNs;
this.grammar = g;
// collect all reachable ElementExps and ReferenceExps.
final Set nodes = new HashSet();
// ElementExps and ReferenceExps who are referenced more than once.
final Set heads = new HashSet();
g.getTopLevel().visit( new ExpressionWalker(){
// ExpressionWalker class traverses expressions in depth-first order.
// So this invokation traverses the all reachable expressions from
// the top level expression.
// Whenever visiting elements and RefExps, they are memorized
// to identify head of islands.
public void onElement( ElementExp exp ) {
if(nodes.contains(exp)) {
heads.add(exp);
return; // prevent infinite recursion.
}
nodes.add(exp);
super.onElement(exp);
}
public void onRef( ReferenceExp exp ) {
if(nodes.contains(exp)) {
heads.add(exp);
return; // prevent infinite recursion.
}
nodes.add(exp);
super.onRef(exp);
}
});
// now heads contain all expressions that work as heads of islands.
// create (name->RefExp) map while resolving name conflicts
//
Map name2exp = new HashMap();
{
int cnt=0; // use to name anonymous RefExp.
Iterator itr = heads.iterator();
while( itr.hasNext() ) {
Expression exp = (Expression)itr.next();
if( exp instanceof ReferenceExp ) {
ReferenceExp rexp = (ReferenceExp)exp;
if( rexp.name == null ) {
// generate unique name
while( name2exp.containsKey("anonymous"+cnt) )
cnt++;
name2exp.put( "anonymous"+cnt, exp );
} else
if( name2exp.containsKey(rexp.name) ) {
// name conflict. try to add suffix.
int i = 2;
while( name2exp.containsKey(rexp.name+i) )
i++;
name2exp.put( rexp.name+i, exp );
} else {
// name of this RefExp can be directly used without modification.
name2exp.put( rexp.name, exp );
}
}
else
if( exp instanceof ElementExp ) {
ElementExp eexp = (ElementExp)exp;
NameClass nc = eexp.getNameClass();
if( nc instanceof SimpleNameClass
&& !name2exp.containsKey( ((SimpleNameClass)nc).localName ) )
name2exp.put( ((SimpleNameClass)nc).localName, exp );
else {
// generate unique name
while( name2exp.containsKey("element"+cnt) )
cnt++;
name2exp.put( "element"+cnt, exp );
}
} else
throw new Error(); // assertion failed.
// it must be ElementExp or ReferenceExp.
}
}
// then reverse name2ref to ref2name
exp2name = new HashMap();
{
Iterator itr = name2exp.keySet().iterator();
while( itr.hasNext() ) {
String name = (String)itr.next();
exp2name.put( name2exp.get(name), name );
}
}
nameClassWriter = createNameClassWriter();
// generates SAX events
try {
final DocumentHandler handler = writer.getDocumentHandler();
handler.setDocumentLocator( new LocatorImpl() );
handler.startDocument();
// to work around the bug of current serializer,
// report xmlns declarations as attributes.
if( defaultNs!=null )
writer.start("grammar",new String[]{
"ns",defaultNs,
"xmlns",RELAXNGReader.RELAXNGNamespace,
"datatypeLibrary", XSDVocabulary.XMLSchemaNamespace });
else
writer.start("grammar", new String[]{
"xmlns",RELAXNGReader.RELAXNGNamespace,
"datatypeLibrary", XSDVocabulary.XMLSchemaNamespace });
{// write start pattern.
writer.start("start");
writeIsland( g.getTopLevel() );
writer.end("start");
}
// write all named expressions
Iterator itr = exp2name.keySet().iterator();
while( itr.hasNext() ) {
Expression exp = (Expression)itr.next();
String name = (String)exp2name.get(exp);
if( exp instanceof ReferenceExp )
exp = ((ReferenceExp)exp).exp;
writer.start("define",new String[]{"name",name});
writeIsland( exp );
writer.end("define");
}
writer.end("grammar");
handler.endDocument();
} catch( SAXRuntimeException sw ) {
throw sw.e;
}
}
/**
* writes a bunch of expression into one tree.
*/
protected void writeIsland( Expression exp ) {
// pattern writer will traverse the island and generates XML representation.
if( exp instanceof ElementExp )
patternWriter.writeElement( (ElementExp)exp );
else
patternWriter.visitUnary(exp);
}
/** Grammar object which we are writing. */
protected Grammar grammar;
/**
* map from ReferenceExp/ElementExp to its unique name.
* "unique name" is used to write/reference this ReferenceExp.
* ReferenceExps who are not in this list can be directly written into XML.
*/
protected Map exp2name;
/**
* sniffs namespace URI that can be used as default 'ns' attribute
* from expression.
*
* find an element or attribute, then use its namespace URI.
*/
protected String sniffDefaultNs( Expression exp ) {
return (String)exp.visit( new ExpressionVisitor(){
public Object onElement( ElementExp exp ) {
return sniff(exp.getNameClass());
}
public Object onAttribute( AttributeExp exp ) {
return sniff(exp.nameClass);
}
protected String sniff(NameClass nc) {
if( nc instanceof SimpleNameClass )
return ((SimpleNameClass)nc).namespaceURI;
else
return null;
}
public Object onChoice( ChoiceExp exp ) {
return onBinExp(exp);
}
public Object onSequence( SequenceExp exp ) {
return onBinExp(exp);
}
public Object onInterleave( InterleaveExp exp ) {
return onBinExp(exp);
}
public Object onConcur( ConcurExp exp ) {
return onBinExp(exp);
}
public Object onBinExp( BinaryExp exp ) {
Object o = exp.exp1.visit(this);
if(o==null) o = exp.exp2.visit(this);
return o;
}
public Object onMixed( MixedExp exp ) {
return exp.exp.visit(this);
}
public Object onOneOrMore( OneOrMoreExp exp ) {
return exp.exp.visit(this);
}
public Object onRef( ReferenceExp exp ) {
return exp.exp.visit(this);
}
public Object onOther( OtherExp exp ) {
return exp.exp.visit(this);
}
public Object onNullSet() {
return null;
}
public Object onEpsilon() {
return null;
}
public Object onAnyString() {
return null;
}
public Object onData( DataExp exp ) {
return null;
}
public Object onValue( ValueExp exp ) {
return null;
}
public Object onList( ListExp exp ) {
return null;
}
});
}
/**
* namespace URI currently implied through "ns" attribute propagation.
*/
protected String defaultNs;
public String getTargetNamespace() { return defaultNs; }
public void writeNameClass( NameClass src ) {
final String MAGIC = PossibleNamesCollector.MAGIC;
Set names = PossibleNamesCollector.calc(src);
// convert a name class to the canonical form.
StringPair[] values = (StringPair[])names.toArray(new StringPair[names.size()]);
Set uriset = new HashSet();
for( int i=0; i in the attribute.
visitUnary(exp.exp);
this.writer.end("attribute");
}
protected void writeElement( ElementExp exp ) {
NameClass nc = exp.getNameClass();
if( nc instanceof SimpleNameClass
&& ((SimpleNameClass)nc).namespaceURI.equals(defaultNs) )
// we can use name attribute to simplify output.
this.writer.start("element",new String[]{"name",
((SimpleNameClass)nc).localName} );
else {
this.writer.start("element");
writeNameClass(exp.getNameClass());
}
visitUnary(simplify(exp.contentModel));
this.writer.end("element");
}
/**
* remove unnecessary ReferenceExp from content model.
* this will sometimes makes content model smaller.
*/
public Expression simplify( Expression exp ) {
return exp.visit( new ExpressionCloner(grammar.getPool()){
public Expression onRef( ReferenceExp exp ) {
if( exp2name.containsKey(exp) )
// this ReferenceExp will be written as a named pattern.
return exp;
else
// bind contents
return exp.exp.visit(this);
}
public Expression onOther( OtherExp exp ) {
return exp.exp.visit(this);
}
public Expression onElement( ElementExp exp ) {
return exp;
}
public Expression onAttribute( AttributeExp exp ) {
return exp;
}
});
}
};
}