com.sun.msv.verifier.regexp.ExpressionAcceptor Maven / Gradle / Ivy
/*
* Copyright (c) 2001-2013 Oracle and/or its affiliates. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of Oracle nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
* IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
* THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package com.sun.msv.verifier.regexp;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.Vector;
import org.relaxng.datatype.DatatypeException;
import com.sun.msv.grammar.AttributeExp;
import com.sun.msv.grammar.ChoiceExp;
import com.sun.msv.grammar.DataOrValueExp;
import com.sun.msv.grammar.ElementExp;
import com.sun.msv.grammar.Expression;
import com.sun.msv.grammar.IDContextProvider;
import com.sun.msv.grammar.IDContextProvider2;
import com.sun.msv.grammar.NameClass;
import com.sun.msv.grammar.NamespaceNameClass;
import com.sun.msv.grammar.NotNameClass;
import com.sun.msv.grammar.SimpleNameClass;
import com.sun.msv.grammar.ValueExp;
import com.sun.msv.grammar.util.IDContextProviderWrapper;
import com.sun.msv.util.DatatypeRef;
import com.sun.msv.util.StartTagInfo;
import com.sun.msv.util.StringRef;
import com.sun.msv.verifier.Acceptor;
/**
* {@link Acceptor} implementation.
*
*
* When you are using REDocumentDeclaration
, then the acceptor
* is always guaranteed to be a subclass of this class.
*
* Therefore, by using this regexp implementation of VGM, you can always downcast
* {@link Acceptor} to this class and access its contents to get more information.
*
*
* If you consider VGM as an automaton,
* this class can be thought as a lazy automaton acceptor.
*
*
* @author Kohsuke KAWAGUCHI
*/
public abstract class ExpressionAcceptor implements Acceptor {
private Expression expression;
/**
* gets the residual content model.
*
*
* This method returns the expression that represents the expected content model
* it will read.
* For example, if the original content model is (A,(B|C)) and this acceptor
* has already read A, then this method returns (B|C).
*
*
* The returned residual is useful to find out what elements can appear next.
*
*
* If you consider VGM as an automaton, the residual content model
* can be thought as the current state. Also,
* At the same time, right language (a regular expression that represents
* the language it can accept from now on).
*/
public Expression getExpression() { return expression; }
/** this object provides various function objects */
protected final REDocumentDeclaration docDecl;
/**
* If true, this acceptor will ignore all undeclared attributes.
* If false, this acceptor will signal an error for an undeclared attribute.
*
*
* This flag is used to implement the semantics of RELAX Core, where
* undeclared attributes are allowed.
*/
protected final boolean ignoreUndeclaredAttributes;
public ExpressionAcceptor( REDocumentDeclaration docDecl, Expression exp, boolean ignoreUndeclaredAttributes ) {
this.docDecl = docDecl;
this.expression = exp;
this.ignoreUndeclaredAttributes = ignoreUndeclaredAttributes;
}
/**
* creates combined child acceptor and primitive child acceptors (if necessary).
*
* be careful not to keep returned object too long because
* it is reused whenever the method is called.
*
* @return null
* if errRef is null and this expression cannot accept given start tag.
* if errRef is non-null and error recovery is not possible.
*/
public Acceptor createChildAcceptor( StartTagInfo tag, StringRef errRef ) {
final CombinedChildContentExpCreator cccc = docDecl.cccec;
// obtains fully combined child content pattern
CombinedChildContentExpCreator.ExpressionPair e = cccc.get(expression,tag);
if( e.content!=Expression.nullSet ) {
// successful.
if( com.sun.msv.driver.textui.Debug.debug ) {
System.out.println("accept start tag <"+ tag.qName+">. combined content pattern is");
System.out.println(com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(e.content));
if( e.continuation!=null )
System.out.println("continuation is:\n"+
com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(e.continuation)
);
else
System.out.println("no continuation");
}
return createAcceptor( e.content, e.continuation,
cccc.getMatchedElements(), cccc.numMatchedElements() );
}
// no element declaration is satisfied by this start tag.
// this must be an error of input document.
if( errRef==null )
// bail out now to notify the caller that an error was found.
return null;
// no ElementExp accepts this tag name
// (actually, some ElementExp may have possibly accepted this tag name,
// but as a result of , no expression left ).
errRef.str = diagnoseBadTagName(tag);
if( errRef.str==null )
// no detailed error message was prepared.
// use some generic one.
errRef.str = docDecl.localizeMessage( REDocumentDeclaration.DIAG_BAD_TAGNAME_GENERIC, tag.qName );
// prepare child acceptor.
return createRecoveryAcceptors();
}
protected abstract Acceptor createAcceptor(
Expression contentModel, Expression continuation/*can be null*/,
ElementExp[] primitives, int numPrimitives );
/**
* @deprecated
*/
public final boolean onAttribute(
String namespaceURI, String localName, String qName, String value,
IDContextProvider context, StringRef refErr, DatatypeRef refType ) {
return onAttribute2( namespaceURI, localName, qName, value,
IDContextProviderWrapper.create(context), refErr, refType );
}
public final boolean onAttribute2(
String namespaceURI, String localName, String qName, String value,
IDContextProvider2 context, StringRef refErr, DatatypeRef refType ) {
// instead of creating a new object each time,
// use a cached copy.
docDecl.attToken.reinit( namespaceURI,localName,qName,
new StringToken(docDecl,value,context,refType) );
return onAttribute( docDecl.attToken, refErr );
}
protected boolean onAttribute( AttributeToken token, StringRef refErr ) {
Expression r = docDecl.attFeeder.feed( this.expression, token, ignoreUndeclaredAttributes );
if( r!=Expression.nullSet ) {
// this attribute is properly consumed.
expression = r;
if(com.sun.msv.driver.textui.Debug.debug)
System.out.println("-- residual after :" +
com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(r));
return true;
}
if( refErr==null ) {
// refErr was not provided. bail out now.
return false;
}
//
// diagnose the error
//
// this attribute was not accepted.
// its value may be wrong.
// try feeding wild card and see if it's accepted.
AttributeRecoveryToken rtoken = token.createRecoveryAttToken();
r = docDecl.attFeeder.feed( this.expression, rtoken, ignoreUndeclaredAttributes );
if( r==Expression.nullSet ) {
// even the wild card was rejected.
// now there are two possibilities.
// the first is that this attribute name is not allowed to appear,
// which is the most typical case. (e.g., type miss of the attribute name, etc).
// the second is that the content model of the element is equal to the
// nullSet, thus nothing can be accepted. This is usually
// a problem of the schema.
if( this.expression==Expression.nullSet ) {
// the content model is equal to the nullSet.
refErr.str = docDecl.localizeMessage(
REDocumentDeclaration.DIAG_CONTENT_MODEL_IS_NULLSET, null );
} else {
// the content model is not equal to the nullSet.
// this means that this attribute
// is not specified by the grammar.
refErr.str = docDecl.localizeMessage(
REDocumentDeclaration.DIAG_UNDECLARED_ATTRIBUTE, token.qName );
}
// recover by using the current expression.
// TODO: possibly we can make all attributes optional or something.
// (because this might be a caused by the typo.)
return true;
} else {
// wild card was accepted, so the value must be wrong.
refErr.str = diagnoseBadAttributeValue( rtoken );
if( refErr.str==null ) {
// no detailed error message can be provided
// so use generic one.
refErr.str = docDecl.localizeMessage(
REDocumentDeclaration.DIAG_BAD_ATTRIBUTE_VALUE_GENERIC, token.qName );
}
// now we know the reason.
// recover by assuming that the valid value was specified for this attribute.
this.expression = r;
return true;
}
}
public boolean onEndAttributes( StartTagInfo sti, StringRef refErr ) {
Expression r = docDecl.attPruner.prune( this.expression );
if( r!=Expression.nullSet ) {
// there was no error.
this.expression = r;
return true;
}
// there was an error.
// specifically, some required attributes are missing.
if( refErr==null )
return false; // refErr was not provided. bail out.
if( this.expression==Expression.nullSet ) {
// the content model is equal to the nullSet.
refErr.str = docDecl.localizeMessage(
REDocumentDeclaration.DIAG_CONTENT_MODEL_IS_NULLSET, null );
} else {
refErr.str = diagnoseMissingAttribute(sti);
if( refErr.str==null )
// no detailed error message can be provided
// so use generic one.
refErr.str = docDecl.localizeMessage(
REDocumentDeclaration.DIAG_MISSING_ATTRIBUTE_GENERIC,
sti.qName );
}
// remove unconsumed attributes
this.expression = this.expression.visit( docDecl.attRemover );
return true;
}
protected boolean stepForward( Token token, StringRef errRef ) {
Expression residual = docDecl.resCalc.calcResidual( expression, token );
if( com.sun.msv.driver.textui.Debug.debug ) {
System.out.println("residual of stepForward("+token+")");
System.out.print(com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(expression));
System.out.print(" -> ");
System.out.println(com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(residual));
}
if( residual==Expression.nullSet ) {
// error: we can't accept this token
if( errRef!=null ) {
// diagnose error.
if( token instanceof StringToken )
errRef.str = diagnoseUnexpectedLiteral( (StringToken)token );
// docDecl.localizeMessage( docDecl.DIAG_BAD_LITERAL_VALUE_WRAPUP,
// TODO: diagnosis for ElementToken
// recovery by ignoring this token.
// TODO: should we modify this to choice(expression,EoCR)?
// we need some measures to prevent redundant choice
} else {
// do not mutate any member variables.
// caller may call stepForward again with error recovery.
}
return false;
}
expression = residual;
return true;
}
/**
* @deprecated
*/
public final boolean onText( String literal, IDContextProvider context, StringRef refErr, DatatypeRef refType ) {
return onText2( literal, IDContextProviderWrapper.create(context), refErr, refType );
}
public boolean onText2( String literal, IDContextProvider2 provider, StringRef refErr, DatatypeRef refType ) {
return stepForward( new StringToken(docDecl,literal,provider,refType), refErr );
}
public final boolean stepForwardByContinuation( Expression continuation, StringRef errRef ) {
if( continuation!=Expression.nullSet ) {
// successful transition
if( com.sun.msv.driver.textui.Debug.debug )
System.out.println("stepForwardByCont. : " +
com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(continuation));
expression = continuation;
return true;
}
if( errRef==null ) return false; // fail immediately.
// TODO: diagnose uncompleted content model.
return false;
}
/** checks if this Acceptor is satisifed */
public boolean isAcceptState( StringRef errRef ) {
if( errRef==null )
return expression.isEpsilonReducible();
else {
if(expression.isEpsilonReducible()) return true;
// error. provide diagnosis
errRef.str = diagnoseUncompletedContent();
return false;
}
}
public int getStringCareLevel() {
// if the value is cached, return cached value.
// otherwise, calculate it now.
OptimizationTag ot = (OptimizationTag)expression.verifierTag;
if(ot==null) expression.verifierTag = ot = new OptimizationTag();
if(ot.stringCareLevel==OptimizationTag.STRING_NOTCOMPUTED)
ot.stringCareLevel = StringCareLevelCalculator.calc(expression);
return ot.stringCareLevel;
}
// error recovery
//==================================================
private final Expression mergeContinuation( Expression exp1, Expression exp2 ) {
if(exp1==null && exp2==null) return null;
if(exp1==null || exp1==Expression.nullSet) return exp2;
if(exp2==null || exp2==Expression.nullSet) return exp1;
return docDecl.pool.createChoice(exp1,exp2);
}
/**
* creates Acceptor that recovers from errors.
*
* This method also modifies the current expression in preparation to
* accept newly created child acceptor.
*
* Recovery will be done by preparing to accept two possibilities.
*
*
* - We may get back to sync by ignoring the newly found illegal element.
* ( this is for mistake like "abcXdefg")
*
- We may get back to sync by replacing newly found illegal element
* by one of the valid elements.
* ( this is for mistake like "abcXefg")
*
*/
private final Acceptor createRecoveryAcceptors() {
final CombinedChildContentExpCreator cccc = docDecl.cccec;
CombinedChildContentExpCreator.ExpressionPair combinedEoC =
cccc.get( expression, null, false );
// get residual of EoC.
Expression eocr = docDecl.resCalc.calcResidual( expression, AnyElementToken.theInstance );
Expression continuation = docDecl.pool.createChoice(
expression, eocr );
Expression contentModel = combinedEoC.content;
if( com.sun.msv.driver.textui.Debug.debug )
{
System.out.println("content model of recovery acceptor:"+
com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(contentModel) );
System.out.println("continuation of recovery acceptor:"+
com.sun.msv.grammar.util.ExpressionPrinter.printSmallest(continuation) );
}
// by passing null as elements of concern and
// using continuation, we are effectively "generating"
// the content model for error recovery.
return createAcceptor( contentModel, continuation, null, 0 );
}
/**
* format list of candidates to one string.
*
* this method
* (1) inserts separator into appropriate positions
* (2) appends "more" message when items are only a portion of candidates.
*/
private String concatenateMessages( List items, boolean more,
String separatorStr, String moreStr )
{
String r="";
String sep = docDecl.localizeMessage(separatorStr,null);
Collections.sort(items,
new Comparator(){
public int compare( Object o1, Object o2 ) {
return ((String)o1).compareTo((String)o2);
}
}); // sort candidates.
for( int i=0; i is used.
// there is no easy way to tell which what tag name is expected.
// TODO: we can reduce strength by treating concur as choice.
// do it.
return null;
}
// we are now sure that combined child content expression will be
// the choice of all elements of concern.
// so if tag name satisfies one of those elements,
// it can be accepted.
// therefore we can provide candidates for users.
Set s = new java.util.HashSet();
boolean more = false;
// if there is a SimpleNameClass with the same localName
// but with a different namespace URI,
// this variable will receive that URI.
String wrongNamespace = null;
final ElementExp[] eocs = cccc.getMatchedElements();
final int len = cccc.numMatchedElements();
for( int i=0; is.
//
// this is also a frequently used pattern by TREX.
// an expression like
//
//
//
// yes no
//
//
//
// falls into this pattern.
final Set items = new java.util.HashSet();
boolean more = false;
ChoiceExp ch = (ChoiceExp)constraint;
Expression[] children = ch.getChildren();
for( int i=0; i