All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.msv.verifier.regexp.ExpressionAcceptor Maven / Gradle / Ivy

There is a newer version: 2.3.0
Show newest version
/*
 * Copyright (c) 2001-2013 Oracle and/or its affiliates. All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 *   - Redistributions of source code must retain the above copyright
 *     notice, this list of conditions and the following disclaimer.
 *
 *   - Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *
 *   - Neither the name of Oracle nor the names of its
 *     contributors may be used to endorse or promote products derived
 *     from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package com.sun.msv.verifier.regexp;

import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.Vector;

import org.relaxng.datatype.DatatypeException;

import com.sun.msv.grammar.AttributeExp;
import com.sun.msv.grammar.ChoiceExp;
import com.sun.msv.grammar.DataOrValueExp;
import com.sun.msv.grammar.ElementExp;
import com.sun.msv.grammar.Expression;
import com.sun.msv.grammar.IDContextProvider;
import com.sun.msv.grammar.IDContextProvider2;
import com.sun.msv.grammar.NameClass;
import com.sun.msv.grammar.NamespaceNameClass;
import com.sun.msv.grammar.NotNameClass;
import com.sun.msv.grammar.SimpleNameClass;
import com.sun.msv.grammar.ValueExp;
import com.sun.msv.grammar.util.IDContextProviderWrapper;
import com.sun.msv.util.DatatypeRef;
import com.sun.msv.util.StartTagInfo;
import com.sun.msv.util.StringRef;
import com.sun.msv.verifier.Acceptor;

/**
 * {@link Acceptor} implementation.
 * 
 * 

* When you are using REDocumentDeclaration, then the acceptor * is always guaranteed to be a subclass of this class. * * Therefore, by using this regexp implementation of VGM, you can always downcast * {@link Acceptor} to this class and access its contents to get more information. * *

* If you consider VGM as an automaton, * this class can be thought as a lazy automaton acceptor. * * * @author Kohsuke KAWAGUCHI */ public abstract class ExpressionAcceptor implements Acceptor { private Expression expression; /** * gets the residual content model. * *

* This method returns the expression that represents the expected content model * it will read. * For example, if the original content model is (A,(B|C)) and this acceptor * has already read A, then this method returns (B|C). * *

* The returned residual is useful to find out what elements can appear next. * *

* If you consider VGM as an automaton, the residual content model * can be thought as the current state. Also, * At the same time, right language (a regular expression that represents * the language it can accept from now on). */ public Expression getExpression() { return expression; } /** this object provides various function objects */ protected final REDocumentDeclaration docDecl; /** * If true, this acceptor will ignore all undeclared attributes. * If false, this acceptor will signal an error for an undeclared attribute. * *

* This flag is used to implement the semantics of RELAX Core, where * undeclared attributes are allowed. */ protected final boolean ignoreUndeclaredAttributes; public ExpressionAcceptor( REDocumentDeclaration docDecl, Expression exp, boolean ignoreUndeclaredAttributes ) { this.docDecl = docDecl; this.expression = exp; this.ignoreUndeclaredAttributes = ignoreUndeclaredAttributes; } /** * creates combined child acceptor and primitive child acceptors (if necessary). * * be careful not to keep returned object too long because * it is reused whenever the method is called. * * @return null * if errRef is null and this expression cannot accept given start tag. * if errRef is non-null and error recovery is not possible. */ public Acceptor createChildAcceptor( StartTagInfo tag, StringRef errRef ) { final CombinedChildContentExpCreator cccc = docDecl.cccec; // obtains fully combined child content pattern CombinedChildContentExpCreator.ExpressionPair e = cccc.get(expression,tag); if( e.content!=Expression.nullSet ) { // successful. if( com.sun.msv.driver.textui.Debug.debug ) { System.out.println("accept start tag <"+ tag.qName+">. combined content pattern is"); System.out.println(com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(e.content)); if( e.continuation!=null ) System.out.println("continuation is:\n"+ com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(e.continuation) ); else System.out.println("no continuation"); } return createAcceptor( e.content, e.continuation, cccc.getMatchedElements(), cccc.numMatchedElements() ); } // no element declaration is satisfied by this start tag. // this must be an error of input document. if( errRef==null ) // bail out now to notify the caller that an error was found. return null; // no ElementExp accepts this tag name // (actually, some ElementExp may have possibly accepted this tag name, // but as a result of , no expression left ). errRef.str = diagnoseBadTagName(tag); if( errRef.str==null ) // no detailed error message was prepared. // use some generic one. errRef.str = docDecl.localizeMessage( REDocumentDeclaration.DIAG_BAD_TAGNAME_GENERIC, tag.qName ); // prepare child acceptor. return createRecoveryAcceptors(); } protected abstract Acceptor createAcceptor( Expression contentModel, Expression continuation/*can be null*/, ElementExp[] primitives, int numPrimitives ); /** * @deprecated */ public final boolean onAttribute( String namespaceURI, String localName, String qName, String value, IDContextProvider context, StringRef refErr, DatatypeRef refType ) { return onAttribute2( namespaceURI, localName, qName, value, IDContextProviderWrapper.create(context), refErr, refType ); } public final boolean onAttribute2( String namespaceURI, String localName, String qName, String value, IDContextProvider2 context, StringRef refErr, DatatypeRef refType ) { // instead of creating a new object each time, // use a cached copy. docDecl.attToken.reinit( namespaceURI,localName,qName, new StringToken(docDecl,value,context,refType) ); return onAttribute( docDecl.attToken, refErr ); } protected boolean onAttribute( AttributeToken token, StringRef refErr ) { Expression r = docDecl.attFeeder.feed( this.expression, token, ignoreUndeclaredAttributes ); if( r!=Expression.nullSet ) { // this attribute is properly consumed. expression = r; if(com.sun.msv.driver.textui.Debug.debug) System.out.println("-- residual after :" + com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(r)); return true; } if( refErr==null ) { // refErr was not provided. bail out now. return false; } // // diagnose the error // // this attribute was not accepted. // its value may be wrong. // try feeding wild card and see if it's accepted. AttributeRecoveryToken rtoken = token.createRecoveryAttToken(); r = docDecl.attFeeder.feed( this.expression, rtoken, ignoreUndeclaredAttributes ); if( r==Expression.nullSet ) { // even the wild card was rejected. // now there are two possibilities. // the first is that this attribute name is not allowed to appear, // which is the most typical case. (e.g., type miss of the attribute name, etc). // the second is that the content model of the element is equal to the // nullSet, thus nothing can be accepted. This is usually // a problem of the schema. if( this.expression==Expression.nullSet ) { // the content model is equal to the nullSet. refErr.str = docDecl.localizeMessage( REDocumentDeclaration.DIAG_CONTENT_MODEL_IS_NULLSET, null ); } else { // the content model is not equal to the nullSet. // this means that this attribute // is not specified by the grammar. refErr.str = docDecl.localizeMessage( REDocumentDeclaration.DIAG_UNDECLARED_ATTRIBUTE, token.qName ); } // recover by using the current expression. // TODO: possibly we can make all attributes optional or something. // (because this might be a caused by the typo.) return true; } else { // wild card was accepted, so the value must be wrong. refErr.str = diagnoseBadAttributeValue( rtoken ); if( refErr.str==null ) { // no detailed error message can be provided // so use generic one. refErr.str = docDecl.localizeMessage( REDocumentDeclaration.DIAG_BAD_ATTRIBUTE_VALUE_GENERIC, token.qName ); } // now we know the reason. // recover by assuming that the valid value was specified for this attribute. this.expression = r; return true; } } public boolean onEndAttributes( StartTagInfo sti, StringRef refErr ) { Expression r = docDecl.attPruner.prune( this.expression ); if( r!=Expression.nullSet ) { // there was no error. this.expression = r; return true; } // there was an error. // specifically, some required attributes are missing. if( refErr==null ) return false; // refErr was not provided. bail out. if( this.expression==Expression.nullSet ) { // the content model is equal to the nullSet. refErr.str = docDecl.localizeMessage( REDocumentDeclaration.DIAG_CONTENT_MODEL_IS_NULLSET, null ); } else { refErr.str = diagnoseMissingAttribute(sti); if( refErr.str==null ) // no detailed error message can be provided // so use generic one. refErr.str = docDecl.localizeMessage( REDocumentDeclaration.DIAG_MISSING_ATTRIBUTE_GENERIC, sti.qName ); } // remove unconsumed attributes this.expression = this.expression.visit( docDecl.attRemover ); return true; } protected boolean stepForward( Token token, StringRef errRef ) { Expression residual = docDecl.resCalc.calcResidual( expression, token ); if( com.sun.msv.driver.textui.Debug.debug ) { System.out.println("residual of stepForward("+token+")"); System.out.print(com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(expression)); System.out.print(" -> "); System.out.println(com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(residual)); } if( residual==Expression.nullSet ) { // error: we can't accept this token if( errRef!=null ) { // diagnose error. if( token instanceof StringToken ) errRef.str = diagnoseUnexpectedLiteral( (StringToken)token ); // docDecl.localizeMessage( docDecl.DIAG_BAD_LITERAL_VALUE_WRAPUP, // TODO: diagnosis for ElementToken // recovery by ignoring this token. // TODO: should we modify this to choice(expression,EoCR)? // we need some measures to prevent redundant choice } else { // do not mutate any member variables. // caller may call stepForward again with error recovery. } return false; } expression = residual; return true; } /** * @deprecated */ public final boolean onText( String literal, IDContextProvider context, StringRef refErr, DatatypeRef refType ) { return onText2( literal, IDContextProviderWrapper.create(context), refErr, refType ); } public boolean onText2( String literal, IDContextProvider2 provider, StringRef refErr, DatatypeRef refType ) { return stepForward( new StringToken(docDecl,literal,provider,refType), refErr ); } public final boolean stepForwardByContinuation( Expression continuation, StringRef errRef ) { if( continuation!=Expression.nullSet ) { // successful transition if( com.sun.msv.driver.textui.Debug.debug ) System.out.println("stepForwardByCont. : " + com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(continuation)); expression = continuation; return true; } if( errRef==null ) return false; // fail immediately. // TODO: diagnose uncompleted content model. return false; } /** checks if this Acceptor is satisifed */ public boolean isAcceptState( StringRef errRef ) { if( errRef==null ) return expression.isEpsilonReducible(); else { if(expression.isEpsilonReducible()) return true; // error. provide diagnosis errRef.str = diagnoseUncompletedContent(); return false; } } public int getStringCareLevel() { // if the value is cached, return cached value. // otherwise, calculate it now. OptimizationTag ot = (OptimizationTag)expression.verifierTag; if(ot==null) expression.verifierTag = ot = new OptimizationTag(); if(ot.stringCareLevel==OptimizationTag.STRING_NOTCOMPUTED) ot.stringCareLevel = StringCareLevelCalculator.calc(expression); return ot.stringCareLevel; } // error recovery //================================================== private final Expression mergeContinuation( Expression exp1, Expression exp2 ) { if(exp1==null && exp2==null) return null; if(exp1==null || exp1==Expression.nullSet) return exp2; if(exp2==null || exp2==Expression.nullSet) return exp1; return docDecl.pool.createChoice(exp1,exp2); } /** * creates Acceptor that recovers from errors. * * This method also modifies the current expression in preparation to * accept newly created child acceptor. * * Recovery will be done by preparing to accept two possibilities. * *

    *
  1. We may get back to sync by ignoring the newly found illegal element. * ( this is for mistake like "abcXdefg") *
  2. We may get back to sync by replacing newly found illegal element * by one of the valid elements. * ( this is for mistake like "abcXefg") *
*/ private final Acceptor createRecoveryAcceptors() { final CombinedChildContentExpCreator cccc = docDecl.cccec; CombinedChildContentExpCreator.ExpressionPair combinedEoC = cccc.get( expression, null, false ); // get residual of EoC. Expression eocr = docDecl.resCalc.calcResidual( expression, AnyElementToken.theInstance ); Expression continuation = docDecl.pool.createChoice( expression, eocr ); Expression contentModel = combinedEoC.content; if( com.sun.msv.driver.textui.Debug.debug ) { System.out.println("content model of recovery acceptor:"+ com.sun.msv.grammar.util.ExpressionPrinter.printContentModel(contentModel) ); System.out.println("continuation of recovery acceptor:"+ com.sun.msv.grammar.util.ExpressionPrinter.printSmallest(continuation) ); } // by passing null as elements of concern and // using continuation, we are effectively "generating" // the content model for error recovery. return createAcceptor( contentModel, continuation, null, 0 ); } /** * format list of candidates to one string. * * this method * (1) inserts separator into appropriate positions * (2) appends "more" message when items are only a portion of candidates. */ private String concatenateMessages( List items, boolean more, String separatorStr, String moreStr ) { String r=""; String sep = docDecl.localizeMessage(separatorStr,null); Collections.sort(items, new Comparator(){ public int compare( Object o1, Object o2 ) { return ((String)o1).compareTo((String)o2); } }); // sort candidates. for( int i=0; i is used. // there is no easy way to tell which what tag name is expected. // TODO: we can reduce strength by treating concur as choice. // do it. return null; } // we are now sure that combined child content expression will be // the choice of all elements of concern. // so if tag name satisfies one of those elements, // it can be accepted. // therefore we can provide candidates for users. Set s = new java.util.HashSet(); boolean more = false; // if there is a SimpleNameClass with the same localName // but with a different namespace URI, // this variable will receive that URI. String wrongNamespace = null; final ElementExp[] eocs = cccc.getMatchedElements(); final int len = cccc.numMatchedElements(); for( int i=0; is. // // this is also a frequently used pattern by TREX. // an expression like // // // // yesno // // // // falls into this pattern. final Set items = new java.util.HashSet(); boolean more = false; ChoiceExp ch = (ChoiceExp)constraint; Expression[] children = ch.getChildren(); for( int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy