net.sf.saxon.event.RegularSequenceChecker Maven / Gradle / Ivy
Show all versions of Saxon-HE Show documentation
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2018-2023 Saxonica Limited
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.event;
import net.sf.saxon.om.*;
import net.sf.saxon.s9api.Location;
import net.sf.saxon.str.UnicodeString;
import net.sf.saxon.trans.XPathException;
import net.sf.saxon.transpile.CSharpSimpleEnum;
import net.sf.saxon.type.SchemaType;
import net.sf.saxon.type.Type;
import java.util.HashMap;
import java.util.Map;
import java.util.Stack;
/**
* A RegularSequenceChecker
is a filter that can be inserted into a Receiver pipeline
* to check that the sequence of events passed in is a regular event sequence. Many
* (though not all) implementations of {@link Outputter} require the sequence of events to
* be regular according to this definition.
* A sequence of {@code Receiver} events is regular if the following conditions
* are satisfied:
*
* - Calls to {@link Outputter#startElement(NodeName, SchemaType, Location, int)}, {@link #endElement()},
* {@link Outputter#startDocument(int)}, and {@link #endDocument()} are properly paired and nested.
* - Events must only occur in a state where they are permitted; the states and transitions
* between states are defined by the table below. The initial state is initial,
* and the final state must be final.
*
*
* Permitted state transitions
*
* State Events Next State
*
*
* initial {@link #open()} open
* open {@link #open()} open
* open {@link Outputter#append(Item, Location, int)}, {@link #append(Item)},
* {@link Receiver#characters(UnicodeString, Location, int)}, {@link Receiver#comment(UnicodeString, Location, int)},
* {@link Receiver#processingInstruction(String, UnicodeString, Location, int)} open
* open {@link Outputter#startDocument(int)} content
* open {@link Outputter#startElement(NodeName, SchemaType, Location, int)} content
* content {@link Receiver#characters(UnicodeString, Location, int)}, {@link Receiver#comment(UnicodeString, Location, int)},
* {@link Receiver#processingInstruction(String, UnicodeString, Location, int)} content
* content {@link Outputter#startElement(NodeName, SchemaType, Location, int)} startTag
* content {@link #endDocument()}, {@link #endElement()} if the stack is empty, then content, otherwise open
* (any) close final
* final close final
*
*
* This class is not normally used in production within Saxon, but is available for diagnostics when needed.
* Some implementations of {@code Receiver} accept sequences of events that are not regular; indeed, some
* implementations are explicitly designed to produce a regular sequence from an irregular sequence.
* Examples of such irregularities are append or startDocument events appearing within
* element content, or attribute events being followed by text events with no intervening
* startContent.
* The rules for a regular sequence imply that the top level events (any events not surrounded
* by startElement-endElement or startDocument-endDocument) can represent any sequence of items, including
* for example multiple document nodes, free-standing attribute and namespace nodes, maps, arrays, and functions;
* but within a startElement-endElement or startDocument-endDocument pair, the events represent content
* that has been normalized and validated according to the XSLT rules for constructing complex content, or
* the XQuery equivalent: for example, attributes and namespaces must appear before child nodes,
* adjacent text nodes should
* have been merged, zero-length text nodes should have been eliminated, all namespaces should be explicitly
* declared, document nodes should be replaced by their children.
* Element nodes in "composed form" (that is, existing as a tree in memory) may be passed through
* the {@link #append(Item)} method at the top level, but within a startElement-endElement or
* startDocument-endDocument pair, elements must be represented in "decomposed form" as a sequence
* of events.
* A call to {@link #close} is permitted in any state, but it should only be called in Open
* state except on an error path; on error paths calling {@link #close} is recommended to ensure that
* resources are released.
*/
public class RegularSequenceChecker extends ProxyReceiver {
private final Stack stack = new Stack<>();
@CSharpSimpleEnum
public enum State {INITIAL, OPEN, START_TAG, CONTENT, FINAL, FAILED}
// StartTag is used only in an incremental Receiver where attributes and namespaces are notified separately
@CSharpSimpleEnum
private enum Transition {
OPEN, APPEND, TEXT, COMMENT, PI, START_DOCUMENT,
START_ELEMENT, END_ELEMENT, END_DOCUMENT, CLOSE}
private State state;
private boolean fullChecking = false;
private static final Map> machine = new HashMap<>();
private static void edge(State from, Transition event, State to) {
@SuppressWarnings("Convert2Diamond") // for C#
Map edges = machine.computeIfAbsent(from, s -> new HashMap());
edges.put(event, to);
}
static {
edge(State.INITIAL, Transition.OPEN, State.OPEN);
edge(State.OPEN, Transition.APPEND, State.OPEN);
edge(State.OPEN, Transition.TEXT, State.OPEN);
edge(State.OPEN, Transition.COMMENT, State.OPEN);
edge(State.OPEN, Transition.PI, State.OPEN);
edge(State.OPEN, Transition.START_DOCUMENT, State.CONTENT);
edge(State.OPEN, Transition.START_ELEMENT, State.CONTENT);
edge(State.CONTENT, Transition.TEXT, State.CONTENT);
edge(State.CONTENT, Transition.COMMENT, State.CONTENT);
edge(State.CONTENT, Transition.PI, State.CONTENT);
edge(State.CONTENT, Transition.START_ELEMENT, State.CONTENT);
edge(State.CONTENT, Transition.END_ELEMENT, State.CONTENT); // or Open if the stack is empty
edge(State.CONTENT, Transition.END_DOCUMENT, State.OPEN);
edge(State.OPEN, Transition.CLOSE, State.FINAL);
edge(State.FAILED, Transition.CLOSE, State.FAILED);
//edge(State.Final, "close", State.Final); // This was a concession to poor practice, but apparently no longer needed
}
private void transition(Transition event) {
final Map map = machine.get(state);
assert map != null;
if (map.containsKey(event)) {
state = map.get(event);
} else {
throw new IllegalStateException("Event " + event + " is not permitted in state " + state);
}
}
/**
* Create a RegularSequenceChecker and allocate a unique Id.
*
* @param nextReceiver the underlying receiver to which the events will be sent (without change)
* @param fullChecking requests full validation of the content passed across the interface. If false,
* the only checking is that the sequence of events is correct. If true, more thorough
* validation is carried out (though this does not necessarily mean that every violation
* is detected).
*/
public RegularSequenceChecker(Receiver nextReceiver, boolean fullChecking) {
super(nextReceiver);
state = State.INITIAL;
this.fullChecking = fullChecking;
}
/**
* Append an arbitrary item (node or atomic value) to the output. In a regular sequence, append
* events occur only at the top level, that is, when the document / element stack is empty.
* @param item the item to be appended
* @param locationId the location of the calling instruction, for diagnostics
* @param copyNamespaces if the item is an element node, this indicates whether its namespaces
* need to be copied. Values are {@link ReceiverOption#ALL_NAMESPACES}; the default (0) means
*/
@Override
public void append(Item item, Location locationId, int copyNamespaces) throws XPathException {
try {
transition(Transition.APPEND);
nextReceiver.append(item, locationId, copyNamespaces);
} catch (XPathException e) {
state = State.FAILED;
throw e;
}
}
/**
* Character data (corresponding to a text node). For character data within content (that is, events occurring
* when the startDocument / startElement stack is non-empty), character data events will never be consecutive
* and will never be zero-length.
*/
@Override
public void characters(UnicodeString chars, Location locationId, int properties) throws XPathException {
transition(Transition.TEXT);
if (chars.isEmpty() && !stack.isEmpty()) {
throw new IllegalStateException("Zero-length text nodes not allowed within document/element content");
}
try {
nextReceiver.characters(chars, locationId, properties);
} catch (XPathException e) {
state = State.FAILED;
throw e;
}
}
/**
* End of sequence
*/
@Override
public void close() throws XPathException {
if (state != State.FINAL && state != State.FAILED) {
if (!stack.isEmpty()) {
throw new IllegalStateException("Unclosed element or document nodes at end of stream");
}
nextReceiver.close();
state = State.FINAL;
}
}
/**
* Output a comment
*/
@Override
public void comment(UnicodeString chars, Location locationId, int properties) throws XPathException {
transition(Transition.COMMENT);
try {
nextReceiver.comment(chars, locationId, properties);
} catch (XPathException e) {
state = State.FAILED;
throw e;
}
}
/**
* Notify the end of a document node
*/
@Override
public void endDocument() throws XPathException {
transition(Transition.END_DOCUMENT);
if (stack.isEmpty() || stack.pop() != Type.DOCUMENT) {
throw new IllegalStateException("Unmatched endDocument() call");
}
try {
nextReceiver.endDocument();
} catch (XPathException e) {
state = State.FAILED;
throw e;
}
}
/**
* End of element
*/
@Override
public void endElement() throws XPathException {
transition(Transition.END_ELEMENT);
if (stack.isEmpty() || stack.pop() != Type.ELEMENT) {
throw new IllegalStateException("Unmatched endElement() call");
}
if (stack.isEmpty()) {
state = State.OPEN;
}
try {
nextReceiver.endElement();
} catch (XPathException e) {
state = State.FAILED;
throw e;
}
}
/**
* Start of event stream
*/
@Override
public void open() throws XPathException {
transition(Transition.OPEN);
try {
nextReceiver.open();
} catch (XPathException e) {
state = State.FAILED;
throw e;
}
}
/**
* Processing Instruction
*/
@Override
public void processingInstruction(String target, UnicodeString data, Location locationId, int properties) throws XPathException {
transition(Transition.PI);
try {
nextReceiver.processingInstruction(target, data, locationId, properties);
} catch (XPathException e) {
state = State.FAILED;
throw e;
}
}
/**
* Start of a document node.
* @param properties properties of the document node.
*/
@Override
public void startDocument(int properties) throws XPathException {
transition(Transition.START_DOCUMENT);
stack.push(Type.DOCUMENT);
try {
nextReceiver.startDocument(properties);
} catch (XPathException e) {
state = State.FAILED;
throw e;
}
}
/**
* Notify the start of an element.
*
*
* All attributes must satisfy the following constraints:
*
* - The namespace prefix and URI must either both be present (non-zero-length) or both absent
* - The prefix "xml" and the URI "http://www.w3.org/XML/1998/namespace"
* are allowed only in combination.
* - The namespace URI "http://www.w3.org/2000/xmlns/" is not allowed.
* - The namespace prefix "xmlns" is not allowed.
* - The local name "xmlns" is not allowed in the absence of a namespace prefix and URI.
*
*
* The following additional constraints apply to the set of attributes as a whole:
*
* - No two attributes may have the same (local-name, namespace URI) combination.
* - No namespace prefix may be used in conjunction with more than one namespace URI.
* - Every (namespace prefix, namespace URI) combination must correspond to an in-scope namespace:
* that is, unless the (prefix, URI) pair is ("", "") or ("xml", "http://www.w3.org/XML/1998/namespace"),
* it must be present in the in-scope namespaces.
*
*
* These constraints are not all enforced by this class.
*
* @param elemName the name of the element. If the name is in a namespace (non-empty namespace URI)
* then the {@link Outputter#namespace(String, NamespaceUri, int)} event must include
* a binding for the relevant prefix (or absence of a prefix) to the relevant URI.
* @param type the type annotation of the element.
* @param attributes the attributes of the element
* @param namespaces the in-scope namespaces of the element
* @param location provides information such as line number and system ID.
* @param properties properties of the element node
*/
@Override
public void startElement(NodeName elemName, SchemaType type,
AttributeMap attributes, NamespaceMap namespaces,
Location location, int properties) throws XPathException {
transition(Transition.START_ELEMENT);
stack.push(Type.ELEMENT);
if (fullChecking) {
attributes.verify();
String prefix = elemName.getPrefix();
if (prefix.isEmpty()) {
NamespaceUri declaredDefaultUri = namespaces.getDefaultNamespace();
if (!declaredDefaultUri.equals(elemName.getNamespaceUri())) {
throw new IllegalStateException("URI of element Q{" + elemName.getNamespaceUri() +
"}" + elemName.getLocalPart() +
" does not match declared default namespace {"
+ declaredDefaultUri + "}");
}
} else {
NamespaceUri declaredUri = namespaces.getNamespaceUri(prefix);
if (declaredUri == null) {
throw new IllegalStateException("Prefix " + prefix + " has not been declared");
} else if (!declaredUri.equals(elemName.getNamespaceUri())) {
throw new IllegalStateException("Prefix " + prefix + " is bound to the wrong namespace");
}
}
for (AttributeInfo att : attributes) {
NodeName name = att.getNodeName();
if (!name.getNamespaceUri().isEmpty()) {
String attPrefix = name.getPrefix();
NamespaceUri declaredUri = namespaces.getNamespaceUri(attPrefix);
if (declaredUri == null) {
throw new IllegalStateException("Prefix " + attPrefix + " has not been declared for attribute " + att.getNodeName().getDisplayName());
} else if (!declaredUri.equals(name.getNamespaceUri())) {
throw new IllegalStateException("Prefix " + prefix + " is bound to the wrong namespace {" + declaredUri + "}");
}
}
}
}
try {
nextReceiver.startElement(elemName, type, attributes, namespaces, location, properties);
} catch (XPathException e) {
state = State.FAILED;
throw e;
}
}
}