All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ctc.wstx.evt.WstxEventReader Maven / Gradle / Ivy

Go to download

Woodstox is a high-performance XML processor that implements Stax (JSR-173), SAX2 and Stax2 APIs

There is a newer version: 7.1.0
Show newest version
/* Woodstox XML processor
 *
 * Copyright (c) 2004- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in the file LICENSE which is
 * included with the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ctc.wstx.evt;

import java.util.NoSuchElementException;

import javax.xml.stream.*;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.XMLEvent;
import javax.xml.stream.util.XMLEventAllocator;

import org.codehaus.stax2.XMLEventReader2;
import org.codehaus.stax2.XMLStreamReader2;

import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.exc.WstxParsingException;
import com.ctc.wstx.sr.StreamScanner;

/**
 * Woodstox version of {@link XMLEventReader2} (and {@link XMLEventReader}).
 *

* NOTE: up to Woodstox 5.1, this was based on Stax2 Reference Implementation * ({@link org.codehaus.stax2.ri.Stax2EventReaderImpl}), but due to various issues * has temporarily (?) been cut-paste-modified here. Ideally it would be reconciled * once Stax2-api version 4.2 can be relied as baseline, but that may take time. */ public class WstxEventReader // extends Stax2EventReaderImpl // before 5.2 implements XMLEventReader2, XMLStreamConstants { // // // Enumerated state ids protected final static int STATE_INITIAL = 1; protected final static int STATE_END_OF_INPUT = 2; protected final static int STATE_CONTENT = 3; // // // Enumerated error case ids /** * Current state when getElementText() called not START_ELEMENT */ protected final static int ERR_GETELEMTEXT_NOT_START_ELEM = 1; /** * Encountered non-textual event (other than closing END_ELEMENT) * when collecting text for getElementText() */ protected final static int ERR_GETELEMTEXT_NON_TEXT_EVENT = 2; /** * Encountered CHARACTERS or CDATA that contains non-white space * char(s), when trying to locate tag with nextTag() */ protected final static int ERR_NEXTTAG_NON_WS_TEXT = 3; /** * Encountered non-skippable non-text/element event with * nextTag() */ protected final static int ERR_NEXTTAG_WRONG_TYPE = 4; /* /********************************************************************** /* Configuration /********************************************************************** */ protected final XMLEventAllocator mAllocator; protected final XMLStreamReader2 mReader; /* /********************************************************************** /* State /********************************************************************** */ /** * Event that has been peeked, ie. loaded without call to * {@link #nextEvent}; will be returned and cleared by * call to {@link #nextEvent} (or, returned again if peeked * again) */ protected XMLEvent mPeekedEvent = null; /** * High-level state indicator, with currently three values: * whether we are initializing (need to synthetize START_DOCUMENT), * at END_OF_INPUT (end-of-doc), or otherwise, normal operation. * Useful in simplifying some methods, as well as to make sure * that independent of how stream reader handles things, event reader * can reliably detect End-Of-Document. */ protected int mState = STATE_INITIAL; /** * This variable keeps track of the type of the 'previous' event * when peeking for the next Event. It is needed for some functionality, * to remember state even when underlying parser has to move to peek * the next event. */ protected int mPrePeekEvent = START_DOCUMENT; /* /********************************************************************** /* Woodstox-specific /********************************************************************** */ /** * Marker flag to allow specialized handling in "multi-document" reading * mode. */ protected final boolean mCfgMultiDocMode; /* /********************************************************************** /* Construction /********************************************************************** */ public WstxEventReader(XMLEventAllocator a, XMLStreamReader2 r) { mAllocator = a; mReader = r; mCfgMultiDocMode = (r instanceof StreamScanner) && ((StreamScanner) r).getConfig().inputParsingModeDocuments(); } /* /********************************************************************** /* Abstract methods that Stax2EventReaderImpl would expose /********************************************************************** */ @Override public boolean isPropertySupported(String name) { return ((XMLStreamReader2)getStreamReader()).isPropertySupported(name); } @Override public boolean setProperty(String name, Object value) { return ((XMLStreamReader2)getStreamReader()).setProperty(name, value); } /** * Method called upon encountering a problem that should result * in an exception being thrown. If non-null String is returned. * that will be used as the message of exception thrown; if null, * a standard message will be used instead. * * @param errorType Type of the problem, one of ERR_ * constants * @param currEvent Type of the event that triggered the problem, * if any; -1 if not available. */ protected String getErrorDesc(int errorType, int currEvent) { // Defaults are mostly fine, except we can easily add event type desc switch (errorType) { case ERR_GETELEMTEXT_NOT_START_ELEM: return ErrorConsts.ERR_STATE_NOT_STELEM+", got "+ErrorConsts.tokenTypeDesc(currEvent); case ERR_GETELEMTEXT_NON_TEXT_EVENT: return "Expected a text token, got "+ErrorConsts.tokenTypeDesc(currEvent); case ERR_NEXTTAG_NON_WS_TEXT: return "Only all-whitespace CHARACTERS/CDATA (or SPACE) allowed for nextTag(), got "+ErrorConsts.tokenTypeDesc(currEvent); case ERR_NEXTTAG_WRONG_TYPE: return "Got "+ErrorConsts.tokenTypeDesc(currEvent)+", instead of START_ELEMENT, END_ELEMENT or SPACE"; } return null; } /* /********************************************************************** /* XMLEventReader API /********************************************************************** */ @Override public void close() throws XMLStreamException { mReader.close(); } @Override public String getElementText() throws XMLStreamException { /* Simple, if no peeking occured: can just forward this to the * underlying parser */ if (mPeekedEvent == null) { return mReader.getElementText(); } XMLEvent evt = mPeekedEvent; mPeekedEvent = null; /* Otherwise need to verify that we are currently over START_ELEMENT. * Problem is we have already went past it... */ if (mPrePeekEvent != START_ELEMENT) { reportProblem(findErrorDesc(ERR_GETELEMTEXT_NOT_START_ELEM, mPrePeekEvent)); } // ??? do we need to update mPrePeekEvent now String str = null; StringBuffer sb = null; // Ok, fine, then just need to loop through and get all the text... for (; true; evt = nextEvent()) { if (evt.isEndElement()) { break; } int type = evt.getEventType(); if (type == COMMENT || type == PROCESSING_INSTRUCTION) { // can/should just ignore them continue; } if (!evt.isCharacters()) { reportProblem(findErrorDesc(ERR_GETELEMTEXT_NON_TEXT_EVENT, type)); } String curr = evt.asCharacters().getData(); if (str == null) { str = curr; } else { if (sb == null) { sb = new StringBuffer(str.length() + curr.length()); sb.append(str); } sb.append(curr); } } if (sb != null) { return sb.toString(); } return (str == null) ? "" : str; } @Override public Object getProperty(String name) { return mReader.getProperty(name); } @Override public boolean hasNext() { return (mState != STATE_END_OF_INPUT); } @Override public XMLEvent nextEvent() throws XMLStreamException { if (mState == STATE_END_OF_INPUT) { throwEndOfInput(); } else if (mState == STATE_INITIAL) { mState = STATE_CONTENT; return createStartDocumentEvent(); } if (mPeekedEvent != null) { XMLEvent evt = mPeekedEvent; mPeekedEvent = null; if (evt.isEndDocument()) { updateStateEndDocument(); } return evt; } return createNextEvent(true, mReader.next()); } @Override public Object next() { try { return nextEvent(); } catch (XMLStreamException sex) { throwUnchecked(sex); return null; } } @Override public XMLEvent nextTag() throws XMLStreamException { // If we have peeked something, need to process it if (mPeekedEvent != null) { XMLEvent evt = mPeekedEvent; mPeekedEvent = null; int type = evt.getEventType(); switch (type) { case END_DOCUMENT: return null; case START_DOCUMENT: // Need to skip START_DOCUMENT to get the root elem break; case SPACE: // Ignorable WS is just fine break; /* !!! 07-Dec-2004, TSa: Specs are mum about Comments and PIs. * But why would they not be skipped just like what * the stream reader does? */ case COMMENT: case PROCESSING_INSTRUCTION: break; case CDATA: case CHARACTERS: if (((Characters) evt).isWhiteSpace()) { break; } reportProblem(findErrorDesc(ERR_NEXTTAG_NON_WS_TEXT, type)); break; // never gets here, but some compilers whine without... case START_ELEMENT: case END_ELEMENT: return evt; default: reportProblem(findErrorDesc(ERR_NEXTTAG_WRONG_TYPE, type)); } } else { /* 13-Sep-2005, TSa: As pointed out by Patrick, we may need to * initialize the state here, too; otherwise peek() won't work * correctly. The problem is that following loop's get method * does not use event reader's method but underlying reader's. * As such, it won't update state: most importantly, initial * state may not be changed to non-initial. */ if (mState == STATE_INITIAL) { mState = STATE_CONTENT; } } while (true) { int next = mReader.next(); switch (next) { case END_DOCUMENT: return null; case SPACE: case COMMENT: case PROCESSING_INSTRUCTION: continue; case CDATA: case CHARACTERS: if (mReader.isWhiteSpace()) { continue; } reportProblem(findErrorDesc(ERR_NEXTTAG_NON_WS_TEXT, next)); break; // just to keep Jikes happy... case START_ELEMENT: case END_ELEMENT: return createNextEvent(false, next); default: reportProblem(findErrorDesc(ERR_NEXTTAG_WRONG_TYPE, next)); } } } @Override public XMLEvent peek() throws XMLStreamException { if (mPeekedEvent == null) { if (mState == STATE_END_OF_INPUT) { // 06-Mar-2006, TSa: Fixed as per Arjen's suggestion: //throwEndOfInput(); return null; } if (mState == STATE_INITIAL) { // Not sure what it should be... but this should do: mPrePeekEvent = START_DOCUMENT; mPeekedEvent = createStartDocumentEvent(); mState = STATE_CONTENT; } else { mPrePeekEvent = mReader.getEventType(); mPeekedEvent = createNextEvent(false, mReader.next()); } } return mPeekedEvent; } /** * Note: only here because we implement Iterator interface. Will not * work, don't bother calling it. */ @Override public void remove() { throw new UnsupportedOperationException("Can not remove events from XMLEventReader."); } /** * Method called when we are about to return END_DOCUMENT event. * Usually this should change state to STATE_END_OF_INPUT, but * may vary for some alternative read modes (like multi-document) * * @since 4.2 */ protected void updateStateEndDocument() throws XMLStreamException { if (mCfgMultiDocMode) { // As per [woodstox-core#42] should allow reading over multiple documents... if (mReader.hasNext()) { // Let's sanity-check that we get token we expect however: int next = mReader.next(); if (next == START_DOCUMENT) { mPrePeekEvent = START_DOCUMENT; mPeekedEvent = createStartDocumentEvent(); mState = STATE_CONTENT; return; } reportProblem("Unexpected token ("+ErrorConsts.tokenTypeDesc(next) +") after END_DOCUMENT in multi-document mode, XMLStreamReader.hasNext() returning true"); } } mState = STATE_END_OF_INPUT; } /* /********************************************************************** /* XMLEventReader2 API /********************************************************************** */ /** *

* Note: although the interface allows implementations to * throw an {@link XMLStreamException}, the reference implementation * doesn't currently need to. * It's still declared, in case in future there is need to throw * such an exception. */ @Override public boolean hasNextEvent() throws XMLStreamException { return (mState != STATE_END_OF_INPUT); } /* /********************************************************************** /* Overridable factory methods /********************************************************************** */ protected XMLEvent createNextEvent(boolean checkEOD, int type) throws XMLStreamException { try { XMLEvent evt = mAllocator.allocate(mReader); if (checkEOD && type == END_DOCUMENT) { updateStateEndDocument(); } return evt; } catch (RuntimeException rex) { throw _checkUnwrap(rex); } } protected XMLStreamException _checkUnwrap(RuntimeException rex) { /* 29-Mar-2008, TSa: Due to some problems with Stax API * (lack of 'throws XMLStreamException' in signature of * XMLStreamReader.getText(), for one) it is possible * we will get a wrapped XMLStreamException. If so, * we should be able to unwrap it. */ Throwable t = rex.getCause(); while (t != null) { if (t instanceof XMLStreamException) { return (XMLStreamException) t; } t = t.getCause(); } // Nope, need to re-throw as is throw rex; } /** * Method called to create the very first event (START_DOCUMENT). */ protected XMLEvent createStartDocumentEvent() throws XMLStreamException { XMLEvent start = mAllocator.allocate(mReader); return start; } /* /********************************************************************** /* Overridable error reporting methods /********************************************************************** */ // note: `private` before 4.2 protected void throwEndOfInput() { throw new NoSuchElementException(); } protected void throwUnchecked(XMLStreamException sex) { // Wrapped root cause? Let's only unwrap one layer; one that // must have been used to expose the problem (if any) Throwable t = (sex.getNestedException() == null) ? sex : sex.getNestedException(); // Unchecked? Can re-throw as is if (t instanceof RuntimeException) { throw (RuntimeException) t; } if (t instanceof Error) { throw (Error) t; } // Otherwise, let's just wrap it throw new RuntimeException("[was "+t.getClass()+"] "+t.getMessage(), t); } protected void reportProblem(String msg) throws XMLStreamException { reportProblem(msg, mReader.getLocation()); } protected void reportProblem(String msg, Location loc) throws XMLStreamException { if (loc == null) { throw new WstxParsingException(msg); } throw new WstxParsingException(msg, loc); } /* /********************************************************************** /* Package methods for sub-classes /********************************************************************** */ protected XMLStreamReader getStreamReader() { return mReader; } /* /********************************************************************** /* Other internal methods /********************************************************************** */ // note: `private` before 4.2 /** * Method used to locate error message description to use. * Calls sub-classes getErrorDesc() first, and only * if no message found, uses default messages defined here. */ protected final String findErrorDesc(int errorType, int currEvent) { String msg = getErrorDesc(errorType, currEvent); if (msg != null) { return msg; } switch (errorType) { case ERR_GETELEMTEXT_NOT_START_ELEM: return "Current state not START_ELEMENT when calling getElementText()"; case ERR_GETELEMTEXT_NON_TEXT_EVENT: return "Expected a text token"; case ERR_NEXTTAG_NON_WS_TEXT: return "Only all-whitespace CHARACTERS/CDATA (or SPACE) allowed for nextTag()"; case ERR_NEXTTAG_WRONG_TYPE: return "Should only encounter START_ELEMENT/END_ELEMENT, SPACE, or all-white-space CHARACTERS"; } // should never happen, but it'd be bad to throw another exception... return "Internal error (unrecognized error type: "+errorType+")"; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy