org.apache.axiom.util.stax.TextFromElementReader Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.axiom.util.stax;
import java.io.IOException;
import java.io.Reader;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
/**
* {@link Reader} implementation that extracts the text nodes from an element given by an
* {@link XMLStreamReader}. The expected input is a document with only a document
* element (as produced by {@link org.apache.axiom.om.OMElement#getXMLStreamReader()}).
* The class will extract the text nodes that are direct children of that element, i.e. it uses
* the same conventions as {@link org.apache.axiom.om.OMElement#getText()}.
* It will call {@link XMLStreamReader#close()} when the end of the document is reached or when
* {@link #close()} is called.
*
* The main purpose of this class is to provide a convenient and efficient way to get the text
* content of an element without converting it first to a string, i.e. without using
* {@link org.apache.axiom.om.OMElement#getText()}. This is important for potentially
* large contents, for which this class guarantees constant memory usage.
*
* Note that this class should in general not be used directly. Instead,
* {@link org.apache.axiom.om.util.ElementHelper#getTextAsStream(org.apache.axiom.om.OMElement)}
* should be called to get the most efficient stream implementation for a given an element.
*/
// This class has package access -> use XMLStreamReaderUtils#getElementTextAsStream
class TextFromElementReader extends Reader {
private final XMLStreamReader stream;
private final boolean allowNonTextChildren;
/**
* Flag indicating that we have reached the end of the document and that the underlying
* parser has been closed.
*/
private boolean endOfStream;
/**
* The current depth relative to the document element (not the document). A value greater than
* 0 indicates that we are inside a nested element and that we need to skip text nodes.
*/
private int skipDepth;
/**
* The current position in the character data of the event, or -1 if all the character data
* has been consumed and a new event needs to be requested from the parser.
*/
private int sourceStart = -1;
TextFromElementReader(XMLStreamReader stream, boolean allowNonTextChildren) {
this.stream = stream;
this.allowNonTextChildren = allowNonTextChildren;
}
public int read(char[] cbuf, int off, int len) throws IOException {
if (endOfStream) {
return -1;
}
int read = 0;
try {
while (true) {
if (sourceStart == -1) {
eventLoop: while (true) {
int type = stream.next();
switch (type) {
case XMLStreamReader.CHARACTERS:
case XMLStreamReader.CDATA:
if (skipDepth == 0) {
sourceStart = 0;
break eventLoop;
}
break;
case XMLStreamReader.START_ELEMENT:
if (allowNonTextChildren) {
skipDepth++;
} else {
throw new IOException("Unexpected START_ELEMENT event");
}
break;
case XMLStreamReader.END_ELEMENT:
if (skipDepth == 0) {
endOfStream = true;
return read == 0 ? -1 : read;
} else {
skipDepth--;
}
}
}
}
int c = stream.getTextCharacters(sourceStart, cbuf, off, len);
sourceStart += c;
off += c;
len -= c;
read += c;
if (len > 0) {
sourceStart = -1;
} else {
return read;
}
}
} catch (XMLStreamException ex) {
throw new XMLStreamIOException(ex);
}
}
public void close() throws IOException {
// Do nothing
}
}