com.fasterxml.aalto.out.ByteXmlWriter Maven / Gradle / Ivy
Show all versions of aalto-xml Show documentation
/* Woodstox Lite ("wool") XML processor
*
* Copyright (c) 2006- Tatu Saloranta, [email protected]
*
* Licensed under the License specified in the file LICENSE which is
* included with the source code.
* You may not use this file except in compliance with the License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.fasterxml.aalto.out;
import java.io.*;
import javax.xml.stream.*;
import org.codehaus.stax2.ri.typed.AsciiValueEncoder;
import com.fasterxml.aalto.impl.ErrorConsts;
import com.fasterxml.aalto.util.XmlCharTypes;
import com.fasterxml.aalto.util.XmlChars;
import com.fasterxml.aalto.util.XmlConsts;
import static com.fasterxml.aalto.out.OutputCharTypes.*;
/**
* This abstract base class (partial implementation of {@link XmlWriter})
* is used if the destination is byte-based {@link java.io.OutputStream}.
*
* Further, all existing implementations are for encodings that
* are 7-bit ascii compatible. This is important since this means
* that marker and separator characters are identical independent
* of actual encoding. This would not hold if support for encodings
* like EBCDIC were supported using this class.
*/
public abstract class ByteXmlWriter
extends XmlWriter
{
/**
* And this value determines size of the intermediate copy buffer
* to use.
*/
final static int DEFAULT_FULL_BUFFER_SIZE = 4000;
/**
* Default intermediate copy buffer size, to be used for efficient
* access to String content. Smaller, since it's in characters, plus
* will not be used for actual write operations
*/
final static int DEFAULT_COPY_BUFFER_SIZE = 1000;
/**
* Let's try avoid short writes, since some output streams have
* high per-call penalty (like network streams).
*/
final static int SMALL_WRITE = 250;
final static byte BYTE_SPACE = (byte) ' ';
final static byte BYTE_COLON = (byte) ':';
final static byte BYTE_SEMICOLON = (byte) ';';
final static byte BYTE_LBRACKET = (byte) '[';
final static byte BYTE_RBRACKET = (byte) ']';
final static byte BYTE_QMARK = (byte) '?';
final static byte BYTE_EQ = (byte) '=';
final static byte BYTE_SLASH = (byte) '/';
final static byte BYTE_HASH = (byte) '#';
final static byte BYTE_HYPHEN = (byte) '-';
final static byte BYTE_LT = (byte) '<';
final static byte BYTE_GT = (byte) '>';
final static byte BYTE_AMP = (byte) '&';
final static byte BYTE_QUOT = (byte) '"';
final static byte BYTE_APOS = (byte) '\'';
final static byte BYTE_A = (byte) 'a';
final static byte BYTE_G = (byte) 'g';
final static byte BYTE_L = (byte) 'l';
final static byte BYTE_M = (byte) 'm';
final static byte BYTE_O = (byte) 'o';
final static byte BYTE_P = (byte) 'p';
final static byte BYTE_Q = (byte) 'q';
final static byte BYTE_S = (byte) 's';
final static byte BYTE_T = (byte) 't';
final static byte BYTE_U = (byte) 'u';
final static byte BYTE_X = (byte) 'x';
final static byte[] BYTES_CDATA_START = getAscii("");
final static byte[] BYTES_COMMENT_START = getAscii("");
final static byte[] BYTES_XMLDECL_START = getAscii("= SURR1_FIRST && ch <= SURR2_LAST) {
// Can't start with surr2...
if (ch >= SURR2_FIRST) {
reportNwfName("Illegal surrogate pairing in name: first character ("+XmlChars.getCharDesc(ch)+") not valid surrogate first character");
}
// Unpaired? Not good either
if (len < 2) {
reportNwfName("Illegal surrogate pairing in name: incomplete surrogate (missing second half)");
}
// Otherwise let's decode code point for verification
ch = calcSurrogate(ch, part.charAt(1), " in name");
i = 2; // and skip second half of surrogate pair
} else {
i = 1;
}
if (!XmlChars.is10NameStartChar(ch)) {
reportNwfName("Invalid name start character "+XmlChars.getCharDesc(ch)+" (name \""+part+"\")");
}
// Also, names can not use entities, must be natively expressable
final int lastValid = getHighestEncodable();
if (ch > lastValid) {
reportNwfName("Illegal name start character "+XmlChars.getCharDesc(ch)+" (name \""+part+"\"): can not be expressed using effective encoding ("+_config.getActualEncoding()+")");
}
for (; i < len; ++i) {
ch = part.charAt(i);
if (ch >= SURR1_FIRST && ch <= SURR2_LAST) {
// Can't start with surr2...
if (ch >= SURR2_FIRST) {
reportNwfName("Illegal surrogate pairing in name: character at #"+i+" ("+XmlChars.getCharDesc(ch)+") not valid surrogate first character");
}
// Unpaired? Not good either
++i;
if (i >= len) {
reportNwfName("Illegal surrogate pairing in name: name ends with incomplete surrogate pair");
}
// Otherwise let's decode code point for verification
ch = calcSurrogate(ch, part.charAt(i), " in name");
}
if (ch > lastValid) {
reportNwfName("Illegal name character "+XmlChars.getCharDesc(ch)+" (name \""+part+"\", index #"+i+"): can not be expressed using effective encoding ("+_config.getActualEncoding()+")");
}
if (!XmlChars.is10NameChar(ch)) {
reportNwfName("Invalid name character "+XmlChars.getCharDesc(ch)+") in name (\""+part+"\"), index #"+i);
}
}
}
/*
/**********************************************************************
/* Abstract methods
/**********************************************************************
*/
/**
* Method called to output a composite character, result of
* combining 2 surrogate characters.
*/
abstract protected void outputSurrogates(int surr1, int surr2)
throws IOException, XMLStreamException;
abstract protected void output2ByteChar(int ch)
throws IOException, XMLStreamException;
/**
* Method called to output a character beyond basic 1- or 2-byte
* encoding (code 0x0800 and above), without being able to use
* character entities
*/
abstract protected int outputStrictMultiByteChar(int ch, char[] cbuf, int inputOffset, int inputLen)
throws IOException, XMLStreamException;
/**
* Method called to output a character beyond basic 1- or 2-byte
* encoding (code 0x0800 and above); possibly using character
* entities, if necessary
*/
abstract protected int outputMultiByteChar(int ch, char[] cbuf, int inputOffset, int inputLen)
throws IOException, XMLStreamException;
/*
/**********************************************************************
/* Low-level (pass-through) methods
/**********************************************************************
*/
@Override
public void _releaseBuffers()
{
super._releaseBuffers();
if (_outputBuffer != null) {
_config.freeFullBBuffer(_outputBuffer);
_outputBuffer = null;
}
if (_copyBuffer != null) {
_config.freeFullCBuffer(_copyBuffer);
_copyBuffer = null;
}
}
@Override
public void _closeTarget(boolean doClose) throws IOException
{
if (_out != null) { // just in case it's called multiple times
if (doClose) {
_out.close();
_out = null;
}
}
}
@Override
public final void flush() throws IOException
{
if (_out != null) {
flushBuffer();
_out.flush();
}
}
/*
/**********************************************************************
/* Write methods, raw
/**********************************************************************
*/
@Override
public final void writeRaw(String text, int offset, int len)
throws IOException, XMLStreamException
{
while (len > 0) {
char[] buf = _copyBuffer;
final int blen = buf.length;
final int len2 = (len < blen) ? len : blen;
text.getChars(offset, offset+len2, buf, 0);
writeRaw(buf, 0, len2);
offset += len2;
len -= len2;
}
}
/**
* This method is heavily encoding-dependant, so it needs
* to be deferred to sub-classes
*/
@Override
public abstract void writeRaw(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException;
/*
/**********************************************************************
/* Write methods, elements
/**********************************************************************
*/
@Override
public final void writeStartTagStart(WName name)
throws IOException, XMLStreamException
{
if (_surrogate != 0) {
throwUnpairedSurrogate();
}
int ptr = _outputPtr;
if ((ptr + name.serializedLength() + 1) > _outputBufferLen) {
writeName(BYTE_LT, name); // let's offline slow case
return;
}
byte[] bbuf = _outputBuffer;
bbuf[ptr++] = BYTE_LT;
ptr += name.appendBytes(bbuf, ptr);
_outputPtr = ptr;
}
@Override
public final void writeStartTagEnd()
throws IOException, XMLStreamException
{
// inlined writeRaw(), gets called so often
if (_surrogate != 0) {
throwUnpairedSurrogate();
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = BYTE_GT;
}
@Override
public void writeStartTagEmptyEnd()
throws IOException
{
int ptr = _outputPtr;
if ((ptr + 2) > _outputBufferLen) {
flushBuffer();
ptr = _outputPtr;
}
byte[] bbuf = _outputBuffer;
bbuf[ptr++] = BYTE_SLASH;
bbuf[ptr++] = BYTE_GT;
_outputPtr = ptr;
}
@Override
public final void writeEndTag(WName name)
throws IOException, XMLStreamException
{
if (_surrogate != 0) {
throwUnpairedSurrogate();
}
int ptr = _outputPtr;
int len = name.serializedLength();
if ((ptr + len + 3) > _outputBufferLen) {
flushBuffer();
// name longer than the buffer? can write it straight out
if ((len + 3) > _outputBufferLen) {
_out.write(BYTE_LT);
_out.write(BYTE_SLASH);
name.writeBytes(_out);
// Last byte will fit in buffer ok though
_outputBuffer[_outputPtr++] = BYTE_GT;
return;
}
ptr = _outputPtr;
}
byte[] bbuf = _outputBuffer;
bbuf[ptr++] = BYTE_LT;
bbuf[ptr++] = BYTE_SLASH;
ptr += name.appendBytes(bbuf, ptr);
bbuf[ptr++] = BYTE_GT;
_outputPtr = ptr;
}
/*
/**********************************************************************
/* Write methods, attributes
/**********************************************************************
*/
@Override
public final void writeAttribute(WName name, String value)
throws IOException, XMLStreamException
{
int vlen = value.length();
// Let's off-line rare case:
if (vlen > _copyBufferLen) {
writeLongAttribute(name, value, vlen);
return;
}
char[] cbuf = _copyBuffer;
if (vlen > 0) {
value.getChars(0, vlen, cbuf, 0);
}
writeAttribute(name, cbuf, 0, vlen);
}
@Override
public final void writeAttribute(WName name, char[] vbuf, int offset, int vlen)
throws IOException, XMLStreamException
{
if (_surrogate != 0) {
throwUnpairedSurrogate();
}
// Enough room?
int ptr = _outputPtr;
byte[] bbuf = _outputBuffer;
if ((ptr + name.serializedLength()) >= _outputBufferLen) {
writeName(BYTE_SPACE, name);
ptr = _outputPtr;
} else {
bbuf[ptr++] = BYTE_SPACE;
ptr += name.appendBytes(bbuf, ptr);
}
// And then the value
if ((ptr + 3 + vlen) > _outputBufferLen) { // won't fit
_outputPtr = ptr;
flushBuffer();
bbuf[_outputPtr++] = BYTE_EQ;
bbuf[_outputPtr++] = BYTE_QUOT;
if ((_outputPtr + vlen + 1) > _outputBufferLen) {
writeAttrValue(vbuf, offset, vlen);
writeRaw(BYTE_QUOT);
return;
}
ptr = _outputPtr;
} else {
bbuf[ptr++] = BYTE_EQ;
bbuf[ptr++] = BYTE_QUOT;
}
if (vlen > 0) {
ptr = fastWriteAttrValue(vbuf, offset, vlen, bbuf, ptr);
}
bbuf[ptr++] = BYTE_QUOT;
_outputPtr = ptr;
}
/**
* Method called to copy given attribute value, when it's known that
* it will completely fit in the output buffer without further checks
*/
protected final int fastWriteAttrValue(char[] vbuf, int offset, int len,
byte[] bbuf, int ptr)
throws IOException, XMLStreamException
{
len += offset; // now marks the end
main_loop:
while (offset < len) {
final int[] charTypes = _charTypes.ATTR_CHARS;
inner_loop:
while (true) {
int ch = (int) vbuf[offset];
if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
break inner_loop;
}
if (charTypes[ch] != XmlCharTypes.CT_OK) {
// Here we do want to quote linefeed, too
break;
}
bbuf[ptr++] = (byte)ch;
if (++offset >= len) {
break main_loop;
}
}
_outputPtr = ptr;
// Ok, so what did we hit? Invalid, or quotable?
int ch = (int) vbuf[offset++];
if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
switch (charTypes[ch]) {
case CT_INVALID:
reportInvalidChar(ch);
break;
case CT_MULTIBYTE_2:
output2ByteChar(ch);
break;
default:
writeAsEntity(ch);
}
} else {
offset = outputMultiByteChar(ch, vbuf, offset, len);
}
/* Ok, need to mess with buffers a bit: plus, it's possible
* that we may even need to flush the buffer as the guarantee
* for fitting may not necessarily hold (but it will after
* flushing)
* Still enough room? (also for following quote -- caller
* relies on that -- that's why >=, not >)
*/
if ((len - offset) >= (_outputBufferLen - _outputPtr)) {
flushBuffer();
}
ptr = _outputPtr;
}
return ptr;
}
protected final void writeAttrValue(char[] vbuf, int offset, int len)
throws IOException, XMLStreamException
{
if (_surrogate != 0) {
outputSurrogates(_surrogate, vbuf[offset]);
++offset;
--len;
}
len += offset; // now marks the end
main_loop:
while (offset < len) {
final int[] charTypes = _charTypes.ATTR_CHARS;
inner_loop:
while (true) {
int ch = (int) vbuf[offset];
if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
break inner_loop;
}
if (charTypes[ch] != XmlCharTypes.CT_OK) {
break;
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte)ch;
if (++offset >= len) {
break main_loop;
}
}
// Ok, so what did we hit?
int ch = (int) vbuf[offset++];
if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
switch (charTypes[ch]) {
case CT_INVALID:
reportInvalidChar(ch);
case CT_MULTIBYTE_2:
output2ByteChar(ch);
break;
default:
writeAsEntity(ch);
break;
}
} else {
offset = outputMultiByteChar(ch, vbuf, offset, len);
continue main_loop;
}
}
}
protected final void writeLongAttribute(WName name, String value, int vlen)
throws IOException, XMLStreamException
{
writeRaw(BYTE_SPACE);
int nlen = name.serializedLength();
if ((_outputPtr + nlen) > _outputBufferLen) {
flushBuffer();
if (nlen > _outputBufferLen) {
name.writeBytes(_out);
} else {
_outputPtr += name.appendBytes(_outputBuffer, _outputPtr);
}
} else {
_outputPtr += name.appendBytes(_outputBuffer, _outputPtr);
}
writeRaw(BYTE_EQ, BYTE_QUOT);
int offset = 0;
while (vlen > 0) {
char[] buf = _copyBuffer;
final int blen = buf.length;
int len2 = (vlen < blen) ? vlen : blen;
value.getChars(offset, offset+len2, buf, 0);
writeAttrValue(buf, 0, len2);
offset += len2;
vlen -= len2;
}
writeRaw(BYTE_QUOT);
}
/*
/**********************************************************************
/* Write methods, names
/**********************************************************************
*/
protected final void writeName(WName name)
throws IOException
{
int ptr = _outputPtr;
int len = name.serializedLength();
if ((ptr + len) > _outputBufferLen) {
flushBuffer();
// name longer than the buffer? can write it straight out
if (len >= _outputBufferLen) {
name.writeBytes(_out);
return;
}
ptr = _outputPtr;
}
ptr += name.appendBytes(_outputBuffer, ptr);
_outputPtr = ptr;
}
protected final void writeName(byte preChar, WName name)
throws IOException
{
flushBuffer();
// name longer than the buffer? Need to write it straight out
int len = name.serializedLength();
if (len >= _outputBufferLen) {
_out.write(preChar);
name.writeBytes(_out);
return;
}
int ptr = _outputPtr;
byte[] buf = _outputBuffer;
buf[ptr++] = preChar;
ptr += name.appendBytes(buf, ptr);
_outputPtr = ptr;
}
protected final void writeName(WName name, byte postChar)
throws IOException
{
flushBuffer();
// name longer than the buffer? Need to write it straight out
if (name.serializedLength() >= _outputBufferLen) {
name.writeBytes(_out);
_out.write(postChar);
return;
}
int ptr = _outputPtr;
byte[] buf = _outputBuffer;
ptr += name.appendBytes(buf, ptr);
buf[ptr++] = postChar;
_outputPtr = ptr;
}
private final void writeAttrNameEqQ(WName name)
throws IOException, XMLStreamException
{
if (_surrogate != 0) {
throwUnpairedSurrogate();
}
// Enough room for ' attr="' part?
int nlen = name.serializedLength();
int ptr = _outputPtr;
if ((ptr + nlen + 3) >= _outputBufferLen) {
flushBuffer();
ptr = _outputPtr;
// Still won't fit in buffer? Let's output pieces separately
if ((ptr + nlen + 3) >= _outputBufferLen) {
writeName(BYTE_SPACE, name);
writeRaw(BYTE_EQ);
writeRaw(BYTE_QUOT);
return;
}
}
byte[] bbuf = _outputBuffer;
bbuf[ptr++] = BYTE_SPACE;
ptr += name.appendBytes(bbuf, ptr);
bbuf[ptr++] = BYTE_EQ;
bbuf[ptr++] = BYTE_QUOT;
_outputPtr = ptr;
}
/*
/**********************************************************************
/* Write methods, textual content
/**********************************************************************
*/
/**
* @return -1 to indicate succesful write, or index of the problematic
* character in input (first ']' from "]]>" sequence, in non-fixing
* mode)
*/
@Override
public int writeCData(String data)
throws IOException, XMLStreamException
{
writeCDataStart(); // will check surrogates
int len = data.length();
int offset = 0;
while (len > 0) {
char[] buf = _copyBuffer;
int blen = buf.length;
// Can write all the rest?
if (blen > len) {
blen = len;
}
// Nope, can only do part
data.getChars(offset, offset+blen, buf, 0);
int cix = writeCDataContents(buf, 0, blen);
if (cix >= 0) {
return offset+cix;
}
offset += blen;
len -= blen;
}
writeCDataEnd(); // will check surrogates
return -1;
}
@Override
public int writeCData(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
{
writeCDataStart(); // will check surrogates
int ix = writeCDataContents(cbuf, offset, len);
if (ix < 0) {
writeCDataEnd(); // will check surrogates
}
return ix;
}
protected int writeCDataContents(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
{
/* Unlike with writeCharacters() and fastWriteName(), let's not
* worry about split buffers here: this is unlikely to become
* performance bottleneck. This allows keeping it simple; and
* should it matter, we could start doing fast version here
* as well.
*/
len += offset; // now marks the end
main_loop:
while (offset < len) {
final int[] charTypes = _charTypes.OTHER_CHARS;
inner_loop:
while (true) {
int ch = (int) cbuf[offset];
if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
break inner_loop;
}
if (charTypes[ch] != XmlCharTypes.CT_OK) {
break inner_loop;
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte) ch;
if (++offset >= len) {
break main_loop;
}
}
// Ok, so what did we hit?
int ch = (int) cbuf[offset++];
if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
switch (charTypes[ch]) {
case CT_INVALID:
reportInvalidChar(ch);
case CT_WS_CR: // No way to escape within CDATA
case CT_WS_LF:
++_locRowNr;
break;
case CT_OUTPUT_MUST_QUOTE: // == MULTIBYTE_N value
reportFailedEscaping("CDATA", ch);
case CT_MULTIBYTE_2:
// To off-line or not?
output2ByteChar(ch);
continue main_loop;
case CT_RBRACKET:
/* !!! TBI: Need to split CData? Can do, but what about
* content split around buffer boundary?
*/
if (offset < len && cbuf[offset] == ']') {
if ((offset+1) < len && cbuf[offset+1] == '>') {
// Ok, need to output ']]' first, then end
offset += 2;
writeRaw(BYTE_RBRACKET, BYTE_RBRACKET);
writeCDataEnd();
// Then new start, and '>'
writeCDataStart();
writeRaw(BYTE_GT);
}
continue main_loop;
}
break;
default: // Everything else should be outputtable as is
break;
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte)ch;
} else { // beyond 2-byte encodables; 3-byte, surrogates?
offset = outputMultiByteChar(ch, cbuf, offset, len);
}
}
return -1;
}
@Override
public final void writeCharacters(String text)
throws IOException, XMLStreamException
{
final int len = text.length();
// Not so common case, let's offline:
if (len > _copyBufferLen) {
longWriteCharacters(text);
return;
}
if (len > 0) {
char[] buf = _copyBuffer;
text.getChars(0, len, buf, 0);
writeCharacters(buf, 0, len);
}
}
private final void longWriteCharacters(String text)
throws IOException, XMLStreamException
{
int offset = 0;
int len = text.length();
char[] buf = _copyBuffer;
do {
final int blen = buf.length;
int len2 = (len < blen) ? len : blen;
text.getChars(offset, offset+len2, buf, 0);
writeCharacters(buf, 0, len2);
offset += len2;
len -= len2;
} while (len > 0);
}
@Override
public final void writeCharacters(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
{
if (_surrogate != 0) {
outputSurrogates(_surrogate, cbuf[offset]);
++offset;
--len;
}
// Ok, let's offline (what's sure to be) slow case first:
// (with multi-byte chars, others may be, too).
int ptr = _outputPtr;
if ((ptr + len) > _outputBufferLen) {
writeSplitCharacters(cbuf, offset, len);
return;
}
len += offset; // now marks the end
main_loop:
while (offset < len) {
final int[] charTypes = _charTypes.TEXT_CHARS;
inner_loop:
while (true) {
int ch = (int) cbuf[offset];
if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
break inner_loop;
}
if (charTypes[ch] != XmlCharTypes.CT_OK) {
// This may look weird, but profiling showed that handling of LFs
// for indentation has measurable effect; plus, that checking it
// here will not slow down inner loop either
if (ch != '\n') {
break inner_loop;
}
++_locRowNr;
}
_outputBuffer[ptr++] = (byte) ch;
if (++offset >= len) {
break main_loop;
}
}
// Ok, so what did we hit?
int ch = (int) cbuf[offset++];
if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
switch (charTypes[ch]) {
case CT_INVALID:
reportInvalidChar(ch);
case CT_WS_CR:
// !!! TBI: line count
// Also, CR to be quoted?
if (_config.willEscapeCR()) {
_outputPtr = ptr;
writeAsEntity(ch);
break;
}
_outputBuffer[ptr++] = (byte)ch;
++_locRowNr;
continue main_loop;
case CT_WS_LF: // never occurs (handled in loop), but don't want to leave gaps
break;
case CT_OUTPUT_MUST_QUOTE: // == MULTIBYTE_N value
case CT_LT:
case CT_AMP:
_outputPtr = ptr;
writeAsEntity(ch);
break;
case CT_MULTIBYTE_2:
// To off-line or not?
_outputPtr = ptr;
output2ByteChar(ch);
break;
case CT_RBRACKET: // may need to quote as well...
// Let's not quote if known not to be followed by '>'
if (offset >= len || cbuf[offset] == '>') {
_outputPtr = ptr;
writeAsEntity(ch);
break;
}
// fall through
default:
_outputBuffer[ptr++] = (byte)ch;
continue main_loop;
}
} else { // beyond 2-byte encodables; 3-byte, surrogates?
_outputPtr = ptr;
offset = outputMultiByteChar(ch, cbuf, offset, len);
}
/* At this point, it's not guaranteed any more that we'll
* be able to fit all output into buffer without checks.
* Let's verify: in the worst case, we'll just flush
* whatever we had, to gain more room.
*/
if ((len - offset) >= (_outputBufferLen - _outputPtr)) {
flushBuffer();
}
ptr = _outputPtr;
}
_outputPtr = ptr;
}
/**
* This method is called when it is possible that the output
* may cross the output buffer boundary. Because of this, code
* has to add more boundary checks.
*/
private final void writeSplitCharacters(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
{
// Note: caller handled surrogate already
len += offset; // now marks the end
main_loop:
while (offset < len) {
final int[] charTypes = _charTypes.TEXT_CHARS;
inner_loop:
while (true) {
int ch = (int) cbuf[offset];
if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
break inner_loop;
}
if (charTypes[ch] != XmlCharTypes.CT_OK) {
if (ch != '\n') {
break inner_loop;
}
++_locRowNr;
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte)ch;
if (++offset >= len) {
break main_loop;
}
}
// Ok, so what did we hit?
int ch = (int) cbuf[offset++];
if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
switch (charTypes[ch]) {
case CT_INVALID:
reportInvalidChar(ch);
case CT_WS_CR:
// !!! TBI: line count
// Also, CR to be quoted?
if (_config.willEscapeCR()) {
writeAsEntity(ch);
continue main_loop;
}
++_locRowNr;
break;
case CT_WS_LF: // can not occur, handled above, but let's keep sequence
break;
case CT_OUTPUT_MUST_QUOTE:
case CT_LT:
case CT_AMP:
writeAsEntity(ch);
continue main_loop;
case CT_MULTIBYTE_2: // 3, 4 and N can never occur
// To off-line or not?
output2ByteChar(ch);
continue main_loop;
case CT_RBRACKET: // may need to quote as well...
// Let's not quote if known not to be followed by '>'
if (offset >= len || cbuf[offset] == '>') {
writeAsEntity(ch);
continue main_loop;
}
break;
default:
break;
}
} else { // beyond 2-byte encodables; 3-byte, surrogates?
offset = outputMultiByteChar(ch, cbuf, offset, len);
continue;
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte)ch;
}
}
/*
/**********************************************************************
/* Write methods, typed (element) content
/**********************************************************************
*/
@Override
public void writeTypedValue(AsciiValueEncoder enc)
throws IOException, XMLStreamException
{
if (_surrogate != 0) {
throwUnpairedSurrogate();
}
int free = _outputBufferLen - _outputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
while (true) {
_outputPtr = enc.encodeMore(_outputBuffer, _outputPtr, _outputBufferLen);
if (enc.isCompleted()) {
break;
}
flushBuffer();
}
}
@Override
public final void writeAttribute(WName name, AsciiValueEncoder enc)
throws IOException, XMLStreamException
{
writeAttrNameEqQ(name);
// (inlined writeTypedVAlue()...)
int free = _outputBufferLen - _outputPtr;
if (enc.bufferNeedsFlush(free)) {
flush();
}
while (true) {
_outputPtr = enc.encodeMore(_outputBuffer, _outputPtr, _outputBufferLen);
if (enc.isCompleted()) {
break;
}
flushBuffer();
}
// (end of inlined writeTypedVAlue()...)
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = BYTE_QUOT;
}
/*
/**********************************************************************
/* Write methods, other
/**********************************************************************
*/
/**
* Method that will try to output the content as specified. If
* the content passed in has embedded "--" in it, it will either
* add an intervening space between consequtive hyphens (if content
* fixing is enabled), or return the offset of the first hyphen in
* multi-hyphen sequence.
*/
@Override
public int writeComment(String data) throws IOException, XMLStreamException
{
writeCommentStart();
int len = data.length();
int offset = 0;
while (len > 0) {
char[] buf = _copyBuffer;
final int blen = buf.length;
int len2 = (len < blen) ? len : blen;
// Nope, can only do part
data.getChars(offset, offset+len2, buf, 0);
int cix = writeCommentContents(buf, 0, len2);
if (cix >= 0) {
return offset+cix;
}
offset += blen;
len -= blen;
}
writeCommentEnd();
return -1;
}
/**
* Note: the only way to fix comment contents is to inject a space
* to split up consequtive '--' (or '-' that ends a comment).
*/
protected int writeCommentContents(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
{
/* Unlike with writeCharacters() and fastWriteName(), let's not
* worry about split buffers here: this is unlikely to become
* performance bottleneck. This allows keeping it simple; and
* should it matter, we could start doing fast version here
* as well.
*/
len += offset; // now marks the end
main_loop:
while (offset < len) {
final int[] charTypes = _charTypes.OTHER_CHARS;
inner_loop:
while (true) {
int ch = (int) cbuf[offset];
if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
break inner_loop;
}
if (charTypes[ch] != XmlCharTypes.CT_OK) {
break inner_loop;
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte) ch;
if (++offset >= len) {
break main_loop;
}
}
// Ok, so what did we hit?
int ch = (int) cbuf[offset++];
if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
switch (charTypes[ch]) {
case CT_INVALID:
reportInvalidChar(ch);
case CT_WS_CR: // No way to escape within CDATA
case CT_WS_LF:
++_locRowNr;
break;
case CT_OUTPUT_MUST_QUOTE: // == MULTIBYTE_N value
reportFailedEscaping("comment", ch);
case CT_MULTIBYTE_2:
// To off-line or not?
output2ByteChar(ch);
continue main_loop;
case CT_HYPHEN:
// No need if followed by non hyphen
if (offset < len && cbuf[offset] != '-') {
break;
}
// Two hyphens, or hyphen at end; must append a space
writeRaw(BYTE_HYPHEN, BYTE_SPACE);
continue main_loop;
default: // Everything else should be outputtable as is
break;
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte)ch;
} else { // beyond 2-byte encodables; 3-byte, surrogates?
offset = outputMultiByteChar(ch, cbuf, offset, len);
}
}
return -1;
}
@Override
public void writeDTD(String data)
throws IOException, XMLStreamException
{
// !!! TBI: Check for char validity, similar to other methods?
writeRaw(data, 0, data.length());
}
@Override
public void writeDTD(WName rootName, String systemId, String publicId,
String internalSubset)
throws IOException, XMLStreamException
{
// !!! TBI
//if (true) throw new RuntimeException("DTD not implemented yet");
}
protected int writePIData(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
{
/* Unlike with writeCharacters() and fastWriteName(), let's not
* worry about split buffers here: this is unlikely to become
* performance bottleneck. This allows keeping it simple; and
* should it matter, we could start doing fast version here
* as well.
*/
len += offset; // now marks the end
main_loop:
while (offset < len) {
final int[] charTypes = _charTypes.OTHER_CHARS;
inner_loop:
while (true) {
int ch = (int) cbuf[offset];
if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
break inner_loop;
}
if (charTypes[ch] != XmlCharTypes.CT_OK) {
break inner_loop;
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte) ch;
if (++offset >= len) {
break main_loop;
}
}
// Ok, so what did we hit?
int ch = (int) cbuf[offset++];
if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
switch (charTypes[ch]) {
case CT_INVALID:
reportInvalidChar(ch);
case CT_WS_CR: // No way to escape within CDATA
case CT_WS_LF:
++_locRowNr;
break;
case CT_OUTPUT_MUST_QUOTE: // == MULTIBYTE_N value
reportFailedEscaping("processing instruction", ch);
case CT_MULTIBYTE_2:
// To off-line or not?
output2ByteChar(ch);
continue main_loop;
case CT_QMARK:
// Problem, if we have '?>'
if (offset < len && cbuf[offset] == '>') {
return offset;
}
break;
default: // Everything else should be outputtable as is
break;
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = (byte)ch;
} else { // beyond 2-byte encodables; 3-byte, surrogates?
offset = outputMultiByteChar(ch, cbuf, offset, len);
}
}
return -1;
}
@Override
public void writeEntityReference(WName name)
throws IOException, XMLStreamException
{
writeRaw(BYTE_AMP); // will check surrogates
writeName(name);
writeRaw(BYTE_SEMICOLON);
}
@Override
public int writePI(WName target, String data)
throws IOException, XMLStreamException
{
writeRaw(BYTE_LT, BYTE_QMARK);
writeName(target);
if (data != null) {
// Need to split etc
writeRaw(BYTE_SPACE);
int len = data.length();
int offset = 0;
while (len > 0) {
char[] buf = _copyBuffer;
int blen = buf.length;
// Can write all the rest?
if (blen > len) {
blen = len;
}
// Nope, can only do part
data.getChars(offset, offset+blen, buf, 0);
int cix = writePIData(buf, 0, blen);
if (cix >= 0) {
return offset+cix;
}
offset += blen;
len -= blen;
}
}
writeRaw(BYTE_QMARK, BYTE_GT);
return -1;
}
@Override
public final void writeSpace(String data)
throws IOException, XMLStreamException
{
int len = data.length();
int offset = 0;
while (len > 0) {
char[] buf = _copyBuffer;
final int blen = buf.length;
int len2 = (len < blen) ? len : blen;
data.getChars(offset, offset+len2, buf, 0);
writeSpace(buf, 0, len2);
offset += len2;
len -= len2;
}
}
@Override
public void writeSpace(char[] cbuf, int offset, int len)
throws IOException, XMLStreamException
{
if (_out == null) {
return;
}
if (_surrogate != 0) { // can this actually happen?
reportNwfContent(ErrorConsts.WERR_SPACE_CONTENT, (int)_surrogate, offset-1);
}
len += offset; // now marks the end
while (offset < len) {
char ch = cbuf[offset++];
if (ch > 0x0020) {
if (!_config.isXml11() || (ch != 0x0085 && ch != 0x2028)) {
reportNwfContent(ErrorConsts.WERR_SPACE_CONTENT, (int)ch, offset-1);
}
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
// !!! Line counts?
_outputBuffer[_outputPtr++] = (byte)ch;
}
}
@Override
public void writeXmlDeclaration(String version, String encoding, String standalone)
throws IOException, XMLStreamException
{
writeRaw(BYTES_XMLDECL_START); // will check surrogates
// !!! TBI: check validity
writeRaw(version, 0, version.length());
writeRaw(BYTE_APOS);
if (encoding != null && encoding.length() > 0) {
writeRaw(BYTES_XMLDECL_ENCODING);
// !!! TBI: check validity
writeRaw(encoding, 0, encoding.length());
writeRaw(BYTE_APOS);
}
if (standalone != null) {
writeRaw(BYTES_XMLDECL_STANDALONE);
// !!! TBI: check validity
writeRaw(standalone, 0, standalone.length());
writeRaw(BYTE_APOS);
}
writeRaw(BYTE_QMARK, BYTE_GT);
}
/*
/**********************************************************************
/* Shared helper output methods
/**********************************************************************
*/
protected final void writeCDataStart()
throws IOException
{
writeRaw(BYTES_CDATA_START);
}
protected final void writeCDataEnd()
throws IOException
{
writeRaw(BYTES_CDATA_END);
}
protected final void writeCommentStart()
throws IOException
{
writeRaw(BYTES_COMMENT_START);
}
protected final void writeCommentEnd()
throws IOException
{
writeRaw(BYTES_COMMENT_END);
}
/*
/**********************************************************************
/* Write methods, raw (unprocessed) output
/**********************************************************************
*/
protected final void writeRaw(byte b)
throws IOException
{
if (_surrogate != 0) {
throwUnpairedSurrogate();
}
if (_outputPtr >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = b;
}
protected final void writeRaw(byte b1, byte b2)
throws IOException
{
if (_surrogate != 0) {
throwUnpairedSurrogate();
}
if ((_outputPtr + 1) >= _outputBufferLen) {
flushBuffer();
}
_outputBuffer[_outputPtr++] = b1;
_outputBuffer[_outputPtr++] = b2;
}
protected final void writeRaw(byte[] buf)
throws IOException
{
writeRaw(buf, 0, buf.length);
}
protected final void writeRaw(byte[] buf, int offset, int len)
throws IOException
{
if (_surrogate != 0) {
throwUnpairedSurrogate();
}
int ptr = _outputPtr;
// Common case: fits right in the buffer
if ((ptr + len) <= _outputBufferLen) {
System.arraycopy(buf, offset, _outputBuffer, ptr, len);
_outputPtr += len;
return;
}
// If not, should we just flush + write?
if (ptr > 0) {
flush();
ptr = _outputPtr;
}
if (len < SMALL_WRITE) {
System.arraycopy(buf, offset, _outputBuffer, ptr, len);
_outputPtr += len;
} else {
_out.write(buf, offset, len);
}
}
/*
/**********************************************************************
/* Internal methods, problem reporting
/**********************************************************************
*/
protected final void throwUnpairedSurrogate()
throws IOException
{
int surr = _surrogate;
_surrogate = 0;
throwUnpairedSurrogate(surr);
}
protected final void throwUnpairedSurrogate(int code)
throws IOException
{
// Let's flush to make debugging easier
flush();
throw new IOException("Unpaired surrogate character (0x"+Integer.toHexString(code)+")");
}
/*
/**********************************************************************
/* Helper methods for sub-classes
/**********************************************************************
*/
protected final void flushBuffer()
throws IOException
{
if ((_outputPtr > 0) && (_out != null)) {
int ptr = _outputPtr;
// Need to update location info, to keep it in sync
_locPastChars += ptr;
_locRowStartOffset -= ptr;
_outputPtr = 0;
_out.write(_outputBuffer, 0, ptr);
}
}
protected final void writeAsEntity(int c)
throws IOException
{
// Quickie check to avoid
byte[] buf = _outputBuffer;
int ptr = _outputPtr;
if ((ptr + 10) >= buf.length) { // [up to 6 hex digits] ;
flushBuffer();
ptr = _outputPtr;
}
buf[ptr++] = BYTE_AMP;
// Can use more optimal notation for 8-bit ascii stuff:
if (c < 256) {
/* Also; although not really mandatory, let's also
* use pre-defined entities where possible.
*/
if (c == '&') {
buf[ptr++] = BYTE_A;
buf[ptr++] = BYTE_M;
buf[ptr++] = BYTE_P;
} else if (c == '<') {
buf[ptr++] = BYTE_L;
buf[ptr++] = BYTE_T;
} else if (c == '>') {
buf[ptr++] = BYTE_G;
buf[ptr++] = BYTE_T;
} else if (c == '\'') {
buf[ptr++] = BYTE_A;
buf[ptr++] = BYTE_P;
buf[ptr++] = BYTE_O;
buf[ptr++] = BYTE_S;
} else if (c == '"') {
buf[ptr++] = BYTE_Q;
buf[ptr++] = BYTE_U;
buf[ptr++] = BYTE_O;
buf[ptr++] = BYTE_T;
} else {
buf[ptr++] = BYTE_HASH;
buf[ptr++] = BYTE_X;
// Can use shortest quoting for tab, cr, lf:
if (c >= 16) {
int digit = (c >> 4);
buf[ptr++] = (byte) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
c &= 0xF;
}
buf[ptr++] = (byte) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
}
} else {
buf[ptr++] = BYTE_HASH;
buf[ptr++] = BYTE_X;
// Ok, let's write the shortest possible sequence then:
int shift = 20;
int origPtr = ptr;
do {
int digit = (c >> shift) & 0xF;
if (digit > 0 || (ptr != origPtr)) {
buf[ptr++] = (byte) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
}
shift -= 4;
} while (shift > 0);
c &= 0xF;
buf[ptr++] = (byte) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
}
buf[ptr++] = ';';
_outputPtr = ptr;
}
protected final int calcSurrogate(int surr1, int surr2, String context)
throws XMLStreamException
{
// First is known to be valid, but how about the other?
if (surr2 < SURR2_FIRST || surr2 > SURR2_LAST) {
String msg = "Incomplete surrogate pair"+context+": first char 0x"+Integer.toHexString(surr1)+", second 0x"+Integer.toHexString(surr2);
reportNwfContent(msg);
}
int c = 0x10000 + ((surr1 - SURR1_FIRST) << 10) + (surr2 - SURR2_FIRST);
if (c > XmlConsts.MAX_UNICODE_CHAR) { // illegal, as per RFC 3629
reportInvalidChar(c);
}
return c;
}
/*
/**********************************************************************
/* Internal helper methods
/**********************************************************************
*/
protected final static byte[] getAscii(String str)
{
int len = str.length();
byte[] result = new byte[len];
getAscii(str, result, 0);
return result;
}
protected final static void getAscii(String str, byte[] result)
{
int len = str.length();
for (int i = 0; i < len; ++i) {
result[i] = (byte) str.charAt(i);
}
}
protected final static void getAscii(String str, byte[] result, int offset)
{
int len = str.length();
for (int i = 0; i < len; ++i) {
result[offset+i] = (byte) str.charAt(i);
}
}
}