com.fasterxml.aalto.out.ByteXmlWriter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of aalto-xml Show documentation
Ultra-high performance non-blocking XML processor (Stax/Stax2, SAX/SAX2)
There is a newer version: 1.3.3
/* Woodstox Lite ("wool") XML processor
 *
 * Copyright (c) 2006- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in the file LICENSE which is
 * included with the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.fasterxml.aalto.out;

import java.io.*;

import javax.xml.stream.*;

import org.codehaus.stax2.ri.typed.AsciiValueEncoder;


import com.fasterxml.aalto.impl.ErrorConsts;
import com.fasterxml.aalto.util.XmlCharTypes;
import com.fasterxml.aalto.util.XmlChars;
import com.fasterxml.aalto.util.XmlConsts;

import static com.fasterxml.aalto.out.OutputCharTypes.*;

/**
 * This abstract base class (partial implementation of {@link XmlWriter})
 * is used if the destination is byte-based {@link java.io.OutputStream}.
 *
 * Further, all existing implementations are for encodings that
 * are 7-bit ascii compatible. This is important since this means
 * that marker and separator characters are identical independent
 * of actual encoding. This would not hold if support for encodings
 * like EBCDIC were supported using this class.
 */
public abstract class ByteXmlWriter
    extends XmlWriter
{
    /**
     * And this value determines size of the intermediate copy buffer
     * to use.
     */
    final static int DEFAULT_FULL_BUFFER_SIZE = 4000;

    /**
     * Default intermediate copy buffer size, to be used for efficient
     * access to String content. Smaller, since it's in characters, plus
     * will not be used for actual write operations
     */
    final static int DEFAULT_COPY_BUFFER_SIZE = 1000;

    /**
     * Let's try avoid short writes, since some output streams have
     * high per-call penalty (like network streams).
     */
    final static int SMALL_WRITE = 250;

    final static byte BYTE_SPACE = (byte) ' ';
    final static byte BYTE_COLON = (byte) ':';
    final static byte BYTE_SEMICOLON = (byte) ';';
    final static byte BYTE_LBRACKET = (byte) '[';
    final static byte BYTE_RBRACKET = (byte) ']';
    final static byte BYTE_QMARK = (byte) '?';
    final static byte BYTE_EQ = (byte) '=';
    final static byte BYTE_SLASH = (byte) '/';
    final static byte BYTE_HASH = (byte) '#';
    final static byte BYTE_HYPHEN = (byte) '-';

    final static byte BYTE_LT = (byte) '<';
    final static byte BYTE_GT = (byte) '>';
    final static byte BYTE_AMP = (byte) '&';
    final static byte BYTE_QUOT = (byte) '"';
    final static byte BYTE_APOS = (byte) '\'';

    final static byte BYTE_A = (byte) 'a';
    final static byte BYTE_G = (byte) 'g';
    final static byte BYTE_L = (byte) 'l';
    final static byte BYTE_M = (byte) 'm';
    final static byte BYTE_O = (byte) 'o';
    final static byte BYTE_P = (byte) 'p';
    final static byte BYTE_Q = (byte) 'q';
    final static byte BYTE_S = (byte) 's';
    final static byte BYTE_T = (byte) 't';
    final static byte BYTE_U = (byte) 'u';
    final static byte BYTE_X = (byte) 'x';

    final static byte[] BYTES_CDATA_START = getAscii("");
    final static byte[] BYTES_COMMENT_START = getAscii("");

    final static byte[] BYTES_XMLDECL_START = getAscii("= SURR1_FIRST && ch <= SURR2_LAST) {
            // Can't start with surr2...
            if (ch >= SURR2_FIRST) {
                reportNwfName("Illegal surrogate pairing in name: first character ("+XmlChars.getCharDesc(ch)+") not valid surrogate first character");
            }
            // Unpaired? Not good either
            if (len < 2) {
                reportNwfName("Illegal surrogate pairing in name: incomplete surrogate (missing second half)");
            }
            // Otherwise let's decode code point for verification
            ch = calcSurrogate(ch, part.charAt(1), " in name");
            i = 2; // and skip second half of surrogate pair
        } else {
            i = 1;
        }
        if (!XmlChars.is10NameStartChar(ch)) {
            reportNwfName("Invalid name start character "+XmlChars.getCharDesc(ch)+" (name \""+part+"\")");
        }
        // Also, names can not use entities, must be natively expressable
        final int lastValid = getHighestEncodable();
        if (ch > lastValid) {
            reportNwfName("Illegal name start character "+XmlChars.getCharDesc(ch)+" (name \""+part+"\"): can not be expressed using effective encoding ("+_config.getActualEncoding()+")");
        }

        for (; i < len; ++i) {
            ch = part.charAt(i);
            if (ch >= SURR1_FIRST && ch <= SURR2_LAST) {
                // Can't start with surr2...
                if (ch >= SURR2_FIRST) {
                    reportNwfName("Illegal surrogate pairing in name: character at #"+i+" ("+XmlChars.getCharDesc(ch)+") not valid surrogate first character");
                }
                // Unpaired? Not good either
                ++i;
                if (i >= len) {
                    reportNwfName("Illegal surrogate pairing in name: name ends with incomplete surrogate pair");
                }
                // Otherwise let's decode code point for verification
                ch = calcSurrogate(ch, part.charAt(i), " in name");
            }
            if (ch > lastValid) {
                reportNwfName("Illegal name character "+XmlChars.getCharDesc(ch)+" (name \""+part+"\", index #"+i+"): can not be expressed using effective encoding ("+_config.getActualEncoding()+")");
            }
            if (!XmlChars.is10NameChar(ch)) {
                reportNwfName("Invalid name character "+XmlChars.getCharDesc(ch)+") in name (\""+part+"\"), index #"+i);
            }
        }
    }

    /*
    /**********************************************************************
    /* Abstract methods
    /**********************************************************************
     */

    /**
     * Method called to output a composite character, result of
     * combining 2 surrogate characters.
     */
    abstract protected void outputSurrogates(int surr1, int surr2)
        throws IOException, XMLStreamException;

    abstract protected void output2ByteChar(int ch)
        throws IOException, XMLStreamException;

    /**
     * Method called to output a character beyond basic 1- or 2-byte
     * encoding (code 0x0800 and above), without being able to use
     * character entities
     */
    abstract protected int outputStrictMultiByteChar(int ch, char[] cbuf, int inputOffset, int inputLen)
        throws IOException, XMLStreamException;

    /**
     * Method called to output a character beyond basic 1- or 2-byte
     * encoding (code 0x0800 and above); possibly using character
     * entities, if necessary
     */
    abstract protected int outputMultiByteChar(int ch, char[] cbuf, int inputOffset, int inputLen)
        throws IOException, XMLStreamException;

    /*
    /**********************************************************************
    /* Low-level (pass-through) methods
    /**********************************************************************
     */

    @Override
    public void _releaseBuffers()
    {
        super._releaseBuffers();
        if (_outputBuffer != null) {
            _config.freeFullBBuffer(_outputBuffer);
            _outputBuffer = null;
        }
        if (_copyBuffer != null) {
            _config.freeFullCBuffer(_copyBuffer);
            _copyBuffer = null;
        }
    }

    @Override
    public void _closeTarget(boolean doClose) throws IOException
    {
        if (_out != null) { // just in case it's called multiple times
            if (doClose) {
                _out.close();
                _out = null;
            }
        }
    }

    @Override
    public final void flush() throws IOException
    {
        if (_out != null) {
            flushBuffer();
            _out.flush();
        }
    }

    /*
    /**********************************************************************
    /* Write methods, raw
    /**********************************************************************
     */

    @Override
    public final void writeRaw(String text, int offset, int len)
        throws IOException, XMLStreamException
    {
        while (len > 0) {
            char[] buf = _copyBuffer;
            final int blen = buf.length;
            final int len2 = (len < blen) ? len : blen;
            text.getChars(offset, offset+len2, buf, 0);
            writeRaw(buf, 0, len2);
            offset += len2;
            len -= len2;
        }
    }

    /**
     * This method is heavily encoding-dependant, so it needs
     * to be deferred to sub-classes
     */
    @Override
    public abstract void writeRaw(char[] cbuf, int offset, int len)
        throws IOException, XMLStreamException;

    /*
    /**********************************************************************
    /* Write methods, elements
    /**********************************************************************
     */

    @Override
    public final void writeStartTagStart(WName name)
        throws IOException, XMLStreamException
    {
        if (_surrogate != 0) {
            throwUnpairedSurrogate();
        }
        int ptr = _outputPtr;
        if ((ptr + name.serializedLength() + 1) > _outputBufferLen) {
            writeName(BYTE_LT, name); // let's offline slow case
            return;
        }
        byte[] bbuf = _outputBuffer;
        bbuf[ptr++] = BYTE_LT;
        ptr += name.appendBytes(bbuf, ptr);
        _outputPtr = ptr;
    }

    @Override
    public final void writeStartTagEnd()
        throws IOException, XMLStreamException
    {
        // inlined writeRaw(), gets called so often
        if (_surrogate != 0) {
            throwUnpairedSurrogate();
        }
        if (_outputPtr >= _outputBufferLen) {
            flushBuffer();
        }
        _outputBuffer[_outputPtr++] = BYTE_GT;
    }    

    @Override
    public void writeStartTagEmptyEnd()
        throws IOException
    {
        int ptr = _outputPtr;
        if ((ptr + 2) > _outputBufferLen) {
            flushBuffer();
            ptr = _outputPtr;
        }
        byte[] bbuf = _outputBuffer;
        bbuf[ptr++] = BYTE_SLASH;
        bbuf[ptr++] = BYTE_GT;
        _outputPtr = ptr;
    }

    @Override
    public final void writeEndTag(WName name)
        throws IOException, XMLStreamException
    {
        if (_surrogate != 0) {
            throwUnpairedSurrogate();
        }
        int ptr = _outputPtr;
        int len = name.serializedLength();
        if ((ptr + len + 3) > _outputBufferLen) {
            flushBuffer();
            // name longer than the buffer? can write it straight out
            if ((len + 3) > _outputBufferLen) {
                _out.write(BYTE_LT);
                _out.write(BYTE_SLASH);
                name.writeBytes(_out);
                // Last byte will fit in buffer ok though
                _outputBuffer[_outputPtr++] = BYTE_GT;
                return;
            }
            ptr = _outputPtr;
        }
        byte[] bbuf = _outputBuffer;
        bbuf[ptr++] = BYTE_LT;
        bbuf[ptr++] = BYTE_SLASH;
        ptr += name.appendBytes(bbuf, ptr);
        bbuf[ptr++] = BYTE_GT;
        _outputPtr = ptr;
    }    

    /*
    /**********************************************************************
    /* Write methods, attributes
    /**********************************************************************
     */

    @Override
    public final void writeAttribute(WName name, String value)
        throws IOException, XMLStreamException
    {
        int vlen = value.length();
        // Let's off-line rare case:
        if (vlen > _copyBufferLen) {
            writeLongAttribute(name, value, vlen);
            return;
        }
        char[] cbuf = _copyBuffer;
        if (vlen > 0) {
            value.getChars(0, vlen, cbuf, 0);
        }
        writeAttribute(name, cbuf, 0, vlen);
    }

    @Override
    public final void writeAttribute(WName name, char[] vbuf, int offset, int vlen)
        throws IOException, XMLStreamException
    {
        if (_surrogate != 0) {
            throwUnpairedSurrogate();
        }

        // Enough room?
        int ptr = _outputPtr;
        byte[] bbuf = _outputBuffer;

        if ((ptr + name.serializedLength()) >= _outputBufferLen) {
            writeName(BYTE_SPACE, name);
            ptr = _outputPtr;
        } else {
            bbuf[ptr++] = BYTE_SPACE;
            ptr += name.appendBytes(bbuf, ptr);
        }

        // And then the value
        if ((ptr + 3 + vlen) > _outputBufferLen) { // won't fit
            _outputPtr = ptr;
            flushBuffer();
            bbuf[_outputPtr++] = BYTE_EQ;
            bbuf[_outputPtr++] = BYTE_QUOT;
            if ((_outputPtr + vlen + 1) > _outputBufferLen) {
                writeAttrValue(vbuf, offset, vlen);
                writeRaw(BYTE_QUOT);
                return;
            }
            ptr = _outputPtr;
        } else {
            bbuf[ptr++] = BYTE_EQ;
            bbuf[ptr++] = BYTE_QUOT;
        }
        if (vlen > 0) {
            ptr = fastWriteAttrValue(vbuf, offset, vlen, bbuf, ptr);
        }
        bbuf[ptr++] = BYTE_QUOT;
        _outputPtr = ptr;
    }

    /**
     * Method called to copy given attribute value, when it's known that
     * it will completely fit in the output buffer without further checks
     */
    protected final int fastWriteAttrValue(char[] vbuf, int offset, int len,
                                           byte[] bbuf, int ptr)
        throws IOException, XMLStreamException
    {
        len += offset; // now marks the end

        main_loop:
        while (offset < len) {
            final int[] charTypes = _charTypes.ATTR_CHARS;

            inner_loop:
            while (true) {
                int ch = (int) vbuf[offset];
                if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
                    break inner_loop;
                }
                if (charTypes[ch] != XmlCharTypes.CT_OK) {
                    // Here we do want to quote linefeed, too
                    break;
                }
                bbuf[ptr++] = (byte)ch;
                if (++offset >= len) {
                    break main_loop;
                }
            }

            _outputPtr = ptr;
            // Ok, so what did we hit? Invalid, or quotable?
            int ch = (int) vbuf[offset++];
            if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
                switch (charTypes[ch]) {
                case CT_INVALID:
                    reportInvalidChar(ch);
                    break;
                case CT_MULTIBYTE_2:
                    output2ByteChar(ch);
                    break;
                default:
                    writeAsEntity(ch);
                }
            } else {
                offset = outputMultiByteChar(ch, vbuf, offset, len);
            }

            /* Ok, need to mess with buffers a bit: plus, it's possible
             * that we may even need to flush the buffer as the guarantee
             * for fitting may not necessarily hold (but it will after
             * flushing)
             * Still enough room? (also for following quote -- caller
             * relies on that -- that's why >=, not >)
             */
            if ((len - offset) >= (_outputBufferLen - _outputPtr)) {
                flushBuffer();
            }
            ptr = _outputPtr;
        }

        return ptr;
    }

    protected final void writeAttrValue(char[] vbuf, int offset, int len)
        throws IOException, XMLStreamException
    {
        if (_surrogate != 0) {
            outputSurrogates(_surrogate, vbuf[offset]);
            ++offset;
            --len;
        }

        len += offset; // now marks the end

        main_loop:
        while (offset < len) {
            final int[] charTypes = _charTypes.ATTR_CHARS;

            inner_loop:
            while (true) {
                int ch = (int) vbuf[offset];
                if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
                    break inner_loop;
                }
                if (charTypes[ch] != XmlCharTypes.CT_OK) {
                    break;
                }
                if (_outputPtr >= _outputBufferLen) {
                    flushBuffer();
                }
                _outputBuffer[_outputPtr++] = (byte)ch;
                if (++offset >= len) {
                    break main_loop;
                }
            }

            // Ok, so what did we hit?
            int ch = (int) vbuf[offset++];
            if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
                switch (charTypes[ch]) {
                case CT_INVALID:
                    reportInvalidChar(ch);
                case CT_MULTIBYTE_2:
                    output2ByteChar(ch);
                    break;
                default:
                    writeAsEntity(ch);
                    break;
                }
            } else {
                offset = outputMultiByteChar(ch, vbuf, offset, len);
                continue main_loop;
            }
        }
    }

    protected final void writeLongAttribute(WName name, String value, int vlen)
        throws IOException, XMLStreamException
    {
        writeRaw(BYTE_SPACE);
        int nlen = name.serializedLength();
        if ((_outputPtr + nlen) > _outputBufferLen) {
            flushBuffer();
            if (nlen > _outputBufferLen) {
                name.writeBytes(_out);
            } else {
                _outputPtr += name.appendBytes(_outputBuffer, _outputPtr);
            }
        } else {
            _outputPtr += name.appendBytes(_outputBuffer, _outputPtr);
        }
        writeRaw(BYTE_EQ, BYTE_QUOT);
        int offset = 0;
        while (vlen > 0) {
            char[] buf = _copyBuffer;
            final int blen = buf.length;
            int len2 = (vlen < blen) ? vlen : blen;
            value.getChars(offset, offset+len2, buf, 0);
            writeAttrValue(buf, 0, len2);
            offset += len2;
            vlen -= len2;
        }
        writeRaw(BYTE_QUOT);
    }

    /*
    /**********************************************************************
    /* Write methods, names
    /**********************************************************************
     */

    protected final void writeName(WName name)
        throws IOException
    {
        int ptr = _outputPtr;
        int len = name.serializedLength();
        if ((ptr + len) > _outputBufferLen) {
            flushBuffer();
            // name longer than the buffer? can write it straight out
            if (len >= _outputBufferLen) {
                name.writeBytes(_out);
                return;
            }
            ptr = _outputPtr;
        }
        ptr += name.appendBytes(_outputBuffer, ptr);
        _outputPtr = ptr;
    }

    protected final void writeName(byte preChar, WName name)
        throws IOException
    {
        flushBuffer();
        // name longer than the buffer? Need to write it straight out
        int len = name.serializedLength();
        if (len >= _outputBufferLen) {
            _out.write(preChar);
            name.writeBytes(_out);
            return;
        }
        int ptr = _outputPtr;
        byte[] buf = _outputBuffer;
        buf[ptr++] = preChar;
        ptr += name.appendBytes(buf, ptr);
        _outputPtr = ptr;
    }

    protected final void writeName(WName name, byte postChar)
        throws IOException
    {
        flushBuffer();
        // name longer than the buffer? Need to write it straight out
        if (name.serializedLength() >= _outputBufferLen) {
            name.writeBytes(_out);
            _out.write(postChar);
            return;
        }
        int ptr = _outputPtr;
        byte[] buf = _outputBuffer;
        ptr += name.appendBytes(buf, ptr);
        buf[ptr++] = postChar;
        _outputPtr = ptr;
    }

    private final void writeAttrNameEqQ(WName name)
        throws IOException, XMLStreamException
    {
        if (_surrogate != 0) {
            throwUnpairedSurrogate();
        }
        // Enough room for ' attr="' part?
        int nlen = name.serializedLength();
        int ptr = _outputPtr;
        if ((ptr + nlen + 3) >= _outputBufferLen) {
            flushBuffer();
            ptr = _outputPtr;
            // Still won't fit in buffer? Let's output pieces separately
            if ((ptr + nlen + 3) >= _outputBufferLen) {
                writeName(BYTE_SPACE, name);
                writeRaw(BYTE_EQ);
                writeRaw(BYTE_QUOT);
                return;
            }
        }
        byte[] bbuf = _outputBuffer;
        bbuf[ptr++] = BYTE_SPACE;
        ptr += name.appendBytes(bbuf, ptr);
        bbuf[ptr++] = BYTE_EQ;
        bbuf[ptr++] = BYTE_QUOT;
        _outputPtr = ptr;
    }

    /*
    /**********************************************************************
    /* Write methods, textual content
    /**********************************************************************
     */

    /**
     * @return -1 to indicate succesful write, or index of the problematic
     *   character in input (first ']' from "]]>" sequence, in non-fixing
     *   mode)
     */
    @Override
    public int writeCData(String data)
        throws IOException, XMLStreamException
    {
        writeCDataStart(); // will check surrogates
        int len = data.length();
        int offset = 0;
        while (len > 0) {
            char[] buf = _copyBuffer;
            int blen = buf.length;

            // Can write all the rest?
            if (blen > len) {
                blen = len;
            }
            // Nope, can only do part
            data.getChars(offset, offset+blen, buf, 0);
            int cix = writeCDataContents(buf, 0, blen);
            if (cix >= 0) {
                return offset+cix;
            }
            offset += blen;
            len -= blen;
        }
        writeCDataEnd(); // will check surrogates
        return -1;
    }

    @Override
    public int writeCData(char[] cbuf, int offset, int len)
        throws IOException, XMLStreamException
    {
        writeCDataStart(); // will check surrogates
        int ix = writeCDataContents(cbuf, offset, len);
        if (ix < 0) {
            writeCDataEnd(); // will check surrogates
        }
        return ix;
    }    

    protected int writeCDataContents(char[] cbuf, int offset, int len)
        throws IOException, XMLStreamException
    {
        /* Unlike with writeCharacters() and fastWriteName(), let's not
         * worry about split buffers here: this is unlikely to become
         * performance bottleneck. This allows keeping it simple; and
         * should it matter, we could start doing fast version here
         * as well.
         */
        len += offset; // now marks the end

        main_loop:
        while (offset < len) {
            final int[] charTypes = _charTypes.OTHER_CHARS;

            inner_loop:
            while (true) {
                int ch = (int) cbuf[offset];
                if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
                    break inner_loop;
                }
                if (charTypes[ch] != XmlCharTypes.CT_OK) {
                    break inner_loop;
                }
                if (_outputPtr >= _outputBufferLen) {
                    flushBuffer();
                }
                _outputBuffer[_outputPtr++] = (byte) ch;
                if (++offset >= len) {
                    break main_loop;
                }
            }

            // Ok, so what did we hit?
            int ch = (int) cbuf[offset++];
            if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
                switch (charTypes[ch]) {
                case CT_INVALID:
                    reportInvalidChar(ch);
                case CT_WS_CR: // No way to escape within CDATA
                case CT_WS_LF:
                    ++_locRowNr;
                    break;
                case CT_OUTPUT_MUST_QUOTE: // == MULTIBYTE_N value
                    reportFailedEscaping("CDATA", ch);
                case CT_MULTIBYTE_2:
                    // To off-line or not?
                    output2ByteChar(ch);
                    continue main_loop;
                case CT_RBRACKET:
                    /* !!! TBI: Need to split CData? Can do, but what about
                     *   content split around buffer boundary?
                     */
                    if (offset < len && cbuf[offset] == ']') {
                        if ((offset+1) < len && cbuf[offset+1] == '>') {
                            // Ok, need to output ']]' first, then end
                            offset += 2;
                            writeRaw(BYTE_RBRACKET, BYTE_RBRACKET);
                            writeCDataEnd();
                            // Then new start, and '>'
                            writeCDataStart();
                            writeRaw(BYTE_GT);
                        }
                        continue main_loop;
                    }
                    break;
                default: // Everything else should be outputtable as is
                    break;
                }
                if (_outputPtr >= _outputBufferLen) {
                    flushBuffer();
                }
                _outputBuffer[_outputPtr++] = (byte)ch;
            } else { // beyond 2-byte encodables; 3-byte, surrogates?
                offset = outputMultiByteChar(ch, cbuf, offset, len);
            }
        }
        return -1;
    }    

    @Override
    public final void writeCharacters(String text)
        throws IOException, XMLStreamException
    {
        final int len = text.length();

        // Not so common case, let's offline:
        if (len > _copyBufferLen) {
            longWriteCharacters(text);
            return;
        }
        if (len > 0) {
            char[] buf = _copyBuffer;
            text.getChars(0, len, buf, 0);
            writeCharacters(buf, 0, len);
        }
    }

    private final void longWriteCharacters(String text)
        throws IOException, XMLStreamException
    {
        int offset = 0;
        int len = text.length();
        char[] buf = _copyBuffer;

        do {
            final int blen = buf.length;
            int len2 = (len < blen) ? len : blen;
            text.getChars(offset, offset+len2, buf, 0);
            writeCharacters(buf, 0, len2);
            offset += len2;
            len -= len2;
        } while (len > 0);
    }

    @Override
    public final void writeCharacters(char[] cbuf, int offset, int len)
        throws IOException, XMLStreamException
    {
        if (_surrogate != 0) {
            outputSurrogates(_surrogate, cbuf[offset]);
            ++offset;
            --len;
        }

        // Ok, let's offline (what's sure to be) slow case first:
        // (with multi-byte chars, others may be, too).
        int ptr = _outputPtr;
        if ((ptr + len) > _outputBufferLen) {
            writeSplitCharacters(cbuf, offset, len);
            return;
        }
        len += offset; // now marks the end

        main_loop:
        while (offset < len) {
            final int[] charTypes = _charTypes.TEXT_CHARS;

            inner_loop:
            while (true) {
                int ch = (int) cbuf[offset];
                if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
                    break inner_loop;
                }
                if (charTypes[ch] != XmlCharTypes.CT_OK) {
                    // This may look weird, but profiling showed that handling of LFs
                    // for indentation has measurable effect; plus, that checking it
                    // here will not slow down inner loop either
                    if (ch != '\n') {
                        break inner_loop;
                    }
                    ++_locRowNr;
                }
                _outputBuffer[ptr++] = (byte) ch;
                if (++offset >= len) {
                    break main_loop;
                }
            }
            // Ok, so what did we hit?
            int ch = (int) cbuf[offset++];
            if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
                switch (charTypes[ch]) {
                case CT_INVALID:
                    reportInvalidChar(ch);
                case CT_WS_CR:
                    // !!! TBI: line count
                    // Also, CR to be quoted?
                    if (_config.willEscapeCR()) {
                        _outputPtr = ptr;
                        writeAsEntity(ch);
                        break;
                    }
                    _outputBuffer[ptr++] = (byte)ch;
                    ++_locRowNr;
                    continue main_loop;
                case CT_WS_LF: // never occurs (handled in loop), but don't want to leave gaps
                    break;
                case CT_OUTPUT_MUST_QUOTE: // == MULTIBYTE_N value
                case CT_LT:
                case CT_AMP:
                    _outputPtr = ptr;
                    writeAsEntity(ch);
                    break;
                case CT_MULTIBYTE_2:
                    // To off-line or not?
                    _outputPtr = ptr;
                    output2ByteChar(ch);
                    break;
                case CT_RBRACKET: // may need to quote as well...
                    // Let's not quote if known not to be followed by '>'
                    if (offset >= len || cbuf[offset] == '>') {
                        _outputPtr = ptr;
                        writeAsEntity(ch);
                        break;
                    }
                    //  fall through
                default:
                    _outputBuffer[ptr++] = (byte)ch;
                    continue main_loop;
                }
            } else { // beyond 2-byte encodables; 3-byte, surrogates?
                _outputPtr = ptr;
                offset = outputMultiByteChar(ch, cbuf, offset, len);
            }

            /* At this point, it's not guaranteed any more that we'll
             * be able to fit all output into buffer without checks.
             * Let's verify: in the worst case, we'll just flush
             * whatever we had, to gain more room.
             */
            if ((len - offset) >= (_outputBufferLen - _outputPtr)) {
                flushBuffer();
            }
            ptr = _outputPtr;
        }
        _outputPtr = ptr;
    }

    /**
     * This method is called when it is possible that the output
     * may cross the output buffer boundary. Because of this, code
     * has to add more boundary checks.
     */
    private final void writeSplitCharacters(char[] cbuf, int offset, int len)
        throws IOException, XMLStreamException
    {
        // Note: caller handled surrogate already

        len += offset; // now marks the end

        main_loop:
        while (offset < len) {
            final int[] charTypes = _charTypes.TEXT_CHARS;

            inner_loop:
            while (true) {
                int ch = (int) cbuf[offset];
                if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
                    break inner_loop;
                }
                if (charTypes[ch] != XmlCharTypes.CT_OK) {
                    if (ch != '\n') {
                        break inner_loop;
                    }
                    ++_locRowNr;
                }
                if (_outputPtr >= _outputBufferLen) {
                    flushBuffer();
                }
                _outputBuffer[_outputPtr++] = (byte)ch;
                if (++offset >= len) {
                    break main_loop;
                }
            }

            // Ok, so what did we hit?
            int ch = (int) cbuf[offset++];
            if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
                switch (charTypes[ch]) {
                case CT_INVALID:
                    reportInvalidChar(ch);
                case CT_WS_CR:
                    // !!! TBI: line count
                    // Also, CR to be quoted?
                    if (_config.willEscapeCR()) {
                        writeAsEntity(ch);
                        continue main_loop;
                    }
                    ++_locRowNr;
                    break;
                case CT_WS_LF: // can not occur, handled above, but let's keep sequence
                    break;
                case CT_OUTPUT_MUST_QUOTE:
                case CT_LT:
                case CT_AMP:
                    writeAsEntity(ch);
                    continue main_loop;
                case CT_MULTIBYTE_2: // 3, 4 and N can never occur
                    // To off-line or not?
                    output2ByteChar(ch);
                    continue main_loop;
                case CT_RBRACKET: // may need to quote as well...
                    // Let's not quote if known not to be followed by '>'
                    if (offset >= len || cbuf[offset] == '>') {
                        writeAsEntity(ch);
                        continue main_loop;
                    }
                    break;
                default:
                    break;
                }
            } else { // beyond 2-byte encodables; 3-byte, surrogates?
                offset = outputMultiByteChar(ch, cbuf, offset, len);
                continue;
            }

            if (_outputPtr >= _outputBufferLen) {
                flushBuffer();
            }
            _outputBuffer[_outputPtr++] = (byte)ch;
        }
    }    

    /*
    /**********************************************************************
    /* Write methods, typed (element) content
    /**********************************************************************
     */

    @Override
    public void writeTypedValue(AsciiValueEncoder enc)
        throws IOException, XMLStreamException
    {
        if (_surrogate != 0) {
            throwUnpairedSurrogate();
        }
        int free = _outputBufferLen - _outputPtr;
        if (enc.bufferNeedsFlush(free)) {
            flush();
        }
        while (true) {
            _outputPtr = enc.encodeMore(_outputBuffer, _outputPtr, _outputBufferLen);
            if (enc.isCompleted()) {
                break;
            }
            flushBuffer();
        }
    }

    @Override
    public final void writeAttribute(WName name, AsciiValueEncoder enc)
        throws IOException, XMLStreamException
    {
        writeAttrNameEqQ(name);

        // (inlined writeTypedVAlue()...)

        int free = _outputBufferLen - _outputPtr;
        if (enc.bufferNeedsFlush(free)) {
            flush();
        }
        while (true) {
            _outputPtr = enc.encodeMore(_outputBuffer, _outputPtr, _outputBufferLen);
            if (enc.isCompleted()) {
                break;
            }
            flushBuffer();
        }

        // (end of inlined writeTypedVAlue()...)

        if (_outputPtr >= _outputBufferLen) {
            flushBuffer();
        }
        _outputBuffer[_outputPtr++] = BYTE_QUOT;
    }

    /*
    /**********************************************************************
    /* Write methods, other
    /**********************************************************************
     */

    /**
     * Method that will try to output the content as specified. If
     * the content passed in has embedded "--" in it, it will either
     * add an intervening space between consequtive hyphens (if content
     * fixing is enabled), or return the offset of the first hyphen in
     * multi-hyphen sequence.
     */
    @Override
    public int writeComment(String data) throws IOException, XMLStreamException
    {
        writeCommentStart();

        int len = data.length();
        int offset = 0;
        while (len > 0) {
            char[] buf = _copyBuffer;
            final int blen = buf.length;
            int len2 = (len < blen) ? len : blen;
            // Nope, can only do part
            data.getChars(offset, offset+len2, buf, 0);
            int cix = writeCommentContents(buf, 0, len2);
            if (cix >= 0) {
                return offset+cix;
            }
            offset += blen;
            len -= blen;
        }
        writeCommentEnd();
        return -1;
    }

    /**
     * Note: the only way to fix comment contents is to inject a space
     * to split up consequtive '--' (or '-' that ends a comment).
     */
    protected int writeCommentContents(char[] cbuf, int offset, int len)
        throws IOException, XMLStreamException
    {
        /* Unlike with writeCharacters() and fastWriteName(), let's not
         * worry about split buffers here: this is unlikely to become
         * performance bottleneck. This allows keeping it simple; and
         * should it matter, we could start doing fast version here
         * as well.
         */
        len += offset; // now marks the end

        main_loop:
        while (offset < len) {
            final int[] charTypes = _charTypes.OTHER_CHARS;

            inner_loop:
            while (true) {
                int ch = (int) cbuf[offset];
                if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
                    break inner_loop;
                }
                if (charTypes[ch] != XmlCharTypes.CT_OK) {
                    break inner_loop;
                }
                if (_outputPtr >= _outputBufferLen) {
                    flushBuffer();
                }
                _outputBuffer[_outputPtr++] = (byte) ch;
                if (++offset >= len) {
                    break main_loop;
                }
            }

            // Ok, so what did we hit?
            int ch = (int) cbuf[offset++];
            if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
                switch (charTypes[ch]) {
                case CT_INVALID:
                    reportInvalidChar(ch);
                case CT_WS_CR: // No way to escape within CDATA
                case CT_WS_LF:
                    ++_locRowNr;
                    break;
                case CT_OUTPUT_MUST_QUOTE: // == MULTIBYTE_N value
                    reportFailedEscaping("comment", ch);
                case CT_MULTIBYTE_2:
                    // To off-line or not?
                    output2ByteChar(ch);
                    continue main_loop;
                case CT_HYPHEN:
                    // No need if followed by non hyphen
                    if (offset < len && cbuf[offset] != '-') {
                        break;
                    }
                    // Two hyphens, or hyphen at end; must append a space
                    writeRaw(BYTE_HYPHEN, BYTE_SPACE);
                    continue main_loop;

                default: // Everything else should be outputtable as is
                    break;
                }
                if (_outputPtr >= _outputBufferLen) {
                    flushBuffer();
                }
                _outputBuffer[_outputPtr++] = (byte)ch;
            } else { // beyond 2-byte encodables; 3-byte, surrogates?
                offset = outputMultiByteChar(ch, cbuf, offset, len);
            }
        }
        return -1;
    }

    @Override
    public void writeDTD(String data)
        throws IOException, XMLStreamException
    {
        // !!! TBI: Check for char validity, similar to other methods?
        writeRaw(data, 0, data.length());
    }

    @Override
    public void writeDTD(WName rootName, String systemId, String publicId,
                         String internalSubset)
        throws IOException, XMLStreamException
    {
        // !!! TBI
        //if (true) throw new RuntimeException("DTD not implemented yet");
    }

    protected int writePIData(char[] cbuf, int offset, int len)
        throws IOException, XMLStreamException
    {
        /* Unlike with writeCharacters() and fastWriteName(), let's not
         * worry about split buffers here: this is unlikely to become
         * performance bottleneck. This allows keeping it simple; and
         * should it matter, we could start doing fast version here
         * as well.
         */
        len += offset; // now marks the end

        main_loop:
        while (offset < len) {
            final int[] charTypes = _charTypes.OTHER_CHARS;

            inner_loop:
            while (true) {
                int ch = (int) cbuf[offset];
                if (ch >= OutputCharTypes.MAIN_TABLE_SIZE) {
                    break inner_loop;
                }
                if (charTypes[ch] != XmlCharTypes.CT_OK) {
                    break inner_loop;
                }
                if (_outputPtr >= _outputBufferLen) {
                    flushBuffer();
                }
                _outputBuffer[_outputPtr++] = (byte) ch;
                if (++offset >= len) {
                    break main_loop;
                }
            }

            // Ok, so what did we hit?
            int ch = (int) cbuf[offset++];
            if (ch < OutputCharTypes.MAIN_TABLE_SIZE) {
                switch (charTypes[ch]) {
                case CT_INVALID:
                    reportInvalidChar(ch);
                case CT_WS_CR: // No way to escape within CDATA
                case CT_WS_LF:
                    ++_locRowNr;
                    break;
                case CT_OUTPUT_MUST_QUOTE: // == MULTIBYTE_N value
                    reportFailedEscaping("processing instruction", ch);
                case CT_MULTIBYTE_2:
                    // To off-line or not?
                    output2ByteChar(ch);
                    continue main_loop;
                case CT_QMARK:
                    // Problem, if we have '?>'
                    if (offset < len && cbuf[offset] == '>') {
                        return offset;
                    }
                    break;
                default: // Everything else should be outputtable as is
                    break;
                }
                if (_outputPtr >= _outputBufferLen) {
                    flushBuffer();
                }
                _outputBuffer[_outputPtr++] = (byte)ch;
            } else { // beyond 2-byte encodables; 3-byte, surrogates?
                offset = outputMultiByteChar(ch, cbuf, offset, len);
            }
        }
        return -1;
    }

    @Override
    public void writeEntityReference(WName name)
        throws IOException, XMLStreamException
    {
        writeRaw(BYTE_AMP); // will check surrogates
        writeName(name);
        writeRaw(BYTE_SEMICOLON);
    }

    @Override
    public int writePI(WName target, String data)
        throws IOException, XMLStreamException
    {
        writeRaw(BYTE_LT, BYTE_QMARK);
        writeName(target);
        if (data != null) {
            // Need to split etc
            writeRaw(BYTE_SPACE);

            int len = data.length();
            int offset = 0;
            while (len > 0) {
                char[] buf = _copyBuffer;
                int blen = buf.length;

                // Can write all the rest?
                if (blen > len) {
                    blen = len;
                }
                // Nope, can only do part
                data.getChars(offset, offset+blen, buf, 0);
                int cix = writePIData(buf, 0, blen);
                if (cix >= 0) {
                    return offset+cix;
                }
                offset += blen;
                len -= blen;
            }
        }
        writeRaw(BYTE_QMARK, BYTE_GT);
        return -1;
    }

    @Override
    public final void writeSpace(String data)
        throws IOException, XMLStreamException
    {
        int len = data.length();
        int offset = 0;

        while (len > 0) {
            char[] buf = _copyBuffer;
            final int blen = buf.length;
            int len2 = (len < blen) ? len : blen;
            data.getChars(offset, offset+len2, buf, 0);
            writeSpace(buf, 0, len2);
            offset += len2;
            len -= len2;
        }
    }

    @Override
    public void writeSpace(char[] cbuf, int offset, int len)
        throws IOException, XMLStreamException
    {
        if (_out == null) {
            return;
        }
        if (_surrogate != 0) { // can this actually happen?
            reportNwfContent(ErrorConsts.WERR_SPACE_CONTENT, (int)_surrogate, offset-1);
        }

        len += offset; // now marks the end
        while (offset < len) {
            char ch = cbuf[offset++];
            if (ch > 0x0020) {
                if (!_config.isXml11() || (ch != 0x0085 && ch != 0x2028)) {
                    reportNwfContent(ErrorConsts.WERR_SPACE_CONTENT, (int)ch, offset-1);
                }
            }
            if (_outputPtr >= _outputBufferLen) {
                flushBuffer();
            }
            // !!! Line counts?
            _outputBuffer[_outputPtr++] = (byte)ch;
        }
    }

    @Override
    public void writeXmlDeclaration(String version, String encoding, String standalone)
        throws IOException, XMLStreamException
    {
        writeRaw(BYTES_XMLDECL_START); // will check surrogates
        // !!! TBI: check validity
        writeRaw(version, 0, version.length());
        writeRaw(BYTE_APOS);
        
        if (encoding != null && encoding.length() > 0) {
            writeRaw(BYTES_XMLDECL_ENCODING);
            // !!! TBI: check validity
            writeRaw(encoding, 0, encoding.length());
            writeRaw(BYTE_APOS);
        }
        if (standalone != null) {
            writeRaw(BYTES_XMLDECL_STANDALONE);
            // !!! TBI: check validity
            writeRaw(standalone, 0, standalone.length());
            writeRaw(BYTE_APOS);
        }
        writeRaw(BYTE_QMARK, BYTE_GT);
    }    

    /*
    /**********************************************************************
    /* Shared helper output methods
    /**********************************************************************
     */

    protected final void writeCDataStart()
        throws IOException
    {
        writeRaw(BYTES_CDATA_START);
    }

    protected final void writeCDataEnd()
        throws IOException
    {
        writeRaw(BYTES_CDATA_END);
    }

    protected final void writeCommentStart()
        throws IOException
    {
        writeRaw(BYTES_COMMENT_START);
    }

    protected final void writeCommentEnd()
        throws IOException
    {
        writeRaw(BYTES_COMMENT_END);
    }

    /*
    /**********************************************************************
    /* Write methods, raw (unprocessed) output
    /**********************************************************************
     */

    protected final void writeRaw(byte b)
        throws IOException
    {
        if (_surrogate != 0) {
            throwUnpairedSurrogate();
        }
        if (_outputPtr >= _outputBufferLen) {
            flushBuffer();
        }
        _outputBuffer[_outputPtr++] = b;
    }

    protected final void writeRaw(byte b1, byte b2)
        throws IOException
    {
        if (_surrogate != 0) {
            throwUnpairedSurrogate();
        }
        if ((_outputPtr + 1) >= _outputBufferLen) {
            flushBuffer();
        }
        _outputBuffer[_outputPtr++] = b1;
        _outputBuffer[_outputPtr++] = b2;
    }

    protected final void writeRaw(byte[] buf)
        throws IOException
    {
        writeRaw(buf, 0, buf.length);
    }

    protected final void writeRaw(byte[] buf, int offset, int len)
        throws IOException
    {
        if (_surrogate != 0) {
            throwUnpairedSurrogate();
        }

        int ptr = _outputPtr;
        // Common case: fits right in the buffer
        if ((ptr + len) <= _outputBufferLen) {
            System.arraycopy(buf, offset, _outputBuffer, ptr, len);
            _outputPtr += len;
            return;
        }

        // If not, should we just flush + write?
        if (ptr > 0) {
            flush();
            ptr = _outputPtr;
        }
        if (len < SMALL_WRITE) {
            System.arraycopy(buf, offset, _outputBuffer, ptr, len);
            _outputPtr += len;
        } else {
            _out.write(buf, offset, len);
        }
    }

    /*
    /**********************************************************************
    /* Internal methods, problem reporting
    /**********************************************************************
     */

    protected final void throwUnpairedSurrogate()
        throws IOException
    {
        int surr = _surrogate;
        _surrogate = 0;
        throwUnpairedSurrogate(surr);
    }

    protected final void throwUnpairedSurrogate(int code)
        throws IOException
    {
        // Let's flush to make debugging easier
        flush();
        throw new IOException("Unpaired surrogate character (0x"+Integer.toHexString(code)+")");
    }

    /*
    /**********************************************************************
    /* Helper methods for sub-classes
    /**********************************************************************
     */

    protected final void flushBuffer()
        throws IOException
    {
        if ((_outputPtr > 0) && (_out != null)) {
            int ptr = _outputPtr;
            // Need to update location info, to keep it in sync
            _locPastChars += ptr;
            _locRowStartOffset -= ptr;
            _outputPtr = 0;
            _out.write(_outputBuffer, 0, ptr);
        }
    }

    protected final void writeAsEntity(int c)
        throws IOException
    {
        // Quickie check to avoid 

        byte[] buf = _outputBuffer;
        int ptr = _outputPtr;
        if ((ptr + 10) >= buf.length) { // &#x [up to 6 hex digits] ;
            flushBuffer();
            ptr = _outputPtr;
        }
        buf[ptr++] = BYTE_AMP;

        // Can use more optimal notation for 8-bit ascii stuff:
        if (c < 256) {
            /* Also; although not really mandatory, let's also
             * use pre-defined entities where possible.
             */
            if (c == '&') {
                buf[ptr++] = BYTE_A;
                buf[ptr++] = BYTE_M;
                buf[ptr++] = BYTE_P;
            } else if (c == '<') {
                buf[ptr++] = BYTE_L;
                buf[ptr++] = BYTE_T;
            } else if (c == '>') {
                buf[ptr++] = BYTE_G;
                buf[ptr++] = BYTE_T;
            } else if (c == '\'') {
                buf[ptr++] = BYTE_A;
                buf[ptr++] = BYTE_P;
                buf[ptr++] = BYTE_O;
                buf[ptr++] = BYTE_S;
            } else if (c == '"') {
                buf[ptr++] = BYTE_Q;
                buf[ptr++] = BYTE_U;
                buf[ptr++] = BYTE_O;
                buf[ptr++] = BYTE_T;
            } else {
                buf[ptr++] = BYTE_HASH;
                buf[ptr++] = BYTE_X;
                // Can use shortest quoting for tab, cr, lf:
                if (c >= 16) {
                    int digit = (c >> 4);
                    buf[ptr++] = (byte) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
                    c &= 0xF;
                }
                buf[ptr++] = (byte) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
            }
        } else {
            buf[ptr++] = BYTE_HASH;
            buf[ptr++] = BYTE_X;

            // Ok, let's write the shortest possible sequence then:
            int shift = 20;
            int origPtr = ptr;

            do {
                int digit = (c >> shift) & 0xF;
                if (digit > 0 || (ptr != origPtr)) {
                    buf[ptr++] = (byte) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit));
                }
                shift -= 4;
            } while (shift > 0);
            c &= 0xF;
            buf[ptr++] = (byte) ((c < 10) ? ('0' + c) : (('a' - 10) + c));
        }
        buf[ptr++] = ';';
        _outputPtr = ptr;
    }

    protected final int calcSurrogate(int surr1, int surr2, String context)
        throws XMLStreamException
    {
        // First is known to be valid, but how about the other?
        if (surr2 < SURR2_FIRST || surr2 > SURR2_LAST) {
            String msg = "Incomplete surrogate pair"+context+": first char 0x"+Integer.toHexString(surr1)+", second 0x"+Integer.toHexString(surr2);
            reportNwfContent(msg);
        }
        int c = 0x10000 + ((surr1 - SURR1_FIRST) << 10) + (surr2 - SURR2_FIRST);
        if (c > XmlConsts.MAX_UNICODE_CHAR) { // illegal, as per RFC 3629
            reportInvalidChar(c);
        }
        return c;
    }

    /*
    /**********************************************************************
    /* Internal helper methods
    /**********************************************************************
     */

    protected final static byte[] getAscii(String str)
    {
        int len = str.length();
        byte[] result = new byte[len];
        getAscii(str, result, 0);
        return result;
    }

    protected final static void getAscii(String str, byte[] result)
    {
        int len = str.length();
        for (int i = 0; i < len; ++i) {
            result[i] = (byte) str.charAt(i);
        }
    }

    protected final static void getAscii(String str, byte[] result, int offset)
    {
        int len = str.length();
        for (int i = 0; i < len; ++i) {
            result[offset+i] = (byte) str.charAt(i);
        }
    }
}