All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.java.com.ctc.wstx.sw.BufferingXmlWriter Maven / Gradle / Ivy

There is a newer version: 4.0.6
Show newest version
/* Woodstox XML processor
 *
 * Copyright (c) 2004- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in file LICENSE, included with
 * the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ctc.wstx.sw;

import java.io.*;

import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;

import org.codehaus.stax2.XMLStreamReader2;

import com.ctc.wstx.api.WriterConfig;
import com.ctc.wstx.io.CharsetNames;

/**
 * Concrete implementation of {@link XmlWriter} that will dispatch writes
 * to another writer (of type {@link java.io.Writer}, and will NOT handle
 * encoding. It will, however, do basic buffering such that the underlying
 * Writer need (and thus, should) not do buffering.
 *

* One design goal for this class is to avoid unnecessary buffering: since * there will be another Writer doing the actual encoding, amount of * buffering needed should still be limited. To this end, a threshold is * used to define what's the threshold of writes that we do want to * coalesce, ie. buffer. Writes bigger than this should in general proceed * without buffering. */ public final class BufferingXmlWriter extends XmlWriter implements XMLStreamConstants { /** * Let's use a typical default to have a compromise between large * enough chunks to output, and minimizing memory overhead. * Compared to encoding writers, buffer size can be bit smaller * since there's one more level of processing (at encoding), which * may use bigger buffering. */ final static int DEFAULT_BUFFER_SIZE = 1000; /** * Choosing threshold for 'small size' is a compromise between * excessive buffering (high small size), and too many fragmented * calls to the underlying writer (low small size). Let's just * use about 1/4 of the full buffer size. */ final static int DEFAULT_SMALL_SIZE = 256; /** * Highest valued character that may need to be encoded (minus charset * encoding requirements) when writing attribute values. */ protected final static int HIGHEST_ENCODABLE_ATTR_CHAR = (int)'<'; /** * Highest valued character that may need to be encoded (minus charset * encoding requirements) when writing attribute values. */ protected final static int HIGHEST_ENCODABLE_TEXT_CHAR = (int)'>'; /* //////////////////////////////////////////////// // Output state, buffering //////////////////////////////////////////////// */ /** * Actual Writer to use for outputting buffered data as appropriate. * During active usage, remains as the writer initially set; set to * null when this writer is closed. */ protected Writer mOut; protected char[] mOutputBuffer; /** * This is the threshold used to check what is considered a "small" * write; small writes will be buffered until resulting size will * be above the threshold. */ protected final int mSmallWriteSize; protected int mOutputPtr; protected int mOutputBufLen; /** * Actual physical stream that the writer is using, if known. */ protected OutputStream mUnderlyingStream; /* //////////////////////////////////////////////// // Encoding/escaping configuration //////////////////////////////////////////////// */ /** * First Unicode character (one with lowest value) after (and including) * which character entities have to be used. For */ private final int mEncHighChar; /** * Character that is considered to be the enclosing quote character; * for XML either single or double quote. */ final char mEncQuoteChar; /** * Entity String to use for escaping the quote character. */ final String mEncQuoteEntity; /* //////////////////////////////////////////////// // Life-cycle //////////////////////////////////////////////// */ /** * @param outs Underlying OutputStream that the writer * (out) is using, if known. Needed to support * (optional) access to the underlying stream */ public BufferingXmlWriter(Writer out, WriterConfig cfg, String enc, boolean autoclose, OutputStream outs) throws IOException { super(cfg, enc, autoclose); mOut = out; mOutputBuffer = cfg.allocFullCBuffer(DEFAULT_BUFFER_SIZE); mOutputBufLen = mOutputBuffer.length; mSmallWriteSize = DEFAULT_SMALL_SIZE; mOutputPtr = 0; mUnderlyingStream = outs; // Let's use double-quotes, as usual; alternative is apostrophe mEncQuoteChar = '"'; mEncQuoteEntity = """; /* Note: let's actually exclude couple of illegal chars for * unicode-based encoders. But we do not have to worry about * surrogates quite here, fortunately. */ int bitsize = guessEncodingBitSize(enc); mEncHighChar = ((bitsize < 16) ? (1 << bitsize) : 0xFFFE); } protected int getOutputPtr() { return mOutputPtr; } /* //////////////////////////////////////////////// // Raw access to underlying output objects //////////////////////////////////////////////// */ final protected OutputStream getOutputStream() { return mUnderlyingStream; } final protected Writer getWriter() { return mOut; } /* //////////////////////////////////////////////// // Low-level (pass-through) methods //////////////////////////////////////////////// */ public void close() throws IOException { if (mOut != null) { // just in case it's called multiple times flush(); Writer w = mOut; mOut = null; mUnderlyingStream = null; mTextWriter = null; mAttrValueWriter = null; char[] buf = mOutputBuffer; mOutputBuffer = null; mConfig.freeFullCBuffer(buf); if (mAutoCloseOutput) { w.close(); } } } public final void flush() throws IOException { if (mOut != null) { flushBuffer(); mOut.flush(); } } public void writeRaw(char[] cbuf, int offset, int len) throws IOException { if (mOut == null) { return; } // First; is the new request small or not? If yes, needs to be buffered if (len < mSmallWriteSize) { // yup // Does it fit in with current buffer? If not, need to flush first if ((mOutputPtr + len) > mOutputBufLen) { flushBuffer(); } System.arraycopy(cbuf, offset, mOutputBuffer, mOutputPtr, len); mOutputPtr += len; return; } // Ok, not a small request. But buffer may have existing content? int ptr = mOutputPtr; if (ptr > 0) { // If it's a small chunk, need to fill enough before flushing if (ptr < mSmallWriteSize) { /* Also, if we are to copy any stuff, let's make sure * that we either copy it all in one chunk, or copy * enough for non-small chunk, flush, and output remaining * non-small chink (former possible if chunk we were requested * to output is only slightly over 'small' size) */ int needed = (mSmallWriteSize - ptr); // Just need minimal copy: System.arraycopy(cbuf, offset, mOutputBuffer, ptr, needed); mOutputPtr = ptr + needed; len -= needed; offset += needed; } flushBuffer(); } // And then we'll just write whatever we have left: mOut.write(cbuf, offset, len); } public void writeRaw(String str) throws IOException { if (mOut == null) { return; } final int len = str.length(); // First; is the new request small or not? If yes, needs to be buffered if (len < mSmallWriteSize) { // yup // Does it fit in with current buffer? If not, need to flush first if ((mOutputPtr + len) >= mOutputBufLen) { flushBuffer(); } str.getChars(0, len, mOutputBuffer, mOutputPtr); mOutputPtr += len; return; } // Otherwise, let's just call the main method writeRaw(str, 0, len); } public void writeRaw(String str, int offset, int len) throws IOException { if (mOut == null) { return; } // First; is the new request small or not? If yes, needs to be buffered if (len < mSmallWriteSize) { // yup // Does it fit in with current buffer? If not, need to flush first if ((mOutputPtr + len) >= mOutputBufLen) { flushBuffer(); } str.getChars(offset, offset+len, mOutputBuffer, mOutputPtr); mOutputPtr += len; return; } // Ok, not a small request. But buffer may have existing content? int ptr = mOutputPtr; if (ptr > 0) { // If it's a small chunk, need to fill enough before flushing if (ptr < mSmallWriteSize) { /* Also, if we are to copy any stuff, let's make sure * that we either copy it all in one chunk, or copy * enough for non-small chunk, flush, and output remaining * non-small chunk (former possible if chunk we were requested * to output is only slightly over 'small' size) */ int needed = (mSmallWriteSize - ptr); // Just need minimal copy: str.getChars(offset, offset+needed, mOutputBuffer, ptr); mOutputPtr = ptr + needed; len -= needed; offset += needed; } flushBuffer(); } // And then we'll just write whatever we have left: mOut.write(str, offset, len); } /* //////////////////////////////////////////////// // "Trusted" low-level output methods //////////////////////////////////////////////// */ public final void writeCDataStart() throws IOException { fastWriteRaw(""); } public final void writeCommentStart() throws IOException { fastWriteRaw(""); } public final void writePIStart(String target, boolean addSpace) throws IOException { fastWriteRaw('<', '?'); fastWriteRaw(target); if (addSpace) { fastWriteRaw(' '); } } public final void writePIEnd() throws IOException { fastWriteRaw('?', '>'); } /* //////////////////////////////////////////////// // Higher-level output methods, text output //////////////////////////////////////////////// */ public int writeCData(String data) throws IOException { if (mCheckContent) { int ix = verifyCDataContent(data); if (ix >= 0) { if (!mFixContent) { // Can we fix it? return ix; } // Yes we can! (...Bob the Builder...) writeSegmentedCData(data, ix); return -1; } } fastWriteRaw(""); return -1; } public int writeCData(char[] cbuf, int offset, int len) throws IOException { if (mCheckContent) { int ix = verifyCDataContent(cbuf, offset, len); if (ix >= 0) { if (!mFixContent) { // Can we fix it? return ix; } // Yes we can! (...Bob the Builder...) writeSegmentedCData(cbuf, offset, len, ix); return -1; } } fastWriteRaw(""); return -1; } public void writeCharacters(String text) throws IOException { if (mOut == null) { return; } if (mTextWriter != null) { // custom escaping? mTextWriter.write(text); return; } int inPtr = 0; final int len = text.length(); int highChar = mEncHighChar; main_loop: while (true) { String ent = null; inner_loop: while (true) { if (inPtr >= len) { break main_loop; } char c = text.charAt(inPtr++); if (c <= HIGHEST_ENCODABLE_TEXT_CHAR) { if (c <= 0x0020) { if (c != ' ' && c != '\n' && c != '\t') { // fine as is if (c == '\r') { if (mEscapeCR) { break inner_loop; } } else { if (!mXml11 || c == 0) { throwInvalidChar(c); } break inner_loop; // need quoting ok } } } else if (c == '<') { ent = "<"; break inner_loop; } else if (c == '&') { ent = "&"; break inner_loop; } else if (c == '>') { // Let's be conservative; and if there's any // change it might be part of "]]>" quote it if (inPtr < 2 || text.charAt(inPtr-2) == ']') { ent = ">"; break inner_loop; } } } else if (c >= highChar) { break inner_loop; } if (mOutputPtr >= mOutputBufLen) { flushBuffer(); } mOutputBuffer[mOutputPtr++] = c; } if (ent != null) { writeRaw(ent); } else { writeAsEntity(text.charAt(inPtr-1)); } } } public void writeCharacters(char[] cbuf, int offset, int len) throws IOException { if (mOut == null) { return; } if (mTextWriter != null) { // custom escaping? mTextWriter.write(cbuf, offset, len); } else { // nope, default: len += offset; do { int c = 0; int highChar = mEncHighChar; int start = offset; String ent = null; for (; offset < len; ++offset) { c = cbuf[offset]; if (c <= HIGHEST_ENCODABLE_TEXT_CHAR) { if (c == '<') { ent = "<"; break; } else if (c == '&') { ent = "&"; break; } else if (c == '>') { /* Let's be conservative; and if there's any * change it might be part of "]]>" quote it */ if ((offset == start) || cbuf[offset-1] == ']') { ent = ">"; break; } } else if (c < 0x0020) { if (c == '\n' || c == '\t') { // fine as is ; } else if (c == '\r') { if (mEscapeCR) { break; } } else { if (!mXml11 || c == 0) { throwInvalidChar(c); } break; // need quoting ok } } } else if (c >= highChar) { break; } // otherwise ok } int outLen = offset - start; if (outLen > 0) { writeRaw(cbuf, start, outLen); } if (ent != null) { writeRaw(ent); ent = null; } else if (offset < len) { writeAsEntity(c); } } while (++offset < len); } } /** * Method that will try to output the content as specified. If * the content passed in has embedded "--" in it, it will either * add an intervening space between consequtive hyphens (if content * fixing is enabled), or return the offset of the first hyphen in * multi-hyphen sequence. */ public int writeComment(String data) throws IOException { if (mCheckContent) { int ix = verifyCommentContent(data); if (ix >= 0) { if (!mFixContent) { // Can we fix it? return ix; } // Yes we can! (...Bob the Builder...) writeSegmentedComment(data, ix); return -1; } } fastWriteRaw(""); return -1; } public void writeDTD(String data) throws IOException { writeRaw(data); } public void writeDTD(String rootName, String systemId, String publicId, String internalSubset) throws IOException, XMLStreamException { fastWriteRaw(" 0) { fastWriteRaw(' ', '['); fastWriteRaw(internalSubset); fastWriteRaw(']'); } fastWriteRaw('>'); } public void writeEntityReference(String name) throws IOException, XMLStreamException { if (mCheckNames) { verifyNameValidity(name, mNsAware); } fastWriteRaw('&'); fastWriteRaw(name); fastWriteRaw(';'); } public void writeXmlDeclaration(String version, String encoding, String standalone) throws IOException { fastWriteRaw(" 0) { fastWriteRaw(" encoding='"); fastWriteRaw(encoding); fastWriteRaw('\''); } if (standalone != null) { fastWriteRaw(" standalone='"); fastWriteRaw(standalone); fastWriteRaw('\''); } fastWriteRaw('?', '>'); } public int writePI(String target, String data) throws IOException, XMLStreamException { if (mCheckNames) { // As per namespace specs, can not have colon(s) verifyNameValidity(target, mNsAware); } fastWriteRaw('<', '?'); fastWriteRaw(target); if (data != null && data.length() > 0) { if (mCheckContent) { int ix = data.indexOf('?'); if (ix >= 0) { ix = data.indexOf("?>", ix); if (ix >= 0) { return ix; } } } fastWriteRaw(' '); // Data may be longer, let's call regular writeRaw method writeRaw(data); } fastWriteRaw('?', '>'); return -1; } /* //////////////////////////////////////////////////// // Write methods, elements //////////////////////////////////////////////////// */ public void writeStartTagStart(String localName) throws IOException, XMLStreamException { if (mCheckNames) { verifyNameValidity(localName, mNsAware); } int ptr = mOutputPtr; int extra = (mOutputBufLen - ptr) - (1 + localName.length()); if (extra < 0) { // split on boundary, slower fastWriteRaw('<'); fastWriteRaw(localName); } else { char[] buf = mOutputBuffer; buf[ptr++] = '<'; int len = localName.length(); localName.getChars(0, len, buf, ptr); mOutputPtr = ptr+len; } } public void writeStartTagStart(String prefix, String localName) throws IOException, XMLStreamException { if (prefix == null || prefix.length() == 0) { // shouldn't happen writeStartTagStart(localName); return; } if (mCheckNames) { verifyNameValidity(prefix, mNsAware); verifyNameValidity(localName, mNsAware); } int ptr = mOutputPtr; int len = prefix.length(); int extra = (mOutputBufLen - ptr) - (2 + localName.length() + len); if (extra < 0) { // across buffer boundary, slow case fastWriteRaw('<'); fastWriteRaw(prefix); fastWriteRaw(':'); fastWriteRaw(localName); } else { // fast case, all inlined char[] buf = mOutputBuffer; buf[ptr++] = '<'; prefix.getChars(0, len, buf, ptr); ptr += len; buf[ptr++] = ':'; len = localName.length(); localName.getChars(0, len, buf, ptr); mOutputPtr = ptr+len; } } public void writeStartTagEnd() throws IOException { fastWriteRaw('>'); } public void writeStartTagEmptyEnd() throws IOException { int ptr = mOutputPtr; if ((ptr + 3) >= mOutputBufLen) { if (mOut == null) { return; } flushBuffer(); ptr = mOutputPtr; } char[] buf = mOutputBuffer; buf[ptr++] = ' '; buf[ptr++] = '/'; buf[ptr++] = '>'; mOutputPtr = ptr; } public void writeEndTag(String localName) throws IOException { int ptr = mOutputPtr; int extra = (mOutputBufLen - ptr) - (3 + localName.length()); if (extra < 0) { fastWriteRaw('<', '/'); fastWriteRaw(localName); fastWriteRaw('>'); } else { char[] buf = mOutputBuffer; buf[ptr++] = '<'; buf[ptr++] = '/'; int len = localName.length(); localName.getChars(0, len, buf, ptr); ptr += len; buf[ptr++] = '>'; mOutputPtr = ptr; } } public void writeEndTag(String prefix, String localName) throws IOException { if (prefix == null || prefix.length() == 0) { writeEndTag(localName); return; } int ptr = mOutputPtr; int len = prefix.length(); int extra = (mOutputBufLen - ptr) - (4 + localName.length() + len); if (extra < 0) { fastWriteRaw('<', '/'); /* At this point, it is assumed caller knows that end tag * matches with start tag, and that it (by extension) has been * validated if and as necessary */ fastWriteRaw(prefix); fastWriteRaw(':'); fastWriteRaw(localName); fastWriteRaw('>'); } else { char[] buf = mOutputBuffer; buf[ptr++] = '<'; buf[ptr++] = '/'; prefix.getChars(0, len, buf, ptr); ptr += len; buf[ptr++] = ':'; len = localName.length(); localName.getChars(0, len, buf, ptr); ptr += len; buf[ptr++] = '>'; mOutputPtr = ptr; } } /* //////////////////////////////////////////////////// // Write methods, attributes/ns //////////////////////////////////////////////////// */ public void writeAttribute(String localName, String value) throws IOException, XMLStreamException { if (mOut == null) { return; } if (mCheckNames) { verifyNameValidity(localName, mNsAware); } int len = localName.length(); if (((mOutputBufLen - mOutputPtr) - (3 + len)) < 0) { fastWriteRaw(' '); fastWriteRaw(localName); fastWriteRaw('=', '"'); } else { int ptr = mOutputPtr; char[] buf = mOutputBuffer; buf[ptr++] = ' '; localName.getChars(0, len, buf, ptr); ptr += len; buf[ptr++] = '='; buf[ptr++] = '"'; mOutputPtr = ptr; } len = (value == null) ? 0 : value.length(); if (len > 0) { if (mAttrValueWriter != null) { // custom escaping? mAttrValueWriter.write(value, 0, len); } else { // nope, default writeAttrValue(value, len); } } fastWriteRaw('"'); } public void writeAttribute(String localName, char[] value, int offset, int vlen) throws IOException, XMLStreamException { if (mOut == null) { return; } if (mCheckNames) { verifyNameValidity(localName, mNsAware); } int len = localName.length(); if (((mOutputBufLen - mOutputPtr) - (3 + len)) < 0) { fastWriteRaw(' '); fastWriteRaw(localName); fastWriteRaw('=', '"'); } else { int ptr = mOutputPtr; char[] buf = mOutputBuffer; buf[ptr++] = ' '; localName.getChars(0, len, buf, ptr); ptr += len; buf[ptr++] = '='; buf[ptr++] = '"'; mOutputPtr = ptr; } if (vlen > 0) { if (mAttrValueWriter != null) { // custom escaping? mAttrValueWriter.write(value, offset, vlen); } else { // nope, default writeAttrValue(value, offset, vlen); } } fastWriteRaw('"'); } public void writeAttribute(String prefix, String localName, String value) throws IOException, XMLStreamException { if (mOut == null) { return; } if (mCheckNames) { verifyNameValidity(prefix, mNsAware); verifyNameValidity(localName, mNsAware); } int len = prefix.length(); if (((mOutputBufLen - mOutputPtr) - (4 + localName.length() + len)) < 0) { fastWriteRaw(' '); if (prefix != null && prefix.length() > 0) { fastWriteRaw(prefix); fastWriteRaw(':'); } fastWriteRaw(localName); fastWriteRaw('=', '"'); } else { int ptr = mOutputPtr; char[] buf = mOutputBuffer; buf[ptr++] = ' '; prefix.getChars(0, len, buf, ptr); ptr += len; buf[ptr++] = ':'; len = localName.length(); localName.getChars(0, len, buf, ptr); ptr += len; buf[ptr++] = '='; buf[ptr++] = '"'; mOutputPtr = ptr; } len = (value == null) ? 0 : value.length(); if (len > 0) { if (mAttrValueWriter != null) { // custom escaping? mAttrValueWriter.write(value, 0, len); } else { // nope, default writeAttrValue(value, len); } } fastWriteRaw('"'); } public void writeAttribute(String prefix, String localName, char[] value, int offset, int vlen) throws IOException, XMLStreamException { if (mOut == null) { return; } if (mCheckNames) { verifyNameValidity(prefix, mNsAware); verifyNameValidity(localName, mNsAware); } int len = prefix.length(); if (((mOutputBufLen - mOutputPtr) - (4 + localName.length() + len)) < 0) { fastWriteRaw(' '); if (prefix != null && prefix.length() > 0) { fastWriteRaw(prefix); fastWriteRaw(':'); } fastWriteRaw(localName); fastWriteRaw('=', '"'); } else { int ptr = mOutputPtr; char[] buf = mOutputBuffer; buf[ptr++] = ' '; prefix.getChars(0, len, buf, ptr); ptr += len; buf[ptr++] = ':'; len = localName.length(); localName.getChars(0, len, buf, ptr); ptr += len; buf[ptr++] = '='; buf[ptr++] = '"'; mOutputPtr = ptr; } if (vlen > 0) { if (mAttrValueWriter != null) { // custom escaping? mAttrValueWriter.write(value, offset, vlen); } else { // nope, default writeAttrValue(value, offset, vlen); } } fastWriteRaw('"'); } private final void writeAttrValue(String value, int len) throws IOException { int inPtr = 0; final char qchar = mEncQuoteChar; int highChar = mEncHighChar; main_loop: while (true) { // main_loop String ent = null; inner_loop: while (true) { if (inPtr >= len) { break main_loop; } char c = value.charAt(inPtr++); if (c <= HIGHEST_ENCODABLE_ATTR_CHAR) { // special char? if (c < 0x0020) { // tab, cr/lf need encoding too if (c != '\n' && c != '\r' && c != '\t') { if (!mXml11 || c == 0) { throwInvalidChar(c); } } break inner_loop; // need quoting ok } else if (c == qchar) { ent = mEncQuoteEntity; break inner_loop; } else if (c == '<') { ent = "<"; break inner_loop; } else if (c == '&') { ent = "&"; break inner_loop; } } else if (c >= highChar) { // out of range, have to escape break inner_loop; } if (mOutputPtr >= mOutputBufLen) { flushBuffer(); } mOutputBuffer[mOutputPtr++] = c; } if (ent != null) { writeRaw(ent); } else { writeAsEntity(value.charAt(inPtr-1)); } } } private final void writeAttrValue(char[] value, int offset, int len) throws IOException { len += offset; final char qchar = mEncQuoteChar; int highChar = mEncHighChar; main_loop: while (true) { // main_loop String ent = null; inner_loop: while (true) { if (offset >= len) { break main_loop; } char c = value[offset++]; if (c <= HIGHEST_ENCODABLE_ATTR_CHAR) { // special char? if (c < 0x0020) { // tab, cr/lf need encoding too if (c != '\n' && c != '\r' && c != '\t') { if (!mXml11 || c == 0) { throwInvalidChar(c); } } break inner_loop; // need quoting ok } else if (c == qchar) { ent = mEncQuoteEntity; break inner_loop; } else if (c == '<') { ent = "<"; break inner_loop; } else if (c == '&') { ent = "&"; break inner_loop; } } else if (c >= highChar) { // out of range, have to escape break inner_loop; } if (mOutputPtr >= mOutputBufLen) { flushBuffer(); } mOutputBuffer[mOutputPtr++] = c; } if (ent != null) { writeRaw(ent); } else { writeAsEntity(value[offset-1]); } } } /* //////////////////////////////////////////////////// // Internal methods, buffering //////////////////////////////////////////////////// */ private final void flushBuffer() throws IOException { if (mOutputPtr > 0 && mOut != null) { int ptr = mOutputPtr; // Need to update location info, to keep it in sync mLocPastChars += ptr; mLocRowStartOffset -= ptr; mOutputPtr = 0; mOut.write(mOutputBuffer, 0, ptr); } } private final void fastWriteRaw(char c) throws IOException { if (mOutputPtr >= mOutputBufLen) { if (mOut == null) { return; } flushBuffer(); } mOutputBuffer[mOutputPtr++] = c; } private final void fastWriteRaw(char c1, char c2) throws IOException { if ((mOutputPtr + 1) >= mOutputBufLen) { if (mOut == null) { return; } flushBuffer(); } mOutputBuffer[mOutputPtr++] = c1; mOutputBuffer[mOutputPtr++] = c2; } private final void fastWriteRaw(String str) throws IOException { int len = str.length(); int ptr = mOutputPtr; if ((ptr + len) >= mOutputBufLen) { if (mOut == null) { return; } /* It's even possible that String is longer than the buffer (not * likely, possible). If so, let's just call the full * method: */ if (len > mOutputBufLen) { writeRaw(str); return; } flushBuffer(); ptr = mOutputPtr; } str.getChars(0, len, mOutputBuffer, ptr); mOutputPtr = ptr+len; } /* //////////////////////////////////////////////////// // Internal methods, content verification/fixing //////////////////////////////////////////////////// */ /** * @return Index at which a problem was found, if any; -1 if there's * no problem. */ protected int verifyCDataContent(String content) { if (content != null && content.length() >= 3) { int ix = content.indexOf(']'); if (ix >= 0) { return content.indexOf("]]>", ix); } } return -1; } protected int verifyCDataContent(char[] c, int start, int end) { if (c != null) { start += 2; /* Let's do simple optimization for search... * (bayer-moore search algorithm) */ while (start < end) { char ch = c[start]; if (ch == ']') { ++start; // let's just move by one in this case continue; } if (ch == '>') { // match? if (c[start-1] == ']' && c[start-2] == ']') { return start-2; } } start += 2; } } return -1; } protected int verifyCommentContent(String content) { int ix = content.indexOf('-'); if (ix >= 0) { /* actually, it's illegal to just end with '-' too, since * that would cause invalid end marker '--->' */ if (ix < (content.length() - 1)) { ix = content.indexOf("--", ix); } } return ix; } protected void writeSegmentedCData(String content, int index) throws IOException { /* It's actually fairly easy, just split "]]>" into 2 pieces; * for each ']]>'; first one containing "]]", second one ">" * (as long as necessary) */ int start = 0; while (index >= 0) { fastWriteRaw(""); start = index+2; index = content.indexOf("]]>", start); } // Ok, then the last segment fastWriteRaw(""); } protected void writeSegmentedCData(char[] c, int start, int len, int index) throws IOException { int end = start + len; while (index >= 0) { fastWriteRaw(""); start = index+2; index = verifyCDataContent(c, start, end); } // Ok, then the last segment fastWriteRaw(""); } protected void writeSegmentedComment(String content, int index) throws IOException { int len = content.length(); // First the special case (last char is hyphen): if (index == (len-1)) { fastWriteRaw(""); return; } /* Fixing comments is more difficult than that of CDATA segments'; * this because CDATA can still contain embedded ']]'s, but * comment neither allows '--' nor ending with '-->'; which means * that it's impossible to just split segments. Instead we'll do * something more intrusive, and embed single spaces between all * '--' character pairs... it's intrusive, but comments are not * supposed to contain any data, so that should be fine (plus * at least result is valid, unlike contents as is) */ fastWriteRaw(""); } /** * Method used to figure out which part of the Unicode char set the * encoding can natively support. Values returned are 7, 8 and 16, * to indicate (respectively) "ascii", "ISO-Latin" and "native Unicode". * These just best guesses, but should work ok for the most common * encodings. */ public static int guessEncodingBitSize(String enc) { if (enc == null || enc.length() == 0) { // let's assume default is UTF-8... return 16; } // Let's see if we can find a normalized name, first: enc = CharsetNames.normalize(enc); // Ok, first, do we have known ones; starting with most common: if (enc == CharsetNames.CS_UTF8) { return 16; // meaning up to 2^16 can be represented natively } else if (enc == CharsetNames.CS_ISO_LATIN1) { return 8; } else if (enc == CharsetNames.CS_US_ASCII) { return 7; } else if (enc == CharsetNames.CS_UTF16 || enc == CharsetNames.CS_UTF16BE || enc == CharsetNames.CS_UTF16LE || enc == CharsetNames.CS_UTF32BE || enc == CharsetNames.CS_UTF32LE) { return 16; } /* Above and beyond well-recognized names, it might still be * good to have more heuristics for as-of-yet unhandled cases... * But, it's probably easier to only assume 8-bit clean (could * even make it just 7, let's see how this works out) */ return 8; } protected final void writeAsEntity(int c) throws IOException { char[] buf = mOutputBuffer; int ptr = mOutputPtr; if ((ptr + 10) >= buf.length) { // &#x [up to 6 hex digits] ; flushBuffer(); ptr = mOutputPtr; } buf[ptr++] = '&'; // Can use more optimal notation for 8-bit ascii stuff: if (c < 256) { /* Also; although not really mandatory, let's also * use pre-defined entities where possible. */ if (c == '&') { buf[ptr++] = 'a'; buf[ptr++] = 'm'; buf[ptr++] = 'p'; } else if (c == '<') { buf[ptr++] = 'l'; buf[ptr++] = 't'; } else if (c == '>') { buf[ptr++] = 'g'; buf[ptr++] = 't'; } else if (c == '\'') { buf[ptr++] = 'a'; buf[ptr++] = 'p'; buf[ptr++] = 'o'; buf[ptr++] = 's'; } else if (c == '"') { buf[ptr++] = 'q'; buf[ptr++] = 'u'; buf[ptr++] = 'o'; buf[ptr++] = 't'; } else { buf[ptr++] = '#';; buf[ptr++] = 'x';; // Can use shortest quoting for tab, cr, lf: if (c >= 16) { int digit = (c >> 4); buf[ptr++] = (char) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit)); c &= 0xF; } buf[ptr++] = (char) ((c < 10) ? ('0' + c) : (('a' - 10) + c)); } } else { buf[ptr++] = '#'; buf[ptr++] = 'x'; // Ok, let's write the shortest possible sequence then: int shift = 20; int origPtr = ptr; do { int digit = (c >> shift) & 0xF; if (digit > 0 || (ptr != origPtr)) { buf[ptr++] = (char) ((digit < 10) ? ('0' + digit) : (('a' - 10) + digit)); } shift -= 4; } while (shift > 0); c &= 0xF; buf[ptr++] = (char) ((c < 10) ? ('0' + c) : (('a' - 10) + c)); } buf[ptr++] = ';'; mOutputPtr = ptr; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy