All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ctc.wstx.util.TextBuffer Maven / Gradle / Ivy

Go to download

Woodstox is a high-performance XML processor that implements Stax (JSR-173) and SAX2 APIs

The newest version!
package com.ctc.wstx.util;

import java.io.*;
import java.util.ArrayList;
import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;

import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;

import org.codehaus.stax2.typed.Base64Variant;
import org.codehaus.stax2.typed.TypedArrayDecoder;
import org.codehaus.stax2.typed.TypedValueDecoder;
import org.codehaus.stax2.typed.TypedXMLStreamException;
import org.codehaus.stax2.validation.XMLValidator;

import org.codehaus.stax2.ri.typed.CharArrayBase64Decoder;

import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.dtd.DTDEventListener;
import com.ctc.wstx.sr.InputProblemReporter;
import com.ctc.wstx.util.StringUtil;

/**
 * TextBuffer is a class similar to {@link StringBuffer}, with
 * following differences:
 *
    *
  • TextBuffer uses segments character arrays, to avoid having * to do additional array copies when array is not big enough. This * means that only reallocating that is necessary is done only once -- * if and when caller * wants to access contents in a linear array (char[], String). *
  • *
  • TextBuffer is not synchronized. *
  • *
*

* Over time more and more cruft has accumulated here, mostly to * support efficient access to collected text. Since access is * easiest to do efficiently using callbacks, this class now needs * to known interfaces of SAX classes and validators. *

* Notes about usage: for debugging purposes, it's suggested to use * {@link #toString} method, as opposed to * {@link #contentsAsArray} or {@link #contentsAsString}. Internally * resulting code paths may or may not be different, WRT caching. * * @author Tatu Saloranta */ public final class TextBuffer { /* 23-Mar-2006, TSa: Memory buffer clearing is a significant overhead * for small documents, no need to use huge buffer -- it will expand * as necessary for larger docs, but commonly text segments just * aren't that long. */ /** * Size of the first text segment buffer to allocate; need not contain * the biggest segment, since new ones will get allocated as needed. * However, it's sensible to use something that often is big enough * to contain segments. */ final static int DEF_INITIAL_BUFFER_SIZE = 500; // 1k /** * We will also restrict maximum length of individual segments * to allocate (not including cases where we must return a single * segment). Value is somewhat arbitrary, let's use it so that * memory used is no more than 1/2 megabytes. */ final static int MAX_SEGMENT_LENGTH = 256 * 1024; final static int INT_SPACE = 0x0020; // // // Configuration: private final ReaderConfig mConfig; // // // Shared read-only input buffer: /** * Shared input buffer; stored here in case some input can be returned * as is, without being copied to collector's own buffers. Note that * this is read-only for this Objet. */ private char[] mInputBuffer; /** * Character offset of first char in input buffer; -1 to indicate * that input buffer currently does not contain any useful char data */ private int mInputStart; /** * When using shared buffer, offset after the last character in * shared buffer */ private int mInputLen; // // // Internal non-shared collector buffers: private boolean mHasSegments = false; /** * List of segments prior to currently active segment. */ private ArrayList mSegments; // // // Currently used segment; not (yet) contained in mSegments /** * Amount of characters in segments in {@link mSegments} */ private int mSegmentSize; private char[] mCurrentSegment; /** * Number of characters in currently active (last) segment */ private int mCurrentSize; // // // Temporary caching for Objects to return /** * String that will be constructed when the whole contents are * needed; will be temporarily stored in case asked for again. */ private String mResultString; private char[] mResultArray; // // // Canonical indentation objects (up to 32 spaces, 8 tabs) public final static int MAX_INDENT_SPACES = 32; public final static int MAX_INDENT_TABS = 8; // Let's add one more space at the end, for safety... private final static String sIndSpaces = // 123456789012345678901234567890123 "\n "; private final static char[] sIndSpacesArray = sIndSpaces.toCharArray(); private final static String[] sIndSpacesStrings = new String[sIndSpacesArray.length]; private final static String sIndTabs = // 1 2 3 4 5 6 7 8 9 "\n\t\t\t\t\t\t\t\t\t"; private final static char[] sIndTabsArray = sIndTabs.toCharArray(); private final static String[] sIndTabsStrings = new String[sIndTabsArray.length]; /* ////////////////////////////////////////////// // Life-cycle ////////////////////////////////////////////// */ private TextBuffer(ReaderConfig cfg) { mConfig = cfg; } public static TextBuffer createRecyclableBuffer(ReaderConfig cfg) { return new TextBuffer(cfg); } public static TextBuffer createTemporaryBuffer() { return new TextBuffer(null); } /** * Method called to indicate that the underlying buffers should now * be recycled if they haven't yet been recycled. Although caller * can still use this text buffer, it is not advisable to call this * method if that is likely, since next time a buffer is needed, * buffers need to reallocated. * Note: calling this method automatically also clears contents * of the buffer. */ public void recycle(boolean force) { if (mConfig != null && mCurrentSegment != null) { if (force) { /* If we are allowed to wipe out all existing data, it's * quite easy; we'll just wipe out contents, and return * biggest buffer: */ resetWithEmpty(); } else { /* But if there's non-shared data (ie. buffer is still * in use), can't return it yet: */ if (mInputStart < 0 && (mSegmentSize + mCurrentSize) > 0) { return; } // If no data (or only shared data), can continue if (mSegments != null && mSegments.size() > 0) { // No need to use anything from list, curr segment not null mSegments.clear(); mSegmentSize = 0; } } char[] buf = mCurrentSegment; mCurrentSegment = null; mConfig.freeMediumCBuffer(buf); } } /** * Method called to clear out any content text buffer may have, and * initializes buffer to use non-shared data. */ public void resetWithEmpty() { mInputBuffer = null; mInputStart = -1; // indicates shared buffer not used mInputLen = 0; mResultString = null; mResultArray = null; // And then reset internal input buffers, if necessary: if (mHasSegments) { clearSegments(); } mCurrentSize = 0; } /** * Similar to {@link #resetWithEmpty}, but actively marks current * text content to be empty string (whereas former method leaves * content as undefined). */ public void resetWithEmptyString() { mInputBuffer = null; mInputStart = -1; // indicates shared buffer not used mInputLen = 0; mResultString = ""; mResultArray = null; if (mHasSegments) { clearSegments(); } mCurrentSize = 0; } /** * Method called to initialize the buffer with a shared copy of data; * this means that buffer will just have pointers to actual data. It * also means that if anything is to be appended to the buffer, it * will first have to unshare it (make a local copy). */ public void resetWithShared(char[] buf, int start, int len) { // Let's first mark things we need about input buffer mInputBuffer = buf; mInputStart = start; mInputLen = len; // Then clear intermediate values, if any: mResultString = null; mResultArray = null; // And then reset internal input buffers, if necessary: if (mHasSegments) { clearSegments(); } } public void resetWithCopy(char[] buf, int start, int len) { mInputBuffer = null; mInputStart = -1; // indicates shared buffer not used mInputLen = 0; mResultString = null; mResultArray = null; // And then reset internal input buffers, if necessary: if (mHasSegments) { clearSegments(); } else { if (mCurrentSegment == null) { mCurrentSegment = allocBuffer(len); } mCurrentSize = mSegmentSize = 0; } append(buf, start, len); } /** * Method called to make sure there is a non-shared segment to use, without * appending any content yet. */ public void resetInitialized() { resetWithEmpty(); if (mCurrentSegment == null) { mCurrentSegment = allocBuffer(0); } } private final char[] allocBuffer(int needed) { int size = Math.max(needed, DEF_INITIAL_BUFFER_SIZE); char[] buf = null; if (mConfig != null) { buf = mConfig.allocMediumCBuffer(size); if (buf != null) { return buf; } } return new char[size]; } private final void clearSegments() { mHasSegments = false; /* Since the current segment should be the biggest one * (as we allocate 50% bigger each time), let's retain it, * and clear others */ mSegments.clear(); mCurrentSize = mSegmentSize = 0; } public void resetWithIndentation(int indCharCount, char indChar) { mInputStart = 0; mInputLen = indCharCount+1; String text; if (indChar == '\t') { // tabs? mInputBuffer = sIndTabsArray; text = sIndTabsStrings[indCharCount]; if (text == null) { sIndTabsStrings[indCharCount] = text = sIndTabs.substring(0, mInputLen); } } else { // nope, spaces (should assert indChar?) mInputBuffer = sIndSpacesArray; text = sIndSpacesStrings[indCharCount]; if (text == null) { sIndSpacesStrings[indCharCount] = text = sIndSpaces.substring(0, mInputLen); } } mResultString = text; /* Should not need the explicit non-shared array; no point in * pre-populating it (can be changed if this is not true) */ mResultArray = null; // And then reset internal input buffers, if necessary: if (mSegments != null && mSegments.size() > 0) { mSegments.clear(); mCurrentSize = mSegmentSize = 0; } } /* ////////////////////////////////////////////// // Accessors for implementing StAX interface: ////////////////////////////////////////////// */ /** * @return Number of characters currently stored by this collector */ public int size() { if (mInputStart >= 0) { // shared copy from input buf return mInputLen; } // local segmented buffers return mSegmentSize + mCurrentSize; } public int getTextStart() { /* Only shared input buffer can have non-zero offset; buffer * segments start at 0, and if we have to create a combo buffer, * that too will start from beginning of the buffer */ return (mInputStart >= 0) ? mInputStart : 0; } public char[] getTextBuffer() { // Are we just using shared input buffer? if (mInputStart >= 0) { return mInputBuffer; } // Nope; but does it fit in just one segment? if (mSegments == null || mSegments.size() == 0) { return mCurrentSegment; } // Nope, need to have/create a non-segmented array and return it return contentsAsArray(); } /* ///////////////////////////////////////////////// // Accessors for implementing StAX2 Typed access ///////////////////////////////////////////////// */ /** * Generic pass-through method which call given decoder * with accumulated data */ public void decode(TypedValueDecoder tvd) throws IllegalArgumentException { char[] buf; int start, end; if (mInputStart >= 0) { // shared buffer, common case buf = mInputBuffer; start = mInputStart; end = start + mInputLen; } else { buf = getTextBuffer(); start = 0; end = mSegmentSize + mCurrentSize; } // Need to trim first while (true) { if (start >= end) { tvd.handleEmptyValue(); return; } if (!StringUtil.isSpace(buf[start])) { break; } ++start; } // Trailing space? while (--end > start && StringUtil.isSpace(buf[end])) { } tvd.decode(buf, start, end+1); } /** * Pass-through decode method called to find find the next token, * decode it, and repeat the process as long as there are more * tokens and the array decoder accepts more entries. * All tokens processed will be "consumed", such that they will * not be visible via buffer. * * @return Number of tokens decoded; 0 means that no (more) tokens * were found from this buffer. */ public int decodeElements(TypedArrayDecoder tad, InputProblemReporter rep) throws TypedXMLStreamException { int count = 0; /* First: for simplicity, we require a single flat buffer to * decode from. Second: to be able to update start location * (to keep track of what's available), we need to fake that * we are using a shared buffer (since that has offset) */ if (mInputStart < 0) { if (mHasSegments) { mInputBuffer = buildResultArray(); mInputLen = mInputBuffer.length; // let's also clear segments since they are not needed any more clearSegments(); } else { // just current buffer, easier to fake mInputBuffer = mCurrentSegment; mInputLen = mCurrentSize; } mInputStart = 0; } // And then let's decode int ptr = mInputStart; final int end = ptr + mInputLen; final char[] buf = mInputBuffer; int start = ptr; try { decode_loop: while (ptr < end) { // First, any space to skip? while (buf[ptr] <= INT_SPACE) { if (++ptr >= end) { break decode_loop; } } // Then let's figure out non-space char (token) start = ptr; ++ptr; while (ptr < end && buf[ptr] > INT_SPACE) { ++ptr; } ++count; int tokenEnd = ptr; ++ptr; // to skip trailing space (or, beyond end) // And there we have it if (tad.decodeValue(buf, start, tokenEnd)) { break; } } } catch (IllegalArgumentException iae) { // Need to convert to a checked stream exception /* Hmmh. This is probably not an accurate location... but * we can't do much better as content we have has been * normalized already. */ Location loc = rep.getLocation(); // -1 to move it back after being advanced earlier (to skip trailing space) String lexical = new String(buf, start, (ptr-start-1)); throw new TypedXMLStreamException(lexical, iae.getMessage(), loc, iae); } finally { mInputStart = ptr; mInputLen = end-ptr; } return count; } /** * Method that needs to be called to configure given base64 decoder * with textual contents collected by this buffer. * * @param dec Decoder that will need data * @param firstChunk Whether this is the first segment fed or not; * if it is, state needs to be fullt reset; if not, only partially. */ public void initBinaryChunks(Base64Variant v, CharArrayBase64Decoder dec, boolean firstChunk) { if (mInputStart < 0) { // non-shared dec.init(v, firstChunk, mCurrentSegment, 0, mCurrentSize, mSegments); } else { // shared dec.init(v, firstChunk, mInputBuffer, mInputStart, mInputLen, null); } } /* ////////////////////////////////////////////// // Accessors: ////////////////////////////////////////////// */ public String contentsAsString() { if (mResultString == null) { // Has array been requested? Can make a shortcut, if so: if (mResultArray != null) { mResultString = new String(mResultArray); } else { // Do we use shared array? if (mInputStart >= 0) { if (mInputLen < 1) { return (mResultString = ""); } mResultString = new String(mInputBuffer, mInputStart, mInputLen); } else { // nope... need to copy // But first, let's see if we have just one buffer int segLen = mSegmentSize; int currLen = mCurrentSize; if (segLen == 0) { // yup mResultString = (currLen == 0) ? "" : new String(mCurrentSegment, 0, currLen); } else { // no, need to combine StringBuffer sb = new StringBuffer(segLen + currLen); // First stored segments if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] curr = (char[]) mSegments.get(i); sb.append(curr, 0, curr.length); } } // And finally, current segment: sb.append(mCurrentSegment, 0, mCurrentSize); mResultString = sb.toString(); } } } } return mResultString; } /** * Similar to {@link #contentsAsString}, but constructs a StringBuffer * for further appends. * * @param extraSpace Number of extra characters to preserve in StringBuffer * beyond space immediately needed to hold the contents */ public StringBuffer contentsAsStringBuffer(int extraSpace) { if (mResultString != null) { return new StringBuffer(mResultString); } if (mResultArray != null) { StringBuffer sb = new StringBuffer(mResultArray.length + extraSpace); sb.append(mResultArray, 0, mResultArray.length); return sb; } if (mInputStart >= 0) { // shared array if (mInputLen < 1) { return new StringBuffer(); } StringBuffer sb = new StringBuffer(mInputLen + extraSpace); sb.append(mInputBuffer, mInputStart, mInputLen); return sb; } int segLen = mSegmentSize; int currLen = mCurrentSize; StringBuffer sb = new StringBuffer(segLen + currLen + extraSpace); // First stored segments if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] curr = (char[]) mSegments.get(i); sb.append(curr, 0, curr.length); } } // And finally, current segment: sb.append(mCurrentSegment, 0, currLen); return sb; } public void contentsToStringBuffer(StringBuffer sb) { if (mResultString != null) { sb.append(mResultString); } else if (mResultArray != null) { sb.append(mResultArray); } else if (mInputStart >= 0) { // shared array if (mInputLen > 0) { sb.append(mInputBuffer, mInputStart, mInputLen); } } else { // First stored segments if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] curr = (char[]) mSegments.get(i); sb.append(curr, 0, curr.length); } } // And finally, current segment: sb.append(mCurrentSegment, 0, mCurrentSize); } } public char[] contentsAsArray() { char[] result = mResultArray; if (result == null) { mResultArray = result = buildResultArray(); } return result; } public int contentsToArray(int srcStart, char[] dst, int dstStart, int len) { // Easy to copy from shared buffer: if (mInputStart >= 0) { int amount = mInputLen - srcStart; if (amount > len) { amount = len; } else if (amount < 0) { amount = 0; } if (amount > 0) { System.arraycopy(mInputBuffer, mInputStart+srcStart, dst, dstStart, amount); } return amount; } /* Could also check if we have array, but that'd only help with * braindead clients that get full array first, then segments... * which hopefully aren't that common */ // Copying from segmented array is bit more involved: int totalAmount = 0; if (mSegments != null) { for (int i = 0, segc = mSegments.size(); i < segc; ++i) { char[] segment = (char[]) mSegments.get(i); int segLen = segment.length; int amount = segLen - srcStart; if (amount < 1) { // nothing from this segment? srcStart -= segLen; continue; } if (amount >= len) { // can get rest from this segment? System.arraycopy(segment, srcStart, dst, dstStart, len); return (totalAmount + len); } // Can get some from this segment, offset becomes zero: System.arraycopy(segment, srcStart, dst, dstStart, amount); totalAmount += amount; dstStart += amount; len -= amount; srcStart = 0; } } // Need to copy anything from last segment? if (len > 0) { int maxAmount = mCurrentSize - srcStart; if (len > maxAmount) { len = maxAmount; } if (len > 0) { // should always be true System.arraycopy(mCurrentSegment, srcStart, dst, dstStart, len); totalAmount += len; } } return totalAmount; } /** * Method that will stream contents of this buffer into specified * Writer. */ public int rawContentsTo(Writer w) throws IOException { // Let's first see if we have created helper objects: if (mResultArray != null) { w.write(mResultArray); return mResultArray.length; } if (mResultString != null) { w.write(mResultString); return mResultString.length(); } // Do we use shared array? if (mInputStart >= 0) { if (mInputLen > 0) { w.write(mInputBuffer, mInputStart, mInputLen); } return mInputLen; } // Nope, need to do full segmented output int rlen = 0; if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] ch = (char[]) mSegments.get(i); w.write(ch); rlen += ch.length; } } if (mCurrentSize > 0) { w.write(mCurrentSegment, 0, mCurrentSize); rlen += mCurrentSize; } return rlen; } public Reader rawContentsViaReader() throws IOException { // Let's first see if we have created helper objects: if (mResultArray != null) { return new CharArrayReader(mResultArray); } if (mResultString != null) { return new StringReader(mResultString); } // Do we use shared array? if (mInputStart >= 0) { if (mInputLen > 0) { return new CharArrayReader(mInputBuffer, mInputStart, mInputLen); } return new StringReader(""); } // or maybe it's all in the current segment if (mSegments == null || mSegments.size() == 0) { return new CharArrayReader(mCurrentSegment, 0, mCurrentSize); } // Nope, need to do full segmented output return new BufferReader(mSegments, mCurrentSegment, mCurrentSize); } public boolean isAllWhitespace() { if (mInputStart >= 0) { // using single shared buffer? char[] buf = mInputBuffer; int i = mInputStart; int last = i + mInputLen; for (; i < last; ++i) { if (buf[i] > INT_SPACE) { return false; } } return true; } // Nope, need to do full segmented output if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] buf = (char[]) mSegments.get(i); for (int j = 0, len2 = buf.length; j < len2; ++j) { if (buf[j] > INT_SPACE) { return false; } } } } char[] buf = mCurrentSegment; for (int i = 0, len = mCurrentSize; i < len; ++i) { if (buf[i] > INT_SPACE) { return false; } } return true; } /** * Method that can be used to check if the contents of the buffer end * in specified String. * * @return True if the textual content buffer contains ends with the * specified String; false otherwise */ public boolean endsWith(String str) { /* Let's just play this safe; should seldom if ever happen... * and because of that, can be sub-optimal, performancewise, to * alternatives. */ if (mInputStart >= 0) { unshare(16); } int segIndex = (mSegments == null) ? 0 : mSegments.size(); int inIndex = str.length() - 1; char[] buf = mCurrentSegment; int bufIndex = mCurrentSize-1; while (inIndex >= 0) { if (str.charAt(inIndex) != buf[bufIndex]) { return false; } if (--inIndex == 0) { break; } if (--bufIndex < 0) { if (--segIndex < 0) { // no more data? return false; } buf = (char[]) mSegments.get(segIndex); bufIndex = buf.length-1; } } return true; } /** * Note: it is assumed that this method is not used often enough to * be a bottleneck, or for long segments. Based on this, it is optimized * for common simple cases where there is only one single character * segment to use; fallback for other cases is to create such segment. */ public boolean equalsString(String str) { int expLen = str.length(); // First the easy check; if we have a shared buf: if (mInputStart >= 0) { if (mInputLen != expLen) { return false; } for (int i = 0; i < expLen; ++i) { if (str.charAt(i) != mInputBuffer[mInputStart+i]) { return false; } } return true; } // Otherwise, segments: if (expLen != size()) { return false; } char[] seg; if (mSegments == null || mSegments.size() == 0) { // just one segment, still easy seg = mCurrentSegment; } else { /* Ok; this is the sub-optimal case. Could obviously juggle through * segments, but probably not worth the hassle, we seldom if ever * get here... */ seg = contentsAsArray(); } for (int i = 0; i < expLen; ++i) { if (seg[i] != str.charAt(i)) { return false; } } return true; } /* ////////////////////////////////////////////// // Access using SAX handlers: ////////////////////////////////////////////// */ public void fireSaxCharacterEvents(ContentHandler h) throws SAXException { if (mResultArray != null) { // already have single array? h.characters(mResultArray, 0, mResultArray.length); } else if (mInputStart >= 0) { // sharing input buffer? h.characters(mInputBuffer, mInputStart, mInputLen); } else { if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] ch = (char[]) mSegments.get(i); h.characters(ch, 0, ch.length); } } if (mCurrentSize > 0) { h.characters(mCurrentSegment, 0, mCurrentSize); } } } public void fireSaxSpaceEvents(ContentHandler h) throws SAXException { if (mResultArray != null) { // only happens for indentation h.ignorableWhitespace(mResultArray, 0, mResultArray.length); } else if (mInputStart >= 0) { // sharing input buffer? h.ignorableWhitespace(mInputBuffer, mInputStart, mInputLen); } else { if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] ch = (char[]) mSegments.get(i); h.ignorableWhitespace(ch, 0, ch.length); } } if (mCurrentSize > 0) { h.ignorableWhitespace(mCurrentSegment, 0, mCurrentSize); } } } public void fireSaxCommentEvent(LexicalHandler h) throws SAXException { // Comment can not be split, so may need to combine the array if (mResultArray != null) { // only happens for indentation h.comment(mResultArray, 0, mResultArray.length); } else if (mInputStart >= 0) { // sharing input buffer? h.comment(mInputBuffer, mInputStart, mInputLen); } else if (mSegments != null && mSegments.size() > 0) { char[] ch = contentsAsArray(); h.comment(ch, 0, ch.length); } else { h.comment(mCurrentSegment, 0, mCurrentSize); } } public void fireDtdCommentEvent(DTDEventListener l) { // Comment can not be split, so may need to combine the array if (mResultArray != null) { // only happens for indentation l.dtdComment(mResultArray, 0, mResultArray.length); } else if (mInputStart >= 0) { // sharing input buffer? l.dtdComment(mInputBuffer, mInputStart, mInputLen); } else if (mSegments != null && mSegments.size() > 0) { char[] ch = contentsAsArray(); l.dtdComment(ch, 0, ch.length); } else { l.dtdComment(mCurrentSegment, 0, mCurrentSize); } } /* ////////////////////////////////////////////// // Support for validation ////////////////////////////////////////////// */ public void validateText(XMLValidator vld, boolean lastSegment) throws XMLStreamException { // Shared buffer? Let's just pass that if (mInputStart >= 0) { vld.validateText(mInputBuffer, mInputStart, mInputStart + mInputLen, lastSegment); } else { /* Otherwise, can either create a combine buffer, or construct * a String. While former could be more efficient, let's do latter * for now since current validator implementations work better * with Strings. */ vld.validateText(contentsAsString(), lastSegment); } } /* ////////////////////////////////////////////// // Public mutators: ////////////////////////////////////////////// */ /** * Method called to make sure that buffer is not using shared input * buffer; if it is, it will copy such contents to private buffer. */ public void ensureNotShared() { if (mInputStart >= 0) { unshare(16); } } public void append(char c) { // Using shared buffer so far? if (mInputStart >= 0) { unshare(16); } mResultString = null; mResultArray = null; // Room in current segment? char[] curr = mCurrentSegment; if (mCurrentSize >= curr.length) { expand(1); curr = mCurrentSegment; } curr[mCurrentSize++] = c; } public void append(char[] c, int start, int len) { // Can't append to shared buf (sanity check) if (mInputStart >= 0) { unshare(len); } mResultString = null; mResultArray = null; // Room in current segment? char[] curr = mCurrentSegment; int max = curr.length - mCurrentSize; if (max >= len) { System.arraycopy(c, start, curr, mCurrentSize, len); mCurrentSize += len; } else { // No room for all, need to copy part(s): if (max > 0) { System.arraycopy(c, start, curr, mCurrentSize, max); start += max; len -= max; } /* And then allocate new segment; we are guaranteed to now * have enough room in segment. */ expand(len); // note: curr != mCurrentSegment after this System.arraycopy(c, start, mCurrentSegment, 0, len); mCurrentSize = len; } } public void append(String str) { // Can't append to shared buf (sanity check) int len = str.length(); if (mInputStart >= 0) { unshare(len); } mResultString = null; mResultArray = null; // Room in current segment? char[] curr = mCurrentSegment; int max = curr.length - mCurrentSize; if (max >= len) { str.getChars(0, len, curr, mCurrentSize); mCurrentSize += len; } else { // No room for all, need to copy part(s): if (max > 0) { str.getChars(0, max, curr, mCurrentSize); len -= max; } /* And then allocate new segment; we are guaranteed to now * have enough room in segment. */ expand(len); str.getChars(max, max+len, mCurrentSegment, 0); mCurrentSize = len; } } /* ////////////////////////////////////////////// // Raw access, for high-performance use: ////////////////////////////////////////////// */ public char[] getCurrentSegment() { /* Since the intention of the caller is to directly add stuff into * buffers, we should NOT have anything in shared buffer... ie. may * need to unshare contents. */ if (mInputStart >= 0) { unshare(1); } else { char[] curr = mCurrentSegment; if (curr == null) { mCurrentSegment = allocBuffer(0); } else if (mCurrentSize >= curr.length) { // Plus, we better have room for at least one more char expand(1); } } return mCurrentSegment; } public int getCurrentSegmentSize() { return mCurrentSize; } public void setCurrentLength(int len) { mCurrentSize = len; } public char[] finishCurrentSegment() { if (mSegments == null) { mSegments = new ArrayList(); } mHasSegments = true; mSegments.add(mCurrentSegment); int oldLen = mCurrentSegment.length; mSegmentSize += oldLen; char[] curr = new char[calcNewSize(oldLen)]; mCurrentSize = 0; mCurrentSegment = curr; return curr; } /** * Method used to determine size of the next segment to * allocate to contain textual content. */ private int calcNewSize(int latestSize) { // Let's grow segments by 50%, when over 8k int incr = (latestSize < 8000) ? latestSize : (latestSize >> 1); int size = latestSize + incr; // but let's not create too big chunks return Math.min(size, MAX_SEGMENT_LENGTH); } /* ////////////////////////////////////////////// // Standard methods: ////////////////////////////////////////////// */ /** * Note: calling this method may not be as efficient as calling * {@link #contentsAsString}, since it's not guaranteed that resulting * String is cached. */ public String toString() { return contentsAsString(); } /* ////////////////////////////////////////////// // Internal methods: ////////////////////////////////////////////// */ /** * Method called if/when we need to append content when we have been * initialized to use shared buffer. */ public void unshare(int needExtra) { int len = mInputLen; mInputLen = 0; char[] inputBuf = mInputBuffer; mInputBuffer = null; int start = mInputStart; mInputStart = -1; // Is buffer big enough, or do we need to reallocate? int needed = len+needExtra; if (mCurrentSegment == null || needed > mCurrentSegment.length) { mCurrentSegment = allocBuffer(needed); } if (len > 0) { System.arraycopy(inputBuf, start, mCurrentSegment, 0, len); } mSegmentSize = 0; mCurrentSize = len; } /** * Method called when current segment is full, to allocate new * segment. * * @param roomNeeded Number of characters that the resulting * new buffer must have */ private void expand(int roomNeeded) { // First, let's move current segment to segment list: if (mSegments == null) { mSegments = new ArrayList(); } char[] curr = mCurrentSegment; mHasSegments = true; mSegments.add(curr); int oldLen = curr.length; mSegmentSize += oldLen; int newSize = Math.max(roomNeeded, calcNewSize(oldLen)); curr = new char[newSize]; mCurrentSize = 0; mCurrentSegment = curr; } private char[] buildResultArray() { if (mResultString != null) { // Can take a shortcut... return mResultString.toCharArray(); } char[] result; // Do we use shared array? if (mInputStart >= 0) { if (mInputLen < 1) { return DataUtil.getEmptyCharArray(); } result = new char[mInputLen]; System.arraycopy(mInputBuffer, mInputStart, result, 0, mInputLen); } else { // nope int size = size(); if (size < 1) { return DataUtil.getEmptyCharArray(); } int offset = 0; result = new char[size]; if (mSegments != null) { for (int i = 0, len = mSegments.size(); i < len; ++i) { char[] curr = (char[]) mSegments.get(i); int currLen = curr.length; System.arraycopy(curr, 0, result, offset, currLen); offset += currLen; } } System.arraycopy(mCurrentSegment, 0, result, offset, mCurrentSize); } return result; } private final static class BufferReader extends Reader { ArrayList _Segments; char[] _CurrentSegment; final int _CurrentLength; int _SegmentIndex; int _SegmentOffset; int _CurrentOffset; public BufferReader(ArrayList segs, char[] currSeg, int currSegLen) { _Segments = segs; _CurrentSegment = currSeg; _CurrentLength = currSegLen; _SegmentIndex = 0; _SegmentOffset = _CurrentOffset = 0; } public void close() { _Segments = null; _CurrentSegment = null; } public void mark(int x) throws IOException { throw new IOException("mark() not supported"); } public boolean markSupported() { return false; } public int read(char[] cbuf, int offset, int len) { if (len < 1) { return 0; } int origOffset = offset; // First need to copy stuff from previous segments while (_Segments != null) { char[] curr = (char[]) _Segments.get(_SegmentIndex); int max = curr.length - _SegmentOffset; if (len <= max) { // this is enough System.arraycopy(curr, _SegmentOffset, cbuf, offset, len); _SegmentOffset += len; offset += len; return (offset - origOffset); } // Not enough, but helps... if (max > 0) { System.arraycopy(curr, _SegmentOffset, cbuf, offset, max); offset += max; } if (++_SegmentIndex >= _Segments.size()) { // last one _Segments = null; } else { _SegmentOffset = 0; } } // ok, anything to copy from the active segment? if (len > 0 && _CurrentSegment != null) { int max = _CurrentLength - _CurrentOffset; if (len >= max) { // reading it all len = max; System.arraycopy(_CurrentSegment, _CurrentOffset, cbuf, offset, len); _CurrentSegment = null; } else { System.arraycopy(_CurrentSegment, _CurrentOffset, cbuf, offset, len); _CurrentOffset += len; } offset += len; } return (origOffset == offset) ? -1 : (offset - origOffset); } public boolean ready() { return true; } public void reset() throws IOException { throw new IOException("reset() not supported"); } public long skip(long amount) { /* Note: implementation is almost identical to that of read(); * difference being that no data is copied. */ if (amount < 0) { return 0L; } long origAmount= amount; while (_Segments != null) { char[] curr = (char[]) _Segments.get(_SegmentIndex); int max = curr.length - _SegmentOffset; if (max >= amount) { // this is enough _SegmentOffset += (int) amount; return origAmount; } // Not enough, but helps... amount -= max; if (++_SegmentIndex >= _Segments.size()) { // last one _Segments = null; } else { _SegmentOffset = 0; } } // ok, anything left in the active segment? if (amount > 0 && _CurrentSegment != null) { int max = _CurrentLength - _CurrentOffset; if (amount >= max) { // reading it all amount -= max; _CurrentSegment = null; } else { amount = 0L; _CurrentOffset += (int) amount; } } return (amount == origAmount) ? -1L : (origAmount - amount); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy