All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ctc.wstx.io.ReaderBootstrapper Maven / Gradle / Ivy

Go to download

Woodstox is a high-performance XML processor that implements Stax (JSR-173) and SAX2 APIs

The newest version!
/* Woodstox XML processor
 *
 * Copyright (c) 2004- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in file LICENSE, included with
 * the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ctc.wstx.io;

import java.io.*;
import java.text.MessageFormat;

import javax.xml.stream.Location;
import javax.xml.stream.XMLReporter;
import javax.xml.stream.XMLStreamException;

import org.codehaus.stax2.validation.XMLValidationProblem;

import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.cfg.ErrorConsts;
import com.ctc.wstx.cfg.ParsingErrorMsgs;
import com.ctc.wstx.exc.*;
import com.ctc.wstx.util.StringUtil;

/**
 * Input bootstrap class used when input comes from a Reader; in this case,
 * encoding is already known, and thus encoding from XML declaration (if
 * any) is only double-checked, not really used.
 *

* Note: since the actual Reader to use after bootstrapping is pre-constructed, * the local input buffer can (and should) be quite small. */ public final class ReaderBootstrapper extends InputBootstrapper { final static char CHAR_BOM_MARKER = (char) 0xFEFF; /* //////////////////////////////////////// // Configuration //////////////////////////////////////// */ /** * Underlying Reader to use for reading content. */ final Reader mIn; /** * Encoding identifier processing application passed in; if not null, * will be compared to actual xml declaration based encoding (if * declaration found) */ final String mInputEncoding; /* /////////////////////////////////////////////////////////////// // Input buffering /////////////////////////////////////////////////////////////// */ private char[] mCharBuffer; private int mInputPtr; private int mInputEnd; /* //////////////////////////////////////// // Life-cycle //////////////////////////////////////// */ private ReaderBootstrapper(String pubId, SystemId sysId, Reader r, String appEncoding) { super(pubId, sysId); mIn = r; if (appEncoding == null) { // may still be able to figure it out if (r instanceof InputStreamReader) { appEncoding = ((InputStreamReader) r).getEncoding(); } } mInputEncoding = appEncoding; } /* //////////////////////////////////////// // Public API //////////////////////////////////////// */ /** * @param r Eventual reader that will be reading actual content, after * bootstrapping finishes * @param appEncoding Encoding that application declared; may be null. * If not null, will be compared to actual declaration found; and * incompatibility reported as a potential (but not necessarily fatal) * problem. */ public static ReaderBootstrapper getInstance(String pubId, SystemId sysId, Reader r, String appEncoding) { return new ReaderBootstrapper(pubId, sysId, r, appEncoding); } /** * Method called to do actual bootstrapping. * * @return Actual reader to use for reading xml content */ public Reader bootstrapInput(ReaderConfig cfg, boolean mainDoc, int xmlVersion) throws IOException, XMLStreamException { /* First order of business: allocate input buffer. Not done during * construction for simplicity; that way config object need not be * passed before actual bootstrap method is called */ /* Let's make sure buffer is at least 6 chars (to know '") */ if (mInputEnd >= 7) { char c = mCharBuffer[mInputPtr]; // BOM to skip? if (c == CHAR_BOM_MARKER) { c = mCharBuffer[++mInputPtr]; } if (c == '<') { if (mCharBuffer[mInputPtr+1] == '?' && mCharBuffer[mInputPtr+2] == 'x' && mCharBuffer[mInputPtr+3] == 'm' && mCharBuffer[mInputPtr+4] == 'l' && mCharBuffer[mInputPtr+5] <= CHAR_SPACE) { // Yup, got the declaration ok! mInputPtr += 6; // skip declaration readXmlDecl(mainDoc, xmlVersion); if (mFoundEncoding != null && mInputEncoding != null) { verifyXmlEncoding(cfg); } } } else { /* We may also get something that would be invalid xml * ("garbage" char; neither '<' nor space). If so, and * it's one of "well-known" cases, we can not only throw * an exception but also indicate a clue as to what is likely * to be wrong. */ /* Specifically, UTF-8 read via, say, ISO-8859-1 reader, can * "leak" marker (0xEF, 0xBB, 0xBF). While we could just eat * it, there's bound to be other problems cropping up, so let's * inform about the problem right away. */ if (c == 0xEF) { throw new WstxIOException("Unexpected first character (char code 0xEF), not valid in xml document: could be mangled UTF-8 BOM marker. Make sure that the Reader uses correct encoding or pass an InputStream instead"); } } } /* Ok, now; do we have unused chars we have read that need to * be merged in? */ if (mInputPtr < mInputEnd) { return new MergedReader(cfg, mIn, mCharBuffer, mInputPtr, mInputEnd); } return mIn; } public String getInputEncoding() { return mInputEncoding; } public int getInputTotal() { return mInputProcessed + mInputPtr; } public int getInputColumn() { return (mInputPtr - mInputRowStart); } /* //////////////////////////////////////// // Internal methods, parsing //////////////////////////////////////// */ protected void verifyXmlEncoding(ReaderConfig cfg) throws XMLStreamException { String inputEnc = mInputEncoding; // Close enough? if (StringUtil.equalEncodings(inputEnc, mFoundEncoding)) { return; } /* Ok, maybe the difference is just with endianness indicator? * (UTF-16BE vs. UTF-16)? */ // !!! TBI XMLReporter rep = cfg.getXMLReporter(); if (rep != null) { Location loc = getLocation(); String msg = MessageFormat.format(ErrorConsts.W_MIXED_ENCODINGS, new Object[] { mFoundEncoding, inputEnc }); String type = ErrorConsts.WT_XML_DECL; /* 30-May-2008, tatus: Should wrap all the info as XMValidationProblem * since that's Woodstox' contract wrt. relatedInformation field. */ XMLValidationProblem prob = new XMLValidationProblem(loc, msg, XMLValidationProblem.SEVERITY_WARNING, type); rep.report(msg, type, prob, loc); } } /* ///////////////////////////////////////////////////// // Internal methods, loading input data ///////////////////////////////////////////////////// */ protected boolean initialLoad(int minimum) throws IOException { mInputPtr = 0; mInputEnd = 0; while (mInputEnd < minimum) { int count = mIn.read(mCharBuffer, mInputEnd, mCharBuffer.length - mInputEnd); if (count < 1) { return false; } mInputEnd += count; } return true; } protected void loadMore() throws IOException, WstxException { /* Need to make sure offsets are properly updated for error * reporting purposes, and do this now while previous amounts * are still known. */ mInputProcessed += mInputEnd; mInputRowStart -= mInputEnd; mInputPtr = 0; mInputEnd = mIn.read(mCharBuffer, 0, mCharBuffer.length); if (mInputEnd < 1) { throw new WstxEOFException(ParsingErrorMsgs.SUFFIX_IN_XML_DECL, getLocation()); } } /* ///////////////////////////////////////////////////// // Implementations of abstract parsing methods ///////////////////////////////////////////////////// */ protected void pushback() { --mInputPtr; } protected int getNext() throws IOException, WstxException { return (mInputPtr < mInputEnd) ? mCharBuffer[mInputPtr++] : nextChar(); } protected int getNextAfterWs(boolean reqWs) throws IOException, WstxException { int count = 0; while (true) { char c = (mInputPtr < mInputEnd) ? mCharBuffer[mInputPtr++] : nextChar(); if (c > CHAR_SPACE) { if (reqWs && count == 0) { reportUnexpectedChar(c, ERR_XMLDECL_EXP_SPACE); } return c; } if (c == CHAR_CR || c == CHAR_LF) { skipCRLF(c); } else if (c == CHAR_NULL) { reportNull(); } ++count; } } /** * @return First character that does not match expected, if any; * CHAR_NULL if match succeeded */ protected int checkKeyword(String exp) throws IOException, WstxException { int len = exp.length(); for (int ptr = 1; ptr < len; ++ptr) { char c = (mInputPtr < mInputEnd) ? mCharBuffer[mInputPtr++] : nextChar(); if (c != exp.charAt(ptr)) { return c; } if (c == CHAR_NULL) { reportNull(); } } return CHAR_NULL; } protected int readQuotedValue(char[] kw, int quoteChar) throws IOException, WstxException { int i = 0; int len = kw.length; while (true) { char c = (mInputPtr < mInputEnd) ? mCharBuffer[mInputPtr++] : nextChar(); if (c == CHAR_CR || c == CHAR_LF) { skipCRLF(c); } else if (c == CHAR_NULL) { reportNull(); } if (c == quoteChar) { return (i < len) ? i : -1; } // Let's just truncate longer values, but match quote if (i < len) { kw[i++] = c; } } } protected Location getLocation() { return new WstxInputLocation(null, mPublicId, mSystemId, mInputProcessed + mInputPtr - 1, mInputRow, mInputPtr - mInputRowStart); } /* ///////////////////////////////////////////////////// // Internal methods, single-byte access methods ///////////////////////////////////////////////////// */ protected char nextChar() throws IOException, WstxException { if (mInputPtr >= mInputEnd) { loadMore(); } return mCharBuffer[mInputPtr++]; } protected void skipCRLF(char lf) throws IOException, WstxException { if (lf == CHAR_CR) { char c = (mInputPtr < mInputEnd) ? mCharBuffer[mInputPtr++] : nextChar(); if (c != BYTE_LF) { --mInputPtr; // pushback if not 2-char/byte lf } } ++mInputRow; mInputRowStart = mInputPtr; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy