All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.aalto.in.InputBootstrapper Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/* Aalto XML processor
 *
 * Copyright (c) 2006- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in the file LICENSE which is
 * included with the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.fasterxml.aalto.in;

import java.io.*;

import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;

import com.fasterxml.aalto.WFCException;
import com.fasterxml.aalto.util.XmlConsts;

/**
 * Abstract base class that defines shared functionality between different
 * bootstrappers (byte stream, char Readers, block input)
 */
public abstract class InputBootstrapper
    implements XmlConsts
{
    /*
    /**********************************************************************
    /* Shared string consts
    /**********************************************************************
     */

    public final static String ERR_XMLDECL_KW_VERSION = "; expected keyword '"+XmlConsts.XML_DECL_KW_VERSION+"'";
    public final static String ERR_XMLDECL_KW_ENCODING = "; expected keyword '"+XmlConsts.XML_DECL_KW_ENCODING+"'";
    public final static String ERR_XMLDECL_KW_STANDALONE = "; expected keyword '"+XmlConsts.XML_DECL_KW_STANDALONE+"'";

    public final static String ERR_XMLDECL_END_MARKER = "; expected \"?>\" end marker";

    public final static String ERR_XMLDECL_EXP_SPACE = "; expected a white space";
    public final static String ERR_XMLDECL_EXP_EQ = "; expected '=' after ";
    public final static String ERR_XMLDECL_EXP_ATTRVAL = "; expected a quote character enclosing value for ";

    /*
    /**********************************************************************
    /* Input location data
    /**********************************************************************
     */

    /**
     * Current number of input units (bytes or chars) that were processed in
     * previous blocks,
     * before contents of current input buffer.
     *

* Note: includes possible BOMs, if those were part of the input. */ protected int _inputProcessed = 0; /** * Current row location of current point in input buffer, using * zero-based counting. */ protected int _inputRow = 0; /** * Current index of the first character of the current row in input * buffer. Needed to calculate column position, if necessary; benefit * of not having column itself is that this only has to be updated * once per line. */ protected int _inputRowStart = 0; /* /********************************************************************** /* Info passed by the caller /********************************************************************** */ final ReaderConfig _config; /* /********************************************************************** /* Info from XML declaration /********************************************************************** */ //boolean mHadDeclaration = false; /** * XML declaration from the input (1.0, 1.1 or 'unknown') */ int mDeclaredXmlVersion = XmlConsts.XML_V_UNKNOWN; /** * Value of encoding pseudo-attribute from xml declaration, if * one was found; null otherwise. */ String mFoundEncoding; String mStandalone; /* /********************************************************************** //* Temporary data /********************************************************************** */ /** * Need a short buffer to read in values of pseudo-attributes (version, * encoding, standalone). Don't really need tons of space; just enough * for the longest anticipated encoding id... and maybe few chars just * in case (for additional white space that we ignore) */ final char[] mKeyword; /* /********************************************************************** /* Life-cycle /********************************************************************** */ protected InputBootstrapper(ReaderConfig cfg) { _config = cfg; mKeyword = cfg.allocSmallCBuffer(ReaderConfig.DEFAULT_SMALL_BUFFER_LEN); } /** * Main bootstrapping method, which will try to open the underlying * input source, check its encoding, read xml declaration if * there is one, and finally create a scanner for actual parsing. */ public abstract XmlScanner bootstrap() throws XMLStreamException; /* /********************************************************************** /* Package methods, parsing /********************************************************************** */ /** * Method that will parse xml declaration, which at this point is * known to exist. */ protected void readXmlDeclaration() throws IOException, XMLStreamException { int c = getNextAfterWs(false); // First, version pseudo-attribute: if (c != 'v') { reportUnexpectedChar(c, ERR_XMLDECL_KW_VERSION); } else { // ok, should be version mDeclaredXmlVersion = readXmlVersion(); c = getWsOrChar('?'); } // Then, 'encoding' if (c == 'e') { mFoundEncoding = readXmlEncoding(); c = getWsOrChar('?'); } // Then, 'standalone' (for main doc) if (c == 's') { mStandalone = readXmlStandalone(); c = getWsOrChar('?'); } // And finally, need to have closing markers if (c != '?') { reportUnexpectedChar(c, ERR_XMLDECL_END_MARKER); } c = getNext(); if (c != '>') { reportUnexpectedChar(c, ERR_XMLDECL_END_MARKER); } } /** * @return Xml version declaration read */ private final int readXmlVersion() throws IOException, XMLStreamException { int c = checkKeyword(XmlConsts.XML_DECL_KW_VERSION); if (c != CHAR_NULL) { reportUnexpectedChar(c, XmlConsts.XML_DECL_KW_VERSION); } c = handleEq(XmlConsts.XML_DECL_KW_VERSION); int len = readQuotedValue(mKeyword, c); if (len == 3) { if (mKeyword[0] == '1' && mKeyword[1] == '.') { c = mKeyword[2]; if (c == '0') { return XmlConsts.XML_V_10; } if (c == '1') { return XmlConsts.XML_V_11; } } } // Nope; error. -1 indicates run off... String got; if (len < 0) { got = "'"+new String(mKeyword)+"[..]'"; } else if (len == 0) { got = ""; } else { got = "'"+new String(mKeyword, 0, len)+"'"; } reportPseudoAttrProblem(XmlConsts.XML_DECL_KW_VERSION, got, XmlConsts.XML_V_10_STR, XmlConsts.XML_V_11_STR); return XmlConsts.XML_V_UNKNOWN; // never gets here, but compiler needs it } private final String readXmlEncoding() throws IOException, XMLStreamException { int c = checkKeyword(XmlConsts.XML_DECL_KW_ENCODING); if (c != CHAR_NULL) { reportUnexpectedChar(c, XmlConsts.XML_DECL_KW_ENCODING); } c = handleEq(XmlConsts.XML_DECL_KW_ENCODING); int len = readQuotedValue(mKeyword, c); /* Hmmh. How about "too long" encodings? Maybe just truncate them, * for now? */ if (len == 0) { // let's still detect missing value... reportPseudoAttrProblem(XmlConsts.XML_DECL_KW_ENCODING, null, null, null); } if (len < 0) { // will be truncated... return new String(mKeyword); } return new String(mKeyword, 0, len); } private final String readXmlStandalone() throws IOException, XMLStreamException { int c = checkKeyword(XmlConsts.XML_DECL_KW_STANDALONE); if (c != CHAR_NULL) { reportUnexpectedChar(c, XmlConsts.XML_DECL_KW_STANDALONE); } c = handleEq(XmlConsts.XML_DECL_KW_STANDALONE); int len = readQuotedValue(mKeyword, c); if (len == 2) { if (mKeyword[0] == 'n' && mKeyword[1] == 'o') { return XmlConsts.XML_SA_NO; } } else if (len == 3) { if (mKeyword[0] == 'y' && mKeyword[1] == 'e' && mKeyword[2] == 's') { return XmlConsts.XML_SA_YES; } } // Nope; error. -1 indicates run off... String got; if (len < 0) { got = "'"+new String(mKeyword)+"[..]'"; } else if (len == 0) { got = ""; } else { got = "'"+new String(mKeyword, 0, len)+"'"; } reportPseudoAttrProblem(XmlConsts.XML_DECL_KW_STANDALONE, got, XmlConsts.XML_SA_YES, XmlConsts.XML_SA_NO); return got; // never gets here, but compiler can't figure it out } private final int handleEq(String attr) throws IOException, XMLStreamException { int c = getNextAfterWs(false); if (c != '=') { reportUnexpectedChar(c, ERR_XMLDECL_EXP_EQ+"'"+attr+"'"); } c = getNextAfterWs(false); if (c != '"' && c != '\'') { reportUnexpectedChar(c, ERR_XMLDECL_EXP_ATTRVAL+"'"+attr+"'"); } return c; } /** * Method that should get next character, which has to be either specified * character (usually end marker), OR, any character as long as there' * at least one space character before it. */ private final int getWsOrChar(int ok) throws IOException, XMLStreamException { int c = getNext(); if (c == ok) { return c; } if (c > XmlConsts.CHAR_SPACE) { reportUnexpectedChar(c, "; expected either '"+((char) ok)+"' or white space"); } if (c == XmlConsts.CHAR_LF || c == XmlConsts.CHAR_CR) { // Need to push it back to be processed properly pushback(); } return getNextAfterWs(false); } /* /********************************************************************** /* Abstract parsing methods for sub-classes to implement /********************************************************************** */ protected abstract void pushback(); protected abstract int getNext() throws IOException, XMLStreamException; protected abstract int getNextAfterWs(boolean reqWs) throws IOException, XMLStreamException; /** * @return First character that does not match expected, if any; * CHAR_NULL if match succeeded */ protected abstract int checkKeyword(String exp) throws IOException, XMLStreamException; protected abstract int readQuotedValue(char[] kw, int quoteChar) throws IOException, XMLStreamException; protected abstract Location getLocation(); /* /********************************************************************** /* Error reporting /********************************************************************** */ protected void reportXmlProblem(String msg) throws XMLStreamException { throw new WFCException(msg, getLocation()); } protected void reportNull() throws XMLStreamException { reportXmlProblem("Illegal null byte/char in input stream"); } protected void reportEof() throws XMLStreamException { reportXmlProblem("Unexpected end-of-input in xml declaration"); } protected void reportUnexpectedChar(int i, String msg) throws XMLStreamException { String excMsg; if (Character.isISOControl((char) i)) { excMsg = "Unexpected character (CTRL-CHAR, code "+i+")"+msg; } else { excMsg = "Unexpected character '"+((char) i)+"' (code "+i+")"+msg; } reportXmlProblem(excMsg); } protected final void reportPseudoAttrProblem(String attrName, String got, String expVal1, String expVal2) throws XMLStreamException { String expStr = (expVal1 == null) ? "" : ("; expected \""+expVal1+"\" or \""+expVal2+"\""); if (got == null || got.length() == 0) { reportXmlProblem("Missing XML pseudo-attribute '"+attrName+"' value"+expStr); } reportXmlProblem("Invalid XML pseudo-attribute '"+attrName+"' value "+got+expStr); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy