All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ctc.wstx.dtd.MinimalDTDReader Maven / Gradle / Ivy

There is a newer version: 0.3.0
Show newest version
/* Woodstox XML processor
 *
 * Copyright (c) 2004- Tatu Saloranta, [email protected]
 *
 * Licensed under the License specified in file LICENSE, included with
 * the source code.
 * You may not use this file except in compliance with the License.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.ctc.wstx.dtd;

import javax.xml.stream.Location;
import javax.xml.stream.XMLStreamException;

import com.ctc.wstx.api.ReaderConfig;
import com.ctc.wstx.ent.EntityDecl;
import com.ctc.wstx.io.WstxInputData;
import com.ctc.wstx.io.WstxInputSource;
import com.ctc.wstx.sr.StreamScanner;

/**
 * Minimal DTD reader implementation that only knows how to skip
 * internal DTD subsets.
 */
public class MinimalDTDReader
    extends StreamScanner
{
    /*
    //////////////////////////////////////////////////
    // Configuration
    //////////////////////////////////////////////////
     */

    /**
     * True, when reading external subset, false when reading internal
     * subset.
     */
    final boolean mIsExternal;

    /*
    //////////////////////////////////////////////////
    // Life-cycle
    //////////////////////////////////////////////////
     */

    /**
     * Constructor used for reading/skipping internal subset.
     */
    private MinimalDTDReader(WstxInputSource input, ReaderConfig cfg)
    {
        this(input, cfg, false);
    }

    /**
     * Common initialization part of int/ext subset constructors.
     */
    protected MinimalDTDReader(WstxInputSource input, ReaderConfig cfg,
                               boolean isExt)
    {
        super(input, cfg, cfg.getDtdResolver());
        mIsExternal = isExt;
        /* And let's force expansion (matters mostly/only for undefined
         * entities)
         */
        mCfgReplaceEntities = true;
    }

    /**
     * Method that just skims
     * through structure of internal subset, but without doing any sort
     * of validation, or parsing of contents. Method may still throw an
     * exception, if skipping causes EOF or there's an I/O problem.
     *
     * @param srcData Link back to the input buffer shared with the owning
     *    stream reader.
     */
    public static void skipInternalSubset(WstxInputData srcData, WstxInputSource input,
                                          ReaderConfig cfg)
        throws XMLStreamException
    {
        MinimalDTDReader r = new MinimalDTDReader(input, cfg);
        // Need to read from same source as the master (owning stream reader)
        r.copyBufferStateFrom(srcData);
        try {
            r.skipInternalSubset();
        } finally {
            /* And then need to restore changes back to srcData (line nrs etc);
             * effectively means that we'll stop reading internal DTD subset,
             * if so.
             */
            srcData.copyBufferStateFrom(r);
        }
    }

    /*
    //////////////////////////////////////////////////
    // Abstract methods from StreamScanner
    //////////////////////////////////////////////////
     */

    /**
     * What DTD reader returns doesn't really matter, so let's just return
     * perceived start location (different from what stream readers actually
     * do)
     */
    public final Location getLocation()
    {
        return getStartLocation();
    }

    // @Override
    protected EntityDecl findEntity(String id, Object arg) {
        throwIllegalCall();
        return null; // never gets here but javac needs it
    }

    /**
     * This is a VC, not WFC, nothing to do when skipping through
     * DTD in non-supporting mode.
     */
    protected void handleUndeclaredEntity(String id)
        throws XMLStreamException
    {
        // nothing to do...
    }

    /**
     * Since improper entity/PE nesting is VC, not WFC, let's not
     * react to this failure at all when only skipping the DTD subset.
     */
    protected void handleIncompleteEntityProblem(WstxInputSource closing)
        throws XMLStreamException
    {
        // nothing to do...
    }

    protected char handleExpandedSurrogate(char first, char second)
    {
        // should we throw an exception?
        return first;
    }

    /*
    //////////////////////////////////////////////////
    // Internal API
    //////////////////////////////////////////////////
     */

    /**
     * Method that may need to be called by attribute default value
     * validation code, during parsing....
     *

* 03-Dec-2004, TSa: This is not particularly elegant: should be * able to pass the information some other way. But for now it * works and is necessary. */ public EntityDecl findEntity(String entName) { return null; } /* ////////////////////////////////////////////////// // Main-level skipping method(s) ////////////////////////////////////////////////// */ /** * Method that will skip through internal DTD subset, without doing * any parsing, except for trying to match end of subset properly. */ protected void skipInternalSubset() throws XMLStreamException { while (true) { int i = getNextAfterWS(); if (i < 0) { // Error for internal subset throwUnexpectedEOF(SUFFIX_IN_DTD_INTERNAL); } if (i == '%') { // parameter entity skipPE(); continue; } if (i == '<') { /* Let's determine type here, and call appropriate skip * methods. */ char c = getNextSkippingPEs(); if (c == '?') { // xml decl? /* Not sure if PIs are really allowed in DTDs, but let's * just allow them until proven otherwise. XML declaration * is legal in the beginning, anyhow */ skipPI(); } else if (c == '!') { // ignore/include, comment, declaration? c = getNextSkippingPEs(); if (c == '[') { /* Shouldn't really get these, as they are not allowed * in the internal subset? So let's just leave it * as is, and see what happens. :-) */ ; } else if (c == '-') { // plain comment skipComment(); } else if (c >= 'A' && c <= 'Z') { skipDeclaration(c); } else { /* Hmmh, let's not care too much; but we need to try * to match the closing gt-char nonetheless? */ skipDeclaration(c); } } else { /* Shouldn't fail (since we are to completely ignore * subset); let's just push it back and continue. */ --mInputPtr; } continue; } if (i == ']') { // Int. subset has no conditional sections, has to be the end... /* 18-Jul-2004, TSa: Let's just make sure it happened * in the main input source, not at external entity... */ if (mInput != mRootInput) { throwParseError("Encountered int. subset end marker ']]>' in an expanded entity; has to be at main level."); } // End of internal subset break; } throwUnexpectedChar(i, SUFFIX_IN_DTD_INTERNAL+"; expected a '<' to start a directive, or \"]>\" to end internal subset."); } } /* ////////////////////////////////////////////////// // Internal methods, input access: ////////////////////////////////////////////////// */ protected char dtdNextFromCurr() throws XMLStreamException { return (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextCharFromCurrent(getErrorMsg()); } protected char dtdNextChar() throws XMLStreamException { return (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextChar(getErrorMsg()); } protected char getNextSkippingPEs() throws XMLStreamException { while (true) { char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : getNextChar(getErrorMsg()); if (c != '%') { return c; } skipPE(); } } /* ////////////////////////////////////////////////// // Internal methods, skipping: ////////////////////////////////////////////////// */ private void skipPE() throws XMLStreamException { skipDTDName(); /* Should now get semicolon... let's try to find and skip it; but * if none found, let's not throw an exception -- we are just skipping * internal subset here. */ char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : dtdNextFromCurr(); if (c != ';') { --mInputPtr; } } protected void skipComment() throws XMLStreamException { skipCommentContent(); // Now, we may be getting end mark; first need second marker char:. char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : dtdNextFromCurr(); if (c != '>') { throwParseError("String '--' not allowed in comment (missing '>'?)"); } } protected void skipCommentContent() throws XMLStreamException { while (true) { char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : dtdNextFromCurr(); if (c == '-') { c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : dtdNextFromCurr(); if (c == '-') { return; } } else if (c == '\n' || c == '\r') { skipCRLF(c); } } } protected void skipPI() throws XMLStreamException { while (true) { char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : dtdNextFromCurr(); if (c == '?') { do { c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : dtdNextFromCurr(); } while (c == '?'); if (c == '>') { break; } } if (c == '\n' || c == '\r') { skipCRLF(c); } } } private void skipDeclaration(char c) throws XMLStreamException { while (c != '>') { c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : dtdNextFromCurr(); if (c == '\n' || c == '\r') { skipCRLF(c); /* No need for specific handling for PE refs; they just have * identifier that'll get properly skipped. */ /* 17-Jul-2004, TSa: But we do need to properly handle literals; * it is possible to add '>' char in entity expansion values. */ } else if (c == '\'' || c == '"') { skipLiteral(c); } } } private void skipLiteral(char quoteChar) throws XMLStreamException { while (true) { char c = (mInputPtr < mInputEnd) ? mInputBuffer[mInputPtr++] : dtdNextFromCurr(); if (c == '\n' || c == '\r') { skipCRLF(c); } else if (c == quoteChar) { break; } /* No need for specific handling for PE refs, should be ignored * just ok (plus they need to properly nested in any case) */ } } private void skipDTDName() throws XMLStreamException { /*int len =*/ skipFullName(getNextChar(getErrorMsg())); /* Should we give an error about missing name? For now, * let's just exit. */ } /* ////////////////////////////////////////////////// // Internal methods, error handling: ////////////////////////////////////////////////// */ protected String getErrorMsg() { return mIsExternal ? SUFFIX_IN_DTD_EXTERNAL : SUFFIX_IN_DTD_INTERNAL; } protected void throwIllegalCall() throws Error { throw new IllegalStateException("Internal error: this method should never be called"); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy