All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xmlpull.mxp1.MXParserNonValidating Maven / Gradle / Ivy

Go to download

XML Pull parser library developed by Extreme Computing Lab, Indiana University

The newest version!
/* -*-             c-basic-offset: 4; indent-tabs-mode: nil; -*-  //------100-columns-wide------>|*/
/*
 * Copyright (c) 2002-2004 Extreme! Lab, Indiana University. All rights reserved.
 *
 * This software is open source. See the bottom of this file for the licence.
 *
 * $Id: MXParserNonValidating.java,v 1.6 2004/03/02 09:14:41 aslom Exp $
 */

package org.xmlpull.mxp1;

import java.io.IOException;
import org.xmlpull.v1.XmlPullParserException;

/**
 * Extend MXP parser to be full non validating XML 1.0 parser
 * (added internal DTD parsing and support for full XML 1.0 (or 1.1) character classes).
 *
 * @author Aleksander Slominski
 */

public class MXParserNonValidating extends MXParserCachingStrings
{
    private boolean processDocDecl;

    public MXParserNonValidating() {
        super();
    }

    /**
     * This allows to change processing DOCDECL (controls if parser is non-validating).
     */
    public void setFeature(String name,
                           boolean state) throws XmlPullParserException
    {
        if(FEATURE_PROCESS_DOCDECL.equals(name)) {
            if(eventType != START_DOCUMENT) throw new XmlPullParserException(
                    "process DOCDECL feature can only be changed before parsing", this, null);
            processDocDecl = state;
            if(state ==  false) {
                //
            }
        } else {
            super.setFeature(name, state);
        }
    }

    public boolean getFeature(String name)
    {
        if(FEATURE_PROCESS_DOCDECL.equals(name)) {
            return processDocDecl;
        } else {
            return super.getFeature(name);
        }
    }


    // will need to overwrite more() and processEntityRef ...
    protected char more() throws IOException, XmlPullParserException {
        return super.more();
    }

    protected char[] lookuEntityReplacement(int entitNameLen)
        throws XmlPullParserException, IOException

    {
        if(!allStringsInterned) {
            final int hash = fastHash(buf, posStart, posEnd - posStart);
            LOOP:
            for (int i = entityEnd - 1; i >= 0; --i)
            {
                if(hash == entityNameHash[ i ] && entitNameLen == entityNameBuf[ i ].length) {
                    final char[] entityBuf = entityNameBuf[ i ];
                    for (int j = 0; j < entitNameLen; j++)
                    {
                        if(buf[posStart + j] != entityBuf[j]) continue LOOP;
                    }
                    if(tokenize) text = entityReplacement[ i ];
                    return entityReplacementBuf[ i ];
                }
            }
        } else {
            entityRefName = newString(buf, posStart, posEnd - posStart);
            for (int i = entityEnd - 1; i >= 0; --i)
            {
                // take advantage that interning for newStirng is enforced
                if(entityRefName == entityName[ i ]) {
                    if(tokenize) text = entityReplacement[ i ];
                    return entityReplacementBuf[ i ];
                }
            }
        }
        return null;
    }


    protected void parseDocdecl()
        throws XmlPullParserException, IOException
    {
        //make sure that tokenize flag is disabled temporarily!!!!
        final boolean oldTokenize = tokenize;
        try {
            //ASSUMPTION: seen '
            ch = requireNextS();
            int nameStart = pos;
            ch = readName(ch);
            int nameEnd = pos;
            ch = skipS(ch);
            // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
            if(ch == 'S' || ch == 'P') {
                ch = processExternalId(ch);
                ch = skipS(ch);
            }
            if(ch == '[') {
                processInternalSubset();
            }
            ch = skipS(ch);
            if(ch != '>') {
                throw new XmlPullParserException(
                    "expected > to finish <[DOCTYPE but got "+printable(ch), this, null);
            }
            posEnd = pos - 1;
        } finally {
            tokenize = oldTokenize;
        }
    }
    protected char processExternalId(char ch)
        throws XmlPullParserException, IOException
    {
        // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
        // [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
        // [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
        // [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]

        //TODO

        return ch;
    }

    protected void processInternalSubset()
        throws XmlPullParserException, IOException
    {
        // [28]  ... (markupdecl | DeclSep)* ']'  // [WFC: External Subset]
        // [28a] DeclSep ::= PEReference | S       // [WFC: PE Between Declarations]

        // [69] PEReference ::= '%' Name ';'  //[WFC: No Recursion]  [WFC: In DTD]
        while(true) {
            char ch = more();  // firs ttime called it will skip initial "["
            if(ch == ']') break;
            if(ch == '%') {
                processPEReference();
            } else if(isS(ch)) {
                ch = skipS(ch);
            } else {
                processMarkupDecl(ch);
            }
        }
    }

    protected void processPEReference()
        throws XmlPullParserException, IOException
    {
        //TODO
    }
    protected void processMarkupDecl(char ch)
        throws XmlPullParserException, IOException
    {
        // [29]  markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
        //                                                     [WFC: PEs in Internal Subset]


        //BIG SWITCH statement
        if(ch != '<') {
            throw new XmlPullParserException("expected < for markupdecl in DTD not "+printable(ch),
                                             this, null);
        }
        ch = more();
        if(ch == '?') {
            parsePI();
        } else if(ch == '!') {
            ch = more();
            if(ch == '-') {
                // note: if(tokenize == false) posStart/End is NOT changed!!!!
                parseComment();
            } else {
                ch = more();
                if(ch == 'A') {
                    processAttlistDecl(ch);  //A-TTLIST
                } else if(ch == 'E') {
                    ch = more();
                    if(ch == 'L') {
                        processElementDecl(ch);  //EL-EMENT
                    } else if(ch == 'N') {
                        processEntityDecl(ch);  // EN-TITY
                    } else {
                        throw new XmlPullParserException(
                            "expected ELEMENT or ENTITY after '
        //???? [VC: Unique Element Type Declaration]
        // [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
        // [47] children ::= (choice | seq) ('?' | '*' | '+')?
        // [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
        // [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
        // [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
        // [51] Mixed ::=  '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
        //             | '(' S? '#PCDATA' S? ')'

        //assert ch == 'L'
        ch = requireNextS();
        readName(ch);
        ch = requireNextS();
        // readContentSpec(ch);
    }

    protected void processAttlistDecl(char ch)
        throws XmlPullParserException, IOException
    {
        // [52] AttlistDecl ::= ''
        // [53] AttDef ::= S Name S AttType S DefaultDecl
        // [54] AttType ::= StringType | TokenizedType | EnumeratedType
        // [55] StringType ::= 'CDATA'
        // [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN'
        //                        | 'NMTOKENS'
        // [57] EnumeratedType ::= NotationType | Enumeration
        // [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
        // [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
        // [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
        //                     [WFC: No < in Attribute Values]

        //assert ch == 'A'

    }


    protected void processEntityDecl(char ch)
        throws XmlPullParserException, IOException
    {

        // [70] EntityDecl ::= GEDecl | PEDecl
        // [71] GEDecl ::= ''
        // [72] PEDecl ::= ''
        // [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
        // [74] PEDef ::= EntityValue | ExternalID
        // [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral

        //[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
        //                     |  "'" ([^%&'] | PEReference | Reference)* "'"

        //assert ch == 'N'

    }

    protected void processNotationDecl(char ch)
        throws XmlPullParserException, IOException
    {

        // [82] NotationDecl ::= ''
        // [83] PublicID ::= 'PUBLIC' S PubidLiteral

        //assert ch == 'N'
    }



    protected char readName(char ch)
        throws XmlPullParserException, IOException
    {
        if(isNameStartChar(ch)) {
            throw new XmlPullParserException(
                "XML name must start with name start character not "+printable(ch), this, null);
        }
        while(isNameChar(ch)) {
            ch = more();
        }
        return ch;
    }
}


/*
 * Indiana University Extreme! Lab Software License, Version 1.2
 *
 * Copyright (c) 2002-2004 The Trustees of Indiana University.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 *
 * 1) All redistributions of source code must retain the above
 *    copyright notice, the list of authors in the original source
 *    code, this list of conditions and the disclaimer listed in this
 *    license;
 *
 * 2) All redistributions in binary form must reproduce the above
 *    copyright notice, this list of conditions and the disclaimer
 *    listed in this license in the documentation and/or other
 *    materials provided with the distribution;
 *
 * 3) Any documentation included with all redistributions must include
 *    the following acknowledgement:
 *
 *      "This product includes software developed by the Indiana
 *      University Extreme! Lab.  For further information please visit
 *      http://www.extreme.indiana.edu/"
 *
 *    Alternatively, this acknowledgment may appear in the software
 *    itself, and wherever such third-party acknowledgments normally
 *    appear.
 *
 * 4) The name "Indiana University" or "Indiana University
 *    Extreme! Lab" shall not be used to endorse or promote
 *    products derived from this software without prior written
 *    permission from Indiana University.  For written permission,
 *    please contact http://www.extreme.indiana.edu/.
 *
 * 5) Products derived from this software may not use "Indiana
 *    University" name nor may "Indiana University" appear in their name,
 *    without prior written permission of the Indiana University.
 *
 * Indiana University provides no reassurances that the source code
 * provided does not infringe the patent or any other intellectual
 * property rights of any other entity.  Indiana University disclaims any
 * liability to any recipient for claims brought by any other entity
 * based on infringement of intellectual property rights or otherwise.
 *
 * LICENSEE UNDERSTANDS THAT SOFTWARE IS PROVIDED "AS IS" FOR WHICH
 * NO WARRANTIES AS TO CAPABILITIES OR ACCURACY ARE MADE. INDIANA
 * UNIVERSITY GIVES NO WARRANTIES AND MAKES NO REPRESENTATION THAT
 * SOFTWARE IS FREE OF INFRINGEMENT OF THIRD PARTY PATENT, COPYRIGHT, OR
 * OTHER PROPRIETARY RIGHTS.  INDIANA UNIVERSITY MAKES NO WARRANTIES THAT
 * SOFTWARE IS FREE FROM "BUGS", "VIRUSES", "TROJAN HORSES", "TRAP
 * DOORS", "WORMS", OR OTHER HARMFUL CODE.  LICENSEE ASSUMES THE ENTIRE
 * RISK AS TO THE PERFORMANCE OF SOFTWARE AND/OR ASSOCIATED MATERIALS,
 * AND TO THE PERFORMANCE AND VALIDITY OF INFORMATION GENERATED USING
 * SOFTWARE.
 */





© 2015 - 2024 Weber Informatics LLC | Privacy Policy