org.xmlpull.mxp1.MXParserNonValidating Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xpp3 Show documentation
Show all versions of xpp3 Show documentation
XML Pull parser library developed by Extreme Computing Lab, Indiana University
The newest version!
/* -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- //------100-columns-wide------>|*/
/*
* Copyright (c) 2002-2004 Extreme! Lab, Indiana University. All rights reserved.
*
* This software is open source. See the bottom of this file for the licence.
*
* $Id: MXParserNonValidating.java,v 1.6 2004/03/02 09:14:41 aslom Exp $
*/
package org.xmlpull.mxp1;
import java.io.IOException;
import org.xmlpull.v1.XmlPullParserException;
/**
* Extend MXP parser to be full non validating XML 1.0 parser
* (added internal DTD parsing and support for full XML 1.0 (or 1.1) character classes).
*
* @author Aleksander Slominski
*/
public class MXParserNonValidating extends MXParserCachingStrings
{
private boolean processDocDecl;
public MXParserNonValidating() {
super();
}
/**
* This allows to change processing DOCDECL (controls if parser is non-validating).
*/
public void setFeature(String name,
boolean state) throws XmlPullParserException
{
if(FEATURE_PROCESS_DOCDECL.equals(name)) {
if(eventType != START_DOCUMENT) throw new XmlPullParserException(
"process DOCDECL feature can only be changed before parsing", this, null);
processDocDecl = state;
if(state == false) {
//
}
} else {
super.setFeature(name, state);
}
}
public boolean getFeature(String name)
{
if(FEATURE_PROCESS_DOCDECL.equals(name)) {
return processDocDecl;
} else {
return super.getFeature(name);
}
}
// will need to overwrite more() and processEntityRef ...
protected char more() throws IOException, XmlPullParserException {
return super.more();
}
protected char[] lookuEntityReplacement(int entitNameLen)
throws XmlPullParserException, IOException
{
if(!allStringsInterned) {
final int hash = fastHash(buf, posStart, posEnd - posStart);
LOOP:
for (int i = entityEnd - 1; i >= 0; --i)
{
if(hash == entityNameHash[ i ] && entitNameLen == entityNameBuf[ i ].length) {
final char[] entityBuf = entityNameBuf[ i ];
for (int j = 0; j < entitNameLen; j++)
{
if(buf[posStart + j] != entityBuf[j]) continue LOOP;
}
if(tokenize) text = entityReplacement[ i ];
return entityReplacementBuf[ i ];
}
}
} else {
entityRefName = newString(buf, posStart, posEnd - posStart);
for (int i = entityEnd - 1; i >= 0; --i)
{
// take advantage that interning for newStirng is enforced
if(entityRefName == entityName[ i ]) {
if(tokenize) text = entityReplacement[ i ];
return entityReplacementBuf[ i ];
}
}
}
return null;
}
protected void parseDocdecl()
throws XmlPullParserException, IOException
{
//make sure that tokenize flag is disabled temporarily!!!!
final boolean oldTokenize = tokenize;
try {
//ASSUMPTION: seen '
ch = requireNextS();
int nameStart = pos;
ch = readName(ch);
int nameEnd = pos;
ch = skipS(ch);
// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
if(ch == 'S' || ch == 'P') {
ch = processExternalId(ch);
ch = skipS(ch);
}
if(ch == '[') {
processInternalSubset();
}
ch = skipS(ch);
if(ch != '>') {
throw new XmlPullParserException(
"expected > to finish <[DOCTYPE but got "+printable(ch), this, null);
}
posEnd = pos - 1;
} finally {
tokenize = oldTokenize;
}
}
protected char processExternalId(char ch)
throws XmlPullParserException, IOException
{
// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
// [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
// [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
// [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
//TODO
return ch;
}
protected void processInternalSubset()
throws XmlPullParserException, IOException
{
// [28] ... (markupdecl | DeclSep)* ']' // [WFC: External Subset]
// [28a] DeclSep ::= PEReference | S // [WFC: PE Between Declarations]
// [69] PEReference ::= '%' Name ';' //[WFC: No Recursion] [WFC: In DTD]
while(true) {
char ch = more(); // firs ttime called it will skip initial "["
if(ch == ']') break;
if(ch == '%') {
processPEReference();
} else if(isS(ch)) {
ch = skipS(ch);
} else {
processMarkupDecl(ch);
}
}
}
protected void processPEReference()
throws XmlPullParserException, IOException
{
//TODO
}
protected void processMarkupDecl(char ch)
throws XmlPullParserException, IOException
{
// [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl | NotationDecl | PI | Comment
// [WFC: PEs in Internal Subset]
//BIG SWITCH statement
if(ch != '<') {
throw new XmlPullParserException("expected < for markupdecl in DTD not "+printable(ch),
this, null);
}
ch = more();
if(ch == '?') {
parsePI();
} else if(ch == '!') {
ch = more();
if(ch == '-') {
// note: if(tokenize == false) posStart/End is NOT changed!!!!
parseComment();
} else {
ch = more();
if(ch == 'A') {
processAttlistDecl(ch); //A-TTLIST
} else if(ch == 'E') {
ch = more();
if(ch == 'L') {
processElementDecl(ch); //EL-EMENT
} else if(ch == 'N') {
processEntityDecl(ch); // EN-TITY
} else {
throw new XmlPullParserException(
"expected ELEMENT or ENTITY after '
//???? [VC: Unique Element Type Declaration]
// [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
// [47] children ::= (choice | seq) ('?' | '*' | '+')?
// [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
// [49] choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
// [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
// [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
// | '(' S? '#PCDATA' S? ')'
//assert ch == 'L'
ch = requireNextS();
readName(ch);
ch = requireNextS();
// readContentSpec(ch);
}
protected void processAttlistDecl(char ch)
throws XmlPullParserException, IOException
{
// [52] AttlistDecl ::= ''
// [53] AttDef ::= S Name S AttType S DefaultDecl
// [54] AttType ::= StringType | TokenizedType | EnumeratedType
// [55] StringType ::= 'CDATA'
// [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' | 'ENTITIES' | 'NMTOKEN'
// | 'NMTOKENS'
// [57] EnumeratedType ::= NotationType | Enumeration
// [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
// [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
// [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
// [WFC: No < in Attribute Values]
//assert ch == 'A'
}
protected void processEntityDecl(char ch)
throws XmlPullParserException, IOException
{
// [70] EntityDecl ::= GEDecl | PEDecl
// [71] GEDecl ::= ''
// [72] PEDecl ::= ''
// [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
// [74] PEDef ::= EntityValue | ExternalID
// [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral
//[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
// | "'" ([^%&'] | PEReference | Reference)* "'"
//assert ch == 'N'
}
protected void processNotationDecl(char ch)
throws XmlPullParserException, IOException
{
// [82] NotationDecl ::= ''
// [83] PublicID ::= 'PUBLIC' S PubidLiteral
//assert ch == 'N'
}
protected char readName(char ch)
throws XmlPullParserException, IOException
{
if(isNameStartChar(ch)) {
throw new XmlPullParserException(
"XML name must start with name start character not "+printable(ch), this, null);
}
while(isNameChar(ch)) {
ch = more();
}
return ch;
}
}
/*
* Indiana University Extreme! Lab Software License, Version 1.2
*
* Copyright (c) 2002-2004 The Trustees of Indiana University.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* 1) All redistributions of source code must retain the above
* copyright notice, the list of authors in the original source
* code, this list of conditions and the disclaimer listed in this
* license;
*
* 2) All redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the disclaimer
* listed in this license in the documentation and/or other
* materials provided with the distribution;
*
* 3) Any documentation included with all redistributions must include
* the following acknowledgement:
*
* "This product includes software developed by the Indiana
* University Extreme! Lab. For further information please visit
* http://www.extreme.indiana.edu/"
*
* Alternatively, this acknowledgment may appear in the software
* itself, and wherever such third-party acknowledgments normally
* appear.
*
* 4) The name "Indiana University" or "Indiana University
* Extreme! Lab" shall not be used to endorse or promote
* products derived from this software without prior written
* permission from Indiana University. For written permission,
* please contact http://www.extreme.indiana.edu/.
*
* 5) Products derived from this software may not use "Indiana
* University" name nor may "Indiana University" appear in their name,
* without prior written permission of the Indiana University.
*
* Indiana University provides no reassurances that the source code
* provided does not infringe the patent or any other intellectual
* property rights of any other entity. Indiana University disclaims any
* liability to any recipient for claims brought by any other entity
* based on infringement of intellectual property rights or otherwise.
*
* LICENSEE UNDERSTANDS THAT SOFTWARE IS PROVIDED "AS IS" FOR WHICH
* NO WARRANTIES AS TO CAPABILITIES OR ACCURACY ARE MADE. INDIANA
* UNIVERSITY GIVES NO WARRANTIES AND MAKES NO REPRESENTATION THAT
* SOFTWARE IS FREE OF INFRINGEMENT OF THIRD PARTY PATENT, COPYRIGHT, OR
* OTHER PROPRIETARY RIGHTS. INDIANA UNIVERSITY MAKES NO WARRANTIES THAT
* SOFTWARE IS FREE FROM "BUGS", "VIRUSES", "TROJAN HORSES", "TRAP
* DOORS", "WORMS", OR OTHER HARMFUL CODE. LICENSEE ASSUMES THE ENTIRE
* RISK AS TO THE PERFORMANCE OF SOFTWARE AND/OR ASSOCIATED MATERIALS,
* AND TO THE PERFORMANCE AND VALIDITY OF INFORMATION GENERATED USING
* SOFTWARE.
*/