org.wings.template.parser.SGMLTag Maven / Gradle / Ivy
/*
* Copyright (c) 1997-1999 The Java Apache Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the Java Apache
* Project for use in the Apache JServ servlet engine project
* (http://java.apache.org/)."
*
* 4. The names "Apache JServ", "Apache JServ Servlet Engine" and
* "Java Apache Project" must not be used to endorse or promote products
* derived from this software without prior written permission.
*
* 5. Products derived from this software may not be called "Apache JServ"
* nor may "Apache" nor "Apache JServ" appear in their names without
* prior written permission of the Java Apache Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the Java Apache
* Project for use in the Apache JServ servlet engine project
* (http://java.apache.org/)."
*
* THIS SOFTWARE IS PROVIDED BY THE JAVA APACHE PROJECT "AS IS" AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE JAVA APACHE PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Java Apache Group. For more information
* on the Java Apache Project and the Apache JServ Servlet Engine project,
* please see .
*/
/*
* Copyright 2000,2005 wingS development team.
*
* This file is part of wingS (http://wingsframework.org).
*
* wingS is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* Please see COPYING for the complete licence.
*/
package org.wings.template.parser;
import java.io.IOException;
import java.io.Reader;
import java.util.*;
/**
* Convenient class for parsing SGML tokens from a page.
*
* This class is optimized for speed, not ease of use.
* (Though I'd contend its fairly easy to use anyway!).
*
* Other than earlier versions of this class this one reads
* its content from a Reader
to avoid reading
* the whole file into a String before parsing it.
* The Reader is required to support the mark()
* operation.
*
* Tags are only read enough to find out what the tag name is;
* If you want to read the full tag call parse(inputReader)
.
* This is done so that applications don't spend time processing
* tags about which they care little.
*
* Here's a sample piece of code which uses this class to read
* all SGML tags on a page:
*
*
* void showTags(PrintWriter out, Reader input)
* {
* SGMLTag tag = new SGMLTag(input);
* while (!tag.finished()) {
* out.println ("tag: " + tag.toString());
* tag = new SGMLTag (input);
* }
* }
*
*
* @author Tim Williams
* @author Henner Zeller
*/
/*
* TODO: (hen)
* - read incomplete TAGs => checked=1
*/
public class SGMLTag {
public final static char singleQuote = '\'';
public final static char doubleQuote = '\"';
/**
* Name of this SGML tag, in uppercase format.
* This is only public for compatibility reasons.
*/
private String name = null;
/**
* The token that closes this tag.
* Different for SSI and SGML tags.
*/
private String closeTag = null;
/**
* Number of characters skipped <
*/
private int offset = 0;
// private stuff
private LinkedList attrs = null; // tag attributes (mixed)
private LinkedHashMap values = null; // tag attribute values (uc)
private boolean wellFormed = true; // looks good?
private boolean attr_ready = false;
// comment delimitation
static final String COMMENT_START = "!--", COMMENT_END = "-->";
static final String SSI_START = COMMENT_START + '#', SSI_END = COMMENT_END;
/**
* Create new SGML tag reference, starting at current location
* of the Reader.
* At first, only the type of tag (first argument) is read if
* parseIt
is false.
* Tag may not be well-formed: if interested, call "parse(input)"
* directly afterwards (without reading any characters
* from the Reader) to get the attributes.
*
* Note that this constructor skips over any HTML-style comments,
* as denoted by matched <-- ... --> pairs.
*
* @param input the Reader being parsed for SGML tags
* @param parseIt boolean which denotes if SGMLTag should be
* parsed fully
* @see #attributes
*/
public SGMLTag(Reader input, boolean parseIt)
throws IOException {
searchStart(input);
if (parseIt) readAttributes(input);
}
/**
* Create new SGML tag reference, starting at current location
* of the Reader. Read all attributes.
*
* Note that this constructor skips over any HTML-style comments,
* as denoted by matched <-- ... --> pairs.
*
* @param input the Reader being parsed for SGML tags
* @see #attributes
*/
public SGMLTag(Reader input)
throws IOException {
this(input, true);
}
public void parse(Reader input)
throws IOException {
readAttributes(input);
}
/**
* Skip over any HTML-style comments,
* as denoted by matched <-- ... --> pairs.
*
* @param input the reader being parsed for SGMLtags
*/
protected void searchStart(Reader input)
throws IOException {
int c = 0;
char buff[] = new char[8]; // must at least hold the length of COMMENT_(START|END)
// skipping over comments, find first tag
while (true) {
// find starting character of SGML tag
while (c >= 0 && c != '<') {
c = input.read();
offset++;
}
if (c == -1) {
offset = -1;
return;
} // EOF
offset--;
/* -- check if we just found a comment
* ".
// Like a word token, but includes the delimiter ">".
else if (c == '-') {
do {
token.append('-');
input.mark(1);
c = input.read();
} while (c >= 0 &&
!Character.isWhitespace((char) c) &&
!isDelimiter((char) c));
input.reset();
token.append((char) input.read());
}
// If we did not skip Whitespaces but actually got one
// this token is empty.
else if (!skipWhitespaces &&
Character.isWhitespace((char) c)) {
input.reset();
return null;
}
// word token or />
else {
do {
token.append((char) c);
input.mark(1);
c = input.read();
} while (c >= 0 &&
!Character.isWhitespace((char) c) &&
!isDelimiter((char) c));
if (token.length() == 1 && token.charAt(0) == '/')
token.append((char) c);
else
input.reset();
}
return token.toString();
}
/**
* could be overwritten
*/
public static int skipWhiteSpace(Reader r)
throws IOException {
int c, len = 0;
do {
r.mark(1);
c = r.read();
len++;
} while (c >= 0 && Character.isWhitespace((char) c));
r.reset();
return len - 1;
}
/**
* Return value of attribute (parameter) setting in SGML tag.
* @param key name (uppercase) of attribute for which to check
* @param defaultValue value if attribute unset
* @deprecated use attributes() and value() instead
* @see #attributes
* @see #value
* @return value of that attribute, or default if not defined
*/
public String getAttribute(String key, String defaultValue) {
return value(key, defaultValue);
}
/**
* Return tag attributes and values.
* @return parameter key / value pairs
* @deprecated use attributes() and value() instead
* @see #attributes
* @see #value
*/
public Map getAttributes() {
return isWellFormed() ? values : null;
}
/**
* Decide whether character is SGML delimiter or equals.
*
* @param c character in question
* @return true if character is an SGML delimiter
*/
private static boolean isDelimiter(char c) {
return c == '<' || c == '=' || c == '>';
}
/**
* Render this tag as a string.
*
* @return SGML tag as string, showing range and values
*/
public String toString() {
StringBuilder str = new StringBuilder();
str.append("[SGMLTag ").append(name).append(": (").append(offset).append(",---)");
if (attrs != null && wellFormed) {
Iterator iter = attributes(true);
while (iter.hasNext()) {
String key = iter.next();
str.append(' ').append(key).append("=\"").append(value(key, null)).append('"');
}
} else {
str.append(" *MALFORMED TAG*");
}
str.append(" ]");
return str.toString();
}
}