com.day.cq.rewriter.htmlparser.TagTokenizer Maven / Gradle / Ivy
/*
* Copyright 1997-2008 Day Management AG
* Barfuesserplatz 6, 4001 Basel, Switzerland
* All Rights Reserved.
*
* This software is the confidential and proprietary information of
* Day Management AG, ("Confidential Information"). You shall not
* disclose such Confidential Information and shall use it only in
* accordance with the terms of the license agreement you entered into
* with Day.
*/
package com.day.cq.rewriter.htmlparser;
import java.io.CharArrayWriter;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
/**
* Tokenizes a snippet of characters into a structured tag/attribute name list.
* @deprecated This is replaced by the Apache Sling Html parsing.
*/
@Deprecated
class TagTokenizer {
/** Tag name buffer */
private final CharArrayWriter tagName = new CharArrayWriter(30);
/** Attribute name buffer */
private final CharArrayWriter attName = new CharArrayWriter(30);
/** Attribute value buffer */
private final CharArrayWriter attValue = new CharArrayWriter(30);
/** Internal property list */
private final AttributeListImpl attributes = new AttributeListImpl();
/** Parse state constant */
private final static int START = 0;
/** Parse state constant */
private final static int TAG = START + 1;
/** Parse state constant */
private final static int NAME = TAG + 1;
/** Parse state constant */
private final static int INSIDE = NAME + 1;
/** Parse state constant */
private final static int ATTNAME = INSIDE + 1;
/** Parse state constant */
private final static int AFTER_ATTNAME = ATTNAME + 1;
/** Parse state constant */
private final static int EQUAL = AFTER_ATTNAME + 1;
/** Parse state constant */
private final static int ATTVALUE = EQUAL + 1;
/** Parse state constant */
private final static int STRING = ATTVALUE + 1;
/** Parse state constant */
private final static int ENDSLASH = STRING + 1;
/** Parse state constant */
private final static int END = ENDSLASH + 1;
/** Quote character */
private char quoteChar = '"';
/** Flag indicating whether the tag scanned is an end tag */
private boolean endTag;
/** Flag indicating whether an ending slash was parsed */
private boolean endSlash;
/** temporary flag indicating if attribute has a value */
private boolean hasAttributeValue;
/** flag indicating if camelcase is to be preserved */
private boolean preserveCamelCase = false;
/**
* Scan characters passed to this parser
*/
public void tokenize(char[] buf, int off, int len) {
reset();
int parseState = START;
for (int i = 0; i < len; i++) {
char c = buf[off + i];
switch (parseState) {
case START:
if (c == '<') {
parseState = TAG;
}
break;
case TAG:
if (c == '/') {
endTag = true;
parseState = NAME;
} else if (c == '"' || c == '\'') {
quoteChar = c;
parseState = STRING;
} else if (Character.isWhitespace(c)) {
parseState = INSIDE;
} else {
tagName.write(c);
parseState = NAME;
}
break;
case NAME:
if (Character.isWhitespace(c)) {
parseState = INSIDE;
} else if (c == '"' || c == '\'') {
quoteChar = c;
parseState = STRING;
} else if (c == '>') {
parseState = END;
} else if (c == '/') {
parseState = ENDSLASH;
} else {
tagName.write(c);
}
break;
case INSIDE:
if (c == '>') {
attributeEnded();
parseState = END;
} else if (c == '/') {
attributeEnded();
parseState = ENDSLASH;
} else if (c == '"' || c == '\'') {
attributeValueStarted();
quoteChar = c;
parseState = STRING;
} else if (c == '=') {
parseState = EQUAL;
} else if (!Character.isWhitespace(c)) {
attName.write(c);
parseState = ATTNAME;
}
break;
case ATTNAME:
if (c == '>') {
attributeEnded();
parseState = END;
} else if (c == '/') {
attributeEnded();
parseState = ENDSLASH;
} else if (c == '=') {
parseState = EQUAL;
} else if (c == '"' || c == '\'') {
quoteChar = c;
parseState = STRING;
} else if (Character.isWhitespace(c)) {
parseState = AFTER_ATTNAME;
} else {
attName.write(c);
}
break;
case AFTER_ATTNAME:
if (c == '>') {
attributeEnded();
parseState = END;
} else if (c == '/') {
attributeEnded();
parseState = ENDSLASH;
} else if (c == '=') {
parseState = EQUAL;
} else if (c == '"' || c == '\'') {
quoteChar = c;
parseState = STRING;
} else if (!Character.isWhitespace(c)) {
attributeEnded();
attName.write(c);
parseState = ATTNAME;
}
break;
case EQUAL:
if (c == '>') {
attributeEnded();
parseState = END;
} else if (c == '"' || c == '\'') {
attributeValueStarted();
quoteChar = c;
parseState = STRING;
} else if (!Character.isWhitespace(c)) {
attributeValueStarted();
attValue.write(c);
parseState = ATTVALUE;
}
break;
case ATTVALUE:
if (Character.isWhitespace(c)) {
attributeEnded();
parseState = INSIDE;
} else if (c == '"' || c == '\'') {
attributeEnded();
quoteChar = c;
parseState = STRING;
} else if (c == '>') {
attributeEnded();
parseState = END;
} else {
attValue.write(c);
}
break;
case STRING:
if (c == quoteChar) {
attributeEnded();
parseState = INSIDE;
} else {
attValue.write(c);
}
break;
case ENDSLASH:
if (c == '>') {
endSlash = true;
parseState = END;
} else if (c == '"' || c == '\'') {
quoteChar = c;
parseState = STRING;
} else if (c != '/' && !Character.isWhitespace(c)) {
attName.write(c);
parseState = ATTNAME;
} else {
parseState = INSIDE;
}
break;
case END:
break;
}
}
}
/**
* Return a flag indicating whether the tag scanned was an end tag
* @return true
if it was an end tag, otherwise
* false
*/
public boolean endTag() {
return endTag;
}
/**
* Return a flag indicating whether an ending slash was scanned
* @return true
if an ending slash was scanned, otherwise
* false
*/
public boolean endSlash() {
return endSlash;
}
/**
* Return the tagname scanned
* @return tag name
*/
public String tagName() {
return tagName.toString();
}
/**
* Return the list of attributes scanned
* @return list of attributes
*/
public AttributeList attributes() {
return attributes;
}
public void setPreserveCamelCase() {
preserveCamelCase = true;
}
/**
* Reset the internal state of the tokenizer
*/
private void reset() {
tagName.reset();
attributes.reset();
endTag = false;
endSlash = false;
}
/**
* Invoked when an attribute ends
*/
private void attributeEnded() {
if (attName.size() > 0) {
if (hasAttributeValue) {
attributes.addAttribute(preserveCamelCase == false
? attName.toString().toLowerCase()
: attName.toString(), attValue.toString(),
quoteChar);
} else {
attributes.addAttribute(preserveCamelCase == false
? attName.toString().toLowerCase()
: attName.toString(), quoteChar);
}
attName.reset();
attValue.reset();
hasAttributeValue = false;
}
}
/**
* Invoked when an attribute value starts
*/
private void attributeValueStarted() {
hasAttributeValue = true;
}
/**
* Retransfers the tokenized tag data into html again
* @return the reassembled html string
*/
public String toHtmlString() {
StringBuffer sb = new StringBuffer();
sb.append("<" + tagName());
Iterator attNames = attributes().attributeNames();
while (attNames.hasNext()) {
String attName = attNames.next();
String attValue = attributes().getQuotedValue(attName);
sb.append(" ");
sb.append(attName);
if (attValue != null) {
sb.append('=');
sb.append(attValue);
}
}
if (endSlash) {
sb.append(" /");
}
sb.append(">");
return sb.toString();
}
}
/**
* Internal implementation of an AttributeList
*/
class AttributeListImpl implements AttributeList {
/**
* Internal Value class
*/
static class Value {
/**
* Create a new Value
instance
*/
public Value(char quoteChar, String value) {
this.quoteChar = quoteChar;
this.value = value;
}
/** Quote character */
public final char quoteChar;
/** Value itself */
public final String value;
/** String representation */
private String stringRep;
/**
* @see Object#toString()
*/
public String toString() {
if (stringRep == null) {
stringRep = quoteChar + value + quoteChar;
}
return stringRep;
}
}
/** Attribute/Value pair map with case insensitives names */
private final Map attributes = new LinkedHashMap();
/** Attribute names, case sensitive */
private final Set attributeNames = new LinkedHashSet();
/** Flag indicating whether this object was modified */
private boolean modified;
/**
* Add an attribute/value pair to this attribute list
*/
public void addAttribute(String name, String value, char quoteChar) {
attributes.put(name.toUpperCase(), new Value(quoteChar, value));
attributeNames.add(name);
}
/**
* Add an attribute/value pair to this attribute list
*/
public void addAttribute(String name, char quoteChar) {
attributes.put(name.toUpperCase(), null);
attributeNames.add(name);
}
/**
* Empty this attribute list
*/
public void reset() {
attributes.clear();
attributeNames.clear();
modified = false;
}
/**
* @see AttributeList#attributeCount
*/
public int attributeCount() {
return attributes.size();
}
/**
* @see AttributeList#attributeNames
*/
public Iterator attributeNames() {
return attributeNames.iterator();
}
/**
* @see AttributeList#containsAttribute(String)
*/
public boolean containsAttribute(String name) {
return attributes.containsKey(name.toUpperCase());
}
/**
* @see AttributeList#getValue(String)
*/
public String getValue(String name) {
Value value = getValueEx(name);
if (value != null) {
return value.value;
}
return null;
}
/**
* @see com.day.cq.rewriter.htmlparser.AttributeList#getQuoteChar(java.lang.String)
*/
public char getQuoteChar(String name) {
Value value = getValueEx(name);
if (value != null) {
return value.quoteChar;
}
return 0;
}
/**
* @see AttributeList#getQuotedValue(String)
*/
public String getQuotedValue(String name) {
Value value = getValueEx(name);
if (value != null) {
return value.toString();
}
return null;
}
/**
* @see AttributeList#setValue(String, String)
*/
public void setValue(String name, String value) {
if (value == null) {
removeValue(name);
} else {
Value old = getValueEx(name);
if (old == null) {
addAttribute(name, value, '"');
modified = true;
} else if (!old.value.equals(value)) {
addAttribute(name, value, old.quoteChar);
modified = true;
}
}
}
/**
* @see AttributeList#removeValue(String)
*/
public void removeValue(String name) {
attributeNames.remove(name);
attributes.remove(name.toUpperCase());
modified = true;
}
/**
* @see AttributeList#isModified
*/
public boolean isModified() {
return modified;
}
/**
* Return internal value structure
*/
protected Value getValueEx(String name) {
return attributes.get(name.toUpperCase());
}
}