All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.day.cq.rewriter.htmlparser.TagTokenizer Maven / Gradle / Ivy

/*
 * Copyright 1997-2008 Day Management AG
 * Barfuesserplatz 6, 4001 Basel, Switzerland
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information of
 * Day Management AG, ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Day.
 */
package com.day.cq.rewriter.htmlparser;

import java.io.CharArrayWriter;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;

/**
 * Tokenizes a snippet of characters into a structured tag/attribute name list.
 * @deprecated This is replaced by the Apache Sling Html parsing.
 */
@Deprecated
class TagTokenizer {
    /** Tag name buffer */
    private final CharArrayWriter tagName = new CharArrayWriter(30);

    /** Attribute name buffer */
    private final CharArrayWriter attName = new CharArrayWriter(30);

    /** Attribute value buffer */
    private final CharArrayWriter attValue = new CharArrayWriter(30);

    /** Internal property list */
    private final AttributeListImpl attributes = new AttributeListImpl();

    /** Parse state constant */
    private final static int START = 0;

    /** Parse state constant */
    private final static int TAG = START + 1;

    /** Parse state constant */
    private final static int NAME = TAG + 1;

    /** Parse state constant */
    private final static int INSIDE = NAME + 1;

    /** Parse state constant */
    private final static int ATTNAME = INSIDE + 1;

    /** Parse state constant */
    private final static int AFTER_ATTNAME = ATTNAME + 1;

    /** Parse state constant */
    private final static int EQUAL = AFTER_ATTNAME + 1;

    /** Parse state constant */
    private final static int ATTVALUE = EQUAL + 1;

    /** Parse state constant */
    private final static int STRING = ATTVALUE + 1;

    /** Parse state constant */
    private final static int ENDSLASH = STRING + 1;

    /** Parse state constant */
    private final static int END = ENDSLASH + 1;

    /** Quote character */
    private char quoteChar = '"';

    /** Flag indicating whether the tag scanned is an end tag */
    private boolean endTag;

    /** Flag indicating whether an ending slash was parsed */
    private boolean endSlash;

    /** temporary flag indicating if attribute has a value */
    private boolean hasAttributeValue;

    /** flag indicating if camelcase is to be preserved */
    private boolean preserveCamelCase = false;

    /**
     * Scan characters passed to this parser
     */
    public void tokenize(char[] buf, int off, int len) {
        reset();

        int parseState = START;

        for (int i = 0; i < len; i++) {
            char c = buf[off + i];

            switch (parseState) {
                case START:
                    if (c == '<') {
                        parseState = TAG;
                    }
                    break;
                case TAG:
                    if (c == '/') {
                        endTag = true;
                        parseState = NAME;
                    } else if (c == '"' || c == '\'') {
                        quoteChar = c;
                        parseState = STRING;
                    } else if (Character.isWhitespace(c)) {
                        parseState = INSIDE;
                    } else {
                        tagName.write(c);
                        parseState = NAME;
                    }
                    break;
                case NAME:
                    if (Character.isWhitespace(c)) {
                        parseState = INSIDE;
                    } else if (c == '"' || c == '\'') {
                        quoteChar = c;
                        parseState = STRING;
                    } else if (c == '>') {
                        parseState = END;
                    } else if (c == '/') {
                        parseState = ENDSLASH;
                    } else {
                        tagName.write(c);
                    }
                    break;
                case INSIDE:
                    if (c == '>') {
                        attributeEnded();
                        parseState = END;
                    } else if (c == '/') {
                        attributeEnded();
                        parseState = ENDSLASH;
                    } else if (c == '"' || c == '\'') {
                        attributeValueStarted();
                        quoteChar = c;
                        parseState = STRING;
                    } else if (c == '=') {
                        parseState = EQUAL;
                    } else if (!Character.isWhitespace(c)) {
                        attName.write(c);
                        parseState = ATTNAME;
                    }
                    break;
                case ATTNAME:
                    if (c == '>') {
                        attributeEnded();
                        parseState = END;
                    } else if (c == '/') {
                        attributeEnded();
                        parseState = ENDSLASH;
                    } else if (c == '=') {
                        parseState = EQUAL;
                    } else if (c == '"' || c == '\'') {
                        quoteChar = c;
                        parseState = STRING;
                    } else if (Character.isWhitespace(c)) {
                        parseState = AFTER_ATTNAME;
                    } else {
                        attName.write(c);
                    }
                    break;
                case AFTER_ATTNAME:
                    if (c == '>') {
                        attributeEnded();
                        parseState = END;
                    } else if (c == '/') {
                        attributeEnded();
                        parseState = ENDSLASH;
                    } else if (c == '=') {
                        parseState = EQUAL;
                    } else if (c == '"' || c == '\'') {
                        quoteChar = c;
                        parseState = STRING;
                    } else if (!Character.isWhitespace(c)) {
                        attributeEnded();
                        attName.write(c);
                        parseState = ATTNAME;
                    }
                    break;
                case EQUAL:
                    if (c == '>') {
                        attributeEnded();
                        parseState = END;
                    } else if (c == '"' || c == '\'') {
                        attributeValueStarted();
                        quoteChar = c;
                        parseState = STRING;
                    } else if (!Character.isWhitespace(c)) {
                        attributeValueStarted();
                        attValue.write(c);
                        parseState = ATTVALUE;
                    }
                    break;
                case ATTVALUE:
                    if (Character.isWhitespace(c)) {
                        attributeEnded();
                        parseState = INSIDE;
                    } else if (c == '"' || c == '\'') {
                        attributeEnded();
                        quoteChar = c;
                        parseState = STRING;
                    } else if (c == '>') {
                        attributeEnded();
                        parseState = END;
                    } else {
                        attValue.write(c);
                    }
                    break;
                case STRING:
                    if (c == quoteChar) {
                        attributeEnded();
                        parseState = INSIDE;
                    } else {
                        attValue.write(c);
                    }
                    break;
                case ENDSLASH:
                    if (c == '>') {
                        endSlash = true;
                        parseState = END;
                    } else if (c == '"' || c == '\'') {
                        quoteChar = c;
                        parseState = STRING;
                    } else if (c != '/' && !Character.isWhitespace(c)) {
                        attName.write(c);
                        parseState = ATTNAME;
                    } else {
                        parseState = INSIDE;
                    }
                    break;
                case END:
                    break;

            }
        }
    }

    /**
     * Return a flag indicating whether the tag scanned was an end tag
     * @return true if it was an end tag, otherwise
     *         false
     */
    public boolean endTag() {
        return endTag;
    }

    /**
     * Return a flag indicating whether an ending slash was scanned
     * @return true if an ending slash was scanned, otherwise
     *         false
     */
    public boolean endSlash() {
        return endSlash;
    }

    /**
     * Return the tagname scanned
     * @return tag name
     */
    public String tagName() {
        return tagName.toString();
    }

    /**
     * Return the list of attributes scanned
     * @return list of attributes
     */
    public AttributeList attributes() {
        return attributes;
    }

    public void setPreserveCamelCase() {
        preserveCamelCase = true;
    }

    /**
     * Reset the internal state of the tokenizer
     */
    private void reset() {
        tagName.reset();
        attributes.reset();
        endTag = false;
        endSlash = false;
    }

    /**
     * Invoked when an attribute ends
     */
    private void attributeEnded() {
        if (attName.size() > 0) {
            if (hasAttributeValue) {
                attributes.addAttribute(preserveCamelCase == false
                ? attName.toString().toLowerCase()
                : attName.toString(), attValue.toString(),
                quoteChar);
            } else {
                attributes.addAttribute(preserveCamelCase == false
                ? attName.toString().toLowerCase()
                : attName.toString(), quoteChar);
                
            }
            attName.reset();
            attValue.reset();
            hasAttributeValue = false;
        }
    }

    /**
     * Invoked when an attribute value starts
     */
    private void attributeValueStarted() {
        hasAttributeValue = true;
    }

    /**
     * Retransfers the tokenized tag data into html again
     * @return the reassembled html string
     */
    public String toHtmlString() {
        StringBuffer sb = new StringBuffer();
        sb.append("<" + tagName());
        Iterator attNames = attributes().attributeNames();
        while (attNames.hasNext()) {
            String attName = attNames.next();
            String attValue = attributes().getQuotedValue(attName);

            sb.append(" ");
            sb.append(attName);
            if (attValue != null) {
                sb.append('=');
                sb.append(attValue);
            }
        }
        if (endSlash) {
            sb.append(" /");
        }
        sb.append(">");
        return sb.toString();
    }
}

/**
 * Internal implementation of an AttributeList
 */
class AttributeListImpl implements AttributeList {

    /**
     * Internal Value class
     */
    static class Value {

        /**
         * Create a new Value instance
         */
        public Value(char quoteChar, String value) {
            this.quoteChar = quoteChar;
            this.value = value;
        }

        /** Quote character */
        public final char quoteChar;

        /** Value itself */
        public final String value;

        /** String representation */
        private String stringRep;

        /**
         * @see Object#toString()
         */
        public String toString() {
            if (stringRep == null) {
                stringRep = quoteChar + value + quoteChar;
            }
            return stringRep;
        }
    }

    /** Attribute/Value pair map with case insensitives names */
    private final Map attributes = new LinkedHashMap();

    /** Attribute names, case sensitive */
    private final Set attributeNames = new LinkedHashSet();

    /** Flag indicating whether this object was modified */
    private boolean modified;

    /**
     * Add an attribute/value pair to this attribute list
     */
    public void addAttribute(String name, String value, char quoteChar) {
        attributes.put(name.toUpperCase(), new Value(quoteChar, value));
        attributeNames.add(name);
    }

    /**
     * Add an attribute/value pair to this attribute list
     */
    public void addAttribute(String name, char quoteChar) {
        attributes.put(name.toUpperCase(), null);
        attributeNames.add(name);
    }

    /**
     * Empty this attribute list
     */
    public void reset() {
        attributes.clear();
        attributeNames.clear();
        modified = false;
    }

    /**
     * @see AttributeList#attributeCount
     */
    public int attributeCount() {
        return attributes.size();
    }

    /**
     * @see AttributeList#attributeNames
     */
    public Iterator attributeNames() {
        return attributeNames.iterator();
    }

    /**
     * @see AttributeList#containsAttribute(String)
     */
    public boolean containsAttribute(String name) {
        return attributes.containsKey(name.toUpperCase());
    }

    /**
     * @see AttributeList#getValue(String)
     */
    public String getValue(String name) {
        Value value = getValueEx(name);
        if (value != null) {
            return value.value;
        }
        return null;
    }

    /**
     * @see com.day.cq.rewriter.htmlparser.AttributeList#getQuoteChar(java.lang.String)
     */
    public char getQuoteChar(String name) {
        Value value = getValueEx(name);
        if (value != null) {
            return value.quoteChar;
        }
        return 0;
    }

    /**
     * @see AttributeList#getQuotedValue(String)
     */
    public String getQuotedValue(String name) {
        Value value = getValueEx(name);
        if (value != null) {
            return value.toString();
        }
        return null;
    }

    /**
     * @see AttributeList#setValue(String, String)
     */
    public void setValue(String name, String value) {
        if (value == null) {
            removeValue(name);
        } else {
            Value old = getValueEx(name);
            if (old == null) {
                addAttribute(name, value, '"');
                modified = true;
            } else if (!old.value.equals(value)) {
                addAttribute(name, value, old.quoteChar);
                modified = true;
            }
        }
    }

    /**
     * @see AttributeList#removeValue(String)
     */
    public void removeValue(String name) {
        attributeNames.remove(name);
        attributes.remove(name.toUpperCase());
        modified = true;
    }

    /**
     * @see AttributeList#isModified
     */
    public boolean isModified() {
        return modified;
    }

    /**
     * Return internal value structure
     */
    protected Value getValueEx(String name) {
        return attributes.get(name.toUpperCase());
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy