All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.htmlparser.Attribute Maven / Gradle / Ivy

// HTMLParser Library $Name: v1_5 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/Attribute.java,v $
// $Author: derrickoswald $
// $Date: 2005/05/15 11:49:03 $
// $Revision: 1.7 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//

package org.htmlparser;

import java.io.Serializable;

/**
 * An attribute within a tag.
 * Holds the name, assignment string, value and quote character.
 * 

* This class was made deliberately simple. Except for * {@link #setRawValue RawValue}, the properties are completely orthogonal, * that is: each property is independant of the others. This means you have * enough rope here to hang yourself, and it's very easy to create * malformed HTML. Where it's obvious, warnings and notes have been provided * in the setters javadocs, but it is up to you -- the programmer -- * to ensure that the contents of the four fields will yield valid HTML * (if that's what you want). *

* Be especially mindful of quotes and assignment strings. These are handled * by the constructors where it's obvious, but in general, you need to set * them explicitly when building an attribute. For example to construct * the attribute label="A multi word value." you could use: *

 *     attribute = new Attribute ();
 *     attribute.setName ("label");
 *     attribute.setAssignment ("=");
 *     attribute.setValue ("A multi word value.");
 *     attribute.setQuote ('"');
 * 
* or *
 *     attribute = new Attribute ();
 *     attribute.setName ("label");
 *     attribute.setAssignment ("=");
 *     attribute.setRawValue ("A multi word value.");
 * 
* or *
 *     attribute = new Attribute ("label", "A multi word value.");
 * 
* Note that the assignment value and quoting need to be set separately when * building the attribute from scratch using the properties. *

*

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Valid States for Attributes. *
DescriptiontoString()NameAssignmentValueQuote
whitespace attributevaluenullnull"value"0
standalone attributename"name"nullnull0
empty attributename="name""="null0
empty single quoted attributename=''"name""="null'
empty double quoted attributename="""name""="null"
naked attributename=value"name""=""value"0
single quoted attributename='value'"name""=""value"'
double quoted attributename="value""name""=""value""
*
In words: *
If Name is null, and Assignment is null, and Quote is zero, * it's whitepace and Value has the whitespace text -- value *
If Name is not null, and both Assignment and Value are null * it's a standalone attribute -- name *
If Name is not null, and Assignment is an equals sign, and Quote is zero * it's an empty attribute -- name= *
If Name is not null, and Assignment is an equals sign, * and Value is "" or null, and Quote is ' * it's an empty single quoted attribute -- name='' *
If Name is not null, and Assignment is an equals sign, * and Value is "" or null, and Quote is " * it's an empty double quoted attribute -- name="" *
If Name is not null, and Assignment is an equals sign, * and Value is something, and Quote is zero * it's a naked attribute -- name=value *
If Name is not null, and Assignment is an equals sign, * and Value is something, and Quote is ' * it's a single quoted attribute -- name='value' *
If Name is not null, and Assignment is an equals sign, * and Value is something, and Quote is " * it's a double quoted attribute -- name="value" *
All other states are invalid HTML. *

* From the * HTML 4.01 Specification, W3C Recommendation 24 December 1999 * http://www.w3.org/TR/html4/intro/sgmltut.html#h-3.2.2:

* * 3.2.2 Attributes

* Elements may have associated properties, called attributes, which may * have values (by default, or set by authors or scripts). Attribute/value * pairs appear before the final ">" of an element's start tag. Any number * of (legal) attribute value pairs, separated by spaces, may appear in an * element's start tag. They may appear in any order.

* In this example, the id attribute is set for an H1 element: *

 * 
 * {@.html
 *  

* This is an identified heading thanks to the id attribute *

} *
*
* By default, SGML requires that all attribute values be delimited using * either double quotation marks (ASCII decimal 34) or single quotation * marks (ASCII decimal 39). Single quote marks can be included within the * attribute value when the value is delimited by double quote marks, and * vice versa. Authors may also use numeric character references to * represent double quotes (") and single quotes ('). * For doublequotes authors can also use the character entity reference * ".

* In certain cases, authors may specify the value of an attribute without * any quotation marks. The attribute value may only contain letters * (a-z and A-Z), digits (0-9), hyphens (ASCII decimal 45), * periods (ASCII decimal 46), underscores (ASCII decimal 95), * and colons (ASCII decimal 58). We recommend using quotation marks even * when it is possible to eliminate them.

* Attribute names are always case-insensitive.

* Attribute values are generally case-insensitive. The definition of each * attribute in the reference manual indicates whether its value is * case-insensitive.

* All the attributes defined by this specification are listed in the * attribute * index.

* *

*/ public class Attribute implements Serializable { /** * The name of this attribute. * The part before the equals sign, or the stand-alone attribute. * This will be null if the attribute is whitespace. */ protected String mName; /** * The assignment string of the attribute. * The equals sign. * This will be null if the attribute is a * stand-alone attribute. */ protected String mAssignment; /** * The value of the attribute. * The part after the equals sign. * This will be null if the attribute is an empty or * stand-alone attribute. */ protected String mValue; /** * The quote, if any, surrounding the value of the attribute, if any. * This will be zero if there are no quotes around the value. */ protected char mQuote; /** * Create an attribute with the name, assignment, value and quote given. * If the quote value is zero, assigns the value using {@link #setRawValue} * which sets the quote character to a proper value if necessary. * @param name The name of this attribute. * @param assignment The assignment string of this attribute. * @param value The value of this attribute. * @param quote The quote around the value of this attribute. */ public Attribute (String name, String assignment, String value, char quote) { setName (name); setAssignment (assignment); if (0 == quote) setRawValue (value); else { setValue (value); setQuote (quote); } } /** * Create an attribute with the name, value and quote given. * Uses an equals sign as the assignment string if the value is not * null, and calls {@link #setRawValue} to get the * correct quoting if quote is zero. * @param name The name of this attribute. * @param value The value of this attribute. * @param quote The quote around the value of this attribute. */ public Attribute (String name, String value, char quote) { this (name, (null == value ? "" : "="), value, quote); } /** * Create a whitespace attribute with the value given. * @param value The value of this attribute. * @exception IllegalArgumentException if the value contains other than * whitespace. To set a real value use {@link #Attribute(String,String)}. */ public Attribute (String value) throws IllegalArgumentException { if (0 != value.trim ().length ()) throw new IllegalArgumentException ("non whitespace value"); else { setName (null); setAssignment (null); setValue (value); setQuote ((char)0); } } /** * Create an attribute with the name and value given. * Uses an equals sign as the assignment string if the value is not * null, and calls {@link #setRawValue} to get the * correct quoting. * @param name The name of this attribute. * @param value The value of this attribute. */ public Attribute (String name, String value) { this (name, (null == value ? "" : "="), value, (char)0); } /** * Create an attribute with the name, assignment string and value given. * Calls {@link #setRawValue} to get the correct quoting. * @param name The name of this attribute. * @param assignment The assignment string of this attribute. * @param value The value of this attribute. */ public Attribute (String name, String assignment, String value) { this (name, assignment, value, (char)0); } /** * Create an empty attribute. * This will provide "" from the {@link #toString} and * {@link #toString(StringBuffer)} methods. */ public Attribute () { this (null, null, null, (char)0); } /** * Get the name of this attribute. * The part before the equals sign, or the contents of the * stand-alone attribute. * @return The name, or null if it's just a whitepace * 'attribute'. */ public String getName () { return (mName); } /** * Get the name of this attribute. * @param buffer The buffer to place the name in. * @see #getName() */ public void getName (StringBuilder buffer) { if (null != mName) buffer.append (mName); } /** * Set the name of this attribute. * Set the part before the equals sign, or the contents of the * stand-alone attribute. * WARNING: Setting this to null can result in * malformed HTML if the assignment string is not null. * @param name The new name. */ public void setName (String name) { mName = name; } /** * Get the assignment string of this attribute. * This is usually just an equals sign, but in poorly formed attributes it * can include whitespace on either or both sides of an equals sign. * @return The assignment string. */ public String getAssignment () { return (mAssignment); } /** * Get the assignment string of this attribute. * @param buffer The buffer to place the assignment string in. * @see #getAssignment() */ public void getAssignment (StringBuilder buffer) { if (null != mAssignment) buffer.append (mAssignment); } /** * Set the assignment string of this attribute. * WARNING: Setting this property to other than an equals sign * or null will result in malformed HTML. In the case of a * null, the {@link #setValue value} should also be set to * null. * @param assignment The new assignment string. */ public void setAssignment (String assignment) { mAssignment = assignment; } /** * Get the value of the attribute. * The part after the equals sign, or the text if it's just a whitepace * 'attribute'. * NOTE: This does not include any quotes that may have enclosed * the value when it was read. To get the un-stripped value use * {@link #getRawValue}. * @return The value, or null if it's a stand-alone or * empty attribute, or the text if it's just a whitepace 'attribute'. */ public String getValue () { return (mValue); } /** * Get the value of the attribute. * @param buffer The buffer to place the value in. * @see #getValue() */ public void getValue (StringBuilder buffer) { if (null != mValue) buffer.append (mValue); } /** * Set the value of the attribute. * The part after the equals sign, or the text if it's a whitepace * 'attribute'. * WARNING: Setting this property to a value that needs to be * quoted without also setting the quote character will result in malformed * HTML. * @param value The new value. */ public void setValue (String value) { mValue = value; } /** * Get the quote, if any, surrounding the value of the attribute, if any. * @return Either ' or " if the attribute value was quoted, or zero * if there are no quotes around it. */ public char getQuote () { return (mQuote); } /** * Get the quote, if any, surrounding the value of the attribute, if any. * @param buffer The buffer to place the quote in. * @see #getQuote() */ public void getQuote (StringBuilder buffer) { if (0 != mQuote) buffer.append (mQuote); } /** * Set the quote surrounding the value of the attribute. * WARNING: Setting this property to zero will result in malformed * HTML if the {@link #getValue value} needs to be quoted (i.e. contains * whitespace). * @param quote The new quote value. */ public void setQuote (char quote) { mQuote = quote; } /** * Get the raw value of the attribute. * The part after the equals sign, or the text if it's just a whitepace * 'attribute'. This includes the quotes around the value if any. * @return The value, or null if it's a stand-alone attribute, * or the text if it's just a whitepace 'attribute'. */ public String getRawValue () { char quote; StringBuilder buffer; String ret; if (isValued ()) { quote = getQuote (); if (0 != quote) { buffer = new StringBuilder (); // todo: what is the value length? buffer.append (quote); getValue (buffer); buffer.append (quote); ret = buffer.toString (); } else ret = getValue (); } else ret = null; return (ret); } /** * Get the raw value of the attribute. * The part after the equals sign, or the text if it's just a whitepace * 'attribute'. This includes the quotes around the value if any. * @param buffer The string buffer to append the attribute value to. * @see #getRawValue() */ public void getRawValue (StringBuilder buffer) { getQuote (buffer); getValue (buffer); getQuote (buffer); } /** * Set the value of the attribute and the quote character. * If the value is pure whitespace, assign it 'as is' and reset the * quote character. If not, check for leading and trailing double or * single quotes, and if found use this as the quote character and * the inner contents of value as the real value. * Otherwise, examine the string to determine if quotes are needed * and an appropriate quote character if so. This may involve changing * double quotes within the string to character references. * @param value The new value. */ public void setRawValue (String value) { char ch; boolean needed; boolean singleq; boolean doubleq; String ref; StringBuilder buffer; char quote; quote = 0; if ((null != value) && (0 != value.trim ().length ())) { if (value.startsWith ("'") && value.endsWith ("'") && (2 <= value.length ())) { quote = '\''; value = value.substring (1, value.length () - 1); } else if (value.startsWith ("\"") && value.endsWith ("\"") && (2 <= value.length ())) { quote = '"'; value = value.substring (1, value.length () - 1); } else { // first determine if there's whitespace in the value // and while we're at it find a suitable quote character needed = false; singleq = true; doubleq = true; for (int i = 0; i < value.length (); i++) { ch = value.charAt (i); if ('\'' == ch) { singleq = false; needed = true; } else if ('"' == ch) { doubleq = false; needed = true; } else if (!('-' == ch) && !('.' == ch) && !('_' == ch) && !(':' == ch) && !Character.isLetterOrDigit (ch)) { needed = true; } } // now apply quoting if (needed) { if (doubleq) quote = '"'; else if (singleq) quote = '\''; else { // uh-oh, we need to convert some quotes into character // references, so convert all double quotes into " quote = '"'; ref = """; // Translate.encode (quote); // JDK 1.4: value = value.replaceAll ("\"", ref); buffer = new StringBuilder ( value.length() * (ref.length () - 1)); for (int i = 0; i < value.length (); i++) { ch = value.charAt (i); if (quote == ch) buffer.append (ref); else buffer.append (ch); } value = buffer.toString (); } } } } setValue (value); setQuote (quote); } /** * Predicate to determine if this attribute is whitespace. * @return true if this attribute is whitespace, * false if it is a real attribute. */ public boolean isWhitespace () { return (null == getName ()); } /** * Predicate to determine if this attribute has no equals sign (or value). * @return true if this attribute is a standalone attribute. * false if has an equals sign. */ public boolean isStandAlone () { return ((null != getName ()) && (null == getAssignment ())); } /** * Predicate to determine if this attribute has an equals sign but no value. * @return true if this attribute is an empty attribute. * false if has an equals sign and a value. */ public boolean isEmpty () { return ((null != getAssignment ()) && (null == getValue ())); } /** * Predicate to determine if this attribute has a value. * @return true if this attribute has a value. * false if it is empty or standalone. */ public boolean isValued () { return (null != getValue ()); } /** * Get the length of the string value of this attribute. * @return The number of characters required to express this attribute. */ public int getLength () { String name; String assignment; String value; char quote; int ret; ret = 0; name = getName (); if (null != name) ret += name.length (); assignment = getAssignment (); if (null != assignment) ret += assignment.length (); value = getValue (); if (null != value) ret += value.length (); quote = getQuote (); if (0 != quote) ret += 2; return (ret); } /** * Get a text representation of this attribute. * Suitable for insertion into a tag, the output is one of * the forms: * *

     * value
     * name
     * name=
     * name=value
     * name='value'
     * name="value"
     * 
* * @return A string that can be used within a tag. */ public String toString () { int length; StringBuilder ret; // get the size to avoid extra StringBuilder allocations length = getLength (); ret = new StringBuilder (length); toString (ret); return (ret.toString ()); } /** * Get a text representation of this attribute. * @param buffer The accumulator for placing the text into. * @see #toString() */ public void toString (StringBuilder buffer) { getName (buffer); getAssignment (buffer); getRawValue (buffer); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy