![JAR search and dependency download from the Maven repository](/logo.png)
au.id.jericho.lib.html.Attribute Maven / Gradle / Ivy
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 2.3
// Copyright (C) 2006 Martin Jericho
// http://sourceforge.net/projects/jerichohtml/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// http://www.gnu.org/copyleft/lesser.html
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package au.id.jericho.lib.html;
import java.io.*;
/**
* Represents a single attribute
* name/value segment within a {@link StartTag}.
*
* An instance of this class is a representation of a single attribute in the source document and is not modifiable.
* The {@link AttributesOutputSegment} class provides the means to add, delete or modify attributes and
* their values for inclusion in an {@link OutputDocument}.
*
* Obtained using the {@link Attributes#get(String key)} method.
*
* See also the XML 1.0 specification for attributes.
*
* @see Attributes
*/
public final class Attribute extends Segment {
private final String key;
private final Segment nameSegment;
private final Segment valueSegment;
private final Segment valueSegmentIncludingQuotes;
static final String CHECKED="checked";
static final String CLASS="class";
static final String DISABLED="disabled";
static final String ID="id";
static final String MULTIPLE="multiple";
static final String NAME="name";
static final String SELECTED="selected";
static final String STYLE="style";
static final String TYPE="type";
static final String VALUE="value";
/**
* Constructs a new Attribute with no value part, called from Attributes class.
*
* Note that the resulting Attribute segment has the same span as the supplied nameSegment.
*
* @param source the {@link Source} document.
* @param key the name of this attribute in lower case.
* @param nameSegment the segment representing the name.
*/
Attribute(final Source source, final String key, final Segment nameSegment) {
this(source,key,nameSegment,null,null);
}
/**
* Constructs a new Attribute, called from Attributes class.
*
* The resulting Attribute segment begins at the start of the nameSegment
* and finishes at the end of the valueSegmentIncludingQuotes. If this attribute
* has no value, it finishes at the end of the nameSegment.
*
* If this attribute has no value, the valueSegment
and valueSegmentIncludingQuotes
must be null.
* The parameter must not be null if the valueSegment
is not null, and vice versa
*
* @param source the {@link Source} document.
* @param key the name of this attribute in lower case.
* @param nameSegment the segment spanning the name.
* @param valueSegment the segment spanning the value.
* @param valueSegmentIncludingQuotes the segment spanning the value, including quotation marks if any.
*/
Attribute(final Source source, final String key, final Segment nameSegment, final Segment valueSegment, final Segment valueSegmentIncludingQuotes) {
super(source,nameSegment.getBegin(),(valueSegmentIncludingQuotes==null ? nameSegment.getEnd() : valueSegmentIncludingQuotes.getEnd()));
this.key=key;
this.nameSegment=nameSegment;
this.valueSegment=valueSegment;
this.valueSegmentIncludingQuotes=valueSegmentIncludingQuotes;
}
/**
* Returns the name of this attribute in lower case.
*
* This package treats all attribute names as case insensitive, consistent with
* HTML but not consistent with
* XHTML.
*
* @return the name of this attribute in lower case.
* @see #getName()
*/
public String getKey() {
return key;
}
/**
* Returns the name of this attribute in original case.
*
* This is exactly equivalent to {@link #getNameSegment()}.toString()
.
*
* @return the name of this attribute in original case.
* @see #getKey()
*/
public String getName() {
return nameSegment.toString();
}
/**
* Returns the segment spanning the {@linkplain #getName() name} of this attribute.
* @return the segment spanning the {@linkplain #getName() name} of this attribute.
* @see #getName()
*/
public Segment getNameSegment() {
return nameSegment;
}
/**
* Indicates whether this attribute has a value.
*
* This method also returns true
if this attribute has been assigned a zero-length value.
*
* It only returns false
if this attribute appears in
* minimized form.
*
* @return true
if this attribute has a value, otherwise false
.
*/
public boolean hasValue() {
return valueSegment!=null;
}
/**
* Returns the {@linkplain CharacterReference#decode(CharSequence,boolean) decoded} value of this attribute,
* or null
if it {@linkplain #hasValue() has no value}.
*
* This is equivalent to {@link CharacterReference}.
{@link CharacterReference#decode(CharSequence,boolean) decode}(
{@link #getValueSegment()},true)
.
*
* Note that before version 1.4.1 this method returned the raw value of the attribute as it appears in the source document,
* without {@linkplain CharacterReference#decode(CharSequence,boolean) decoding}.
*
* To obtain the raw value without decoding, use {@link #getValueSegment()}.toString()
.
*
* Special attention should be given to attributes that contain URLs, such as the
* href
attribute.
* When such an attribute contains a URL with parameters (as described in the
* form-urlencoded media type),
* the ampersand (&
) characters used to separate the parameters should be
* {@linkplain CharacterReference#encode(CharSequence) encoded} to prevent the parameter names from being
* unintentionally interpreted as {@linkplain CharacterEntityReference character entity references}.
* This requirement is explicitly stated in the
* HTML 4.01 specification section 5.3.2.
*
* For example, take the following element in the source document:
*
<a href="Report.jsp?chapt=2§=3">next</a>
* By default, calling
* {@link Element#getAttributes() getAttributes()}.
{@link Attributes#getValue(String) getValue}("href")
* on this element returns the string
* "Report.jsp?chapt=2§=3
", since the text "§
" is interpreted as the rarely used
* character entity reference {@link CharacterEntityReference#_sect §} (U+00A7), despite the fact that it is
* missing the {@linkplain CharacterReference#isTerminated() terminating semicolon} (;
).
*
* Most browsers recognise unterminated character entity references
* in attribute values representing a codepoint of U+00FF or below, but ignore those representing codepoints above this value.
* One relatively popular browser only recognises those representing a codepoint of U+003E or below, meaning it would
* have interpreted the URL in the above example differently to most other browsers.
* Most browsers also use different rules depending on whether the unterminated character reference is inside or outside
* of an attribute value, with both of these possibilities further split into different rules for
* {@linkplain CharacterEntityReference character entity references},
* decimal character references, and
* hexadecimal character references.
*
* The behaviour of this library is determined by the current {@linkplain Config.CompatibilityMode compatibility mode} setting,
* which is determined by the {@link Config#CurrentCompatibilityMode} property.
*
* @return the {@linkplain CharacterReference#decode(CharSequence,boolean) decoded} value of this attribute, or null
if it {@linkplain #hasValue() has no value}.
*/
public String getValue() {
return CharacterReference.decode(valueSegment,true);
}
/**
* Returns the segment spanning the {@linkplain #getValue() value} of this attribute, or null
if it {@linkplain #hasValue() has no value}.
* @return the segment spanning the {@linkplain #getValue() value} of this attribute, or null
if it {@linkplain #hasValue() has no value}.
* @see #getValue()
*/
public Segment getValueSegment() {
return valueSegment;
}
/**
* Returns the segment spanning the {@linkplain #getValue() value} of this attribute, including quotation marks if any,
* or null
if it {@linkplain #hasValue() has no value}.
*
* If the value is not enclosed by quotation marks, this is the same as the {@linkplain #getValueSegment() value segment}
*
* @return the segment spanning the {@linkplain #getValue() value} of this attribute, including quotation marks if any, or null
if it {@linkplain #hasValue() has no value}.
*/
public Segment getValueSegmentIncludingQuotes() {
return valueSegmentIncludingQuotes;
}
/**
* Returns the character used to quote the value.
*
* The return value is either a double-quote ("
), a single-quote ('
), or a space.
*
* @return the character used to quote the value, or a space if the value is not quoted or this attribute has no value.
*/
public char getQuoteChar() {
if (valueSegment==valueSegmentIncludingQuotes) return ' '; // no quotes
return source.charAt(valueSegmentIncludingQuotes.getBegin());
}
/**
* Returns a string representation of this object useful for debugging purposes.
* @return a string representation of this object useful for debugging purposes.
*/
public String getDebugInfo() {
final StringBuffer sb=new StringBuffer().append(key).append(super.getDebugInfo()).append(",name=").append(nameSegment.getDebugInfo());
if (hasValue())
sb.append(",value=").append(valueSegment.getDebugInfo()).append('"').append(valueSegment).append("\"\n");
else
sb.append(",NO VALUE\n");
return sb.toString();
}
Tag appendTidy(final StringBuffer sb, Tag nextTag) {
sb.append(' ');
Util.appendTo(sb,nameSegment);
if (valueSegment!=null) {
sb.append("=\"");
while (nextTag!=null && nextTag.begin=valueSegment.end) {
appendTidyValue(sb,valueSegment);
} else {
int i=valueSegment.begin;
while (nextTag!=null && nextTag.beginvalueSegment.end) {
sb.append(new Segment(source,nextTag.begin,i=valueSegment.end));
break;
}
sb.append(nextTag);
i=nextTag.end;
nextTag=nextTag.findNextTag();
}
if (i