All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.java.net.htmlparser.jericho.Attribute Maven / Gradle / Ivy

Go to download

Jericho HTML Parser is a java library allowing analysis and manipulation of parts of an HTML document, including server-side tags, while reproducing verbatim any unrecognised or invalid HTML.

There is a newer version: 3.4
Show newest version
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.1
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.io.*;

/**
 * Represents a single attribute
 * name/value segment within a {@link StartTag}.
 * 

* An instance of this class is a representation of a single attribute in the source document and is not modifiable. * The {@link OutputDocument#replace(Attributes, Map)} and {@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)} methods * provide the means to add, delete or modify attributes and their values in an {@link OutputDocument}. *

* Obtained using the {@link Attributes#get(String key)} method. *

* See also the XML 1.0 specification for attributes. * * @see Attributes */ public final class Attribute extends Segment { private final String key; private final Segment nameSegment; private final Segment valueSegment; private final Segment valueSegmentIncludingQuotes; static final String CHECKED="checked"; static final String CLASS="class"; static final String DISABLED="disabled"; static final String ID="id"; static final String MULTIPLE="multiple"; static final String NAME="name"; static final String SELECTED="selected"; static final String STYLE="style"; static final String TYPE="type"; static final String VALUE="value"; /** * Constructs a new Attribute with no value part, called from Attributes class. *

* Note that the resulting Attribute segment has the same span as the supplied nameSegment. * * @param source the {@link Source} document. * @param key the name of this attribute in lower case. * @param nameSegment the segment representing the name. */ Attribute(final Source source, final String key, final Segment nameSegment) { this(source,key,nameSegment,null,null); } /** * Constructs a new Attribute, called from Attributes class. *

* The resulting Attribute segment begins at the start of the nameSegment * and finishes at the end of the valueSegmentIncludingQuotes. If this attribute * has no value, it finishes at the end of the nameSegment. *

* If this attribute has no value, the valueSegment and valueSegmentIncludingQuotes must be null. * The parameter must not be null if the valueSegment is not null, and vice versa * * @param source the {@link Source} document. * @param key the name of this attribute in lower case. * @param nameSegment the segment spanning the name. * @param valueSegment the segment spanning the value. * @param valueSegmentIncludingQuotes the segment spanning the value, including quotation marks if any. */ Attribute(final Source source, final String key, final Segment nameSegment, final Segment valueSegment, final Segment valueSegmentIncludingQuotes) { super(source,nameSegment.getBegin(),(valueSegmentIncludingQuotes==null ? nameSegment.getEnd() : valueSegmentIncludingQuotes.getEnd())); this.key=key; this.nameSegment=nameSegment; this.valueSegment=valueSegment; this.valueSegmentIncludingQuotes=valueSegmentIncludingQuotes; } /** * Returns the name of this attribute in lower case. *

* This package treats all attribute names as case insensitive, consistent with * HTML but not consistent with * XHTML. * * @return the name of this attribute in lower case. * @see #getName() */ public String getKey() { return key; } /** * Returns the name of this attribute in original case. *

* This is exactly equivalent to {@link #getNameSegment()}.toString(). * * @return the name of this attribute in original case. * @see #getKey() */ public String getName() { return nameSegment.toString(); } /** * Returns the segment spanning the {@linkplain #getName() name} of this attribute. * @return the segment spanning the {@linkplain #getName() name} of this attribute. * @see #getName() */ public Segment getNameSegment() { return nameSegment; } /** * Indicates whether this attribute has a value. *

* This method also returns true if this attribute has been assigned a zero-length value. *

* It only returns false if this attribute appears in * minimized form. * * @return true if this attribute has a value, otherwise false. */ public boolean hasValue() { return valueSegment!=null; } /** * Returns the {@linkplain CharacterReference#decode(CharSequence,boolean) decoded} value of this attribute, * or null if it {@linkplain #hasValue() has no value}. *

* This is equivalent to {@link CharacterReference}.{@link CharacterReference#decode(CharSequence,boolean) decode}({@link #getValueSegment()},true). *

* Note that before version 1.4.1 this method returned the raw value of the attribute as it appears in the source document, * without {@linkplain CharacterReference#decode(CharSequence,boolean) decoding}. *

* To obtain the raw value without decoding, use {@link #getValueSegment()}.toString(). *

* Special attention should be given to attributes that contain URLs, such as the * href attribute. * When such an attribute contains a URL with parameters (as described in the * form-urlencoded media type), * the ampersand (&) characters used to separate the parameters should be * {@linkplain CharacterReference#encode(CharSequence) encoded} to prevent the parameter names from being * unintentionally interpreted as {@linkplain CharacterEntityReference character entity references}. * This requirement is explicitly stated in the * HTML 4.01 specification section 5.3.2. *

* For example, take the following element in the source document: *

<a href="Report.jsp?chapt=2&sect=3">next</a>
* By default, calling * {@link Element#getAttributes() getAttributes()}.{@link Attributes#getValue(String) getValue}("href") * on this element returns the string * "Report.jsp?chapt=2§=3", since the text "&sect" is interpreted as the rarely used * character entity reference {@link CharacterEntityReference#_sect &sect;} (U+00A7), despite the fact that it is * missing the {@linkplain CharacterReference#isTerminated() terminating semicolon} (;). *

* Most browsers recognise unterminated character entity references * in attribute values representing a codepoint of U+00FF or below, but ignore those representing codepoints above this value. * One relatively popular browser only recognises those representing a codepoint of U+003E or below, meaning it would * have interpreted the URL in the above example differently to most other browsers. * Most browsers also use different rules depending on whether the unterminated character reference is inside or outside * of an attribute value, with both of these possibilities further split into different rules for * {@linkplain CharacterEntityReference character entity references}, * decimal character references, and * hexadecimal character references. *

* The behaviour of this library is determined by the current {@linkplain Config.CompatibilityMode compatibility mode} setting, * which is determined by the static {@link Config#CurrentCompatibilityMode} property. * * @return the {@linkplain CharacterReference#decode(CharSequence,boolean) decoded} value of this attribute, or null if it {@linkplain #hasValue() has no value}. */ public String getValue() { return CharacterReference.decode(valueSegment,true); } /** * Returns the segment spanning the {@linkplain #getValue() value} of this attribute, or null if it {@linkplain #hasValue() has no value}. * @return the segment spanning the {@linkplain #getValue() value} of this attribute, or null if it {@linkplain #hasValue() has no value}. * @see #getValue() */ public Segment getValueSegment() { return valueSegment; } /** * Returns the segment spanning the {@linkplain #getValue() value} of this attribute, including quotation marks if any, * or null if it {@linkplain #hasValue() has no value}. *

* If the value is not enclosed by quotation marks, this is the same as the {@linkplain #getValueSegment() value segment} * * @return the segment spanning the {@linkplain #getValue() value} of this attribute, including quotation marks if any, or null if it {@linkplain #hasValue() has no value}. */ public Segment getValueSegmentIncludingQuotes() { return valueSegmentIncludingQuotes; } /** * Returns the character used to quote the value. *

* The return value is either a double-quote ("), a single-quote ('), or a space. * * @return the character used to quote the value, or a space if the value is not quoted or this attribute has no value. */ public char getQuoteChar() { if (valueSegment==valueSegmentIncludingQuotes) return ' '; // no quotes return source.charAt(valueSegmentIncludingQuotes.getBegin()); } /** * Returns a string representation of this object useful for debugging purposes. * @return a string representation of this object useful for debugging purposes. */ public String getDebugInfo() { final StringBuilder sb=new StringBuilder().append(key).append(super.getDebugInfo()).append(",name=").append(nameSegment.getDebugInfo()); if (hasValue()) sb.append(",value=").append(valueSegment.getDebugInfo()).append('"').append(valueSegment).append('"').append(Config.NewLine); else sb.append(",NO VALUE").append(Config.NewLine); return sb.toString(); } Tag appendTidy(final Appendable appendable, Tag nextTag) throws IOException { appendable.append(' ').append(nameSegment); if (valueSegment!=null) { appendable.append("=\""); while (nextTag!=null && nextTag.begin=valueSegment.end) { appendTidyValue(appendable,valueSegment); } else { int i=valueSegment.begin; while (nextTag!=null && nextTag.beginvalueSegment.end) { appendable.append(new Segment(source,nextTag.begin,i=valueSegment.end)); break; } appendable.append(nextTag); i=nextTag.end; nextTag=nextTag.getNextTag(); } if (i





© 2015 - 2024 Weber Informatics LLC | Privacy Policy