src.java.net.htmlparser.jericho.Attribute Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of jericho-html

Jericho HTML Parser is a java library allowing analysis and manipulation of parts of an HTML document, including server-side tags, while reproducing verbatim any unrecognised or invalid HTML.

There is a newer version: 3.4

Show newest version

// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.1
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.io.*;

/**
 * Represents a single attribute
 * name/value segment within a {@link StartTag}.
 * 
 * An instance of this class is a representation of a single attribute in the source document and is not modifiable.
 * The {@link OutputDocument#replace(Attributes, Map)} and {@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)} methods
 * provide the means to add, delete or modify attributes and their values in an {@link OutputDocument}.
 * 

 * Obtained using the {@link Attributes#get(String key)} method.
 * 

 * See also the XML 1.0 specification for attributes.
 *
 * @see Attributes
 */
public final class Attribute extends Segment {
	private final String key;
	private final Segment nameSegment;
	private final Segment valueSegment;
	private final Segment valueSegmentIncludingQuotes;

	static final String CHECKED="checked";
	static final String CLASS="class";
	static final String DISABLED="disabled";
	static final String ID="id";
	static final String MULTIPLE="multiple";
	static final String NAME="name";
	static final String SELECTED="selected";
	static final String STYLE="style";
	static final String TYPE="type";
	static final String VALUE="value";

	/**
	 * Constructs a new Attribute with no value part, called from Attributes class.
	 * 

	 * Note that the resulting Attribute segment has the same span as the supplied nameSegment.
	 *
	 * @param source  the {@link Source} document.
	 * @param key  the name of this attribute in lower case.
	 * @param nameSegment  the segment representing the name.
	 */
	Attribute(final Source source, final String key, final Segment nameSegment) {
		this(source,key,nameSegment,null,null);
	}

	/**
	 * Constructs a new Attribute, called from Attributes class.
	 * 

	 * The resulting Attribute segment begins at the start of the nameSegment
	 * and finishes at the end of the valueSegmentIncludingQuotes.  If this attribute
	 * has no value, it finishes at the end of the nameSegment.
	 * 

	 * If this attribute has no value, the valueSegment and valueSegmentIncludingQuotes must be null.
	 * The  parameter must not be null if the valueSegment is not null, and vice versa
	 *
	 * @param source  the {@link Source} document.
	 * @param key  the name of this attribute in lower case.
	 * @param nameSegment  the segment spanning the name.
	 * @param valueSegment  the segment spanning the value.
	 * @param valueSegmentIncludingQuotes  the segment spanning the value, including quotation marks if any.
	 */
	Attribute(final Source source, final String key, final Segment nameSegment, final Segment valueSegment, final Segment valueSegmentIncludingQuotes) {
		super(source,nameSegment.getBegin(),(valueSegmentIncludingQuotes==null ? nameSegment.getEnd() : valueSegmentIncludingQuotes.getEnd()));
		this.key=key;
		this.nameSegment=nameSegment;
		this.valueSegment=valueSegment;
		this.valueSegmentIncludingQuotes=valueSegmentIncludingQuotes;
	}

	/**
	 * Returns the name of this attribute in lower case.
	 * 

	 * This package treats all attribute names as case insensitive, consistent with
	 * HTML but not consistent with
	 * XHTML.
	 *
	 * @return the name of this attribute in lower case.
	 * @see #getName()
	 */
	public String getKey() {
		return key;
	}

	/**
	 * Returns the name of this attribute in original case.
	 * 

	 * This is exactly equivalent to {@link #getNameSegment()}.toString().
	 *
	 * @return the name of this attribute in original case.
	 * @see #getKey()
	 */
	public String getName() {
		return nameSegment.toString();
	}

	/**
	 * Returns the segment spanning the {@linkplain #getName() name} of this attribute.
	 * @return the segment spanning the {@linkplain #getName() name} of this attribute.
	 * @see #getName()
	 */
	public Segment getNameSegment() {
		return nameSegment;
	}

	/**
	 * Indicates whether this attribute has a value.
	 * 

	 * This method also returns true if this attribute has been assigned a zero-length value.
	 * 

	 * It only returns false if this attribute appears in
	 * minimized form.
	 *
	 * @return true if this attribute has a value, otherwise false.
	 */
	public boolean hasValue() {
		return valueSegment!=null;
	}

	/**
	 * Returns the {@linkplain CharacterReference#decode(CharSequence,boolean) decoded} value of this attribute,
	 * or null if it {@linkplain #hasValue() has no value}.
	 * 

	 * This is equivalent to {@link CharacterReference}.{@link CharacterReference#decode(CharSequence,boolean) decode}({@link #getValueSegment()},true).
	 * 

	 * Note that before version 1.4.1 this method returned the raw value of the attribute as it appears in the source document,
	 * without {@linkplain CharacterReference#decode(CharSequence,boolean) decoding}.
	 * 

	 * To obtain the raw value without decoding, use {@link #getValueSegment()}.toString().
	 * 

	 * Special attention should be given to attributes that contain URLs, such as the
	 * href attribute.
	 * When such an attribute contains a URL with parameters (as described in the
	 * form-urlencoded media type),
	 * the ampersand (&) characters used to separate the parameters should be
	 * {@linkplain CharacterReference#encode(CharSequence) encoded} to prevent the parameter names from being
	 * unintentionally interpreted as {@linkplain CharacterEntityReference character entity references}.
	 * This requirement is explicitly stated in the 
	 * HTML 4.01 specification section 5.3.2.
	 * 

	 * For example, take the following element in the source document:
	 * 
<a href="Report.jsp?chapt=2&sect=3">next</a>
	 * By default, calling 
	 * {@link Element#getAttributes() getAttributes()}.{@link Attributes#getValue(String) getValue}("href")
	 * on this element returns the string
	 * "Report.jsp?chapt=2§=3", since the text "&sect" is interpreted as the rarely used
	 * character entity reference {@link CharacterEntityReference#_sect &sect;} (U+00A7), despite the fact that it is
	 * missing the {@linkplain CharacterReference#isTerminated() terminating semicolon} (;).
	 * 
	 * Most browsers recognise unterminated character entity references
	 * in attribute values representing a codepoint of U+00FF or below, but ignore those representing codepoints above this value.
 	 * One relatively popular browser only recognises those representing a codepoint of U+003E or below, meaning it would
 	 * have interpreted the URL in the above example differently to most other browsers.
	 * Most browsers also use different rules depending on whether the unterminated character reference is inside or outside
	 * of an attribute value, with both of these possibilities further split into different rules for
	 * {@linkplain CharacterEntityReference character entity references},
	 * decimal character references, and
	 * hexadecimal character references.
	 * 

	 * The behaviour of this library is determined by the current {@linkplain Config.CompatibilityMode compatibility mode} setting,
	 * which is determined by the static {@link Config#CurrentCompatibilityMode} property.
	 *
	 * @return the {@linkplain CharacterReference#decode(CharSequence,boolean) decoded} value of this attribute, or null if it {@linkplain #hasValue() has no value}.
	 */
	public String getValue() {
		return CharacterReference.decode(valueSegment,true);
	}

	/**
	 * Returns the segment spanning the {@linkplain #getValue() value} of this attribute, or null if it {@linkplain #hasValue() has no value}.
	 * @return the segment spanning the {@linkplain #getValue() value} of this attribute, or null if it {@linkplain #hasValue() has no value}.
	 * @see #getValue()
	 */
	public Segment getValueSegment() {
		return valueSegment;
	}

	/**
	 * Returns the segment spanning the {@linkplain #getValue() value} of this attribute, including quotation marks if any,
	 * or null if it {@linkplain #hasValue() has no value}.
	 * 

	 * If the value is not enclosed by quotation marks, this is the same as the {@linkplain #getValueSegment() value segment}
	 *
	 * @return the segment spanning the {@linkplain #getValue() value} of this attribute, including quotation marks if any, or null if it {@linkplain #hasValue() has no value}.
	 */
	public Segment getValueSegmentIncludingQuotes() {
		return valueSegmentIncludingQuotes;
	}

	/**
	 * Returns the character used to quote the value.
	 * 

	 * The return value is either a double-quote ("), a single-quote ('), or a space.
	 *
	 * @return the character used to quote the value, or a space if the value is not quoted or this attribute has no value.
	 */
	public char getQuoteChar() {
		if (valueSegment==valueSegmentIncludingQuotes) return ' '; // no quotes
		return source.charAt(valueSegmentIncludingQuotes.getBegin());
	}

	/**
	 * Returns a string representation of this object useful for debugging purposes.
	 * @return a string representation of this object useful for debugging purposes.
	 */
	public String getDebugInfo() {
		final StringBuilder sb=new StringBuilder().append(key).append(super.getDebugInfo()).append(",name=").append(nameSegment.getDebugInfo());
		if (hasValue())
			sb.append(",value=").append(valueSegment.getDebugInfo()).append('"').append(valueSegment).append('"').append(Config.NewLine);
		else
			sb.append(",NO VALUE").append(Config.NewLine);
		return sb.toString();
	}

	Tag appendTidy(final Appendable appendable, Tag nextTag) throws IOException {
		appendable.append(' ').append(nameSegment);
		if (valueSegment!=null) {
			appendable.append("=\"");
			while (nextTag!=null && nextTag.begin=valueSegment.end) {
				appendTidyValue(appendable,valueSegment);
			} else {
				int i=valueSegment.begin;
				while (nextTag!=null && nextTag.beginvalueSegment.end) {
						appendable.append(new Segment(source,nextTag.begin,i=valueSegment.end));
						break;
					}
					appendable.append(nextTag);
					i=nextTag.end;
					nextTag=nextTag.getNextTag();
				}
				if (i

    

    

    
                
 
            
    
                
 
            

    
        
            
                Related Artifacts
                
                     mysql-connector-java mysql
 facebook-messenger com.github.codedrinker
 selenium-java org.seleniumhq.selenium
 instagram-java com.github.sola92
 gson com.google.code.gson
 poi org.apache.poi
 httpclient org.apache.httpcomponents
 json org.json
 facebook-java-api com.google.code.facebook-java-api
 poi-ooxml org.apache.poi
 jackson-databind com.fasterxml.jackson.core
 junit junit
 primefaces org.primefaces
 ojdbc7 com.github.noraui
 jfoenix com.jfoenix
 testng org.testng
 json-simple com.googlecode.json-simple
 selenium-server org.seleniumhq.selenium
 itextpdf com.itextpdf
 spring-core org.springframework
                
            
        
        
            
                Related Groups
                
                     org.springframework
 org.apache.poi
 org.hibernate
 org.springframework.boot
 com.fasterxml.jackson.core
 com.itextpdf
 org.seleniumhq.selenium
 mysql
 org.finos.legend.engine
 org.apache.httpcomponents
 org.apache.logging.log4j
 org.openjfx
 org.apache.commons
 org.json
 com.google.guava
 com.google.zxing
 net.sf.jasperreports
 javax.xml.bind
 ojdbc
 com.google.code.facebook-java-api