src.au.id.jericho.lib.html.AttributesOutputSegment Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jericho-html
Jericho HTML Parser is a simple but powerful java library allowing analysis and manipulation of parts of an HTML document, including some common server-side tags, while reproducing verbatim any unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions.
There is a newer version: 2.3
Show newest version
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 1.5
// Copyright (C) 2004 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// http://www.gnu.org/copyleft/lesser.html
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package au.id.jericho.lib.html;

import java.util.*;
import java.io.*;

/**
 * Implements an {@link IOutputSegment} whose content is a list of attribute name/value pairs.
 * 
 * This output segment is designed to replace the original {@link Attributes} segment in the source,
 * providing a simple means of adding, modifying and removing attributes.
 * 

 * Each instance of this class contains a java.util.Map of name/value pairs which can either be
 * specified directly in the constructor or initialised to the same entries as the source {@link Attributes}
 * specified in the constructor.
 * This map can be accessed via the {@link #getMap()} method, and its entries modified as required before output.
 * 

 * Keys in the map must be String objects, and values must implement the CharSequence interface.
 * 

 * An attribute with no value is represented by a map entry with a null value.
 * 

 * Attribute values are stored unencoded in the map, and are automatically
 * {@linkplain CharacterReference#encode(CharSequence) encoded} if necessary during output.
 * 

 * The use of invalid characters in attribute names will result in unspecified behaviour.
 * 

 * Note that methods in the Attributes class treat attribute names as case insensitive,
 * whereas the Map will treat them as case sensitive.
 * 
Example of Usage:
 *  *  Source source=new Source(htmlDocument);
 *  Attributes bodyAttributes
 *    =source.findNextStartTag(0,Tag.BODY).getAttributes();
 *  AttributesOutputSegment bodyAttributesOutputSegment
 *    =new AttributesOutputSegment(bodyAttributes,true);
 *  bodyAttributesOutputSegment.getMap().put("bgcolor","green");
 *  OutputDocument outputDocument=new OutputDocument(source);
 *  outputDocument.add(bodyAttributesOutputSegment);
 *  String htmlDocumentWithGreenBackground=outputDocument.toString();
 * 
 *
 * @see OutputDocument
 * @see Attributes
 */
public class AttributesOutputSegment implements IOutputSegment {
	private int begin;
	private int end;
	private Map map;

	/**
	 * Constructs a new AttributesOutputSegment with the same span and initial name/value entries as the specified source {@link Attributes}.
	 * 
	 * Specifying a value of true in the convertNamesToLowerCase argument
	 * causes all attribute names to be converted to lower case in the map.
	 * This simplifies the process of finding/updating specific attributes since map keys are case sensitive.
	 * 

	 * Attribute values are automatically {@linkplain CharacterReference#decode(CharSequence) decoded} before
	 * being loaded into the map.
	 * 

	 * Calling this constructor with the following code:
	 * 
new AttributesOutputSegment(attributes, convertNamesToLowerCase)
	 * is logically equivalent to calling:
	 * new AttributesOutputSegment(attributes, attributes.populateMap(new LinkedHashMap(), convertNamesToLowerCase))
	 * 
	 * The use of LinkedHashMap to implement the map ensures (probably unnecessarily) that
	 * existing attributes are output in the same order as they appear in the source document, and new
	 * attributes are output in the same order as they are added.
	 *
	 * @param attributes  the Attributes defining the span and initial name/value entries of the new AttributesOutputSegment.
	 * @param convertNamesToLowerCase  specifies whether all attribute names are converted to lower case in the map.
	 * @see #AttributesOutputSegment(Attributes,Map)
	 */
	public AttributesOutputSegment(Attributes attributes, boolean convertNamesToLowerCase) {
		this(attributes,attributes.getMap(convertNamesToLowerCase));
	}

	/**
	 * Constructs a new AttributesOutputSegment with the same span
	 * as the specified source {@link Attributes}, using the specified Map to
	 * store the entries.
	 * 

	 * This constructor might be used if the Map containing the new attribute values
	 * should not be preloaded with the same entries as the source attributes, or a map implementation
	 * other than LinkedHashMap is required.
	 *
	 * @param attributes  the Attributes defining the span of the new AttributesOutputSegment.
	 * @param map  the Map containing the name/value entries.
	 * @see #AttributesOutputSegment(Attributes, boolean convertNamesToLowerCase)
	 */
	public AttributesOutputSegment(Attributes attributes, Map map) {
		if (map==null || attributes==null) throw new IllegalArgumentException();
		begin=attributes.getBegin();
		end=attributes.getEnd();
		this.map=map;
	}

	public int getBegin() {
		return begin;
	}

	public int getEnd() {
		return end;
	}

	/**
	 * Returns the Map containing the name/value entries to be output.
	 * @return the Map containing the name/value entries to be output.
	 */
	public Map getMap() {
		return map;
	}

	/**
	 * Outputs the contents of the {@linkplain #getMap() map} as HTML attribute name/value pairs to the specified Writer.
	 * 
	 * Each attribute is preceded by a single space, and all values are
	 * {@linkplain CharacterReference#encode(CharSequence) encoded} and enclosed in double quotes.
	 *
	 * @param writer  the Writer to which the output is to be sent.
	 * @throws IOException  if an I/O exception occurs.
	 * @see Attributes#generateHTML(Map attributesMap)
	 */
	public void output(Writer writer) throws IOException {
		Attributes.appendHTML(writer,map);
	}

	public String toString() {
		return Attributes.generateHTML(map);
	}

	public String getDebugInfo() {
		StringWriter stringWriter=new StringWriter();
		stringWriter.getBuffer().append('(').append(begin).append(',').append(end).append("):");
		try {output(stringWriter);} catch (IOException ex) {} // IOException never occurs in StringWriter
		return stringWriter.toString();
	}
}