src.au.id.jericho.lib.html.AttributesOutputSegment Maven / Gradle / Ivy
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 1.5
// Copyright (C) 2004 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// http://www.gnu.org/copyleft/lesser.html
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
package au.id.jericho.lib.html;
import java.util.*;
import java.io.*;
/**
* Implements an {@link IOutputSegment} whose content is a list of attribute name/value pairs.
*
* This output segment is designed to replace the original {@link Attributes} segment in the source,
* providing a simple means of adding, modifying and removing attributes.
*
* Each instance of this class contains a java.util.Map
of name/value pairs which can either be
* specified directly in the constructor or initialised to the same entries as the source {@link Attributes}
* specified in the constructor.
* This map can be accessed via the {@link #getMap()} method, and its entries modified as required before output.
*
* Keys in the map must be String
objects, and values must implement the CharSequence
interface.
*
* An attribute with no value is represented by a map entry with a null
value.
*
* Attribute values are stored unencoded in the map, and are automatically
* {@linkplain CharacterReference#encode(CharSequence) encoded} if necessary during output.
*
* The use of invalid characters in attribute names will result in unspecified behaviour.
*
* Note that methods in the Attributes
class treat attribute names as case insensitive,
* whereas the Map
will treat them as case sensitive.
*
Example of Usage:
*
* Source source=new Source(htmlDocument);
* Attributes bodyAttributes
* =source.findNextStartTag(0,Tag.BODY).getAttributes();
* AttributesOutputSegment bodyAttributesOutputSegment
* =new AttributesOutputSegment(bodyAttributes,true);
* bodyAttributesOutputSegment.getMap().put("bgcolor","green");
* OutputDocument outputDocument=new OutputDocument(source);
* outputDocument.add(bodyAttributesOutputSegment);
* String htmlDocumentWithGreenBackground=outputDocument.toString();
*
*
* @see OutputDocument
* @see Attributes
*/
public class AttributesOutputSegment implements IOutputSegment {
private int begin;
private int end;
private Map map;
/**
* Constructs a new AttributesOutputSegment
with the same span and initial name/value entries as the specified source {@link Attributes}.
*
* Specifying a value of true
in the convertNamesToLowerCase
argument
* causes all attribute names to be converted to lower case in the map.
* This simplifies the process of finding/updating specific attributes since map keys are case sensitive.
*
* Attribute values are automatically {@linkplain CharacterReference#decode(CharSequence) decoded} before
* being loaded into the map.
*
* Calling this constructor with the following code:
*
new AttributesOutputSegment(attributes, convertNamesToLowerCase)
* is logically equivalent to calling:
* new AttributesOutputSegment(attributes, attributes.populateMap(new LinkedHashMap(), convertNamesToLowerCase))
*
* The use of LinkedHashMap
to implement the map ensures (probably unnecessarily) that
* existing attributes are output in the same order as they appear in the source document, and new
* attributes are output in the same order as they are added.
*
* @param attributes the Attributes
defining the span and initial name/value entries of the new AttributesOutputSegment
.
* @param convertNamesToLowerCase specifies whether all attribute names are converted to lower case in the map.
* @see #AttributesOutputSegment(Attributes,Map)
*/
public AttributesOutputSegment(Attributes attributes, boolean convertNamesToLowerCase) {
this(attributes,attributes.getMap(convertNamesToLowerCase));
}
/**
* Constructs a new AttributesOutputSegment
with the same span
* as the specified source {@link Attributes}, using the specified Map
to
* store the entries.
*
* This constructor might be used if the Map
containing the new attribute values
* should not be preloaded with the same entries as the source attributes, or a map implementation
* other than LinkedHashMap
is required.
*
* @param attributes the Attributes
defining the span of the new AttributesOutputSegment
.
* @param map the Map
containing the name/value entries.
* @see #AttributesOutputSegment(Attributes, boolean convertNamesToLowerCase)
*/
public AttributesOutputSegment(Attributes attributes, Map map) {
if (map==null || attributes==null) throw new IllegalArgumentException();
begin=attributes.getBegin();
end=attributes.getEnd();
this.map=map;
}
public int getBegin() {
return begin;
}
public int getEnd() {
return end;
}
/**
* Returns the Map
containing the name/value entries to be output.
* @return the Map
containing the name/value entries to be output.
*/
public Map getMap() {
return map;
}
/**
* Outputs the contents of the {@linkplain #getMap() map} as HTML attribute name/value pairs to the specified Writer
.
*
* Each attribute is preceded by a single space, and all values are
* {@linkplain CharacterReference#encode(CharSequence) encoded} and enclosed in double quotes.
*
* @param writer the Writer
to which the output is to be sent.
* @throws IOException if an I/O exception occurs.
* @see Attributes#generateHTML(Map attributesMap)
*/
public void output(Writer writer) throws IOException {
Attributes.appendHTML(writer,map);
}
public String toString() {
return Attributes.generateHTML(map);
}
public String getDebugInfo() {
StringWriter stringWriter=new StringWriter();
stringWriter.getBuffer().append('(').append(begin).append(',').append(end).append("):");
try {output(stringWriter);} catch (IOException ex) {} // IOException never occurs in StringWriter
return stringWriter.toString();
}
}