src.java.net.htmlparser.jericho.Renderer Maven / Gradle / Ivy
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.1
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.
package net.htmlparser.jericho;
import java.util.*;
import java.io.*;
/**
* Performs a simple rendering of HTML markup into text.
*
* This provides a human readable version of the segment content that is modelled on the way
* Mozilla Thunderbird and other email clients provide an automatic conversion of
* HTML content to text in their alternative MIME encoding of emails.
*
* The output using default settings complies with the "text/plain; format=flowed" (DelSp=No) protocol described in
* RFC3676.
*
* Many properties are available to customise the output, possibly the most significant of which being {@link #setMaxLineLength(int) MaxLineLength}.
* See the individual property descriptions for details.
*
* Use one of the following methods to obtain the output:
*
* - {@link #writeTo(Writer)}
* - {@link #appendTo(Appendable)}
* - {@link #toString()}
* - {@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)}
*
*
* The rendering of some constructs, especially tables, is very rudimentary.
* No attempt is made to render nested tables properly, except to ensure that all of the text content is included in the output.
*
* Rendering an entire {@link Source} object performs a {@linkplain Source#fullSequentialParse() full sequential parse} automatically.
*
* Any aspect of the algorithm not specifically mentioned here is subject to change without notice in future versions.
*
* To extract pure text without any rendering of the markup, use the {@link TextExtractor} class instead.
*/
public class Renderer implements CharStreamSource {
private final Segment rootSegment;
private int maxLineLength=76;
private String newLine="\r\n";
private boolean includeHyperlinkURLs=true;
private boolean decorateFontStyles=false;
private boolean convertNonBreakingSpaces=Config.ConvertNonBreakingSpaces;
private int blockIndentSize=4;
private int listIndentSize=6;
private char[] listBullets=new char[] {'*','o','+','#'};
private String tableCellSeparator=" \t";
/**
* Constructs a new Renderer
based on the specified {@link Segment}.
* @param segment the segment containing the HTML to be rendered.
* @see Segment#getRenderer()
*/
public Renderer(final Segment segment) {
rootSegment=segment;
}
// Documentation inherited from CharStreamSource
public void writeTo(final Writer writer) throws IOException {
appendTo(writer);
writer.flush();
}
// Documentation inherited from CharStreamSource
public void appendTo(final Appendable appendable) throws IOException {
new Processor(this,rootSegment,getMaxLineLength(),getNewLine(),getIncludeHyperlinkURLs(),getDecorateFontStyles(),getConvertNonBreakingSpaces(),getBlockIndentSize(),getListIndentSize(),getListBullets(),getTableCellSeparator()).appendTo(appendable);
}
// Documentation inherited from CharStreamSource
public long getEstimatedMaximumOutputLength() {
return rootSegment.length();
}
// Documentation inherited from CharStreamSource
public String toString() {
return CharStreamSourceUtil.toString(this);
}
/**
* Sets the column at which lines are to be wrapped.
*
* Lines that would otherwise exceed this length are wrapped onto a new line at a word boundary.
*
* A Line may still exceed this length if it consists of a single word, where the length of the word plus the line indent exceeds the maximum length.
* In this case the line is wrapped immediately after the end of the word.
*
* The default value is 76
, which reflects the maximum line length for sending
* email data specified in RFC2049 section 3.5.
*
* @param maxLineLength the column at which lines are to be wrapped.
* @return this Renderer
instance, allowing multiple property setting methods to be chained in a single statement.
* @see #getMaxLineLength()
*/
public Renderer setMaxLineLength(final int maxLineLength) {
this.maxLineLength=maxLineLength;
return this;
}
/**
* Returns the column at which lines are to be wrapped.
*
* See the {@link #setMaxLineLength(int)} method for a full description of this property.
*
* @return the column at which lines are to be wrapped.
*/
public int getMaxLineLength() {
return maxLineLength;
}
/**
* Sets the string to be used to represent a newline in the output.
*
* The default value is "\r\n"
(CR+LF) regardless of the platform on which the library is running.
* This is so that the default configuration produces valid
* MIME plain/text output, which mandates the use of CR+LF for line breaks.
*
* Specifying a null
argument causes the output to use same new line string as is used in the source document, which is
* determined via the {@link Source#getNewLine()} method.
* If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document,
* or using the value from the static {@link Config#NewLine} property.
*
* @param newLine the string to be used to represent a newline in the output, may be null
.
* @return this Renderer
instance, allowing multiple property setting methods to be chained in a single statement.
* @see #getNewLine()
*/
public Renderer setNewLine(final String newLine) {
this.newLine=newLine;
return this;
}
/**
* Returns the string to be used to represent a newline in the output.
*
* See the {@link #setNewLine(String)} method for a full description of this property.
*
* @return the string to be used to represent a newline in the output.
*/
public String getNewLine() {
if (newLine==null) newLine=rootSegment.source.getBestGuessNewLine();
return newLine;
}
/**
* Sets whether hyperlink URL's are included in the output.
*
* The default value is true
.
*
* When this property is true
, the URL of each hyperlink is included in the output as determined by the implementation of the
* {@link #renderHyperlinkURL(StartTag)} method.
*
*
* - Example:
* -
*
* Assuming the default implementation of {@link #renderHyperlinkURL(StartTag)}, when this property is true
, the following HTML:
*
* <a href="http://jericho.htmlparser.net/">Jericho HTML Parser</a>
*
* produces the following output:
*
* Jericho HTML Parser <http://jericho.htmlparser.net/>
*
*
*
*
* @param includeHyperlinkURLs specifies whether hyperlink URL's are included in the output.
* @return this Renderer
instance, allowing multiple property setting methods to be chained in a single statement.
* @see #getIncludeHyperlinkURLs()
*/
public Renderer setIncludeHyperlinkURLs(final boolean includeHyperlinkURLs) {
this.includeHyperlinkURLs=includeHyperlinkURLs;
return this;
}
/**
* Indicates whether hyperlink URL's are included in the output.
*
* See the {@link #setIncludeHyperlinkURLs(boolean)} method for a full description of this property.
*
* @return true
if hyperlink URL's are included in the output, otherwise false
.
*/
public boolean getIncludeHyperlinkURLs() {
return includeHyperlinkURLs;
}
/**
* Renders the hyperlink URL from the specified {@link StartTag}.
*
* A return value of null
indicates that the hyperlink URL should not be rendered at all.
*
* The default implementation of this method returns null
if the href
attribute of the specified start tag
* is '#
', starts with "javascript:
", or is missing.
* In all other cases it returns the value of the href
attribute enclosed in angle brackets.
*
* See the documentation of the {@link #setIncludeHyperlinkURLs(boolean)} method for an example of how a hyperlink is rendered by the default implementation.
*
* This method can be overridden in a subclass to customise the rendering of hyperlink URLs.
*
* Rendering of hyperlink URLs can be disabled completely without overriding this method by setting the
* {@link #setIncludeHyperlinkURLs(boolean) IncludeHyperlinkURLs} property to false
.
*
*
* - Example:
* -
* To render hyperlink URLs without the enclosing angle brackets:
*
* Renderer renderer=new Renderer(segment) {
* public String renderHyperlinkURL(StartTag startTag) {
* String href=startTag.getAttributeValue("href");
* if (href==null || href.equals("#") || href.startsWith("javascript:")) return null;
* return href;
* }
* };
* String renderedSegment=renderer.toString();
*
*
*
* @param startTag the start tag of the hyperlink element, must not be null
.
* @return The rendered hyperlink URL from the specified {@link StartTag}, or null
if the hyperlink URL should not be rendered.
*/
public String renderHyperlinkURL(final StartTag startTag) {
final String href=startTag.getAttributeValue("href");
if (href==null || href.equals("#") || href.startsWith("javascript:")) return null;
return '<'+href+'>';
}
/**
* Sets whether decoration characters are to be included around the content of some
* font style elements and
* phrase elements.
*
* The default value is false
.
*
* Below is a table summarising the decorated elements.
*
*
*
* Elements Character Example Output
* {@link HTMLElementName#B B} and {@link HTMLElementName#STRONG STRONG} *
*bold text*
* {@link HTMLElementName#I I} and {@link HTMLElementName#EM EM} /
/italic text/
* {@link HTMLElementName#U U} _
_underlined text_
* {@link HTMLElementName#CODE CODE} |
|code|
*
*
* @param decorateFontStyles specifies whether decoration characters are to be included around the content of some font style elements.
* @return this Renderer
instance, allowing multiple property setting methods to be chained in a single statement.
* @see #getDecorateFontStyles()
*/
public Renderer setDecorateFontStyles(final boolean decorateFontStyles) {
this.decorateFontStyles=decorateFontStyles;
return this;
}
/**
* Indicates whether decoration characters are to be included around the content of some
* font style elements and
* phrase elements.
*
* See the {@link #setDecorateFontStyles(boolean)} method for a full description of this property.
*
* @return true
if decoration characters are to be included around the content of some font style elements, otherwise false
.
*/
public boolean getDecorateFontStyles() {
return decorateFontStyles;
}
/**
* Sets whether non-breaking space ({@link CharacterEntityReference#_nbsp }) character entity references are converted to spaces.
*
* The default value is that of the static {@link Config#ConvertNonBreakingSpaces} property at the time the Renderer
is instantiated.
*
* @param convertNonBreakingSpaces specifies whether non-breaking space ({@link CharacterEntityReference#_nbsp }) character entity references are converted to spaces.
* @return this Renderer
instance, allowing multiple property setting methods to be chained in a single statement.
* @see #getConvertNonBreakingSpaces()
*/
public Renderer setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces) {
this.convertNonBreakingSpaces=convertNonBreakingSpaces;
return this;
}
/**
* Indicates whether non-breaking space ({@link CharacterEntityReference#_nbsp }) character entity references are converted to spaces.
*
* See the {@link #setConvertNonBreakingSpaces(boolean)} method for a full description of this property.
*
* @return true
if non-breaking space ({@link CharacterEntityReference#_nbsp }) character entity references are converted to spaces, otherwise false
.
*/
public boolean getConvertNonBreakingSpaces() {
return convertNonBreakingSpaces;
}
/**
* Sets the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements.
*
* At present this applies to {@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE} and {@link HTMLElementName#DD DD} elements.
*
* The default value is 4
.
*
* @param blockIndentSize the size of the indent.
* @return this Renderer
instance, allowing multiple property setting methods to be chained in a single statement.
* @see #getBlockIndentSize()
*/
public Renderer setBlockIndentSize(final int blockIndentSize) {
this.blockIndentSize=blockIndentSize;
return this;
}
/**
* Returns the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements.
*
* See the {@link #setBlockIndentSize(int)} method for a full description of this property.
*
* @return the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements.
*/
public int getBlockIndentSize() {
return blockIndentSize;
}
/**
* Sets the size of the indent to be used for {@link HTMLElementName#LI LI} elements.
*
* The default value is 6
.
*
* This applies to {@link HTMLElementName#LI LI} elements inside both {@link HTMLElementName#UL UL} and {@link HTMLElementName#OL OL} elements.
*
* The bullet or number of the list item is included as part of the indent.
*
* @param listIndentSize the size of the indent.
* @return this Renderer
instance, allowing multiple property setting methods to be chained in a single statement.
* @see #getListIndentSize()
*/
public Renderer setListIndentSize(final int listIndentSize) {
this.listIndentSize=listIndentSize;
return this;
}
/**
* Returns the size of the indent to be used for {@link HTMLElementName#LI LI} elements.
*
* See the {@link #setListIndentSize(int)} method for a full description of this property.
*
* @return the size of the indent to be used for {@link HTMLElementName#LI LI} elements.
*/
public int getListIndentSize() {
return listIndentSize;
}
/**
* Sets the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements.
*
* The values in the default array are *
, o
, +
and #
.
*
* If the nesting of rendered lists goes deeper than the length of this array, the bullet characters start repeating from the first in the array.
*
* WARNING: If any of the characters in the default array are modified, this will affect all other instances of this class using the default array.
*
* @param listBullets an array of characters to be used as bullets, must have at least one entry.
* @return this Renderer
instance, allowing multiple property setting methods to be chained in a single statement.
* @see #getListBullets()
*/
public Renderer setListBullets(final char[] listBullets) {
if (listBullets==null || listBullets.length==0) throw new IllegalArgumentException("listBullets argument must be an array of at least one character");
this.listBullets=listBullets;
return this;
}
/**
* Returns the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements.
*
* See the {@link #setListBullets(char[])} method for a full description of this property.
*
* @return the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements.
*/
public char[] getListBullets() {
return listBullets;
}
/**
* Sets the string that is to separate table cells.
*
* The default value is " \t"
(a space followed by a tab).
*
* @param tableCellSeparator the string that is to separate table cells.
* @return this Renderer
instance, allowing multiple property setting methods to be chained in a single statement.
* @see #getTableCellSeparator()
*/
public Renderer setTableCellSeparator(final String tableCellSeparator) {
this.tableCellSeparator=tableCellSeparator;
return this;
}
/**
* Returns the string that is to separate table cells.
*
* See the {@link #setTableCellSeparator(String)} method for a full description of this property.
*
* @return the string that is to separate table cells.
*/
public String getTableCellSeparator() {
return tableCellSeparator;
}
/** This class does the actual work, but is first passed final copies of all the parameters for efficiency. */
private static final class Processor {
private final Renderer renderer;
private final Segment rootSegment;
private final Source source;
private final int maxLineLength;
private final String newLine;
private final boolean includeHyperlinkURLs;
private final boolean decorateFontStyles;
private final boolean convertNonBreakingSpaces;
private final int blockIndentSize;
private final int listIndentSize;
private final char[] listBullets;
private final String tableCellSeparator;
private Appendable appendable;
private int renderedIndex; // keeps track of where rendering is up to in case of overlapping elements
private boolean atStartOfLine;
private int col;
private int blockIndentLevel;
private int listIndentLevel;
private int blockVerticalMargin; // minimum number of blank lines to output at the current block boundary, or NO_MARGIN (-1) if we are not currently at a block boundary.
private boolean preformatted;
private boolean lastCharWhiteSpace;
private boolean ignoreInitialWhitespace;
private boolean bullet;
private int listBulletNumber;
private static final int NO_MARGIN=-1;
private static final int UNORDERED_LIST=-1;
private static Map ELEMENT_HANDLERS=new HashMap();
static {
ELEMENT_HANDLERS.put(HTMLElementName.A,A_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.ADDRESS,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.APPLET,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.B,FontStyleElementHandler.INSTANCE_B);
ELEMENT_HANDLERS.put(HTMLElementName.BLOCKQUOTE,StandardBlockElementHandler.INSTANCE_1_1_INDENT);
ELEMENT_HANDLERS.put(HTMLElementName.BR,BR_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.BUTTON,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.CAPTION,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.CENTER,StandardBlockElementHandler.INSTANCE_1_1);
ELEMENT_HANDLERS.put(HTMLElementName.CODE,FontStyleElementHandler.INSTANCE_CODE);
ELEMENT_HANDLERS.put(HTMLElementName.DD,StandardBlockElementHandler.INSTANCE_0_0_INDENT);
ELEMENT_HANDLERS.put(HTMLElementName.DIR,ListElementHandler.INSTANCE_UL);
ELEMENT_HANDLERS.put(HTMLElementName.DIV,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.DT,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.EM,FontStyleElementHandler.INSTANCE_I);
ELEMENT_HANDLERS.put(HTMLElementName.FIELDSET,StandardBlockElementHandler.INSTANCE_1_1);
ELEMENT_HANDLERS.put(HTMLElementName.FORM,StandardBlockElementHandler.INSTANCE_1_1);
ELEMENT_HANDLERS.put(HTMLElementName.H1,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.H2,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.H3,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.H4,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.H5,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.H6,StandardBlockElementHandler.INSTANCE_2_1);
ELEMENT_HANDLERS.put(HTMLElementName.HEAD,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.HR,HR_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.I,FontStyleElementHandler.INSTANCE_I);
ELEMENT_HANDLERS.put(HTMLElementName.LEGEND,StandardBlockElementHandler.INSTANCE_0_0);
ELEMENT_HANDLERS.put(HTMLElementName.LI,LI_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.MENU,ListElementHandler.INSTANCE_UL);
ELEMENT_HANDLERS.put(HTMLElementName.MAP,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.NOFRAMES,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.NOSCRIPT,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.OL,ListElementHandler.INSTANCE_OL);
ELEMENT_HANDLERS.put(HTMLElementName.P,StandardBlockElementHandler.INSTANCE_1_1);
ELEMENT_HANDLERS.put(HTMLElementName.PRE,PRE_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.SCRIPT,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.SELECT,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.STRONG,FontStyleElementHandler.INSTANCE_B);
ELEMENT_HANDLERS.put(HTMLElementName.STYLE,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.TEXTAREA,RemoveElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.TD,TD_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.TH,TD_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.TR,TR_ElementHandler.INSTANCE);
ELEMENT_HANDLERS.put(HTMLElementName.U,FontStyleElementHandler.INSTANCE_U);
ELEMENT_HANDLERS.put(HTMLElementName.UL,ListElementHandler.INSTANCE_UL);
}
public Processor(final Renderer renderer, final Segment rootSegment, final int maxLineLength, final String newLine, final boolean includeHyperlinkURLs, final boolean decorateFontStyles, final boolean convertNonBreakingSpaces, final int blockIndentSize, final int listIndentSize, final char[] listBullets, final String tableCellSeparator) {
this.renderer=renderer;
this.rootSegment=rootSegment;
source=rootSegment.source;
this.maxLineLength=maxLineLength;
this.newLine=newLine;
this.includeHyperlinkURLs=includeHyperlinkURLs;
this.decorateFontStyles=decorateFontStyles;
this.convertNonBreakingSpaces=convertNonBreakingSpaces;
this.blockIndentSize=blockIndentSize;
this.listIndentSize=listIndentSize;
this.listBullets=listBullets;
this.tableCellSeparator=tableCellSeparator;
}
public void appendTo(final Appendable appendable) throws IOException {
reset();
this.appendable=appendable;
appendSegmentProcessingChildElements(rootSegment.begin,rootSegment.end,rootSegment.getChildElements());
}
private void reset() {
renderedIndex=0;
atStartOfLine=true;
col=0;
blockIndentLevel=0;
listIndentLevel=0;
blockVerticalMargin=NO_MARGIN;
preformatted=false;
lastCharWhiteSpace=ignoreInitialWhitespace=false;
bullet=false;
}
private void appendElementContent(final Element element) throws IOException {
final int contentEnd=element.getContentEnd();
if (element.isEmpty() || renderedIndex>=contentEnd) return;
final int contentBegin=element.getStartTag().end;
appendSegmentProcessingChildElements(Math.max(renderedIndex,contentBegin),contentEnd,element.getChildElements());
}
private void appendSegmentProcessingChildElements(final int begin, final int end, final List childElements) throws IOException {
int index=begin;
for (Element childElement : childElements) {
if (index>=childElement.end) continue;
if (index=end) break;
appendSegment(index,tag.begin);
index=tag.end;
}
appendSegment(index,end);
}
private void appendSegment(int begin, final int end) throws IOException {
assert begin<=end;
if (begin=end) return;
try {
if (preformatted)
appendPreformattedSegment(begin,end);
else
appendNonPreformattedSegment(begin,end);
} finally {
if (renderedIndex=renderedIndex;
if (isStartOfBlock()) appendBlockVerticalMargin();
final String text=CharacterReference.decode(source.subSequence(begin,end),false,convertNonBreakingSpaces);
for (int i=0; i=renderedIndex;
final String text=CharacterReference.decodeCollapseWhiteSpace(source.subSequence(begin,end),convertNonBreakingSpaces);
if (text.length()==0) {
if (!ignoreInitialWhitespace) lastCharWhiteSpace=true;
return;
}
if (isStartOfBlock()) {
appendBlockVerticalMargin();
} else if (lastCharWhiteSpace || (Segment.isWhiteSpace(source.charAt(begin)) && !ignoreInitialWhitespace)) {
append(' ');
}
int textIndex=0;
int i=0;
lastCharWhiteSpace=ignoreInitialWhitespace=false;
while (true) {
for (; i" or "From ".
if (i+1') continue;
if (i+6=maxLineLength) {
if (lastCharWhiteSpace && (blockIndentLevel|listIndentLevel)==0) append(' ');
startNewLine(0);
} else if (lastCharWhiteSpace) {
append(' ');
}
append(text,textIndex,i);
if (i==text.length()) break;
lastCharWhiteSpace=true;
textIndex=++i;
}
lastCharWhiteSpace=Segment.isWhiteSpace(source.charAt(end-1));
}
private boolean isStartOfBlock() {
return blockVerticalMargin!=NO_MARGIN;
}
private void appendBlockVerticalMargin() throws IOException {
assert blockVerticalMargin!=NO_MARGIN;
startNewLine(blockVerticalMargin);
blockVerticalMargin=NO_MARGIN;
}
private void blockBoundary(final int verticalMargin) throws IOException {
// Set a block boundary with the given vertical margin. The vertical margin is the minimum number of blank lines to output between the blocks.
// This method can be called multiple times at a block boundary, and the next textual output will output the number of blank lines determined by the
// maximum vertical margin of all the method calls.
if (blockVerticalMargin0; i--) appendable.append(' ');
if (bullet) {
for (int i=(listIndentLevel-1)*listIndentSize; i>0; i--) appendable.append(' ');
if (listBulletNumber==UNORDERED_LIST) {
for (int i=listIndentSize-2; i>0; i--) appendable.append(' ');
appendable.append(listBullets[(listIndentLevel-1)%listBullets.length]).append(' ');
} else {
String bulletNumberString=Integer.toString(listBulletNumber);
for (int i=listIndentSize-bulletNumberString.length()-2; i>0; i--) appendable.append(' ');
appendable.append(bulletNumberString).append(". ");
}
bullet=false;
} else {
for (int i=listIndentLevel*listIndentSize; i>0; i--) appendable.append(' ');
}
col=blockIndentLevel*blockIndentSize+listIndentLevel*listIndentSize;
atStartOfLine=false;
}
private Processor append(final char ch) throws IOException {
if (atStartOfLine) appendIndent();
appendable.append(ch);
col++;
return this;
}
private Processor append(final String text) throws IOException {
if (atStartOfLine) appendIndent();
appendable.append(text);
col+=text.length();
return this;
}
private void append(final CharSequence text, final int begin, final int end) throws IOException {
if (atStartOfLine) appendIndent();
for (int i=begin; i=x.maxLineLength) {
x.startNewLine(0);
} else {
x.append(' ');
}
x.append(renderedHyperlinkURL);
x.lastCharWhiteSpace=true;
}
}
private static class BR_ElementHandler implements ElementHandler {
public static final ElementHandler INSTANCE=new BR_ElementHandler();
public void process(Processor x, Element element) throws IOException {
x.newLine();
x.blockBoundary(0);
}
}
private static class HR_ElementHandler implements ElementHandler {
public static final ElementHandler INSTANCE=new HR_ElementHandler();
public void process(Processor x, Element element) throws IOException {
x.blockBoundary(0);
x.appendBlockVerticalMargin();
for (int i=0; i<72; i++) x.append('-');
x.blockBoundary(0);
}
}
private static class ListElementHandler implements ElementHandler {
public static final ElementHandler INSTANCE_OL=new ListElementHandler(0);
public static final ElementHandler INSTANCE_UL=new ListElementHandler(UNORDERED_LIST);
private final int initialListBulletNumber;
public ListElementHandler(int initialListBulletNumber) {
this.initialListBulletNumber=initialListBulletNumber;
}
public void process(Processor x, Element element) throws IOException {
x.blockBoundary(0);
int oldListBulletNumber=x.listBulletNumber;
x.listBulletNumber=initialListBulletNumber;
x.listIndentLevel++;
x.appendElementContent(element);
x.listIndentLevel--;
x.listBulletNumber=oldListBulletNumber;
x.blockBoundary(0);
}
}
private static class LI_ElementHandler implements ElementHandler {
public static final ElementHandler INSTANCE=new LI_ElementHandler();
public void process(Processor x, Element element) throws IOException {
if (x.listBulletNumber!=UNORDERED_LIST) x.listBulletNumber++;
x.bullet=true;
x.blockBoundary(0);
x.appendBlockVerticalMargin();
x.appendIndent();
x.ignoreInitialWhitespace=true;
x.appendElementContent(element);
x.bullet=false;
x.blockBoundary(0);
}
}
private static class PRE_ElementHandler implements ElementHandler {
public static final ElementHandler INSTANCE=new PRE_ElementHandler();
public void process(Processor x, Element element) throws IOException {
x.blockBoundary(1);
boolean oldPreformatted=x.preformatted; // should always be false
x.preformatted=true;
x.appendElementContent(element);
x.preformatted=oldPreformatted;
x.blockBoundary(1);
}
}
private static class TD_ElementHandler implements ElementHandler {
public static final ElementHandler INSTANCE=new TD_ElementHandler();
public void process(Processor x, Element element) throws IOException {
if (!x.isStartOfBlock()) x.append(x.tableCellSeparator);
x.lastCharWhiteSpace=false;
x.appendElementContent(element);
}
}
private static class TR_ElementHandler implements ElementHandler {
public static final ElementHandler INSTANCE=new TR_ElementHandler();
public void process(Processor x, Element element) throws IOException {
x.blockBoundary(0);
x.appendElementContent(element);
x.blockBoundary(0);
}
}
}
}