All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.java.net.htmlparser.jericho.Renderer Maven / Gradle / Ivy

There is a newer version: 5.0.111
Show newest version
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.1
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.util.*;
import java.io.*;

/**
 * Performs a simple rendering of HTML markup into text.
 * 

* This provides a human readable version of the segment content that is modelled on the way * Mozilla Thunderbird and other email clients provide an automatic conversion of * HTML content to text in their alternative MIME encoding of emails. *

* The output using default settings complies with the "text/plain; format=flowed" (DelSp=No) protocol described in * RFC3676. *

* Many properties are available to customise the output, possibly the most significant of which being {@link #setMaxLineLength(int) MaxLineLength}. * See the individual property descriptions for details. *

* Use one of the following methods to obtain the output: *

    *
  • {@link #writeTo(Writer)}
  • *
  • {@link #appendTo(Appendable)}
  • *
  • {@link #toString()}
  • *
  • {@link CharStreamSourceUtil#getReader(CharStreamSource) CharStreamSourceUtil.getReader(this)}
  • *
*

* The rendering of some constructs, especially tables, is very rudimentary. * No attempt is made to render nested tables properly, except to ensure that all of the text content is included in the output. *

* Rendering an entire {@link Source} object performs a {@linkplain Source#fullSequentialParse() full sequential parse} automatically. *

* Any aspect of the algorithm not specifically mentioned here is subject to change without notice in future versions. *

* To extract pure text without any rendering of the markup, use the {@link TextExtractor} class instead. */ public class Renderer implements CharStreamSource { private final Segment rootSegment; private int maxLineLength=76; private String newLine="\r\n"; private boolean includeHyperlinkURLs=true; private boolean decorateFontStyles=false; private boolean convertNonBreakingSpaces=Config.ConvertNonBreakingSpaces; private int blockIndentSize=4; private int listIndentSize=6; private char[] listBullets=new char[] {'*','o','+','#'}; private String tableCellSeparator=" \t"; /** * Constructs a new Renderer based on the specified {@link Segment}. * @param segment the segment containing the HTML to be rendered. * @see Segment#getRenderer() */ public Renderer(final Segment segment) { rootSegment=segment; } // Documentation inherited from CharStreamSource public void writeTo(final Writer writer) throws IOException { appendTo(writer); writer.flush(); } // Documentation inherited from CharStreamSource public void appendTo(final Appendable appendable) throws IOException { new Processor(this,rootSegment,getMaxLineLength(),getNewLine(),getIncludeHyperlinkURLs(),getDecorateFontStyles(),getConvertNonBreakingSpaces(),getBlockIndentSize(),getListIndentSize(),getListBullets(),getTableCellSeparator()).appendTo(appendable); } // Documentation inherited from CharStreamSource public long getEstimatedMaximumOutputLength() { return rootSegment.length(); } // Documentation inherited from CharStreamSource public String toString() { return CharStreamSourceUtil.toString(this); } /** * Sets the column at which lines are to be wrapped. *

* Lines that would otherwise exceed this length are wrapped onto a new line at a word boundary. *

* A Line may still exceed this length if it consists of a single word, where the length of the word plus the line indent exceeds the maximum length. * In this case the line is wrapped immediately after the end of the word. *

* The default value is 76, which reflects the maximum line length for sending * email data specified in RFC2049 section 3.5. * * @param maxLineLength the column at which lines are to be wrapped. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getMaxLineLength() */ public Renderer setMaxLineLength(final int maxLineLength) { this.maxLineLength=maxLineLength; return this; } /** * Returns the column at which lines are to be wrapped. *

* See the {@link #setMaxLineLength(int)} method for a full description of this property. * * @return the column at which lines are to be wrapped. */ public int getMaxLineLength() { return maxLineLength; } /** * Sets the string to be used to represent a newline in the output. *

* The default value is "\r\n" (CR+LF) regardless of the platform on which the library is running. * This is so that the default configuration produces valid * MIME plain/text output, which mandates the use of CR+LF for line breaks. *

* Specifying a null argument causes the output to use same new line string as is used in the source document, which is * determined via the {@link Source#getNewLine()} method. * If the source document does not contain any new lines, a "best guess" is made by either taking the new line string of a previously parsed document, * or using the value from the static {@link Config#NewLine} property. * * @param newLine the string to be used to represent a newline in the output, may be null. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getNewLine() */ public Renderer setNewLine(final String newLine) { this.newLine=newLine; return this; } /** * Returns the string to be used to represent a newline in the output. *

* See the {@link #setNewLine(String)} method for a full description of this property. * * @return the string to be used to represent a newline in the output. */ public String getNewLine() { if (newLine==null) newLine=rootSegment.source.getBestGuessNewLine(); return newLine; } /** * Sets whether hyperlink URL's are included in the output. *

* The default value is true. *

* When this property is true, the URL of each hyperlink is included in the output as determined by the implementation of the * {@link #renderHyperlinkURL(StartTag)} method. *

*

*
Example:
*
*

* Assuming the default implementation of {@link #renderHyperlinkURL(StartTag)}, when this property is true, the following HTML: *

* <a href="http://jericho.htmlparser.net/">Jericho HTML Parser</a> *
* produces the following output: *
* Jericho HTML Parser <http://jericho.htmlparser.net/> *
*
*
* * @param includeHyperlinkURLs specifies whether hyperlink URL's are included in the output. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getIncludeHyperlinkURLs() */ public Renderer setIncludeHyperlinkURLs(final boolean includeHyperlinkURLs) { this.includeHyperlinkURLs=includeHyperlinkURLs; return this; } /** * Indicates whether hyperlink URL's are included in the output. *

* See the {@link #setIncludeHyperlinkURLs(boolean)} method for a full description of this property. * * @return true if hyperlink URL's are included in the output, otherwise false. */ public boolean getIncludeHyperlinkURLs() { return includeHyperlinkURLs; } /** * Renders the hyperlink URL from the specified {@link StartTag}. *

* A return value of null indicates that the hyperlink URL should not be rendered at all. *

* The default implementation of this method returns null if the href attribute of the specified start tag * is '#', starts with "javascript:", or is missing. * In all other cases it returns the value of the href attribute enclosed in angle brackets. *

* See the documentation of the {@link #setIncludeHyperlinkURLs(boolean)} method for an example of how a hyperlink is rendered by the default implementation. *

* This method can be overridden in a subclass to customise the rendering of hyperlink URLs. *

* Rendering of hyperlink URLs can be disabled completely without overriding this method by setting the * {@link #setIncludeHyperlinkURLs(boolean) IncludeHyperlinkURLs} property to false. *

*

*
Example:
*
* To render hyperlink URLs without the enclosing angle brackets:

* * Renderer renderer=new Renderer(segment) {
*     public String renderHyperlinkURL(StartTag startTag) {
*         String href=startTag.getAttributeValue("href");
*         if (href==null || href.equals("#") || href.startsWith("javascript:")) return null;
*         return href;
*     }
* };
* String renderedSegment=renderer.toString(); *
*
*
* @param startTag the start tag of the hyperlink element, must not be null. * @return The rendered hyperlink URL from the specified {@link StartTag}, or null if the hyperlink URL should not be rendered. */ public String renderHyperlinkURL(final StartTag startTag) { final String href=startTag.getAttributeValue("href"); if (href==null || href.equals("#") || href.startsWith("javascript:")) return null; return '<'+href+'>'; } /** * Sets whether decoration characters are to be included around the content of some * font style elements and * phrase elements. *

* The default value is false. *

* Below is a table summarising the decorated elements. *

* *

* * * * * *
ElementsCharacterExample Output
{@link HTMLElementName#B B} and {@link HTMLElementName#STRONG STRONG}**bold text*
{@link HTMLElementName#I I} and {@link HTMLElementName#EM EM}//italic text/
{@link HTMLElementName#U U}__underlined text_
{@link HTMLElementName#CODE CODE}||code|
* * @param decorateFontStyles specifies whether decoration characters are to be included around the content of some font style elements. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getDecorateFontStyles() */ public Renderer setDecorateFontStyles(final boolean decorateFontStyles) { this.decorateFontStyles=decorateFontStyles; return this; } /** * Indicates whether decoration characters are to be included around the content of some * font style elements and * phrase elements. *

* See the {@link #setDecorateFontStyles(boolean)} method for a full description of this property. * * @return true if decoration characters are to be included around the content of some font style elements, otherwise false. */ public boolean getDecorateFontStyles() { return decorateFontStyles; } /** * Sets whether non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces. *

* The default value is that of the static {@link Config#ConvertNonBreakingSpaces} property at the time the Renderer is instantiated. * * @param convertNonBreakingSpaces specifies whether non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getConvertNonBreakingSpaces() */ public Renderer setConvertNonBreakingSpaces(boolean convertNonBreakingSpaces) { this.convertNonBreakingSpaces=convertNonBreakingSpaces; return this; } /** * Indicates whether non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces. *

* See the {@link #setConvertNonBreakingSpaces(boolean)} method for a full description of this property. * * @return true if non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character entity references are converted to spaces, otherwise false. */ public boolean getConvertNonBreakingSpaces() { return convertNonBreakingSpaces; } /** * Sets the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements. *

* At present this applies to {@link HTMLElementName#BLOCKQUOTE BLOCKQUOTE} and {@link HTMLElementName#DD DD} elements. *

* The default value is 4. * * @param blockIndentSize the size of the indent. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getBlockIndentSize() */ public Renderer setBlockIndentSize(final int blockIndentSize) { this.blockIndentSize=blockIndentSize; return this; } /** * Returns the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements. *

* See the {@link #setBlockIndentSize(int)} method for a full description of this property. * * @return the size of the indent to be used for anything other than {@link HTMLElementName#LI LI} elements. */ public int getBlockIndentSize() { return blockIndentSize; } /** * Sets the size of the indent to be used for {@link HTMLElementName#LI LI} elements. *

* The default value is 6. *

* This applies to {@link HTMLElementName#LI LI} elements inside both {@link HTMLElementName#UL UL} and {@link HTMLElementName#OL OL} elements. *

* The bullet or number of the list item is included as part of the indent. * * @param listIndentSize the size of the indent. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getListIndentSize() */ public Renderer setListIndentSize(final int listIndentSize) { this.listIndentSize=listIndentSize; return this; } /** * Returns the size of the indent to be used for {@link HTMLElementName#LI LI} elements. *

* See the {@link #setListIndentSize(int)} method for a full description of this property. * * @return the size of the indent to be used for {@link HTMLElementName#LI LI} elements. */ public int getListIndentSize() { return listIndentSize; } /** * Sets the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements. *

* The values in the default array are *, o, + and #. *

* If the nesting of rendered lists goes deeper than the length of this array, the bullet characters start repeating from the first in the array. *

* WARNING: If any of the characters in the default array are modified, this will affect all other instances of this class using the default array. * * @param listBullets an array of characters to be used as bullets, must have at least one entry. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getListBullets() */ public Renderer setListBullets(final char[] listBullets) { if (listBullets==null || listBullets.length==0) throw new IllegalArgumentException("listBullets argument must be an array of at least one character"); this.listBullets=listBullets; return this; } /** * Returns the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements. *

* See the {@link #setListBullets(char[])} method for a full description of this property. * * @return the bullet characters to use for list items inside {@link HTMLElementName#UL UL} elements. */ public char[] getListBullets() { return listBullets; } /** * Sets the string that is to separate table cells. *

* The default value is " \t" (a space followed by a tab). * * @param tableCellSeparator the string that is to separate table cells. * @return this Renderer instance, allowing multiple property setting methods to be chained in a single statement. * @see #getTableCellSeparator() */ public Renderer setTableCellSeparator(final String tableCellSeparator) { this.tableCellSeparator=tableCellSeparator; return this; } /** * Returns the string that is to separate table cells. *

* See the {@link #setTableCellSeparator(String)} method for a full description of this property. * * @return the string that is to separate table cells. */ public String getTableCellSeparator() { return tableCellSeparator; } /** This class does the actual work, but is first passed final copies of all the parameters for efficiency. */ private static final class Processor { private final Renderer renderer; private final Segment rootSegment; private final Source source; private final int maxLineLength; private final String newLine; private final boolean includeHyperlinkURLs; private final boolean decorateFontStyles; private final boolean convertNonBreakingSpaces; private final int blockIndentSize; private final int listIndentSize; private final char[] listBullets; private final String tableCellSeparator; private Appendable appendable; private int renderedIndex; // keeps track of where rendering is up to in case of overlapping elements private boolean atStartOfLine; private int col; private int blockIndentLevel; private int listIndentLevel; private int blockVerticalMargin; // minimum number of blank lines to output at the current block boundary, or NO_MARGIN (-1) if we are not currently at a block boundary. private boolean preformatted; private boolean lastCharWhiteSpace; private boolean ignoreInitialWhitespace; private boolean bullet; private int listBulletNumber; private static final int NO_MARGIN=-1; private static final int UNORDERED_LIST=-1; private static Map ELEMENT_HANDLERS=new HashMap(); static { ELEMENT_HANDLERS.put(HTMLElementName.A,A_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.ADDRESS,StandardBlockElementHandler.INSTANCE_0_0); ELEMENT_HANDLERS.put(HTMLElementName.APPLET,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.B,FontStyleElementHandler.INSTANCE_B); ELEMENT_HANDLERS.put(HTMLElementName.BLOCKQUOTE,StandardBlockElementHandler.INSTANCE_1_1_INDENT); ELEMENT_HANDLERS.put(HTMLElementName.BR,BR_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.BUTTON,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.CAPTION,StandardBlockElementHandler.INSTANCE_0_0); ELEMENT_HANDLERS.put(HTMLElementName.CENTER,StandardBlockElementHandler.INSTANCE_1_1); ELEMENT_HANDLERS.put(HTMLElementName.CODE,FontStyleElementHandler.INSTANCE_CODE); ELEMENT_HANDLERS.put(HTMLElementName.DD,StandardBlockElementHandler.INSTANCE_0_0_INDENT); ELEMENT_HANDLERS.put(HTMLElementName.DIR,ListElementHandler.INSTANCE_UL); ELEMENT_HANDLERS.put(HTMLElementName.DIV,StandardBlockElementHandler.INSTANCE_0_0); ELEMENT_HANDLERS.put(HTMLElementName.DT,StandardBlockElementHandler.INSTANCE_0_0); ELEMENT_HANDLERS.put(HTMLElementName.EM,FontStyleElementHandler.INSTANCE_I); ELEMENT_HANDLERS.put(HTMLElementName.FIELDSET,StandardBlockElementHandler.INSTANCE_1_1); ELEMENT_HANDLERS.put(HTMLElementName.FORM,StandardBlockElementHandler.INSTANCE_1_1); ELEMENT_HANDLERS.put(HTMLElementName.H1,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.H2,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.H3,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.H4,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.H5,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.H6,StandardBlockElementHandler.INSTANCE_2_1); ELEMENT_HANDLERS.put(HTMLElementName.HEAD,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.HR,HR_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.I,FontStyleElementHandler.INSTANCE_I); ELEMENT_HANDLERS.put(HTMLElementName.LEGEND,StandardBlockElementHandler.INSTANCE_0_0); ELEMENT_HANDLERS.put(HTMLElementName.LI,LI_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.MENU,ListElementHandler.INSTANCE_UL); ELEMENT_HANDLERS.put(HTMLElementName.MAP,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.NOFRAMES,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.NOSCRIPT,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.OL,ListElementHandler.INSTANCE_OL); ELEMENT_HANDLERS.put(HTMLElementName.P,StandardBlockElementHandler.INSTANCE_1_1); ELEMENT_HANDLERS.put(HTMLElementName.PRE,PRE_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.SCRIPT,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.SELECT,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.STRONG,FontStyleElementHandler.INSTANCE_B); ELEMENT_HANDLERS.put(HTMLElementName.STYLE,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.TEXTAREA,RemoveElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.TD,TD_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.TH,TD_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.TR,TR_ElementHandler.INSTANCE); ELEMENT_HANDLERS.put(HTMLElementName.U,FontStyleElementHandler.INSTANCE_U); ELEMENT_HANDLERS.put(HTMLElementName.UL,ListElementHandler.INSTANCE_UL); } public Processor(final Renderer renderer, final Segment rootSegment, final int maxLineLength, final String newLine, final boolean includeHyperlinkURLs, final boolean decorateFontStyles, final boolean convertNonBreakingSpaces, final int blockIndentSize, final int listIndentSize, final char[] listBullets, final String tableCellSeparator) { this.renderer=renderer; this.rootSegment=rootSegment; source=rootSegment.source; this.maxLineLength=maxLineLength; this.newLine=newLine; this.includeHyperlinkURLs=includeHyperlinkURLs; this.decorateFontStyles=decorateFontStyles; this.convertNonBreakingSpaces=convertNonBreakingSpaces; this.blockIndentSize=blockIndentSize; this.listIndentSize=listIndentSize; this.listBullets=listBullets; this.tableCellSeparator=tableCellSeparator; } public void appendTo(final Appendable appendable) throws IOException { reset(); this.appendable=appendable; appendSegmentProcessingChildElements(rootSegment.begin,rootSegment.end,rootSegment.getChildElements()); } private void reset() { renderedIndex=0; atStartOfLine=true; col=0; blockIndentLevel=0; listIndentLevel=0; blockVerticalMargin=NO_MARGIN; preformatted=false; lastCharWhiteSpace=ignoreInitialWhitespace=false; bullet=false; } private void appendElementContent(final Element element) throws IOException { final int contentEnd=element.getContentEnd(); if (element.isEmpty() || renderedIndex>=contentEnd) return; final int contentBegin=element.getStartTag().end; appendSegmentProcessingChildElements(Math.max(renderedIndex,contentBegin),contentEnd,element.getChildElements()); } private void appendSegmentProcessingChildElements(final int begin, final int end, final List childElements) throws IOException { int index=begin; for (Element childElement : childElements) { if (index>=childElement.end) continue; if (index=end) break; appendSegment(index,tag.begin); index=tag.end; } appendSegment(index,end); } private void appendSegment(int begin, final int end) throws IOException { assert begin<=end; if (begin=end) return; try { if (preformatted) appendPreformattedSegment(begin,end); else appendNonPreformattedSegment(begin,end); } finally { if (renderedIndex=renderedIndex; if (isStartOfBlock()) appendBlockVerticalMargin(); final String text=CharacterReference.decode(source.subSequence(begin,end),false,convertNonBreakingSpaces); for (int i=0; i=renderedIndex; final String text=CharacterReference.decodeCollapseWhiteSpace(source.subSequence(begin,end),convertNonBreakingSpaces); if (text.length()==0) { if (!ignoreInitialWhitespace) lastCharWhiteSpace=true; return; } if (isStartOfBlock()) { appendBlockVerticalMargin(); } else if (lastCharWhiteSpace || (Segment.isWhiteSpace(source.charAt(begin)) && !ignoreInitialWhitespace)) { append(' '); } int textIndex=0; int i=0; lastCharWhiteSpace=ignoreInitialWhitespace=false; while (true) { for (; i" or "From ". if (i+1') continue; if (i+6=maxLineLength) { if (lastCharWhiteSpace && (blockIndentLevel|listIndentLevel)==0) append(' '); startNewLine(0); } else if (lastCharWhiteSpace) { append(' '); } append(text,textIndex,i); if (i==text.length()) break; lastCharWhiteSpace=true; textIndex=++i; } lastCharWhiteSpace=Segment.isWhiteSpace(source.charAt(end-1)); } private boolean isStartOfBlock() { return blockVerticalMargin!=NO_MARGIN; } private void appendBlockVerticalMargin() throws IOException { assert blockVerticalMargin!=NO_MARGIN; startNewLine(blockVerticalMargin); blockVerticalMargin=NO_MARGIN; } private void blockBoundary(final int verticalMargin) throws IOException { // Set a block boundary with the given vertical margin. The vertical margin is the minimum number of blank lines to output between the blocks. // This method can be called multiple times at a block boundary, and the next textual output will output the number of blank lines determined by the // maximum vertical margin of all the method calls. if (blockVerticalMargin0; i--) appendable.append(' '); if (bullet) { for (int i=(listIndentLevel-1)*listIndentSize; i>0; i--) appendable.append(' '); if (listBulletNumber==UNORDERED_LIST) { for (int i=listIndentSize-2; i>0; i--) appendable.append(' '); appendable.append(listBullets[(listIndentLevel-1)%listBullets.length]).append(' '); } else { String bulletNumberString=Integer.toString(listBulletNumber); for (int i=listIndentSize-bulletNumberString.length()-2; i>0; i--) appendable.append(' '); appendable.append(bulletNumberString).append(". "); } bullet=false; } else { for (int i=listIndentLevel*listIndentSize; i>0; i--) appendable.append(' '); } col=blockIndentLevel*blockIndentSize+listIndentLevel*listIndentSize; atStartOfLine=false; } private Processor append(final char ch) throws IOException { if (atStartOfLine) appendIndent(); appendable.append(ch); col++; return this; } private Processor append(final String text) throws IOException { if (atStartOfLine) appendIndent(); appendable.append(text); col+=text.length(); return this; } private void append(final CharSequence text, final int begin, final int end) throws IOException { if (atStartOfLine) appendIndent(); for (int i=begin; i=x.maxLineLength) { x.startNewLine(0); } else { x.append(' '); } x.append(renderedHyperlinkURL); x.lastCharWhiteSpace=true; } } private static class BR_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new BR_ElementHandler(); public void process(Processor x, Element element) throws IOException { x.newLine(); x.blockBoundary(0); } } private static class HR_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new HR_ElementHandler(); public void process(Processor x, Element element) throws IOException { x.blockBoundary(0); x.appendBlockVerticalMargin(); for (int i=0; i<72; i++) x.append('-'); x.blockBoundary(0); } } private static class ListElementHandler implements ElementHandler { public static final ElementHandler INSTANCE_OL=new ListElementHandler(0); public static final ElementHandler INSTANCE_UL=new ListElementHandler(UNORDERED_LIST); private final int initialListBulletNumber; public ListElementHandler(int initialListBulletNumber) { this.initialListBulletNumber=initialListBulletNumber; } public void process(Processor x, Element element) throws IOException { x.blockBoundary(0); int oldListBulletNumber=x.listBulletNumber; x.listBulletNumber=initialListBulletNumber; x.listIndentLevel++; x.appendElementContent(element); x.listIndentLevel--; x.listBulletNumber=oldListBulletNumber; x.blockBoundary(0); } } private static class LI_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new LI_ElementHandler(); public void process(Processor x, Element element) throws IOException { if (x.listBulletNumber!=UNORDERED_LIST) x.listBulletNumber++; x.bullet=true; x.blockBoundary(0); x.appendBlockVerticalMargin(); x.appendIndent(); x.ignoreInitialWhitespace=true; x.appendElementContent(element); x.bullet=false; x.blockBoundary(0); } } private static class PRE_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new PRE_ElementHandler(); public void process(Processor x, Element element) throws IOException { x.blockBoundary(1); boolean oldPreformatted=x.preformatted; // should always be false x.preformatted=true; x.appendElementContent(element); x.preformatted=oldPreformatted; x.blockBoundary(1); } } private static class TD_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new TD_ElementHandler(); public void process(Processor x, Element element) throws IOException { if (!x.isStartOfBlock()) x.append(x.tableCellSeparator); x.lastCharWhiteSpace=false; x.appendElementContent(element); } } private static class TR_ElementHandler implements ElementHandler { public static final ElementHandler INSTANCE=new TR_ElementHandler(); public void process(Processor x, Element element) throws IOException { x.blockBoundary(0); x.appendElementContent(element); x.blockBoundary(0); } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy