All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.htmlunit.html.serializer.HtmlSerializerVisibleText Maven / Gradle / Ivy

Go to download

XLT (Xceptance LoadTest) is an extensive load and performance test tool developed and maintained by Xceptance.

The newest version!
/*
 * Copyright (c) 2002-2024 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.htmlunit.html.serializer;

import static org.htmlunit.css.CssStyleSheet.BLOCK;

import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.htmlunit.Page;
import org.htmlunit.SgmlPage;
import org.htmlunit.WebWindow;
import org.htmlunit.css.ComputedCssStyleDeclaration;
import org.htmlunit.css.StyleAttributes.Definition;
import org.htmlunit.html.DomComment;
import org.htmlunit.html.DomElement;
import org.htmlunit.html.DomNode;
import org.htmlunit.html.DomText;
import org.htmlunit.html.HtmlApplet;
import org.htmlunit.html.HtmlBody;
import org.htmlunit.html.HtmlBreak;
import org.htmlunit.html.HtmlCheckBoxInput;
import org.htmlunit.html.HtmlDetails;
import org.htmlunit.html.HtmlHiddenInput;
import org.htmlunit.html.HtmlInlineFrame;
import org.htmlunit.html.HtmlInput;
import org.htmlunit.html.HtmlMenu;
import org.htmlunit.html.HtmlNoFrames;
import org.htmlunit.html.HtmlNoScript;
import org.htmlunit.html.HtmlOption;
import org.htmlunit.html.HtmlOrderedList;
import org.htmlunit.html.HtmlPreformattedText;
import org.htmlunit.html.HtmlRadioButtonInput;
import org.htmlunit.html.HtmlResetInput;
import org.htmlunit.html.HtmlScript;
import org.htmlunit.html.HtmlSelect;
import org.htmlunit.html.HtmlStyle;
import org.htmlunit.html.HtmlSubmitInput;
import org.htmlunit.html.HtmlSummary;
import org.htmlunit.html.HtmlTable;
import org.htmlunit.html.HtmlTableCell;
import org.htmlunit.html.HtmlTableFooter;
import org.htmlunit.html.HtmlTableHeader;
import org.htmlunit.html.HtmlTableRow;
import org.htmlunit.html.HtmlTextArea;
import org.htmlunit.html.HtmlTitle;
import org.htmlunit.html.HtmlUnorderedList;
import org.htmlunit.html.TableRowGroup;

/**
 * Special serializer to generate the output we need
 * at least for selenium WebElement#getText().
 * 

This is also used from estimations by ComputedCSSStyleDeclaration.

* * @author Ronald Brill * @author cd alexndr */ public class HtmlSerializerVisibleText { /** * Converts an HTML node to text. * @param node a node * @return the text representation according to the setting of this serializer */ public String asText(final DomNode node) { if (node instanceof HtmlBreak) { return ""; } final HtmlSerializerTextBuilder builder = new HtmlSerializerTextBuilder(); appendNode(builder, node, whiteSpaceStyle(node, Mode.WHITE_SPACE_NORMAL)); return builder.getText(); } /** * Iterate over all Children and call appendNode() for every. * * @param builder the StringBuilder to add to * @param node the node to process * @param mode the {@link Mode} to use for processing */ protected void appendChildren(final HtmlSerializerTextBuilder builder, final DomNode node, final Mode mode) { for (final DomNode child : node.getChildren()) { appendNode(builder, child, updateWhiteSpaceStyle(node, mode)); } } /** * The core distribution method call the different appendXXX * methods depending on the type of the given node. * * @param builder the StringBuilder to add to * @param node the node to process * @param mode the {@link Mode} to use for processing */ protected void appendNode(final HtmlSerializerTextBuilder builder, final DomNode node, final Mode mode) { if (node instanceof DomText) { appendText(builder, (DomText) node, mode); } else if (node instanceof DomComment) { appendComment(builder, (DomComment) node, mode); } else if (node instanceof HtmlApplet && node.getPage().getWebClient().getOptions().isAppletEnabled()) { appendApplet(builder, (HtmlApplet) node, mode); } else if (node instanceof HtmlBreak) { appendBreak(builder, (HtmlBreak) node, mode); } else if (node instanceof HtmlHiddenInput) { appendHiddenInput(builder, (HtmlHiddenInput) node, mode); } else if (node instanceof HtmlScript) { appendScript(builder, (HtmlScript) node, mode); } else if (node instanceof HtmlStyle) { appendStyle(builder, (HtmlStyle) node, mode); } else if (node instanceof HtmlNoFrames) { appendNoFrames(builder, (HtmlNoFrames) node, mode); } else if (node instanceof HtmlTextArea) { appendTextArea(builder, (HtmlTextArea) node, mode); } else if (node instanceof HtmlTitle) { appendTitle(builder, (HtmlTitle) node, mode); } else if (node instanceof HtmlTableRow) { appendTableRow(builder, (HtmlTableRow) node, mode); } else if (node instanceof HtmlSelect) { appendSelect(builder, (HtmlSelect) node, mode); } else if (node instanceof HtmlOption) { appendOption(builder, (HtmlOption) node, mode); } else if (node instanceof HtmlSubmitInput) { appendSubmitInput(builder, (HtmlSubmitInput) node, mode); } else if (node instanceof HtmlResetInput) { appendResetInput(builder, (HtmlResetInput) node, mode); } else if (node instanceof HtmlCheckBoxInput) { appendCheckBoxInput(builder, (HtmlCheckBoxInput) node, mode); } else if (node instanceof HtmlRadioButtonInput) { appendRadioButtonInput(builder, (HtmlRadioButtonInput) node, mode); } else if (node instanceof HtmlInput) { // nothing } else if (node instanceof HtmlTable) { appendTable(builder, (HtmlTable) node, mode); } else if (node instanceof HtmlOrderedList) { appendOrderedList(builder, (HtmlOrderedList) node, mode); } else if (node instanceof HtmlUnorderedList) { appendUnorderedList(builder, (HtmlUnorderedList) node, mode); } else if (node instanceof HtmlPreformattedText) { appendPreformattedText(builder, (HtmlPreformattedText) node, mode); } else if (node instanceof HtmlInlineFrame) { appendInlineFrame(builder, (HtmlInlineFrame) node, mode); } else if (node instanceof HtmlMenu) { appendMenu(builder, (HtmlMenu) node, mode); } else if (node instanceof HtmlDetails) { appendDetails(builder, (HtmlDetails) node, mode); } else if (node instanceof HtmlNoScript && node.getPage().getWebClient().isJavaScriptEnabled()) { appendNoScript(builder, (HtmlNoScript) node, mode); } else { appendDomNode(builder, node, mode); } } /** * Process {@link DomNode}. * * @param builder the StringBuilder to add to * @param domNode the target to process * @param mode the {@link Mode} to use for processing */ protected void appendDomNode(final HtmlSerializerTextBuilder builder, final DomNode domNode, final Mode mode) { final boolean block; if (domNode instanceof HtmlBody) { block = false; } else if (domNode instanceof DomElement) { final WebWindow window = domNode.getPage().getEnclosingWindow(); final String display = window.getComputedStyle((DomElement) domNode, null).getDisplay(); block = BLOCK.equals(display); } else { block = false; } if (block) { builder.appendBlockSeparator(); } appendChildren(builder, domNode, mode); if (block) { builder.appendBlockSeparator(); } } /** * Process {@link HtmlHiddenInput}. * * @param builder the StringBuilder to add to * @param htmlHiddenInput the target to process * @param mode the {@link Mode} to use for processing */ protected void appendHiddenInput(final HtmlSerializerTextBuilder builder, final HtmlHiddenInput htmlHiddenInput, final Mode mode) { // nothing to do } /** * Process {@link HtmlScript}. * * @param builder the StringBuilder to add to * @param htmlScript the target to process * @param mode the {@link Mode} to use for processing */ protected void appendScript(final HtmlSerializerTextBuilder builder, final HtmlScript htmlScript, final Mode mode) { // nothing to do } /** * Process {@link HtmlStyle}. * * @param builder the StringBuilder to add to * @param htmlStyle the target to process * @param mode the {@link Mode} to use for processing */ protected void appendStyle(final HtmlSerializerTextBuilder builder, final HtmlStyle htmlStyle, final Mode mode) { // nothing to do } /** * Process {@link HtmlNoScript}. * * @param builder the StringBuilder to add to * @param htmlNoScript the target to process * @param mode the {@link Mode} to use for processing */ protected void appendNoScript(final HtmlSerializerTextBuilder builder, final HtmlNoScript htmlNoScript, final Mode mode) { // nothing to do } /** * Process {@link HtmlNoFrames}. * * @param builder the StringBuilder to add to * @param htmlNoFrames the target to process * @param mode the {@link Mode} to use for processing */ protected void appendNoFrames(final HtmlSerializerTextBuilder builder, final HtmlNoFrames htmlNoFrames, final Mode mode) { // nothing to do } /** * Process {@link HtmlSubmitInput}. * * @param builder the StringBuilder to add to * @param htmlSubmitInput the target to process * @param mode the {@link Mode} to use for processing */ protected void appendSubmitInput(final HtmlSerializerTextBuilder builder, final HtmlSubmitInput htmlSubmitInput, final Mode mode) { // nothing to do } /** * Process {@link HtmlInput}. * * @param builder the StringBuilder to add to * @param htmlInput the target to process * @param mode the {@link Mode} to use for processing */ protected void appendInput(final HtmlSerializerTextBuilder builder, final HtmlInput htmlInput, final Mode mode) { builder.append(htmlInput.getValueAttribute(), mode); } /** * Process {@link HtmlResetInput}. * * @param builder the StringBuilder to add to * @param htmlResetInput the target to process * @param mode the {@link Mode} to use for processing */ protected void appendResetInput(final HtmlSerializerTextBuilder builder, final HtmlResetInput htmlResetInput, final Mode mode) { // nothing to do } /** * Process {@link HtmlMenu}. * @param builder the StringBuilder to add to * @param htmlMenu the target to process * @param mode the {@link Mode} to use for processing */ protected void appendMenu(final HtmlSerializerTextBuilder builder, final HtmlMenu htmlMenu, final Mode mode) { builder.appendBlockSeparator(); boolean first = true; for (final DomNode item : htmlMenu.getChildren()) { if (!first) { builder.appendBlockSeparator(); } first = false; appendNode(builder, item, mode); } builder.appendBlockSeparator(); } /** * Process {@link HtmlDetails}. * @param builder the StringBuilder to add to * @param htmlDetails the target to process * @param mode the {@link Mode} to use for processing */ protected void appendDetails(final HtmlSerializerTextBuilder builder, final HtmlDetails htmlDetails, final Mode mode) { if (htmlDetails.isOpen()) { appendChildren(builder, htmlDetails, mode); return; } for (final DomNode child : htmlDetails.getChildren()) { if (child instanceof HtmlSummary) { appendNode(builder, child, mode); } } } /** * Process {@link HtmlTitle}. * @param builder the StringBuilder to add to * @param htmlTitle the target to process * @param mode the {@link Mode} to use for processing */ protected void appendTitle(final HtmlSerializerTextBuilder builder, final HtmlTitle htmlTitle, final Mode mode) { // nothing to do } /** * Process {@link HtmlTableRow}. * * @param builder the StringBuilder to add to * @param htmlTableRow the target to process * @param mode the {@link Mode} to use for processing */ protected void appendTableRow(final HtmlSerializerTextBuilder builder, final HtmlTableRow htmlTableRow, final Mode mode) { boolean first = true; for (final HtmlTableCell cell : htmlTableRow.getCells()) { if (!first) { builder.appendBlank(); } else { first = false; } appendChildren(builder, cell, mode); // trim? } } /** * Check domNode visibility. * @param domNode the node to check * @return true or false */ protected boolean isDisplayed(final DomNode domNode) { return domNode.isDisplayed(); } /** * Process {@link HtmlTextArea}. * * @param builder the StringBuilder to add to * @param htmlTextArea the target to process * @param mode the {@link Mode} to use for processing */ protected void appendTextArea(final HtmlSerializerTextBuilder builder, final HtmlTextArea htmlTextArea, final Mode mode) { if (isDisplayed(htmlTextArea)) { builder.append(htmlTextArea.getDefaultValue(), whiteSpaceStyle(htmlTextArea, Mode.PRE)); builder.trimRight(Mode.PRE); } } /** * Process {@link HtmlTable}. * * @param builder the StringBuilder to add to * @param htmlTable the target to process * @param mode the {@link Mode} to use for processing */ protected void appendTable(final HtmlSerializerTextBuilder builder, final HtmlTable htmlTable, final Mode mode) { builder.appendBlockSeparator(); final String caption = htmlTable.getCaptionText(); if (caption != null) { builder.append(caption, mode); builder.appendBlockSeparator(); } boolean first = true; // first thead has to be displayed first and first tfoot has to be displayed last final HtmlTableHeader tableHeader = htmlTable.getHeader(); if (tableHeader != null) { first = appendTableRows(builder, mode, tableHeader.getRows(), true, null, null); } final HtmlTableFooter tableFooter = htmlTable.getFooter(); final List tableRows = htmlTable.getRows(); first = appendTableRows(builder, mode, tableRows, first, tableHeader, tableFooter); if (tableFooter != null) { first = appendTableRows(builder, mode, tableFooter.getRows(), first, null, null); } else if (tableRows.isEmpty()) { final DomNode firstChild = htmlTable.getFirstChild(); if (firstChild != null) { appendNode(builder, firstChild, mode); } } builder.appendBlockSeparator(); } /** * Process {@link HtmlTableRow}. * * @param builder the StringBuilder to add to * @param mode the {@link Mode} to use for processing * @param rows the rows * @param first if true this is the first one * @param skipParent1 skip row if the parent is this * @param skipParent2 skip row if the parent is this * @return true if this was the first one */ protected boolean appendTableRows(final HtmlSerializerTextBuilder builder, final Mode mode, final List rows, boolean first, final TableRowGroup skipParent1, final TableRowGroup skipParent2) { for (final HtmlTableRow row : rows) { if (row.getParentNode() == skipParent1 || row.getParentNode() == skipParent2) { continue; } if (!first) { builder.appendBlockSeparator(); } first = false; appendTableRow(builder, row, mode); } return first; } /** * Process {@link HtmlSelect}. * * @param builder the StringBuilder to add to * @param htmlSelect the target to process * @param mode the {@link Mode} to use for processing */ protected void appendSelect(final HtmlSerializerTextBuilder builder, final HtmlSelect htmlSelect, final Mode mode) { builder.appendBlockSeparator(); boolean leadingNlPending = false; final Mode selectMode = whiteSpaceStyle(htmlSelect, mode); for (final DomNode item : htmlSelect.getChildren()) { if (leadingNlPending) { builder.appendBlockSeparator(); leadingNlPending = false; } builder.resetContentAdded(); appendNode(builder, item, whiteSpaceStyle(item, selectMode)); if (!leadingNlPending && builder.contentAdded_) { leadingNlPending = true; } } builder.appendBlockSeparator(); } /** * Process {@link HtmlSelect}. * * @param builder the StringBuilder to add to * @param htmlOption the target to process * @param mode the {@link Mode} to use for processing */ protected void appendOption(final HtmlSerializerTextBuilder builder, final HtmlOption htmlOption, final Mode mode) { builder.ignoreHtmlBreaks(); appendChildren(builder, htmlOption, mode); builder.processHtmlBreaks(); } /** * Process {@link HtmlOrderedList}. * * @param builder the StringBuilder to add to * @param htmlOrderedList the OL element * @param mode the {@link Mode} to use for processing */ protected void appendOrderedList(final HtmlSerializerTextBuilder builder, final HtmlOrderedList htmlOrderedList, final Mode mode) { builder.appendBlockSeparator(); boolean leadingNlPending = false; final Mode olMode = whiteSpaceStyle(htmlOrderedList, mode); for (final DomNode item : htmlOrderedList.getChildren()) { if (leadingNlPending) { builder.appendBlockSeparator(); leadingNlPending = false; } builder.resetContentAdded(); appendNode(builder, item, whiteSpaceStyle(item, olMode)); if (!leadingNlPending && builder.contentAdded_) { leadingNlPending = true; } } builder.appendBlockSeparator(); } /** * Process {@link HtmlUnorderedList}. * @param builder the StringBuilder to add to * @param htmlUnorderedList the target to process * @param mode the {@link Mode} to use for processing */ protected void appendUnorderedList(final HtmlSerializerTextBuilder builder, final HtmlUnorderedList htmlUnorderedList, final Mode mode) { builder.appendBlockSeparator(); boolean leadingNlPending = false; final Mode ulMode = whiteSpaceStyle(htmlUnorderedList, mode); for (final DomNode item : htmlUnorderedList.getChildren()) { if (leadingNlPending) { builder.appendBlockSeparator(); leadingNlPending = false; } builder.resetContentAdded(); appendNode(builder, item, whiteSpaceStyle(item, ulMode)); if (!leadingNlPending && builder.contentAdded_) { leadingNlPending = true; } } builder.appendBlockSeparator(); } /** * Process {@link HtmlPreformattedText}. * * @param builder the StringBuilder to add to * @param htmlPreformattedText the target to process * @param mode the {@link Mode} to use for processing */ protected void appendPreformattedText(final HtmlSerializerTextBuilder builder, final HtmlPreformattedText htmlPreformattedText, final Mode mode) { if (isDisplayed(htmlPreformattedText)) { builder.appendBlockSeparator(); appendChildren(builder, htmlPreformattedText, whiteSpaceStyle(htmlPreformattedText, Mode.PRE)); builder.appendBlockSeparator(); } } /** * Process {@link HtmlInlineFrame}. * * @param builder the StringBuilder to add to * @param htmlInlineFrame the target to process * @param mode the {@link Mode} to use for processing */ protected void appendInlineFrame(final HtmlSerializerTextBuilder builder, final HtmlInlineFrame htmlInlineFrame, final Mode mode) { if (isDisplayed(htmlInlineFrame)) { builder.appendBlockSeparator(); final Page page = htmlInlineFrame.getEnclosedPage(); if (page instanceof SgmlPage) { builder.append(((SgmlPage) page).asNormalizedText(), mode); } builder.appendBlockSeparator(); } } /** * Process {@link DomText}. * * @param builder the StringBuilder to add to * @param domText the target to process * @param mode the {@link Mode} to use for processing */ protected void appendText(final HtmlSerializerTextBuilder builder, final DomText domText, final Mode mode) { final DomNode parent = domText.getParentNode(); if (parent instanceof HtmlTitle || parent instanceof HtmlScript) { builder.append(domText.getData(), Mode.WHITE_SPACE_PRE_LINE); } if (parent == null || parent instanceof HtmlTitle || parent instanceof HtmlScript || isDisplayed(parent)) { builder.append(domText.getData(), mode); } } /** * Process {@link DomComment}. * * @param builder the StringBuilder to add to * @param domComment the target to process * @param mode the {@link Mode} to use for processing */ protected void appendComment(final HtmlSerializerTextBuilder builder, final DomComment domComment, final Mode mode) { // nothing to do } /** * Process {@link HtmlApplet}. * * @param builder the StringBuilder to add to * @param htmlApplet the target to process * @param mode the {@link Mode} to use for processing */ protected void appendApplet(final HtmlSerializerTextBuilder builder, final HtmlApplet htmlApplet, final Mode mode) { // nothing to do } /** * Process {@link HtmlBreak}. * * @param builder the StringBuilder to add to * @param htmlBreak the target to process * @param mode the {@link Mode} to use for processing */ protected void appendBreak(final HtmlSerializerTextBuilder builder, final HtmlBreak htmlBreak, final Mode mode) { builder.appendBreak(mode); } /** * Process {@link HtmlCheckBoxInput}. * * @param builder the StringBuilder to add to * @param htmlCheckBoxInput the target to process * @param mode the {@link Mode} to use for processing */ protected void appendCheckBoxInput(final HtmlSerializerTextBuilder builder, final HtmlCheckBoxInput htmlCheckBoxInput, final Mode mode) { // nothing to do } /** * Process {@link HtmlRadioButtonInput}. * * @param builder the StringBuilder to add to * @param htmlRadioButtonInput the target to process * @param mode the {@link Mode} to use for processing */ protected void appendRadioButtonInput(final HtmlSerializerTextBuilder builder, final HtmlRadioButtonInput htmlRadioButtonInput, final Mode mode) { // nothing to do } protected Mode whiteSpaceStyle(final DomNode domNode, final Mode defaultMode) { final Page page = domNode.getPage(); if (page != null) { final WebWindow window = page.getEnclosingWindow(); if (window.getWebClient().getOptions().isCssEnabled()) { DomNode node = domNode; while (node != null) { if (node instanceof DomElement) { final ComputedCssStyleDeclaration style = window.getComputedStyle((DomElement) node, null); final String value = style.getStyleAttribute(Definition.WHITE_SPACE, false); if (StringUtils.isNoneEmpty(value)) { if ("normal".equalsIgnoreCase(value)) { return Mode.WHITE_SPACE_NORMAL; } if ("nowrap".equalsIgnoreCase(value)) { return Mode.WHITE_SPACE_NORMAL; } if ("pre".equalsIgnoreCase(value)) { return Mode.WHITE_SPACE_PRE; } if ("pre-wrap".equalsIgnoreCase(value)) { return Mode.WHITE_SPACE_PRE; } if ("pre-line".equalsIgnoreCase(value)) { return Mode.WHITE_SPACE_PRE_LINE; } } } node = node.getParentNode(); } } } return defaultMode; } protected Mode updateWhiteSpaceStyle(final DomNode domNode, final Mode defaultMode) { final Page page = domNode.getPage(); if (page != null) { final WebWindow window = page.getEnclosingWindow(); if (window.getWebClient().getOptions().isCssEnabled()) { if (domNode instanceof DomElement) { final ComputedCssStyleDeclaration style = window.getComputedStyle((DomElement) domNode, null); final String value = style.getStyleAttribute(Definition.WHITE_SPACE, false); if (StringUtils.isNoneEmpty(value)) { if ("normal".equalsIgnoreCase(value)) { return Mode.WHITE_SPACE_NORMAL; } if ("nowrap".equalsIgnoreCase(value)) { return Mode.WHITE_SPACE_NORMAL; } if ("pre".equalsIgnoreCase(value)) { return Mode.WHITE_SPACE_PRE; } if ("pre-wrap".equalsIgnoreCase(value)) { return Mode.WHITE_SPACE_PRE; } if ("pre-line".equalsIgnoreCase(value)) { return Mode.WHITE_SPACE_PRE_LINE; } } } } } return defaultMode; } /** Mode. */ protected enum Mode { /** * The mode for the pre tag. */ PRE, /** * Sequences of white space are collapsed. Newline characters * in the source are handled the same as other white space. * Lines are broken as necessary to fill line boxes. */ WHITE_SPACE_NORMAL, /** * Sequences of white space are preserved. Lines are only broken * at newline characters in the source and at
elements. */ WHITE_SPACE_PRE, /** * Sequences of white space are collapsed. Lines are broken * at newline characters, at
, and as necessary * to fill line boxes. */ WHITE_SPACE_PRE_LINE } protected static class HtmlSerializerTextBuilder { private enum State { DEFAULT, EMPTY, BLANK_AT_END, BLANK_AT_END_AFTER_NEWLINE, NEWLINE_AT_END, BREAK_AT_END, BLOCK_SEPARATOR_AT_END } private State state_; private final StringBuilder builder_; private int trimRightPos_; private boolean contentAdded_; private boolean ignoreHtmlBreaks_; public HtmlSerializerTextBuilder() { builder_ = new StringBuilder(); state_ = State.EMPTY; trimRightPos_ = 0; } // see https://drafts.csswg.org/css-text-3/#white-space public void append(final String content, final Mode mode) { int length = content.length(); if (length == 0) { return; } length--; int i = -1; for (char c : content.toCharArray()) { i++; // handle \r if (c == '\r') { if (length != i) { continue; } c = '\n'; } if (c == '\n') { if (mode == Mode.WHITE_SPACE_PRE) { switch (state_) { case EMPTY: case BLOCK_SEPARATOR_AT_END: break; default: builder_.append('\n'); state_ = State.NEWLINE_AT_END; trimRightPos_ = builder_.length(); break; } continue; } if (mode == Mode.PRE) { builder_.append('\n'); state_ = State.NEWLINE_AT_END; trimRightPos_ = builder_.length(); continue; } if (mode == Mode.WHITE_SPACE_PRE_LINE) { switch (state_) { case EMPTY: case BLOCK_SEPARATOR_AT_END: break; default: builder_.append('\n'); state_ = State.NEWLINE_AT_END; trimRightPos_ = builder_.length(); break; } continue; } switch (state_) { case EMPTY: case BLANK_AT_END: case BLANK_AT_END_AFTER_NEWLINE: case BLOCK_SEPARATOR_AT_END: case NEWLINE_AT_END: case BREAK_AT_END: break; default: builder_.append(' '); state_ = State.BLANK_AT_END; break; } continue; } if (c == ' ' || c == '\t' || c == '\f') { if (mode == Mode.WHITE_SPACE_PRE || mode == Mode.PRE) { appendBlank(); continue; } if (mode == Mode.WHITE_SPACE_PRE_LINE) { switch (state_) { case EMPTY: case BLANK_AT_END: case BLANK_AT_END_AFTER_NEWLINE: case BREAK_AT_END: break; default: builder_.append(' '); state_ = State.BLANK_AT_END; break; } continue; } switch (state_) { case EMPTY: case BLANK_AT_END: case BLANK_AT_END_AFTER_NEWLINE: case BLOCK_SEPARATOR_AT_END: case NEWLINE_AT_END: case BREAK_AT_END: break; default: builder_.append(' '); state_ = State.BLANK_AT_END; break; } continue; } if (c == (char) 160) { appendBlank(); if (mode == Mode.WHITE_SPACE_NORMAL || mode == Mode.WHITE_SPACE_PRE_LINE) { state_ = State.DEFAULT; } continue; } builder_.append(c); state_ = State.DEFAULT; trimRightPos_ = builder_.length(); contentAdded_ = true; } } public void appendBlockSeparator() { switch (state_) { case EMPTY: break; case BLANK_AT_END: builder_.setLength(trimRightPos_); if (builder_.length() == 0) { state_ = State.EMPTY; } else { builder_.append('\n'); state_ = State.BLOCK_SEPARATOR_AT_END; } break; case BLANK_AT_END_AFTER_NEWLINE: builder_.setLength(trimRightPos_ - 1); if (builder_.length() == 0) { state_ = State.EMPTY; } else { builder_.append('\n'); state_ = State.BLOCK_SEPARATOR_AT_END; } break; case BLOCK_SEPARATOR_AT_END: break; case NEWLINE_AT_END: case BREAK_AT_END: builder_.setLength(builder_.length() - 1); trimRightPos_ = trimRightPos_ - 1; if (builder_.length() == 0) { state_ = State.EMPTY; } else { builder_.append('\n'); state_ = State.BLOCK_SEPARATOR_AT_END; } break; default: builder_.append('\n'); state_ = State.BLOCK_SEPARATOR_AT_END; break; } } public void appendBreak(final Mode mode) { if (ignoreHtmlBreaks_) { return; } builder_.setLength(trimRightPos_); builder_.append('\n'); state_ = State.BREAK_AT_END; trimRightPos_ = builder_.length(); } public void appendBlank() { builder_.append(' '); state_ = State.BLANK_AT_END; trimRightPos_ = builder_.length(); } public void trimRight(final Mode mode) { if (mode == Mode.PRE) { switch (state_) { case BLOCK_SEPARATOR_AT_END: case NEWLINE_AT_END: case BREAK_AT_END: if (trimRightPos_ == builder_.length()) { trimRightPos_--; } break; default: break; } } builder_.setLength(trimRightPos_); state_ = State.DEFAULT; if (builder_.length() == 0) { state_ = State.EMPTY; } } public boolean wasContentAdded() { return contentAdded_; } public void resetContentAdded() { contentAdded_ = false; } public void ignoreHtmlBreaks() { ignoreHtmlBreaks_ = true; } public void processHtmlBreaks() { ignoreHtmlBreaks_ = false; } public String getText() { return builder_.substring(0, trimRightPos_); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy