All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.htmlunit.cyberneko.xerces.dom.TextImpl Maven / Gradle / Ivy

/*
 * Copyright (c) 2017-2024 Ronald Brill
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.htmlunit.cyberneko.xerces.dom;

import org.w3c.dom.DOMException;
import org.w3c.dom.Node;
import org.w3c.dom.Text;

/**
 * Text nodes hold the non-markup, non-Entity content of an Element or
 * Attribute.
 * 

* When a document is first made available to the DOM, there is only one Text * object for each block of adjacent plain-text. Users (ie, applications) may * create multiple adjacent Texts during editing -- see * {@link org.w3c.dom.Element#normalize} for discussion. *

* Note that CDATASection is a subclass of Text. This is conceptually valid, * since they're really just two different ways of quoting characters when * they're written out as part of an XML stream. */ public class TextImpl extends CharacterDataImpl implements Text { // Factory constructor. public TextImpl(final CoreDocumentImpl ownerDoc, final String data) { super(ownerDoc, data); } /** * {@inheritDoc} * * A short integer indicating what type of node this is. The named constants for * this value are defined in the org.w3c.dom.Node interface. */ @Override public short getNodeType() { return Node.TEXT_NODE; } /** * {@inheritDoc} */ @Override public String getNodeName() { return "#text"; } // NON-DOM: Set whether this Text is ignorable whitespace. public void setIgnorableWhitespace(final boolean ignore) { if (needsSyncData()) { synchronizeData(); } isIgnorableWhitespace(ignore); } /** * {@inheritDoc} * * DOM L3 Core CR - Experimental *

* Returns whether this text node contains element content whitespace, often * abusively called "ignorable whitespace". The text node is determined to * contain whitespace in element content during the load of the document or if * validation occurs while using Document.normalizeDocument(). */ @Override public boolean isElementContentWhitespace() { // REVISIT: is this implemenation correct? if (needsSyncData()) { synchronizeData(); } return internalIsIgnorableWhitespace(); } /** * {@inheritDoc} * * DOM Level 3 WD - Experimental. Returns all text of Text nodes * logically-adjacent text nodes to this node, concatenated in document order. */ @Override public String getWholeText() { if (needsSyncData()) { synchronizeData(); } final StringBuilder builder = new StringBuilder(); if (data_ != null && data_.length() != 0) { builder.append(data_); } // concatenate text of logically adjacent text nodes to the left of this node in // the tree getWholeTextBackward(this.getPreviousSibling(), builder, this.getParentNode()); final String temp = builder.toString(); // clear buffer builder.setLength(0); // concatenate text of logically adjacent text nodes to the right of this node // in the tree getWholeTextForward(this.getNextSibling(), builder, this.getParentNode()); return temp + builder; } /** * internal method taking a StringBuffer in parameter and inserts the text * content at the start of the buffer * * @param builder string buffer * @throws DOMException on error */ protected void insertTextContent(final StringBuilder builder) throws DOMException { final String content = getNodeValue(); if (content != null) { builder.insert(0, content); } } /** * Concatenates the text of all logically-adjacent text nodes to the right of * this node * * @param node the node * @param builder the {@link StringBuilder} * @param parent the parent * @return true - if execution was stopped because the type of node other than * EntityRef, Text, CDATA is encountered, otherwise return false */ private boolean getWholeTextForward(Node node, final StringBuilder builder, final Node parent) { // boolean to indicate whether node is a child of an entity reference boolean inEntRef = false; if (parent != null) { inEntRef = parent.getNodeType() == Node.ENTITY_REFERENCE_NODE; } while (node != null) { final short type = node.getNodeType(); if (type == Node.ENTITY_REFERENCE_NODE) { if (getWholeTextForward(node.getFirstChild(), builder, node)) { return true; } } else if (type == Node.TEXT_NODE || type == Node.CDATA_SECTION_NODE) { ((NodeImpl) node).getTextContent(builder); } else { return true; } node = node.getNextSibling(); } // if the parent node is an entity reference node, must // check nodes to the right of the parent entity reference node for logically // adjacent // text nodes if (inEntRef) { getWholeTextForward(parent.getNextSibling(), builder, parent.getParentNode()); return true; } return false; } /** * Concatenates the text of all logically-adjacent text nodes to the left of the * node * * @param node the node * @param builder the {@link StringBuilder} * @param parent the parent * @return true - if execution was stopped because the type of node other than * EntityRef, Text, CDATA is encountered, otherwise return false */ private boolean getWholeTextBackward(Node node, final StringBuilder builder, final Node parent) { // boolean to indicate whether node is a child of an entity reference boolean inEntRef = false; if (parent != null) { inEntRef = parent.getNodeType() == Node.ENTITY_REFERENCE_NODE; } while (node != null) { final short type = node.getNodeType(); if (type == Node.ENTITY_REFERENCE_NODE) { if (getWholeTextBackward(node.getLastChild(), builder, node)) { return true; } } else if (type == Node.TEXT_NODE || type == Node.CDATA_SECTION_NODE) { ((TextImpl) node).insertTextContent(builder); } else { return true; } node = node.getPreviousSibling(); } // if the parent node is an entity reference node, must // check nodes to the left of the parent entity reference node for logically // adjacent // text nodes if (inEntRef) { getWholeTextBackward(parent.getPreviousSibling(), builder, parent.getParentNode()); return true; } return false; } /** * {@inheritDoc} * * Replaces the text of the current node and all logically-adjacent text nodes * with the specified text. All logically-adjacent text nodes are removed * including the current node unless it was the recipient of the replacement * text. * * @param content The content of the replacing Text node. * @return text - The Text node created with the specified content. */ @Override public Text replaceWholeText(final String content) throws DOMException { if (needsSyncData()) { synchronizeData(); } // if the content is null final Node parent = this.getParentNode(); if (content == null || content.length() == 0) { // remove current node if (parent != null) { // check if node in the tree parent.removeChild(this); } return null; } // make sure we can make the replacement if (ownerDocument().errorChecking) { // make sure we can make the replacement if (!canModifyPrev(this) || !canModifyNext(this)) { throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, DOMMessageFormatter .formatMessage(DOMMessageFormatter.DOM_DOMAIN, "NO_MODIFICATION_ALLOWED_ERR", null)); } } // replace the text node final Text currentNode; this.setData(content); currentNode = this; // check logically-adjacent text nodes Node prev = currentNode.getPreviousSibling(); while (prev != null) { // If the logically-adjacent next node can be removed // remove it. A logically adjacent node can be removed if // it is a Text or CDATASection node or an EntityReference with // Text and CDATA only children. if ((prev.getNodeType() == Node.TEXT_NODE) || (prev.getNodeType() == Node.CDATA_SECTION_NODE) || (prev.getNodeType() == Node.ENTITY_REFERENCE_NODE && hasTextOnlyChildren(prev))) { parent.removeChild(prev); prev = currentNode; } else { break; } prev = prev.getPreviousSibling(); } // check logically-adjacent text nodes Node next = currentNode.getNextSibling(); while (next != null) { // If the logically-adjacent next node can be removed // remove it. A logically adjacent node can be removed if // it is a Text or CDATASection node or an EntityReference with // Text and CDATA only children. if ((next.getNodeType() == Node.TEXT_NODE) || (next.getNodeType() == Node.CDATA_SECTION_NODE) || (next.getNodeType() == Node.ENTITY_REFERENCE_NODE && hasTextOnlyChildren(next))) { parent.removeChild(next); next = currentNode; } else { break; } next = next.getNextSibling(); } return currentNode; } /** * If any EntityReference to be removed has descendants that are not * EntityReference, Text, or CDATASection nodes, the replaceWholeText method * must fail before performing any modification of the document, raising a * DOMException with the code NO_MODIFICATION_ALLOWED_ERR. Traverse previous * siblings of the node to be replaced. If a previous sibling is an * EntityReference node, get it's last child. If the last child was a Text or * CDATASection node and its previous siblings are neither a replaceable * EntityReference or Text or CDATASection nodes, return false. IF the last * child was neither Text nor CDATASection nor a replaceable EntityReference * Node, then return true. If the last child was a Text or CDATASection node any * its previous sibling was not or was an EntityReference that did not contain * only Text or CDATASection nodes, return false. Check this recursively for * EntityReference nodes. * * @param node the node * @return true - can replace text false - can't replace exception must be * raised */ private boolean canModifyPrev(final Node node) { boolean textLastChild = false; Node prev = node.getPreviousSibling(); while (prev != null) { final short type = prev.getNodeType(); if (type == Node.ENTITY_REFERENCE_NODE) { // If the previous sibling was entityreference // check if its content is replaceable Node lastChild = prev.getLastChild(); // if the entity reference has no children // return false if (lastChild == null) { return false; } // The replacement text of the entity reference should // be either only text,cadatsections or replaceable entity // reference nodes or the last child should be neither of these while (lastChild != null) { final short lType = lastChild.getNodeType(); if (lType == Node.TEXT_NODE || lType == Node.CDATA_SECTION_NODE) { textLastChild = true; } else if (lType == Node.ENTITY_REFERENCE_NODE) { if (!canModifyPrev(lastChild)) { return false; } // If the EntityReference child contains // only text, or non-text or ends with a // non-text node. textLastChild = true; } else { // If the last child was replaceable and others are not // Text or CDataSection or replaceable EntityRef nodes // return false. return !textLastChild; } lastChild = lastChild.getPreviousSibling(); } } else if (type == Node.TEXT_NODE || type == Node.CDATA_SECTION_NODE) { // If the previous sibling was text or cdatasection move to next } else { // If the previous sibling was anything but text or // cdatasection or an entity reference, stop search and // return true return true; } prev = prev.getPreviousSibling(); } return true; } /** * If any EntityReference to be removed has descendants that are not * EntityReference, Text, or CDATASection nodes, the replaceWholeText method * must fail before performing any modification of the document, raising a * DOMException with the code NO_MODIFICATION_ALLOWED_ERR. Traverse previous * siblings of the node to be replaced. If a previous sibling is an * EntityReference node, get it's last child. If the first child was a Text or * CDATASection node and its next siblings are neither a replaceable * EntityReference or Text or CDATASection nodes, return false. IF the first * child was neither Text nor CDATASection nor a replaceable EntityReference * Node, then return true. If the first child was a Text or CDATASection node * any its next sibling was not or was an EntityReference that did not contain * only Text or CDATASection nodes, return false. Check this recursively for * EntityReference nodes. * * @param node the node * @return true - can replace text false - can't replace exception must be * raised */ private boolean canModifyNext(final Node node) { boolean textFirstChild = false; Node next = node.getNextSibling(); while (next != null) { final short type = next.getNodeType(); if (type == Node.ENTITY_REFERENCE_NODE) { // If the previous sibling was entityreference // check if its content is replaceable Node firstChild = next.getFirstChild(); // if the entity reference has no children // return false if (firstChild == null) { return false; } // The replacement text of the entity reference should // be either only text,cadatsections or replaceable entity // reference nodes or the last child should be neither of these while (firstChild != null) { final short lType = firstChild.getNodeType(); if (lType == Node.TEXT_NODE || lType == Node.CDATA_SECTION_NODE) { textFirstChild = true; } else if (lType == Node.ENTITY_REFERENCE_NODE) { if (!canModifyNext(firstChild)) { return false; } // If the EntityReference child contains // only text, or non-text or ends with a // non-text node. textFirstChild = true; } else { // If the first child was replaceable text and next // children are not, then return false return !textFirstChild; } firstChild = firstChild.getNextSibling(); } } else if (type == Node.TEXT_NODE || type == Node.CDATA_SECTION_NODE) { // If the previous sibling was text or cdatasection move to next } else { // If the next sibling was anything but text or // cdatasection or an entity reference, stop search and // return true return true; } next = next.getNextSibling(); } return true; } /** * Check if an EntityReference node has Text Only child nodes * * @param node the node * @return true - Contains text only children */ private boolean hasTextOnlyChildren(final Node node) { Node child = node; if (child == null) { return false; } child = child.getFirstChild(); while (child != null) { final int type = child.getNodeType(); if (type == Node.ENTITY_REFERENCE_NODE) { return hasTextOnlyChildren(child); } if (type != Node.TEXT_NODE && type != Node.CDATA_SECTION_NODE) { return false; } child = child.getNextSibling(); } return true; } // NON-DOM: Returns whether this Text is ignorable whitespace. public boolean isIgnorableWhitespace() { if (needsSyncData()) { synchronizeData(); } return internalIsIgnorableWhitespace(); } /** * {@inheritDoc} * * Break a text node into two sibling nodes. (Note that if the current node has * no parent, they won't wind up as "siblings" -- they'll both be orphans.) * * @param offset The offset at which to split. If offset is at the end of the * available data, the second node will be empty. * * @return A reference to the new node (containing data after the offset point). * The original node will contain data up to that point. * * @throws DOMException INDEX_SIZE_ERR if offset is < 0 or > length. * * @throws DOMException NO_MODIFICATION_ALLOWED_ERR if node is read-only. */ @Override public Text splitText(final int offset) throws DOMException { if (needsSyncData()) { synchronizeData(); } if (offset < 0 || offset > data_.length()) { throw new DOMException(DOMException.INDEX_SIZE_ERR, DOMMessageFormatter.formatMessage(DOMMessageFormatter.DOM_DOMAIN, "INDEX_SIZE_ERR", null)); } // split text into two separate nodes final Text newText = getOwnerDocument().createTextNode(data_.substring(offset)); setNodeValue(data_.substring(0, offset)); // insert new text node final Node parentNode = getParentNode(); if (parentNode != null) { parentNode.insertBefore(newText, nextSibling); } return newText; } // NON-DOM (used by DOMParser): Reset data for the node. public void replaceData(final String value) { data_ = value; } // NON-DOM (used by DOMParser: Sets data to empty string. // Returns the value the data was set to. public String removeData() { final String olddata = data_; data_ = ""; return olddata; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy