All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sourceforge.htmlunit.xerces.dom.TextImpl Maven / Gradle / Ivy

Go to download

HtmlUnit adaptation of NekoHtml. It has the same functionality but exposing HTMLElements to be overridden.

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package net.sourceforge.htmlunit.xerces.dom;

import org.w3c.dom.DOMException;
import org.w3c.dom.Node;
import org.w3c.dom.Text;

/**
 * Text nodes hold the non-markup, non-Entity content of
 * an Element or Attribute.
 * 

* When a document is first made available to the DOM, there is only * one Text object for each block of adjacent plain-text. Users (ie, * applications) may create multiple adjacent Texts during editing -- * see {@link org.w3c.dom.Element#normalize} for discussion. *

* Note that CDATASection is a subclass of Text. This is conceptually * valid, since they're really just two different ways of quoting * characters when they're written out as part of an XML stream. */ public class TextImpl extends CharacterDataImpl implements Text { // Factory constructor. public TextImpl(CoreDocumentImpl ownerDoc, String data) { super(ownerDoc, data); } /** * {@inheritDoc} * * A short integer indicating what type of node this is. The named * constants for this value are defined in the org.w3c.dom.Node interface. */ @Override public short getNodeType() { return Node.TEXT_NODE; } /** * {@inheritDoc} */ @Override public String getNodeName() { return "#text"; } // NON-DOM: Set whether this Text is ignorable whitespace. public void setIgnorableWhitespace(boolean ignore) { if (needsSyncData()) { synchronizeData(); } isIgnorableWhitespace(ignore); } /** * {@inheritDoc} * * DOM L3 Core CR - Experimental *

* Returns whether this text node contains * element content whitespace, often abusively called "ignorable whitespace". * The text node is determined to contain whitespace in element content * during the load of the document or if validation occurs while using * Document.normalizeDocument(). */ @Override public boolean isElementContentWhitespace() { // REVISIT: is this implemenation correct? if (needsSyncData()) { synchronizeData(); } return internalIsIgnorableWhitespace(); } /** * {@inheritDoc} * * DOM Level 3 WD - Experimental. * Returns all text of Text nodes logically-adjacent text * nodes to this node, concatenated in document order. */ @Override public String getWholeText(){ if (needsSyncData()) { synchronizeData(); } StringBuffer buffer = new StringBuffer(); if (data != null && data.length() != 0) { buffer.append(data); } // concatenate text of logically adjacent text nodes to the left of this node in the tree getWholeTextBackward(this.getPreviousSibling(), buffer, this.getParentNode()); String temp = buffer.toString(); // clear buffer buffer.setLength(0); // concatenate text of logically adjacent text nodes to the right of this node in the tree getWholeTextForward(this.getNextSibling(), buffer, this.getParentNode()); return temp + buffer; } /** * internal method taking a StringBuffer in parameter and inserts the * text content at the start of the buffer * * @param buf string buffer * @throws DOMException on error */ protected void insertTextContent(StringBuffer buf) throws DOMException { String content = getNodeValue(); if (content != null) { buf.insert(0, content); } } /** * Concatenates the text of all logically-adjacent text nodes to the * right of this node * @param node the node * @param buffer the buffer * @param parent the parent * @return true - if execution was stopped because the type of node * other than EntityRef, Text, CDATA is encountered, otherwise * return false */ private boolean getWholeTextForward(Node node, StringBuffer buffer, Node parent){ // boolean to indicate whether node is a child of an entity reference boolean inEntRef = false; if (parent!=null) { inEntRef = parent.getNodeType()==Node.ENTITY_REFERENCE_NODE; } while (node != null) { short type = node.getNodeType(); if (type == Node.ENTITY_REFERENCE_NODE) { if (getWholeTextForward(node.getFirstChild(), buffer, node)){ return true; } } else if (type == Node.TEXT_NODE || type == Node.CDATA_SECTION_NODE) { ((NodeImpl)node).getTextContent(buffer); } else { return true; } node = node.getNextSibling(); } // if the parent node is an entity reference node, must // check nodes to the right of the parent entity reference node for logically adjacent // text nodes if (inEntRef) { getWholeTextForward(parent.getNextSibling(), buffer, parent.getParentNode()); return true; } return false; } /** * Concatenates the text of all logically-adjacent text nodes to the left of * the node * @param node the node * @param buffer the buffer * @param parent the parent * @return true - if execution was stopped because the type of node * other than EntityRef, Text, CDATA is encountered, otherwise * return false */ private boolean getWholeTextBackward(Node node, StringBuffer buffer, Node parent){ // boolean to indicate whether node is a child of an entity reference boolean inEntRef = false; if (parent!=null) { inEntRef = parent.getNodeType()==Node.ENTITY_REFERENCE_NODE; } while (node != null) { short type = node.getNodeType(); if (type == Node.ENTITY_REFERENCE_NODE) { if (getWholeTextBackward(node.getLastChild(), buffer, node)){ return true; } } else if (type == Node.TEXT_NODE || type == Node.CDATA_SECTION_NODE) { ((TextImpl)node).insertTextContent(buffer); } else { return true; } node = node.getPreviousSibling(); } // if the parent node is an entity reference node, must // check nodes to the left of the parent entity reference node for logically adjacent // text nodes if (inEntRef) { getWholeTextBackward(parent.getPreviousSibling(), buffer, parent.getParentNode()); return true; } return false; } /** * {@inheritDoc} * * Replaces the text of the current node and all logically-adjacent text * nodes with the specified text. All logically-adjacent text nodes are * removed including the current node unless it was the recipient of the * replacement text. * * @param content * The content of the replacing Text node. * @return text - The Text node created with the specified content. */ @Override public Text replaceWholeText(String content) throws DOMException { if (needsSyncData()) { synchronizeData(); } //if the content is null Node parent = this.getParentNode(); if (content == null || content.length() == 0) { // remove current node if (parent != null) { // check if node in the tree parent.removeChild(this); } return null; } // make sure we can make the replacement if (ownerDocument().errorChecking) { // make sure we can make the replacement if (!canModifyPrev(this) || !canModifyNext(this)) { throw new DOMException(DOMException.NO_MODIFICATION_ALLOWED_ERR, DOMMessageFormatter.formatMessage( DOMMessageFormatter.DOM_DOMAIN, "NO_MODIFICATION_ALLOWED_ERR", null)); } } //replace the text node Text currentNode; this.setData(content); currentNode = this; //check logically-adjacent text nodes Node prev = currentNode.getPreviousSibling(); while (prev != null) { //If the logically-adjacent next node can be removed //remove it. A logically adjacent node can be removed if //it is a Text or CDATASection node or an EntityReference with //Text and CDATA only children. if ((prev.getNodeType() == Node.TEXT_NODE) || (prev.getNodeType() == Node.CDATA_SECTION_NODE) || (prev.getNodeType() == Node.ENTITY_REFERENCE_NODE && hasTextOnlyChildren(prev))) { parent.removeChild(prev); prev = currentNode; } else { break; } prev = prev.getPreviousSibling(); } //check logically-adjacent text nodes Node next = currentNode.getNextSibling(); while (next != null) { //If the logically-adjacent next node can be removed //remove it. A logically adjacent node can be removed if //it is a Text or CDATASection node or an EntityReference with //Text and CDATA only children. if ((next.getNodeType() == Node.TEXT_NODE) || (next.getNodeType() == Node.CDATA_SECTION_NODE) || (next.getNodeType() == Node.ENTITY_REFERENCE_NODE && hasTextOnlyChildren(next))) { parent.removeChild(next); next = currentNode; } else { break; } next = next.getNextSibling(); } return currentNode; } /** * If any EntityReference to be removed has descendants that are not * EntityReference, Text, or CDATASection nodes, the replaceWholeText method * must fail before performing any modification of the document, raising a * DOMException with the code NO_MODIFICATION_ALLOWED_ERR. Traverse previous * siblings of the node to be replaced. If a previous sibling is an * EntityReference node, get it's last child. If the last child was a Text * or CDATASection node and its previous siblings are neither a replaceable * EntityReference or Text or CDATASection nodes, return false. IF the last * child was neither Text nor CDATASection nor a replaceable EntityReference * Node, then return true. If the last child was a Text or CDATASection node * any its previous sibling was not or was an EntityReference that did not * contain only Text or CDATASection nodes, return false. Check this * recursively for EntityReference nodes. * * @param node the node * @return true - can replace text false - can't replace exception must be * raised */ private boolean canModifyPrev(Node node) { boolean textLastChild = false; Node prev = node.getPreviousSibling(); while (prev != null) { short type = prev.getNodeType(); if (type == Node.ENTITY_REFERENCE_NODE) { //If the previous sibling was entityreference //check if its content is replaceable Node lastChild = prev.getLastChild(); //if the entity reference has no children //return false if (lastChild == null) { return false; } //The replacement text of the entity reference should //be either only text,cadatsections or replaceable entity //reference nodes or the last child should be neither of these while (lastChild != null) { short lType = lastChild.getNodeType(); if (lType == Node.TEXT_NODE || lType == Node.CDATA_SECTION_NODE) { textLastChild = true; } else if (lType == Node.ENTITY_REFERENCE_NODE) { if (!canModifyPrev(lastChild)) { return false; } //If the EntityReference child contains //only text, or non-text or ends with a //non-text node. textLastChild = true; } else { //If the last child was replaceable and others are not //Text or CDataSection or replaceable EntityRef nodes //return false. return !textLastChild; } lastChild = lastChild.getPreviousSibling(); } } else if (type == Node.TEXT_NODE || type == Node.CDATA_SECTION_NODE) { //If the previous sibling was text or cdatasection move to next } else { //If the previous sibling was anything but text or //cdatasection or an entity reference, stop search and //return true return true; } prev = prev.getPreviousSibling(); } return true; } /** * If any EntityReference to be removed has descendants that are not * EntityReference, Text, or CDATASection nodes, the replaceWholeText method * must fail before performing any modification of the document, raising a * DOMException with the code NO_MODIFICATION_ALLOWED_ERR. Traverse previous * siblings of the node to be replaced. If a previous sibling is an * EntityReference node, get it's last child. If the first child was a Text * or CDATASection node and its next siblings are neither a replaceable * EntityReference or Text or CDATASection nodes, return false. IF the first * child was neither Text nor CDATASection nor a replaceable EntityReference * Node, then return true. If the first child was a Text or CDATASection * node any its next sibling was not or was an EntityReference that did not * contain only Text or CDATASection nodes, return false. Check this * recursively for EntityReference nodes. * * @param node the node * @return true - can replace text false - can't replace exception must be * raised */ private boolean canModifyNext(Node node) { boolean textFirstChild = false; Node next = node.getNextSibling(); while (next != null) { short type = next.getNodeType(); if (type == Node.ENTITY_REFERENCE_NODE) { //If the previous sibling was entityreference //check if its content is replaceable Node firstChild = next.getFirstChild(); //if the entity reference has no children //return false if (firstChild == null) { return false; } //The replacement text of the entity reference should //be either only text,cadatsections or replaceable entity //reference nodes or the last child should be neither of these while (firstChild != null) { short lType = firstChild.getNodeType(); if (lType == Node.TEXT_NODE || lType == Node.CDATA_SECTION_NODE) { textFirstChild = true; } else if (lType == Node.ENTITY_REFERENCE_NODE) { if (!canModifyNext(firstChild)) { return false; } //If the EntityReference child contains //only text, or non-text or ends with a //non-text node. textFirstChild = true; } else { //If the first child was replaceable text and next //children are not, then return false return !textFirstChild; } firstChild = firstChild.getNextSibling(); } } else if (type == Node.TEXT_NODE || type == Node.CDATA_SECTION_NODE) { //If the previous sibling was text or cdatasection move to next } else { //If the next sibling was anything but text or //cdatasection or an entity reference, stop search and //return true return true; } next = next.getNextSibling(); } return true; } /** * Check if an EntityReference node has Text Only child nodes * * @param node the node * @return true - Contains text only children */ private boolean hasTextOnlyChildren(Node node) { Node child = node; if (child == null) { return false; } child = child.getFirstChild(); while (child != null) { int type = child.getNodeType(); if (type == Node.ENTITY_REFERENCE_NODE) { return hasTextOnlyChildren(child); } if (type != Node.TEXT_NODE && type != Node.CDATA_SECTION_NODE) { return false; } child = child.getNextSibling(); } return true; } // NON-DOM: Returns whether this Text is ignorable whitespace. public boolean isIgnorableWhitespace() { if (needsSyncData()) { synchronizeData(); } return internalIsIgnorableWhitespace(); } /** * {@inheritDoc} * * Break a text node into two sibling nodes. (Note that if the current node * has no parent, they won't wind up as "siblings" -- they'll both be * orphans.) * * @param offset * The offset at which to split. If offset is at the end of the * available data, the second node will be empty. * * @return A reference to the new node (containing data after the offset * point). The original node will contain data up to that point. * * @throws DOMException INDEX_SIZE_ERR * if offset is < 0 or > length. * * @throws DOMException NO_MODIFICATION_ALLOWED_ERR if node is read-only. */ @Override public Text splitText(int offset) throws DOMException { if (needsSyncData()) { synchronizeData(); } if (offset < 0 || offset > data.length() ) { throw new DOMException(DOMException.INDEX_SIZE_ERR, DOMMessageFormatter.formatMessage(DOMMessageFormatter.DOM_DOMAIN, "INDEX_SIZE_ERR", null)); } // split text into two separate nodes Text newText = getOwnerDocument().createTextNode(data.substring(offset)); setNodeValue(data.substring(0, offset)); // insert new text node Node parentNode = getParentNode(); if (parentNode != null) { parentNode.insertBefore(newText, nextSibling); } return newText; } // NON-DOM (used by DOMParser): Reset data for the node. public void replaceData (String value){ data = value; } // NON-DOM (used by DOMParser: Sets data to empty string. // Returns the value the data was set to. public String removeData (){ String olddata=data; data = ""; return olddata; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy