org.w3c.tidy.Node Maven / Gradle / Ivy
/*
* Copyright 2010 Alibaba Group Holding Limited.
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* @(#)Node.java 1.11 2000/08/16
*
*/
package org.w3c.tidy;
/**
* Node (c) 1998-2000 (W3C) MIT, INRIA, Keio University See Tidy.java for the
* copyright notice. Derived from HTML Tidy Release 4 Aug
* 2000
*
* @author Dave Raggett
* @author Andy Quick (translation to Java)
* @version 1.11, 2000/08/16 Tidy Release 4 Aug 2000
*/
/*
* Used for elements and text nodes element name is null for text nodes start
* and end are offsets into lexbuf which contains the textual content of all
* elements in the parse tree. parent and content allow traversal of the parse
* tree in any direction. attributes are represented as a linked list of AttVal
* nodes which hold the strings for attribute/value pairs.
*/
public class Node {
public static final short RootNode = 0;
public static final short DocTypeTag = 1;
public static final short CommentTag = 2;
public static final short ProcInsTag = 3;
public static final short TextNode = 4;
public static final short StartTag = 5;
public static final short EndTag = 6;
public static final short StartEndTag = 7;
public static final short CDATATag = 8;
public static final short SectionTag = 9;
public static final short AspTag = 10;
public static final short JsteTag = 11;
public static final short PhpTag = 12;
protected Node parent;
protected Node prev;
protected Node next;
protected Node last;
protected int start; /* start of span onto text array */
protected int end; /* end of span onto text array */
protected byte[] textarray; /* the text array */
protected short type; /*
* TextNode, StartTag, EndTag etc.
*/
protected boolean closed; /*
* true if closed by explicit end tag
*/
protected boolean implicit; /* true if inferred */
protected boolean linebreak; /*
* true if followed by a line break
*/
protected Dict was; /* old tag when it was changed */
protected Dict tag; /* tag's dictionary definition */
protected String element; /* name (null for text nodes) */
protected AttVal attributes;
protected Node content;
public Node() {
this(TextNode, null, 0, 0);
}
public Node(short type, byte[] textarray, int start, int end) {
this.parent = null;
this.prev = null;
this.next = null;
this.last = null;
this.start = start;
this.end = end;
this.textarray = textarray;
this.type = type;
this.closed = false;
this.implicit = false;
this.linebreak = false;
this.was = null;
this.tag = null;
this.element = null;
this.attributes = null;
this.content = null;
}
public Node(short type, byte[] textarray, int start, int end, String element, TagTable tt) {
this.parent = null;
this.prev = null;
this.next = null;
this.last = null;
this.start = start;
this.end = end;
this.textarray = textarray;
this.type = type;
this.closed = false;
this.implicit = false;
this.linebreak = false;
this.was = null;
this.tag = null;
this.element = element;
this.attributes = null;
this.content = null;
if (type == StartTag || type == StartEndTag || type == EndTag) {
tt.findTag(this);
}
}
/* used to clone heading nodes when split by an
*/
@Override
protected Object clone() {
Node node = new Node();
node.parent = this.parent;
if (this.textarray != null) {
node.textarray = new byte[this.end - this.start];
node.start = 0;
node.end = this.end - this.start;
if (node.end > 0) {
System.arraycopy(this.textarray, this.start, node.textarray, node.start, node.end);
}
}
node.type = this.type;
node.closed = this.closed;
node.implicit = this.implicit;
node.linebreak = this.linebreak;
node.was = this.was;
node.tag = this.tag;
if (this.element != null) {
node.element = this.element;
}
if (this.attributes != null) {
node.attributes = (AttVal) this.attributes.clone();
}
return node;
}
public AttVal getAttrByName(String name) {
AttVal attr;
for (attr = this.attributes; attr != null; attr = attr.next) {
if (name != null && attr.attribute != null && attr.attribute.equals(name)) {
break;
}
}
return attr;
}
/* default method for checking an element's attributes */
public void checkAttributes(Lexer lexer) {
AttVal attval;
for (attval = this.attributes; attval != null; attval = attval.next) {
attval.checkAttribute(lexer, this);
}
}
public void checkUniqueAttributes(Lexer lexer) {
AttVal attval;
for (attval = this.attributes; attval != null; attval = attval.next) {
if (attval.asp == null && attval.php == null) {
attval.checkUniqueAttribute(lexer, this);
}
}
}
public void addAttribute(String name, String value) {
AttVal av = new AttVal(null, null, null, null, '"', name, value);
av.dict = AttributeTable.getDefaultAttributeTable().findAttribute(av);
if (this.attributes == null) {
this.attributes = av;
} else /* append to end of attributes */ {
AttVal here = this.attributes;
while (here.next != null) {
here = here.next;
}
here.next = av;
}
}
/* remove attribute from node then free it */
public void removeAttribute(AttVal attr) {
AttVal av;
AttVal prev = null;
AttVal next;
for (av = this.attributes; av != null; av = next) {
next = av.next;
if (av == attr) {
if (prev != null) {
prev.next = next;
} else {
this.attributes = next;
}
} else {
prev = av;
}
}
}
/* find doctype element */
public Node findDocType() {
Node node;
for (node = this.content; node != null && node.type != DocTypeTag; node = node.next) {
;
}
return node;
}
public void discardDocType() {
Node node;
node = findDocType();
if (node != null) {
if (node.prev != null) {
node.prev.next = node.next;
} else {
node.parent.content = node.next;
}
if (node.next != null) {
node.next.prev = node.prev;
}
node.next = null;
}
}
/* remove node from markup tree and discard it */
public static Node discardElement(Node element) {
Node next = null;
if (element != null) {
next = element.next;
removeNode(element);
}
return next;
}
/* insert node into markup tree */
public static void insertNodeAtStart(Node element, Node node) {
node.parent = element;
if (element.content == null) {
element.last = node;
} else {
element.content.prev = node; // AQ added 13 Apr 2000
}
node.next = element.content;
node.prev = null;
element.content = node;
}
/* insert node into markup tree */
public static void insertNodeAtEnd(Node element, Node node) {
node.parent = element;
node.prev = element.last;
if (element.last != null) {
element.last.next = node;
} else {
element.content = node;
}
element.last = node;
}
/*
* insert node into markup tree in pace of element which is moved to become
* the child of the node
*/
public static void insertNodeAsParent(Node element, Node node) {
node.content = element;
node.last = element;
node.parent = element.parent;
element.parent = node;
if (node.parent.content == element) {
node.parent.content = node;
}
if (node.parent.last == element) {
node.parent.last = node;
}
node.prev = element.prev;
element.prev = null;
if (node.prev != null) {
node.prev.next = node;
}
node.next = element.next;
element.next = null;
if (node.next != null) {
node.next.prev = node;
}
}
/* insert node into markup tree before element */
public static void insertNodeBeforeElement(Node element, Node node) {
Node parent;
parent = element.parent;
node.parent = parent;
node.next = element;
node.prev = element.prev;
element.prev = node;
if (node.prev != null) {
node.prev.next = node;
}
if (parent.content == element) {
parent.content = node;
}
}
/* insert node into markup tree after element */
public static void insertNodeAfterElement(Node element, Node node) {
Node parent;
parent = element.parent;
node.parent = parent;
// AQ - 13Jan2000 fix for parent == null
if (parent != null && parent.last == element) {
parent.last = node;
} else {
node.next = element.next;
// AQ - 13Jan2000 fix for node.next == null
if (node.next != null) {
node.next.prev = node;
}
}
element.next = node;
node.prev = element;
}
public static void trimEmptyElement(Lexer lexer, Node element) {
TagTable tt = lexer.configuration.tt;
if (lexer.canPrune(element)) {
if (element.type != TextNode) {
Report.warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
}
discardElement(element);
} else if (element.tag == tt.tagP && element.content == null) {
/* replace by
to preserve formatting */
Node node = lexer.inferredTag("br");
Node.coerceNode(lexer, element, tt.tagBr);
Node.insertNodeAfterElement(element, node);
}
}
/*
* This maps hello world to hello
* world If last child of element is a text node then trim
* trailing white space character moving it to after element's end tag.
*/
public static void trimTrailingSpace(Lexer lexer, Node element, Node last) {
byte c;
TagTable tt = lexer.configuration.tt;
if (last != null && last.type == Node.TextNode && last.end > last.start) {
c = lexer.lexbuf[last.end - 1];
if (c == 160 || c == (byte) ' ') {
/* take care with */
if (element.tag == tt.tagTd || element.tag == tt.tagTh) {
if (last.end > last.start + 1) {
last.end -= 1;
}
} else {
last.end -= 1;
if ((element.tag.model & Dict.CM_INLINE) != 0 && !((element.tag.model & Dict.CM_FIELD) != 0)) {
lexer.insertspace = true;
}
/* if empty string then delete from parse tree */
if (last.start == last.end) {
trimEmptyElement(lexer, last);
}
}
}
}
}
/*
* This maps hello world to
hello world Trims
* initial space, by moving it before the start tag, or if this element is
* the first in parent's content, then by discarding the space
*/
public static void trimInitialSpace(Lexer lexer, Node element, Node text) {
Node prev, node;
// GLP: Local fix to Bug 119789. Remove this comment when parser.c is updated.
// 31-Oct-00.
if (text.type == TextNode && text.textarray[text.start] == (byte) ' ' && text.start < text.end) {
if ((element.tag.model & Dict.CM_INLINE) != 0 && !((element.tag.model & Dict.CM_FIELD) != 0)
&& element.parent.content != element) {
prev = element.prev;
if (prev != null && prev.type == TextNode) {
if (prev.textarray[prev.end - 1] != (byte) ' ') {
prev.textarray[prev.end++] = (byte) ' ';
}
++element.start;
} else /* create new node */ {
node = lexer.newNode();
// Local fix for bug 228486 (GLP). This handles the case
// where we need to create a preceeding text node but there are
// no "slots" in textarray that we can steal from the current
// element. Therefore, we create a new textarray containing
// just the blank. When Tidy is fixed, this should be removed.
if (element.start >= element.end) {
node.start = 0;
node.end = 1;
node.textarray = new byte[1];
} else {
node.start = element.start++;
node.end = element.start;
node.textarray = element.textarray;
}
node.textarray[node.start] = (byte) ' ';
node.prev = prev;
if (prev != null) {
prev.next = node;
}
node.next = element;
element.prev = node;
node.parent = element.parent;
}
}
/* discard the space in current node */
++text.start;
}
}
/*
* Move initial and trailing space out. This routine maps: hello
* world to hello world and hello
* world to hello world
*/
public static void trimSpaces(Lexer lexer, Node element) {
Node text = element.content;
TagTable tt = lexer.configuration.tt;
if (text != null && text.type == Node.TextNode && element.tag != tt.tagPre) {
trimInitialSpace(lexer, element, text);
}
text = element.last;
if (text != null && text.type == Node.TextNode) {
trimTrailingSpace(lexer, element, text);
}
}
public boolean isDescendantOf(Dict tag) {
Node parent;
for (parent = this.parent; parent != null; parent = parent.parent) {
if (parent.tag == tag) {
return true;
}
}
return false;
}
/*
* the doctype has been found after other tags, and needs moving to before
* the html element
*/
public static void insertDocType(Lexer lexer, Node element, Node doctype) {
TagTable tt = lexer.configuration.tt;
Report.warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);
while (element.tag != tt.tagHtml) {
element = element.parent;
}
insertNodeBeforeElement(element, doctype);
}
public Node findBody(TagTable tt) {
Node node;
node = this.content;
while (node != null && node.tag != tt.tagHtml) {
node = node.next;
}
if (node == null) {
return null;
}
node = node.content;
while (node != null && node.tag != tt.tagBody) {
node = node.next;
}
return node;
}
public boolean isElement() {
return this.type == StartTag || this.type == StartEndTag ? true : false;
}
/*
* unexpected content in table row is moved to just before the table in
* accordance with Netscape and IE. This code assumes that node hasn't been
* inserted into the row.
*/
public static void moveBeforeTable(Node row, Node node, TagTable tt) {
Node table;
/* first find the table element */
for (table = row.parent; table != null; table = table.parent) {
if (table.tag == tt.tagTable) {
if (table.parent.content == table) {
table.parent.content = node;
}
node.prev = table.prev;
node.next = table;
table.prev = node;
node.parent = table.parent;
if (node.prev != null) {
node.prev.next = node;
}
break;
}
}
}
/*
* if a table row is empty then insert an empty cell this practice is
* consistent with browser behavior and avoids potential problems with row
* spanning cells
*/
public static void fixEmptyRow(Lexer lexer, Node row) {
Node cell;
if (row.content == null) {
cell = lexer.inferredTag("td");
insertNodeAtEnd(row, cell);
Report.warning(lexer, row, cell, Report.MISSING_STARTTAG);
}
}
public static void coerceNode(Lexer lexer, Node node, Dict tag) {
Node tmp = lexer.inferredTag(tag.name);
Report.warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
node.was = node.tag;
node.tag = tag;
node.type = StartTag;
node.implicit = true;
node.element = tag.name;
}
/* extract a node and its children from a markup tree */
public static void removeNode(Node node) {
if (node.prev != null) {
node.prev.next = node.next;
}
if (node.next != null) {
node.next.prev = node.prev;
}
if (node.parent != null) {
if (node.parent.content == node) {
node.parent.content = node.next;
}
if (node.parent.last == node) {
node.parent.last = node.prev;
}
}
node.parent = node.prev = node.next = null;
}
public static boolean insertMisc(Node element, Node node) {
if (node.type == CommentTag || node.type == ProcInsTag || node.type == CDATATag || node.type == SectionTag
|| node.type == AspTag || node.type == JsteTag || node.type == PhpTag) {
insertNodeAtEnd(element, node);
return true;
}
return false;
}
/*
* used to determine how attributes without values should be printed this
* was introduced to deal with user defined tags e.g. Cold Fusion
*/
public static boolean isNewNode(Node node) {
if (node != null && node.tag != null) {
return (node.tag.model & Dict.CM_NEW) != 0;
}
return true;
}
public boolean hasOneChild() {
return this.content != null && this.content.next == null;
}
/* find html element */
public Node findHTML(TagTable tt) {
Node node;
for (node = this.content; node != null && node.tag != tt.tagHtml; node = node.next) {
;
}
return node;
}
public Node findHEAD(TagTable tt) {
Node node;
node = this.findHTML(tt);
if (node != null) {
for (node = node.content; node != null && node.tag != tt.tagHead; node = node.next) {
;
}
}
return node;
}
public boolean checkNodeIntegrity() {
Node child;
boolean found = false;
if (this.prev != null) {
if (this.prev.next != this) {
return false;
}
}
if (this.next != null) {
if (this.next.prev != this) {
return false;
}
}
if (this.parent != null) {
if (this.prev == null && this.parent.content != this) {
return false;
}
if (this.next == null && this.parent.last != this) {
return false;
}
for (child = this.parent.content; child != null; child = child.next) {
if (child == this) {
found = true;
break;
}
}
if (!found) {
return false;
}
}
for (child = this.content; child != null; child = child.next) {
if (!child.checkNodeIntegrity()) {
return false;
}
}
return true;
}
/*
* Add class="foo" to node
*/
public static void addClass(Node node, String classname) {
AttVal classattr = node.getAttrByName("class");
/*
* if there already is a class attribute then append class name after a
* space
*/
if (classattr != null) {
classattr.value = classattr.value + " " + classname;
} else {
/* create new class attribute */
node.addAttribute("class", classname);
}
}
/* --------------------- DEBUG -------------------------- */
private static final String[] nodeTypeString = { "RootNode", "DocTypeTag", "CommentTag", "ProcInsTag", "TextNode",
"StartTag", "EndTag", "StartEndTag", "SectionTag", "AspTag", "PhpTag" };
@Override
public String toString() {
String s = "";
Node n = this;
while (n != null) {
s += "[Node type=";
s += nodeTypeString[n.type];
s += ",element=";
if (n.element != null) {
s += n.element;
} else {
s += "null";
}
if (n.type == TextNode || n.type == CommentTag || n.type == ProcInsTag) {
s += ",text=";
if (n.textarray != null && n.start <= n.end) {
s += "\"";
s += Lexer.getString(n.textarray, n.start, n.end - n.start);
s += "\"";
} else {
s += "null";
}
}
s += ",content=";
if (n.content != null) {
s += n.content.toString();
} else {
s += "null";
}
s += "]";
if (n.next != null) {
s += ",";
}
n = n.next;
}
return s;
}
/* --------------------- END DEBUG ---------------------- */
/* --------------------- DOM ---------------------------- */
protected org.w3c.dom.Node adapter = null;
protected org.w3c.dom.Node getAdapter() {
if (adapter == null) {
switch (this.type) {
case RootNode:
adapter = new DOMDocumentImpl(this);
break;
case StartTag:
case StartEndTag:
adapter = new DOMElementImpl(this);
break;
case DocTypeTag:
adapter = new DOMDocumentTypeImpl(this);
break;
case CommentTag:
adapter = new DOMCommentImpl(this);
break;
case TextNode:
adapter = new DOMTextImpl(this);
break;
case CDATATag:
adapter = new DOMCDATASectionImpl(this);
break;
case ProcInsTag:
adapter = new DOMProcessingInstructionImpl(this);
break;
default:
adapter = new DOMNodeImpl(this);
}
}
return adapter;
}
protected Node cloneNode(boolean deep) {
Node node = (Node) this.clone();
if (deep) {
Node child;
Node newChild;
for (child = this.content; child != null; child = child.next) {
newChild = child.cloneNode(deep);
insertNodeAtEnd(node, newChild);
}
}
return node;
}
protected void setType(short newType) {
this.type = newType;
}
/* --------------------- END DOM ------------------------ */
}