All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xmlcml.cml.html.HtmlElement Maven / Gradle / Ivy

/**
 *    Copyright 2011 Peter Murray-Rust et. al.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

    package org.xmlcml.cml.html;

import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import nu.xom.Attribute;
import nu.xom.Element;
import nu.xom.Node;
import nu.xom.Nodes;

import org.apache.log4j.Logger;
import org.xmlcml.cml.base.CMLConstants;
import org.xmlcml.cml.base.CMLUtil;

/*
 Legend: Optional, Forbidden, Empty, Deprecated, Loose DTD, Frameset DTD
Name 	Start Tag 	End Tag 	Empty 	Depr. 	DTD 	Description
A 	  	  	  	  	  	anchor
ABBR 	  	  	  	  	  	abbreviated form (e.g., WWW, HTTP, etc.)
ACRONYM 	  	  	  	  	  	 
ADDRESS 	  	  	  	  	  	information on author
APPLET 	  	  	  	D 	L 	Java applet
AREA 	  	F 	E 	  	  	client-side image map area
B 	  	  	  	  	  	bold text style
BASE 	  	F 	E 	  	  	document base URI
BASEFONT 	  	F 	E 	D 	L 	base font size
BDO 	  	  	  	  	  	I18N BiDi over-ride
BIG 	  	  	  	  	  	large text style
BLOCKQUOTE 	  	  	  	  	  	long quotation
BODY 	O 	O 	  	  	  	document body
BR 	  	F 	E 	  	  	forced line break
BUTTON 	  	  	  	  	  	push button
CAPTION 	  	  	  	  	  	table caption
CENTER 	  	  	  	D 	L 	shorthand for DIV align=center
CITE 	  	  	  	  	  	citation
CODE 	  	  	  	  	  	computer code fragment
COL 	  	F 	E 	  	  	table column
COLGROUP 	  	O 	  	  	  	table column group
DD 	  	O 	  	  	  	definition description
DEL 	  	  	  	  	  	deleted text
DFN 	  	  	  	  	  	instance definition
DIR 	  	  	  	D 	L 	directory list
DIV 	  	  	  	  	  	generic language/style container
DL 	  	  	  	  	  	definition list
DT 	  	O 	  	  	  	definition term
EM 	  	  	  	  	  	emphasis
FIELDSET 	  	  	  	  	  	form control group
FONT 	  	  	  	D 	L 	local change to font
FORM 	  	  	  	  	  	interactive form
FRAME 	  	F 	E 	  	F 	subwindow
FRAMESET 	  	  	  	  	F 	window subdivision
H1 	  	  	  	  	  	heading
H2 	  	  	  	  	  	heading
H3 	  	  	  	  	  	heading
H4 	  	  	  	  	  	heading
H5 	  	  	  	  	  	heading
H6 	  	  	  	  	  	heading
HEAD 	O 	O 	  	  	  	document head
HR 	  	F 	E 	  	  	horizontal rule
HTML 	O 	O 	  	  	  	document root element
I 	  	  	  	  	  	italic text style
IFRAME 	  	  	  	  	L 	inline subwindow
IMG 	  	F 	E 	  	  	Embedded image
INPUT 	  	F 	E 	  	  	form control
INS 	  	  	  	  	  	inserted text
ISINDEX 	  	F 	E 	D 	L 	single line prompt
KBD 	  	  	  	  	  	text to be entered by the user
LABEL 	  	  	  	  	  	form field label text
LEGEND 	  	  	  	  	  	fieldset legend
LI 	  	O 	  	  	  	list item
LINK 	  	F 	E 	  	  	a media-independent link
MAP 	  	  	  	  	  	client-side image map
MENU 	  	  	  	D 	L 	menu list
META 	  	F 	E 	  	  	generic metainformation
NOFRAMES 	  	  	  	  	F 	alternate content container for non frame-based rendering
NOSCRIPT 	  	  	  	  	  	alternate content container for non script-based rendering
OBJECT 	  	  	  	  	  	generic embedded object
OL 	  	  	  	  	  	ordered list
OPTGROUP 	  	  	  	  	  	option group
OPTION 	  	O 	  	  	  	selectable choice
P 	  	O 	  	  	  	paragraph
PARAM 	  	F 	E 	  	  	named property value
PRE 	  	  	  	  	  	preformatted text
Q 	  	  	  	  	  	short inline quotation
S 	  	  	  	D 	L 	strike-through text style
SAMP 	  	  	  	  	  	sample program output, scripts, etc.
SCRIPT 	  	  	  	  	  	script statements
SELECT 	  	  	  	  	  	option selector
SMALL 	  	  	  	  	  	small text style
SPAN 	  	  	  	  	  	generic language/style container
STRIKE 	  	  	  	D 	L 	strike-through text
STRONG 	  	  	  	  	  	strong emphasis
STYLE 	  	  	  	  	  	style info
SUB 	  	  	  	  	  	subscript
SUP 	  	  	  	  	  	superscript
TABLE 	  	  	  	  	  	 
TBODY 	O 	O 	  	  	  	table body
TD 	  	O 	  	  	  	table data cell
TEXTAREA 	  	  	  	  	  	multi-line text field
TFOOT 	  	O 	  	  	  	table footer
TH 	  	O 	  	  	  	table header cell
THEAD 	  	O 	  	  	  	table header
TITLE 	  	  	  	  	  	document title
TR 	  	O 	  	  	  	table row
TT 	  	  	  	  	  	teletype or monospaced text style
U 	  	  	  	D 	L 	underlined text style
UL 	  	  	  	  	  	unordered list
VAR 	  	  	  	  	  	instance of a variable or program argument

 */
/** base class for lightweight generic HTML element.
 * no checking - i.e. can take any name or attributes
 * @author pm286
 *
 */
public abstract class HtmlElement extends Element implements CMLConstants {
	private static final String ID = "id";
	private final static Logger LOG = Logger.getLogger(HtmlElement.class);

	public static String[] tags = {
		"A", 
		"ABBR", 
		"ACRONYM", 
		"ADDRESS", 
		"APPLET", 
		"AREA", 
		"B", 
		"BASE", 
		"BASEFONT", 
		"BDO", 
		"BIG", 
		"BLOCKQUOTE", 
		"BODY", 
		"BR", 
		"BUTTON", 
		"CAPTION", 
		"CENTER", 
		"CITE", 
		"CODE", 
		"COL", 
		"COLGROUP", 
		"DD", 
		"DEL", 
		"DFN", 
		"DIR", 
		"DIV", 
		"DL", 
		"DT", 
		"EM", 
		"FIELDSET", 
		"FONT", 
		"FORM", 
		"FRAME", 
		"FRAMESET", 
		"H1", 
		"H2", 
		"H3", 
		"H4", 
		"H5", 
		"H6", 
		"HEAD", 
		"HR", 
		"HTML", 
		"I", 
		"IFRAME", 
		"IMG", 
		"INPUT", 
		"INS", 
		"ISINDEX", 
		"KBD", 
		"LABEL", 
		"LEGEND", 
		"LI", 
		"LINK", 
		"MAP", 
		"MENU", 
		"META", 
		"NOFRAMES", 
		"NOSCRIPT", 
		"OBJECT", 
		"OL", 
		"OPTGROUP", 
		"OPTION", 
		"P", 
		"PARAM", 
		"PRE", 
		"Q", 
		"S", 
		"SAMP", 
		"SCRIPT", 
		"SELECT", 
		"SMALL", 
		"SPAN", 
		"STRIKE", 
		"STRONG", 
		"STYLE", 
		"SUB", 
		"SUP", 
		"TABLE", 
		"TBODY", 
		"TD", 
		"TEXTAREA", 
		"TFOOT", 
		"TH", 
		"THEAD", 
		"TITLE", 
		"TR", 
		"TT", 
		"U", 
		"UL", 
		"VAR", 
	};
	public static Set TAGSET;
	static {
		TAGSET = new HashSet();
		for (String tag : tags) {
			TAGSET.add(tag);
		}
	};
	
	public enum Target {
		bottom,
		menu,
		separate;
	};
	/** constructor.
	 * 
	 * @param name
	 * @param namespace
	 */
	public HtmlElement(String name) {
		super(name, XHTML_NS);
	}
	
	public static HtmlElement create(Element element) {
		HtmlElement htmlElement = null;
		String tag = element.getLocalName();
		if(HtmlA.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlA();
		} else if(HtmlB.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlB();
		} else if(HtmlBig.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlBig();
		} else if(HtmlBody.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlBody();
		} else if(HtmlBr.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlBr();
		} else if(HtmlCaption.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlCaption();
		} else if(HtmlDiv.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlDiv();
		} else if(HtmlFrame.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlFrame();
		} else if(HtmlFrameset.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlFrameset();
		} else if(HtmlH1.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlH1();
		} else if(HtmlH2.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlH2();
		} else if(HtmlH3.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlH3();
		} else if(HtmlHead.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlHead();
		} else if(HtmlHr.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlHr();
		} else if(HtmlHtml.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlHtml();
		} else if(HtmlI.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlI();
		} else if(HtmlImg.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlImg();
		} else if(HtmlLi.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlLi();
		} else if(HtmlLink.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlLink();
		} else if(HtmlMeta.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlMeta();
		} else if(HtmlOl.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlOl();
		} else if(HtmlP.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlP();
		} else if(HtmlS.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlS();
		} else if(HtmlSmall.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlSmall();
		} else if(HtmlSpan.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlSpan();
		} else if(HtmlStrong.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlStrong();
		} else if(HtmlStyle.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlStyle();
		} else if(HtmlSub.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlSub();
		} else if(HtmlSup.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlSup();
		} else if(HtmlTable.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlTable();
		} else if(HtmlTbody.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlTbody();
		} else if(HtmlTfoot.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlTfoot();
		} else if(HtmlThead.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlThead();
		} else if(HtmlTd.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlTd();
		} else if(HtmlTh.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlTh();
		} else if(HtmlTr.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlTr();
		} else if(HtmlTt.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlTt();
		} else if(HtmlUl.TAG.equalsIgnoreCase(tag)) {
			htmlElement = new HtmlUl();
		} else if (TAGSET.contains(tag.toUpperCase())) {
			htmlElement = new HtmlGeneric(tag.toLowerCase());
		} else {
			throw new RuntimeException("Unknown html tag "+tag);
		}
		CMLUtil.copyAttributes(element, htmlElement);
		for (int i = 0; i < element.getChildCount(); i++) {
			Node child = element.getChild(i);
			if (child instanceof Element) {
				HtmlElement htmlChild = HtmlElement.create((Element)child);
				htmlElement.appendChild(htmlChild);
			} else {
				htmlElement.appendChild(child.copy());
			}
		}
		return htmlElement;
		
	}
	
	public void setAttribute(String name, String value) {
		this.addAttribute(new Attribute(name, value));
	}

	public void setContent(String content) {
		this.appendChild(content);
	}
	
	public void setClass(String value) {
		this.setAttribute("class", value);
	}

	public void setId(String value) {
		this.setAttribute(ID, value);
	}

	public void setName(String value) {
		this.setAttribute("name", value);
	}

	public void output(OutputStream os) throws IOException {
		CMLUtil.debug(this, os, 1);
	}

	public void debug(String msg) {
		CMLUtil.debug(this, msg);
	}

	public void setValue(String value) {
		this.removeChildren();
		this.appendChild(value);
	}

	public String getId() {
		return this.getAttributeValue(ID);
	}

	public static List getSelfOrDescendants(HtmlElement root, String tag) {
		tag = tag.toLowerCase();
		Nodes nodes = root.query(".//*[local-name()='"+tag+"'");
		List elements = new ArrayList();
		for (int i = 0; i < nodes.size(); i++) {
			elements.add((HtmlElement)nodes.get(i));
		}
		return elements;
	}

	public static HtmlElement getSingleSelfOrDescendant(HtmlElement root, String tag) {
		List elements = getSelfOrDescendants(root, tag);
		return (elements.size() != 1) ? null : elements.get(0);
	}

	public static List getChildElements(HtmlElement root, String tag) {
		tag = tag.toLowerCase();
		Nodes nodes = root.query("./*[local-name()='"+tag+"']");
		List elements = new ArrayList();
		for (int i = 0; i < nodes.size(); i++) {
			elements.add((HtmlElement)nodes.get(i));
		}
		return elements;
	}

	public static HtmlElement getSingleChildElement(HtmlElement root, String tag) {
		List elements = getChildElements(root, tag);
		return (elements.size() != 1) ? null : elements.get(0);
	}



}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy