All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.au.id.jericho.lib.html.Element Maven / Gradle / Ivy

Go to download

Jericho HTML Parser is a simple but powerful java library allowing analysis and manipulation of parts of an HTML document, including some common server-side tags, while reproducing verbatim any unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions.

There is a newer version: 2.3
Show newest version
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 1.5
// Copyright (C) 2004 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// http://www.gnu.org/copyleft/lesser.html
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package au.id.jericho.lib.html;

import java.util.*;

/**
 * Represents an HTML element,
 * which encompasses the {@link StartTag}, an optional {@link EndTag} and all content in between.
 * 

* If the start tag has no corresponding end tag: *

    *
  • * If the end tag is {@linkplain StartTag#isEndTagOptional() optional}, the end of the element occurs at the * start of the next tag that implicitly terminates this type of element. *
  • * If the end tag is {@linkplain StartTag#isEndTagForbidden() forbidden}, the element spans only the start tag. *
  • * If the end tag is {@linkplain StartTag#isEndTagRequired() required}, the source HTML is invalid and the * element spans only the start tag. * No attempt is made by this library to determine how user agents might interpret invalid HTML. *
* Note that this behaviour has changed since version 1.0, which treated optional end tags the same as required end tags. *

* Created using the {@link Segment#findAllElements(String name)} or {@link StartTag#getElement()} method. *

* See also the XML 1.0 specification for elements. * * @see StartTag */ public final class Element extends Segment { private StartTag startTag; private EndTag endTag=null; Element(Source source, StartTag startTag, EndTag endTag) { super(source, startTag.begin, endTag==null ? startTag.end : endTag.end); this.startTag=startTag; this.endTag=(endTag==null || endTag.length()==0) ? null : endTag; } /** * Returns the {@linkplain #getContent() content} text of the element. * @return the content text of the element, or null if the element is {@linkplain #isEmpty() empty}. */ public String getContentText() { return isEmpty() ? null : source.subSequence(startTag.end,getContentEnd()).toString(); } /** * Returns the segment representing the content of the element. *

* This segment spans between the end of the start tag and the start of the end tag. * If the end tag is not present, the content reaches to the end of the element. *

* The returned segment is newly created with every call to this method. *

* Note that before version 1.5 this method returned null if the element was {@linkplain #isEmpty() empty}, * whereas now a zero-length segment is returned. * * @return the segment representing the content of the element, guaranteed not null. */ public Segment getContent() { return new Segment(source,startTag.end,getContentEnd()); } /** * Returns the start tag of the element. * @return the start tag of the element. */ public StartTag getStartTag() { return startTag; } /** * Returns the end tag of the element. *

* If the element has no end tag this method returns null. * * @return the end tag of the element, or null if the element has no end tag. */ public EndTag getEndTag() { return endTag; } /** * Returns the {@linkplain StartTag#getName() name} of the StartTag of this element. * @return the name of the StartTag of this element. */ public String getName() { return startTag.getName(); } /** * Indicates whether the element is empty. *

* The representation of an empty element is either a start tag immediately followed by an end tag, * or an {@linkplain StartTag#isEmptyElementTag() empty-element tag}. * * @return true if the element is empty, otherwise false. */ public boolean isEmpty() { return startTag.end==getContentEnd(); } /** * Indicates whether the element is an empty element tag. * This is signified by the characters "/>" at the end of the start tag and the absence of an end tag. * Note that not every {@linkplain #isEmpty() empty} element is an empty element tag. * * @return true if the element is an empty element tag, otherwise false. * @see #isEmpty() */ public boolean isEmptyElementTag() { return startTag.isEmptyElementTag(); } /** * Indicates whether an element with the given name is a * block element according to the * HTML 4.01 Transitional DTD. *

* A brief description of the difference between block and inline elements is given in the HTML 4.01 * Specification section 7.5.3. * * @return true if an element with the given name is a block element, otherwise false. */ public static boolean isBlock(String name) { return Tag.isBlock(name); } /** * Indicates whether an element with the given name is an * inline element according to the * HTML 4.01 Transitional DTD. *

* A brief description of the difference between block and inline elements is given in the HTML 4.01 * Specification section 7.5.3. * * @return true if an element with the given name is an inline element, otherwise false. */ public static boolean isInline(String name) { return Tag.isInline(name); } /** * Returns the attributes specified in this element's start tag. *

* This is equivalent to {@link StartTag#getAttributes() getStartTag().getAttributes()} * * @return the attributes specified in this element's start tag. * @see StartTag#getAttributes() */ public Attributes getAttributes() { return getStartTag().getAttributes(); } /** * Returns the {@link FormControl} defined by this element. * @return the {@link FormControl} defined by this element, or null if it is not a control. */ public FormControl getFormControl() { return FormControl.construct(this); } public String getDebugInfo() { return "Element "+super.getDebugInfo()+": "+startTag.getDebugInfo()+"-"+(endTag==null ? "(no end tag)" : endTag.getDebugInfo()); } protected int getContentEnd() { return endTag!=null ? endTag.begin : end; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy