org.htmlunit.SgmlPage Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of htmlunit Show documentation
A headless browser intended for use in testing web-based applications.
The newest version!
/*
 * Copyright (c) 2002-2024 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.htmlunit;

import java.net.URL;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;

import org.htmlunit.html.AbstractDomNodeList;
import org.htmlunit.html.DomAttr;
import org.htmlunit.html.DomCDataSection;
import org.htmlunit.html.DomComment;
import org.htmlunit.html.DomDocumentFragment;
import org.htmlunit.html.DomElement;
import org.htmlunit.html.DomNode;
import org.htmlunit.html.DomNodeIterator;
import org.htmlunit.html.DomNodeList;
import org.htmlunit.html.DomText;
import org.htmlunit.util.UrlUtils;
import org.w3c.dom.CDATASection;
import org.w3c.dom.Comment;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.Text;
import org.w3c.dom.traversal.NodeFilter;

/**
 * A basic class of Standard Generalized Markup Language (SGML), e.g. HTML and XML.
 *
 * @author Ahmed Ashour
 * @author Ronald Brill
 */
public abstract class SgmlPage extends DomNode implements Page, Document {

    private DocumentType documentType_;
    private final WebResponse webResponse_;
    private WebWindow enclosingWindow_;
    private final WebClient webClient_;
    private boolean printing_;

    /**
     * Creates an instance of SgmlPage.
     *
     * @param webResponse the web response that was used to create this page
     * @param webWindow the window that this page is being loaded into
     */
    public SgmlPage(final WebResponse webResponse, final WebWindow webWindow) {
        super(null);
        webResponse_ = webResponse;
        enclosingWindow_ = webWindow;
        webClient_ = webWindow.getWebClient();
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void cleanUp() {
        if (getWebClient().getCache().getCachedResponse(webResponse_.getWebRequest()) == null) {
            webResponse_.cleanUp();
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public WebResponse getWebResponse() {
        return webResponse_;
    }

    /**
     * Gets the name for the current node.
     * @return the node name
     */
    @Override
    public String getNodeName() {
        return "#document";
    }

    /**
     * Gets the type of the current node.
     * @return the node type
     */
    @Override
    public short getNodeType() {
        return DOCUMENT_NODE;
    }

    /**
     * Returns the window that this page is sitting inside.
     *
     * @return the enclosing frame or null if this page isn't inside a frame
     */
    @Override
    public WebWindow getEnclosingWindow() {
        return enclosingWindow_;
    }

    /**
     * Sets the window that contains this page.
     *
     * @param window the new frame or null if this page is being removed from a frame
     */
    public void setEnclosingWindow(final WebWindow window) {
        enclosingWindow_ = window;
    }

    /**
     * Returns the WebClient that originally loaded this page.
     *
     * @return the WebClient that originally loaded this page
     */
    public WebClient getWebClient() {
        return webClient_;
    }

    /**
     * Creates an empty {@link DomDocumentFragment} object.
     * @return a newly created {@link DomDocumentFragment}
     */
    @Override
    public DomDocumentFragment createDocumentFragment() {
        return new DomDocumentFragment(this);
    }

    /**
     * Returns the document type.
     * @return the document type
     */
    @Override
    public final DocumentType getDoctype() {
        return documentType_;
    }

    /**
     * Sets the document type.
     * @param type the document type
     */
    protected void setDocumentType(final DocumentType type) {
        documentType_ = type;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public SgmlPage getPage() {
        return this;
    }

    /**
     * Returns the encoding.
     * @return the encoding
     */
    public abstract Charset getCharset();

    /**
     * Returns the document element.
     * @return the document element
     */
    @Override
    public DomElement getDocumentElement() {
        DomNode childNode = getFirstChild();
        while (childNode != null && !(childNode instanceof DomElement)) {
            childNode = childNode.getNextSibling();
        }
        return (DomElement) childNode;
    }

    /**
     * Creates a clone of this instance.
     * @return a clone of this instance
     */
    @Override
    protected SgmlPage clone() {
        try {
            return (SgmlPage) super.clone();
        }
        catch (final CloneNotSupportedException e) {
            throw new IllegalStateException("Clone not supported", e);
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public String asXml() {
        final DomElement documentElement = getDocumentElement();
        if (documentElement == null) {
            return "";
        }
        return documentElement.asXml();
    }

    /**
     * Returns {@code true} if this page has case-sensitive tag names, {@code false} otherwise. In general,
     * XML has case-sensitive tag names, and HTML doesn't. This is especially important during XPath matching.
     * @return {@code true} if this page has case-sensitive tag names, {@code false} otherwise
     */
    public abstract boolean hasCaseSensitiveTagNames();

    /**
     * {@inheritDoc}
     * The current implementation just {@link DomNode#normalize()}s the document element.
     */
    @Override
    public void normalizeDocument() {
        getDocumentElement().normalize();
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public String getCanonicalXPath() {
        return "/";
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public DomAttr createAttribute(final String name) {
        return new DomAttr(getPage(), null, name, "", false);
    }

    /**
     * Returns the URL of this page.
     * @return the URL of this page
     */
    @Override
    public URL getUrl() {
        final WebResponse wr = getWebResponse();
        if (null == wr) {
            return UrlUtils.URL_ABOUT_BLANK;
        }
        return getWebResponse().getWebRequest().getUrl();
    }

    @Override
    public boolean isHtmlPage() {
        return false;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public DomNodeList getElementsByTagName(final String tagName) {
        return new AbstractDomNodeList(this) {
            @Override
            protected List provideElements() {
                final List res = new ArrayList<>();
                final boolean caseSensitive = hasCaseSensitiveTagNames();
                for (final DomElement elem : getDomElementDescendants()) {
                    final String localName = elem.getLocalName();
                    if ("*".equals(tagName) || localName.equals(tagName)
                            || (!caseSensitive && localName.equalsIgnoreCase(tagName))) {
                        res.add(elem);
                    }
                }
                return res;
            }
        };
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public DomNodeList getElementsByTagNameNS(final String namespaceURI, final String localName) {
        return new AbstractDomNodeList(this) {
            @Override
            protected List provideElements() {
                final List res = new ArrayList<>();
                final Comparator comparator;

                if (hasCaseSensitiveTagNames()) {
                    comparator = Comparator.nullsFirst(String::compareTo);
                }
                else {
                    comparator = Comparator.nullsFirst(String::compareToIgnoreCase);
                }

                for (final DomElement elem : getDomElementDescendants()) {
                    final String locName = elem.getLocalName();

                    if (("*".equals(namespaceURI) || comparator.compare(namespaceURI, elem.getNamespaceURI()) == 0)
                            && ("*".equals(locName) || comparator.compare(locName, elem.getLocalName()) == 0)) {
                        res.add(elem);
                    }
                }
                return res;
            }
        };
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public CDATASection createCDATASection(final String data) {
        return new DomCDataSection(this, data);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Text createTextNode(final String data) {
        return new DomText(this, data);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Comment createComment(final String data) {
        return new DomComment(this, data);
    }

    /**
     * Create a new NodeIterator over the subtree rooted at the
     * specified node.
     * @param root The node which will be iterated together with its
     *   children. The NodeIterator is initially positioned
     *   just before this node. The whatToShow flags and the
     *   filter, if any, are not considered when setting this position. The
     *   root must not be null.
     * @param whatToShow This flag specifies which node types may appear in
     *   the logical view of the tree presented by the
     *   NodeIterator. See the description of
     *   NodeFilter for the set of possible SHOW_
     *   values.These flags can be combined using OR.
     * @param filter The NodeFilter to be used with this
     *   NodeIterator, or null to indicate no
     *   filter.
     * @param entityReferenceExpansion The value of this flag determines
     *   whether entity reference nodes are expanded.
     * @return The newly created NodeIterator.
     * @exception DOMException
     *   NOT_SUPPORTED_ERR: Raised if the specified root is
     *   null.
     */
    public DomNodeIterator createNodeIterator(final Node root, final int whatToShow, final NodeFilter filter,
            final boolean entityReferenceExpansion) throws DOMException {
        return new DomNodeIterator((DomNode) root, whatToShow, filter, entityReferenceExpansion);
    }

    /**
     * Returns the content type of this page.
     * @return the content type of this page
     */
    public abstract String getContentType();

    /**
     * INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.

     *
     * Clears the computed styles.
     */
    public void clearComputedStyles() {
        // nothing to do here, overwritten in HtmlPage
    }

    /**
     * INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.

     *
     * Clears the computed styles for a specific {@link Element}.
     * @param element the element to clear its cache
     */
    public void clearComputedStyles(final DomElement element) {
        // nothing to do here, overwritten in HtmlPage
    }

    /**
     * INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.

     *
     * Clears the computed styles for a specific {@link Element}
     * and all parent elements.
     * @param element the element to clear its cache
     */
    public void clearComputedStylesUpToRoot(final DomElement element) {
        // nothing to do here, overwritten in HtmlPage
    }

    /**
     * @return whether or not this is currently printing
     */
    public boolean isPrinting() {
        return printing_;
    }

    /**
     * @param printing the printing state to set
     */
    public void setPrinting(final boolean printing) {
        printing_ = printing;
        clearComputedStyles();
    }
}