All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jodd.lagarto.dom.LagartoDomBuilderConfig Maven / Gradle / Ivy

Go to download

Web Data Extractor - Extract data from common web format. like HTML,XML,JSON.

The newest version!
// Copyright (c) 2003-present, Jodd Team (http://jodd.org)
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

package jodd.lagarto.dom;

import jodd.lagarto.LagartoParserConfig;

/**
 * Additional configuration for {@link LagartoDOMBuilder}
 * based on {@link LagartoParserConfig}.
 */
public class LagartoDomBuilderConfig extends LagartoParserConfig {

    protected boolean ignoreWhitespacesBetweenTags;
    protected boolean ignoreComments;
    protected boolean selfCloseVoidTags;
    protected boolean collectErrors;
    protected float condCommentIEVersion = 10;
    protected boolean enabledVoidTags = true;
    protected boolean impliedEndTags;

    protected boolean useFosterRules;
    protected boolean unclosedTagAsOrphanCheck;

    protected LagartoHtmlRenderer lagartoHtmlRenderer = new LagartoHtmlRenderer();

    // ---------------------------------------------------------------- access

    public boolean isUnclosedTagAsOrphanCheck() {
        return unclosedTagAsOrphanCheck;
    }

    public LagartoDomBuilderConfig setUnclosedTagAsOrphanCheck(boolean unclosedTagAsOrphanCheck) {
        this.unclosedTagAsOrphanCheck = unclosedTagAsOrphanCheck;
        return this;
    }

    /**
     * Returns true if {@link HtmlFosterRules foster rules}
     * should be used.
     */
    public boolean isUseFosterRules() {
        return useFosterRules;
    }

    public LagartoDomBuilderConfig setUseFosterRules(boolean useFosterRules) {
        this.useFosterRules = useFosterRules;
        return this;
    }

    public boolean isIgnoreWhitespacesBetweenTags() {
        return ignoreWhitespacesBetweenTags;
    }

    /**
     * Specifies if whitespaces between open/closed tags should be ignored.
     */
    public LagartoDomBuilderConfig setIgnoreWhitespacesBetweenTags(boolean ignoreWhitespacesBetweenTags) {
        this.ignoreWhitespacesBetweenTags = ignoreWhitespacesBetweenTags;
        return this;
    }

    public boolean isIgnoreComments() {
        return ignoreComments;
    }

    /**
     * Specifies if comments should be ignored in DOM tree.
     */
    public LagartoDomBuilderConfig setIgnoreComments(boolean ignoreComments) {
        this.ignoreComments = ignoreComments;
        return this;
    }

    public boolean isEnabledVoidTags() {
        return enabledVoidTags;
    }

    /**
     * Enables usage of void tags.
     */
    public LagartoDomBuilderConfig setEnabledVoidTags(boolean enabledVoidTags) {
        this.enabledVoidTags = enabledVoidTags;
        return this;
    }

    public boolean isSelfCloseVoidTags() {
        return selfCloseVoidTags;
    }

    /**
     * Specifies if void tags should be self closed.
     */
    public LagartoDomBuilderConfig setSelfCloseVoidTags(boolean selfCloseVoidTags) {
        this.selfCloseVoidTags = selfCloseVoidTags;
        return this;
    }

    public boolean isCollectErrors() {
        return collectErrors;
    }

    /**
     * Enables error collection during parsing.
     */
    public LagartoDomBuilderConfig setCollectErrors(boolean collectErrors) {
        this.collectErrors = collectErrors;
        return this;
    }

    public float getCondCommentIEVersion() {
        return condCommentIEVersion;
    }

    public LagartoDomBuilderConfig setCondCommentIEVersion(float condCommentIEVersion) {
        this.condCommentIEVersion = condCommentIEVersion;
        return this;
    }

    public boolean isImpliedEndTags() {
        return impliedEndTags;
    }

    /**
     * Enables implied end tags for certain tags.
     * This flag reduces the performances a bit, so if you
     * are dealing with 'straight' html that uses closes
     * tags, consider switching this flag off.
     */
    public LagartoDomBuilderConfig setImpliedEndTags(boolean impliedEndTags) {
        this.impliedEndTags = impliedEndTags;
        return this;
    }


    public LagartoHtmlRenderer getLagartoHtmlRenderer() {
        return lagartoHtmlRenderer;
    }

    public void setLagartoHtmlRenderer(LagartoHtmlRenderer lagartoHtmlRenderer) {
        this.lagartoHtmlRenderer = lagartoHtmlRenderer;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy