jodd.lagarto.dom.LagartoDomBuilderConfig Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of extractor Show documentation
Show all versions of extractor Show documentation
Web Data Extractor - Extract data from common web format. like HTML,XML,JSON.
The newest version!
// Copyright (c) 2003-present, Jodd Team (http://jodd.org)
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package jodd.lagarto.dom;
import jodd.lagarto.LagartoParserConfig;
/**
* Additional configuration for {@link LagartoDOMBuilder}
* based on {@link LagartoParserConfig}.
*/
public class LagartoDomBuilderConfig extends LagartoParserConfig {
protected boolean ignoreWhitespacesBetweenTags;
protected boolean ignoreComments;
protected boolean selfCloseVoidTags;
protected boolean collectErrors;
protected float condCommentIEVersion = 10;
protected boolean enabledVoidTags = true;
protected boolean impliedEndTags;
protected boolean useFosterRules;
protected boolean unclosedTagAsOrphanCheck;
protected LagartoHtmlRenderer lagartoHtmlRenderer = new LagartoHtmlRenderer();
// ---------------------------------------------------------------- access
public boolean isUnclosedTagAsOrphanCheck() {
return unclosedTagAsOrphanCheck;
}
public LagartoDomBuilderConfig setUnclosedTagAsOrphanCheck(boolean unclosedTagAsOrphanCheck) {
this.unclosedTagAsOrphanCheck = unclosedTagAsOrphanCheck;
return this;
}
/**
* Returns true
if {@link HtmlFosterRules foster rules}
* should be used.
*/
public boolean isUseFosterRules() {
return useFosterRules;
}
public LagartoDomBuilderConfig setUseFosterRules(boolean useFosterRules) {
this.useFosterRules = useFosterRules;
return this;
}
public boolean isIgnoreWhitespacesBetweenTags() {
return ignoreWhitespacesBetweenTags;
}
/**
* Specifies if whitespaces between open/closed tags should be ignored.
*/
public LagartoDomBuilderConfig setIgnoreWhitespacesBetweenTags(boolean ignoreWhitespacesBetweenTags) {
this.ignoreWhitespacesBetweenTags = ignoreWhitespacesBetweenTags;
return this;
}
public boolean isIgnoreComments() {
return ignoreComments;
}
/**
* Specifies if comments should be ignored in DOM tree.
*/
public LagartoDomBuilderConfig setIgnoreComments(boolean ignoreComments) {
this.ignoreComments = ignoreComments;
return this;
}
public boolean isEnabledVoidTags() {
return enabledVoidTags;
}
/**
* Enables usage of void tags.
*/
public LagartoDomBuilderConfig setEnabledVoidTags(boolean enabledVoidTags) {
this.enabledVoidTags = enabledVoidTags;
return this;
}
public boolean isSelfCloseVoidTags() {
return selfCloseVoidTags;
}
/**
* Specifies if void tags should be self closed.
*/
public LagartoDomBuilderConfig setSelfCloseVoidTags(boolean selfCloseVoidTags) {
this.selfCloseVoidTags = selfCloseVoidTags;
return this;
}
public boolean isCollectErrors() {
return collectErrors;
}
/**
* Enables error collection during parsing.
*/
public LagartoDomBuilderConfig setCollectErrors(boolean collectErrors) {
this.collectErrors = collectErrors;
return this;
}
public float getCondCommentIEVersion() {
return condCommentIEVersion;
}
public LagartoDomBuilderConfig setCondCommentIEVersion(float condCommentIEVersion) {
this.condCommentIEVersion = condCommentIEVersion;
return this;
}
public boolean isImpliedEndTags() {
return impliedEndTags;
}
/**
* Enables implied end tags for certain tags.
* This flag reduces the performances a bit, so if you
* are dealing with 'straight' html that uses closes
* tags, consider switching this flag off.
*/
public LagartoDomBuilderConfig setImpliedEndTags(boolean impliedEndTags) {
this.impliedEndTags = impliedEndTags;
return this;
}
public LagartoHtmlRenderer getLagartoHtmlRenderer() {
return lagartoHtmlRenderer;
}
public void setLagartoHtmlRenderer(LagartoHtmlRenderer lagartoHtmlRenderer) {
this.lagartoHtmlRenderer = lagartoHtmlRenderer;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy