All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.attoparser.config.ParseConfiguration Maven / Gradle / Ivy

There is a newer version: 2.0.7.RELEASE
Show newest version
/*
 * =============================================================================
 * 
 *   Copyright (c) 2012-2022, The ATTOPARSER team (https://www.attoparser.org)
 * 
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 * 
 *       https://www.apache.org/licenses/LICENSE-2.0
 * 
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 * 
 * =============================================================================
 */
package org.attoparser.config;

import java.io.Serializable;






/**
 * 

* Models a series of parsing configurations that can be applied during document parsing * by {@link org.attoparser.MarkupParser} and its variants * {@link org.attoparser.simple.SimpleMarkupParser} and {@link org.attoparser.dom.DOMMarkupParser}. *

*

* Among others, the parameters that can be configured are: *

*
    *
  • The parsing mode: XML or HTML.
  • *
  • Whether to expect XML-well-formed code or not.
  • *
  • Whether to perform automatic tag balancing or not.
  • *
  • Whether we will allow parsing of markup fragments or just entire documents.
  • *
*

* The {@link #htmlConfiguration()} and {@link #xmlConfiguration()} static methods act as starting points * for configuration. Once one of these pre-initialized configurations has been created, it can be * fine-tuned for the user's needs. *

*

* Note these configuration objects are mutable, so they should not be modified once they * have been passed to a parser in order to initialize it. *

*

* Instances of this class can be cloned, so creating a variant of an already-tuned configuration * is easy. *

* * * @author Daniel Fernández * * @since 2.0.0 * */ public final class ParseConfiguration implements Serializable, Cloneable { /** *

* Enumeration representing the possible actions to be taken with regard to element balancing: *

*
    *
  • {@link #NO_BALANCING}: Do not perform element balancing checks at all. Events will be * reported as they appear. There is no guarantee that a DOM tree can be built from the * fired events though.
  • *
  • {@link #REQUIRE_BALANCED}: Require that elements are already correctly balanced in markup, * throwing an exception if not. Note that when in HTML mode, this does not require the * specification of optional tags such as <tbody>. Also note that this * will automatically consider the * {@link #setNoUnmatchedCloseElementsRequired(boolean)} flag to be set to true.
  • *
  • {@link #AUTO_OPEN_CLOSE}: Auto open and close elements, which includes both those elements that, * according to the HTML spec (when in HTML mode) have optional start or end tags (see * http://www.w3.org/html/wg/drafts/html/master/syntax.html#optional-tags) * and those that simply are unclosed at the moment a parent element needs to be closed (so their closing * is forced). As an example of optional tags, the HTML5 spec * establishes that <html>, <body> and <tbody> are optional, and * that an <li> will close any currently * open <li> elements. This is not really * ill-formed code, but something allowed by the spec. All of these will be * reported as auto-* events by the parser.
  • *
  • {@link #AUTO_CLOSE}: Equivalent to {@link #AUTO_OPEN_CLOSE} but not performing any auto-open * operations, so that processing of HTML fragments is possible (no <html> or * <body> elements are automatically added).
  • *
*

* This enumeration is used at the {@link org.attoparser.config.ParseConfiguration} class. *

*/ public static enum ElementBalancing { NO_BALANCING, REQUIRE_BALANCED, AUTO_OPEN_CLOSE, AUTO_CLOSE } private static final long serialVersionUID = 5191449744126332911L; // Cannot make public because they are mutable private static final ParseConfiguration DEFAULT_HTML_PARSE_CONFIGURATION; private static final ParseConfiguration DEFAULT_XML_PARSE_CONFIGURATION; private ParsingMode mode = ParsingMode.XML; private boolean caseSensitive = true; private boolean textSplittable = false; private ElementBalancing elementBalancing = ElementBalancing.NO_BALANCING; private boolean noUnmatchedCloseElementsRequired = false; private boolean xmlWellFormedAttributeValuesRequired = false; private boolean uniqueAttributesInElementRequired = false; private PrologParseConfiguration prologParseConfiguration = new PrologParseConfiguration(); private UniqueRootElementPresence uniqueRootElementPresence = UniqueRootElementPresence.DEPENDS_ON_PROLOG_DOCTYPE; static { DEFAULT_HTML_PARSE_CONFIGURATION = new ParseConfiguration(); DEFAULT_HTML_PARSE_CONFIGURATION.setMode(ParsingMode.HTML); DEFAULT_HTML_PARSE_CONFIGURATION.setTextSplittable(false); DEFAULT_HTML_PARSE_CONFIGURATION.setElementBalancing(ElementBalancing.AUTO_CLOSE); DEFAULT_HTML_PARSE_CONFIGURATION.setNoUnmatchedCloseElementsRequired(false); DEFAULT_HTML_PARSE_CONFIGURATION.setUniqueAttributesInElementRequired(false); DEFAULT_HTML_PARSE_CONFIGURATION.setXmlWellFormedAttributeValuesRequired(false); DEFAULT_HTML_PARSE_CONFIGURATION.setUniqueRootElementPresence(UniqueRootElementPresence.NOT_VALIDATED); DEFAULT_HTML_PARSE_CONFIGURATION.getPrologParseConfiguration().setValidateProlog(false); DEFAULT_HTML_PARSE_CONFIGURATION.getPrologParseConfiguration().setPrologPresence(PrologPresence.ALLOWED); DEFAULT_HTML_PARSE_CONFIGURATION.getPrologParseConfiguration().setXmlDeclarationPresence(PrologPresence.ALLOWED); DEFAULT_HTML_PARSE_CONFIGURATION.getPrologParseConfiguration().setDoctypePresence(PrologPresence.ALLOWED); DEFAULT_HTML_PARSE_CONFIGURATION.getPrologParseConfiguration().setRequireDoctypeKeywordsUpperCase(false); DEFAULT_XML_PARSE_CONFIGURATION = new ParseConfiguration(); DEFAULT_XML_PARSE_CONFIGURATION.setMode(ParsingMode.XML); DEFAULT_XML_PARSE_CONFIGURATION.setTextSplittable(false); DEFAULT_XML_PARSE_CONFIGURATION.setElementBalancing(ElementBalancing.REQUIRE_BALANCED); DEFAULT_XML_PARSE_CONFIGURATION.setNoUnmatchedCloseElementsRequired(true); DEFAULT_XML_PARSE_CONFIGURATION.setUniqueAttributesInElementRequired(true); DEFAULT_XML_PARSE_CONFIGURATION.setXmlWellFormedAttributeValuesRequired(true); DEFAULT_XML_PARSE_CONFIGURATION.setUniqueRootElementPresence(UniqueRootElementPresence.DEPENDS_ON_PROLOG_DOCTYPE); DEFAULT_XML_PARSE_CONFIGURATION.getPrologParseConfiguration().setValidateProlog(true); DEFAULT_XML_PARSE_CONFIGURATION.getPrologParseConfiguration().setPrologPresence(PrologPresence.ALLOWED); DEFAULT_XML_PARSE_CONFIGURATION.getPrologParseConfiguration().setXmlDeclarationPresence(PrologPresence.ALLOWED); DEFAULT_XML_PARSE_CONFIGURATION.getPrologParseConfiguration().setDoctypePresence(PrologPresence.ALLOWED); DEFAULT_XML_PARSE_CONFIGURATION.getPrologParseConfiguration().setRequireDoctypeKeywordsUpperCase(true); } /** *

* Return an instance of {@link org.attoparser.config.ParseConfiguration} containing a valid configuration * set for most HTML scenarios. *

*
    *
  • Mode: {@link org.attoparser.config.ParseConfiguration.ParsingMode#HTML}
  • *
  • Text splittable: false
  • *
  • Element balancing: {@link org.attoparser.config.ParseConfiguration.ElementBalancing#AUTO_CLOSE}
  • *
  • No unmatched close elements required: false
  • *
  • Unique attributes in elements required: false
  • *
  • Xml-well-formed attribute values required: false
  • *
  • Unique root element presence: {@link org.attoparser.config.ParseConfiguration.UniqueRootElementPresence#NOT_VALIDATED}
  • *
  • Validate Prolog: false
  • *
* * @return a valid default configuration object for HTML parsing. */ public static ParseConfiguration htmlConfiguration() { try { return DEFAULT_HTML_PARSE_CONFIGURATION.clone(); } catch (final CloneNotSupportedException e) { // Will never be thrown throw new IllegalStateException(e); } } /** *

* Return an instance of {@link org.attoparser.config.ParseConfiguration} containing a valid configuration * set for most XML scenarios. *

*
    *
  • Mode: {@link org.attoparser.config.ParseConfiguration.ParsingMode#XML}
  • *
  • Text splittable: false
  • *
  • Element balancing: {@link org.attoparser.config.ParseConfiguration.ElementBalancing#REQUIRE_BALANCED}
  • *
  • No unmatched close elements required: true
  • *
  • Unique attributes in elements required: true
  • *
  • Xml-well-formed attribute values required: true
  • *
  • Unique root element presence: {@link org.attoparser.config.ParseConfiguration.UniqueRootElementPresence#DEPENDS_ON_PROLOG_DOCTYPE}
  • *
  • Validate Prolog: true
  • *
  • Prolog presence: {@link org.attoparser.config.ParseConfiguration.PrologPresence#ALLOWED}
  • *
  • XML Declaration presence: {@link org.attoparser.config.ParseConfiguration.PrologPresence#ALLOWED}
  • *
  • DOCTYPE presence: {@link org.attoparser.config.ParseConfiguration.PrologPresence#ALLOWED}
  • *
  • Require DOCTYPE keyword to be uppercase: true
  • *
* * @return a valid default configuration object for XML parsing. */ public static ParseConfiguration xmlConfiguration() { try { return DEFAULT_XML_PARSE_CONFIGURATION.clone(); } catch (final CloneNotSupportedException e) { // Will never be thrown throw new IllegalStateException(e); } } /* * No need to make this public. Instances of ParseConfiguration should be created from the static * factory methods for XML and HTML config. */ private ParseConfiguration() { super(); } /** *

* Return the parsing mode to be used. Can be XML or HTML. *

*

* Depending on the selected mode parsers will behave differently, given HTML has some specific * rules which are not XML-compatible (like void elements which might appear unclosed like * <meta>. *

* * @return the parsing mode to be used. */ public ParsingMode getMode() { return mode; } /** *

* Specify the parsing mode to be used. Can be XML or HTML. *

*

* Depending on the selected mode parsers will behave differently, given HTML has some specific * rules which are not XML-compatible (like void elements which might appear unclosed like * <meta>. *

* * @param mode the parsing mode to be used. */ public void setMode(final ParsingMode mode) { this.mode = mode; if (ParsingMode.HTML.equals(this.mode)) { // We can never use HTML parsing in case-sensitive mode this.caseSensitive = false; } } /** *

* Returns whether validations performed on the parsed document should be * case sensitive or not (e.g. attribute names, document root element name, element * open vs close elements, etc.) *

*

* HTML requires this parameter to be false. Default for XML is true. *

* * @return whether validations should be case sensitive or not. */ public boolean isCaseSensitive() { return this.caseSensitive; } /** *

* Specify whether validations performed on the parsed document should be * case sensitive or not (e.g. attribute names, document root element name, element * open vs close elements, etc.) *

*

* HTML requires this parameter to be false. Default for XML is true. *

* * @param caseSensitive whether validations should be case sensitive or not. */ public void setCaseSensitive(final boolean caseSensitive) { if (caseSensitive && ParsingMode.HTML.equals(this.mode)) { throw new IllegalArgumentException( "Cannot set parser as case-sensitive for HTML mode. Use XML mode instead."); } this.caseSensitive = caseSensitive; } /** *

* Returns whether text fragments in markup can be split in more than one text node, if it * occupies more than an entire buffer in size. *

*

* Default is false. *

* * @return whether text fragments can be split or not. */ public boolean isTextSplittable() { return this.textSplittable; } /** *

* Specify whether text fragments in markup can be split in more than one text node, if it * occupies more than an entire buffer in size. *

*

* Default is false. *

* * @param textSplittable whether text fragments can be split or not. */ public void setTextSplittable(final boolean textSplittable) { this.textSplittable = textSplittable; } /** *

* Returns the level of element balancing required at the document being parsed, * enabling auto-closing of elements if needed. *

*

* Possible values are: *

*
    *
  • {@link ElementBalancing#NO_BALANCING}: Do not perform element balancing checks at all. Events will be * reported as they appear. There is no guarantee that a DOM tree can be built from the * fired events though.
  • *
  • {@link ElementBalancing#REQUIRE_BALANCED}: Require that elements are already correctly balanced in markup, * throwing an exception if not. Note that when in HTML mode, this does not require the * specification of optional tags such as <tbody>. Also note that this * will automatically consider the * {@link #setNoUnmatchedCloseElementsRequired(boolean)} flag to be set to true.
  • *
  • {@link ElementBalancing#AUTO_OPEN_CLOSE}: Auto open and close elements, which includes both those elements that, * according to the HTML spec (when in HTML mode) have optional start or end tags (see * http://www.w3.org/html/wg/drafts/html/master/syntax.html#optional-tags) * and those that simply are unclosed at the moment a parent element needs to be closed (so their closing * is forced). As an example of optional tags, the HTML5 spec * establishes that <html>, <body> and <tbody> are optional, and * that an <li> will close any currently * open <li> elements. This is not really * ill-formed code, but something allowed by the spec. All of these will be * reported as auto-* events by the parser.
  • *
  • {@link ElementBalancing#AUTO_CLOSE}: Equivalent to {@link ElementBalancing#AUTO_OPEN_CLOSE} but not performing any auto-open * operations, so that processing of HTML fragments is possible (no <html> or * <body> elements are automatically added).
  • *
* * @return the level of element balancing. */ public ElementBalancing getElementBalancing() { return this.elementBalancing; } /** *

* Specify the level of element balancing required at the document being parsed, * enabling auto-closing of elements if needed. *

*

* Possible values are: *

*
    *
  • {@link ElementBalancing#NO_BALANCING}: Do not perform element balancing checks at all. Events will be * reported as they appear. There is no guarantee that a DOM tree can be built from the * fired events though.
  • *
  • {@link ElementBalancing#REQUIRE_BALANCED}: Require that elements are already correctly balanced in markup, * throwing an exception if not. Note that when in HTML mode, this does not require the * specification of optional tags such as <tbody>. Also note that this * will automatically consider the * {@link #setNoUnmatchedCloseElementsRequired(boolean)} flag to be set to true.
  • *
  • {@link ElementBalancing#AUTO_OPEN_CLOSE}: Auto open and close elements, which includes both those elements that, * according to the HTML spec (when in HTML mode) have optional start or end tags (see * http://www.w3.org/html/wg/drafts/html/master/syntax.html#optional-tags) * and those that simply are unclosed at the moment a parent element needs to be closed (so their closing * is forced). As an example of optional tags, the HTML5 spec * establishes that <html>, <body> and <tbody> are optional, and * that an <li> will close any currently * open <li> elements. This is not really * ill-formed code, but something allowed by the spec. All of these will be * reported as auto-* events by the parser.
  • *
  • {@link ElementBalancing#AUTO_CLOSE}: Equivalent to {@link ElementBalancing#AUTO_OPEN_CLOSE} but not performing any auto-open * operations, so that processing of HTML fragments is possible (no <html> or * <body> elements are automatically added).
  • *
* * @param elementBalancing the level of element balancing. */ public void setElementBalancing(final ElementBalancing elementBalancing) { this.elementBalancing = elementBalancing; } /** *

* Returns the {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration} object determining the * way in which prolog (XML Declaration, DOCTYPE) will be dealt with during parsing. *

* * @return the configuration object. */ public PrologParseConfiguration getPrologParseConfiguration() { return this.prologParseConfiguration; } /** *

* Returns whether unmatched close elements (those not matching any equivalent open elements) are * allowed or not. *

* * @return whether unmatched close elements will be allowed (false) or not (true). */ public boolean isNoUnmatchedCloseElementsRequired() { return this.noUnmatchedCloseElementsRequired; } /** *

* Specify whether unmatched close elements (those not matching any equivalent open elements) are * allowed or not. *

* * @param noUnmatchedCloseElementsRequired whether unmatched close elements will be allowed * (false) or not (true). */ public void setNoUnmatchedCloseElementsRequired( final boolean noUnmatchedCloseElementsRequired) { this.noUnmatchedCloseElementsRequired = noUnmatchedCloseElementsRequired; } /** *

* Returns whether element attributes will be required to be well-formed from the XML * standpoint. This means: *

*
    *
  • Attributes should always have a value.
  • *
  • Attribute values should be surrounded by double-quotes.
  • *
* * @return whether attributes should be XML-well-formed or not. */ public boolean isXmlWellFormedAttributeValuesRequired() { return this.xmlWellFormedAttributeValuesRequired; } /** *

* Specify whether element attributes will be required to be well-formed from the XML * standpoint. This means: *

*
    *
  • Attributes should always have a value.
  • *
  • Attribute values should be surrounded by double-quotes.
  • *
* * @param xmlWellFormedAttributeValuesRequired whether attributes should be XML-well-formed or not. */ public void setXmlWellFormedAttributeValuesRequired( final boolean xmlWellFormedAttributeValuesRequired) { this.xmlWellFormedAttributeValuesRequired = xmlWellFormedAttributeValuesRequired; } /** *

* Returns whether attributes should never appear duplicated in elements. *

* * @return whether attributes should never appear duplicated in elements. */ public boolean isUniqueAttributesInElementRequired() { return this.uniqueAttributesInElementRequired; } /** *

* Returns whether attributes should never appear duplicated in elements. *

* * @param uniqueAttributesInElementRequired whether attributes should never appear duplicated in elements. */ public void setUniqueAttributesInElementRequired(final boolean uniqueAttributesInElementRequired) { this.uniqueAttributesInElementRequired = uniqueAttributesInElementRequired; } /** *

* This value determines whether it will be required that the document has a unique * root element. *

*

* If set to {@link UniqueRootElementPresence#REQUIRED_ALWAYS}, then a document with * more than one elements at the root level will never be considered valid. And if * {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is true and there is a DOCTYPE * clause, it will be checked that the root name established at the DOCTYPE clause * is the same as the document's element root. *

*

* If set to {@link UniqueRootElementPresence#DEPENDS_ON_PROLOG_DOCTYPE}, then: *

*
    *
  • If {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is false, multiple * document root elements will be allowed.
  • *
  • If {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is true: *
      *
    • If there is a DOCTYPE clause, a unique element root will be required, * and its name will be checked against the name specified at the DOCTYPE * clause.
    • *
    • If there is no DOCTYPE clause (even if it is forbidden), multiple * document root elements will be allowed.
    • *
    *
  • *
*

* If set to {@link UniqueRootElementPresence#NOT_VALIDATED}, then nothing will be checked * regarding the name of the root element/s. *

*

* Default value is {@link UniqueRootElementPresence#DEPENDS_ON_PROLOG_DOCTYPE}. *

* * @return the configuration value for validating the presence of a unique root element. */ public UniqueRootElementPresence getUniqueRootElementPresence() { return this.uniqueRootElementPresence; } /** *

* This value determines whether it will be required that the document has a unique * root element. *

*

* If set to {@link UniqueRootElementPresence#REQUIRED_ALWAYS}, then a document with * more than one elements at the root level will never be considered valid. And if * {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is true and there is a DOCTYPE * clause, it will be checked that the root name established at the DOCTYPE clause * is the same as the document's element root. *

*

* If set to {@link UniqueRootElementPresence#DEPENDS_ON_PROLOG_DOCTYPE}, then: *

*
    *
  • If {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is false, multiple * document root elements will be allowed.
  • *
  • If {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is true: *
      *
    • If there is a DOCTYPE clause, a unique element root will be required, * and its name will be checked against the name specified at the DOCTYPE * clause.
    • *
    • If there is no DOCTYPE clause (even if it is forbidden), multiple * document root elements will be allowed.
    • *
    *
  • *
*

* If set to {@link UniqueRootElementPresence#NOT_VALIDATED}, then nothing will be checked * regarding the name of the root element/s. *

*

* Default value is {@link UniqueRootElementPresence#DEPENDS_ON_PROLOG_DOCTYPE}. *

* * @param uniqueRootElementPresence the configuration value for validating the presence of a unique root element. */ public void setUniqueRootElementPresence(final UniqueRootElementPresence uniqueRootElementPresence) { validateNotNull(uniqueRootElementPresence, "The \"unique root element presence\" configuration value cannot be null"); this.uniqueRootElementPresence = uniqueRootElementPresence; } @Override public ParseConfiguration clone() throws CloneNotSupportedException { final ParseConfiguration conf = (ParseConfiguration) super.clone(); conf.mode = this.mode; conf.caseSensitive = this.caseSensitive; conf.elementBalancing = this.elementBalancing; conf.uniqueAttributesInElementRequired = this.uniqueAttributesInElementRequired; conf.xmlWellFormedAttributeValuesRequired = this.xmlWellFormedAttributeValuesRequired; conf.uniqueRootElementPresence = this.uniqueRootElementPresence; conf.prologParseConfiguration = this.prologParseConfiguration.clone(); return conf; } /** *

* Enumeration used for determining the parsing mode, which will affect the parser's behaviour. * Values are XML and HTML. *

*

* This enumeration is used at the {@link org.attoparser.config.ParseConfiguration} class. *

*/ public static enum ParsingMode { HTML, XML } /** *

* Enumeration used for determining whether an element in the document prolog (DOCTYPE, XML Declaration) or * the prolog itself should be allowed, required or even forbidden. *

*

* This enumeration is used at the {@link org.attoparser.config.ParseConfiguration} class. *

*/ public static enum PrologPresence { REQUIRED(true, false, false), ALLOWED(false, true, false), FORBIDDEN(false, false, true); private final boolean required; private final boolean allowed; private final boolean forbidden; private PrologPresence( final boolean required, final boolean allowed, final boolean forbidden) { this.required = required; this.allowed = allowed; this.forbidden = forbidden; } public boolean isRequired() { return this.required; } public boolean isAllowed() { return this.allowed; } public boolean isForbidden() { return this.forbidden; } } /** *

* Enumeration used for determining the behaviour the parser should have with respect to the presence and * number of root elements in the parsed document. *

*

* Root elements are the elements that appear at the root of the document (e.g. <html> in * complete HTML documents). This enumeration allows requiring that the root element is unique always, * requiring it only if a document prolog (XML Declaration or DOCTYPE) is present, or not validating * this at all. *

*

* This enumeration is used at the {@link org.attoparser.config.ParseConfiguration} class. *

*/ public static enum UniqueRootElementPresence { REQUIRED_ALWAYS(true, false), DEPENDS_ON_PROLOG_DOCTYPE(false, true), NOT_VALIDATED(false, false); private final boolean requiredAlways; private final boolean dependsOnPrologDoctype; private UniqueRootElementPresence( final boolean requiredAlways, final boolean dependsOnPrologDoctype) { this.requiredAlways = requiredAlways; this.dependsOnPrologDoctype = dependsOnPrologDoctype; } public boolean isRequiredAlways() { return this.requiredAlways; } public boolean isDependsOnPrologDoctype() { return this.dependsOnPrologDoctype; } } /** *

* Class encapsulating the configuration parameters used for parsing * and validating the "prolog" section of a markup document. The prolog * is the section of an XML/HTML document containing the XML declaration * and the DOCTYPE clause (if these exist). *

*

* If validateProlog is set to false, all other parameters * should be ignored. *

*

* If validateProlog is true, then the rest of the parameters * will be considered. *

*

* Not all combinations of values of the {@link #getPrologPresence()}, * {@link #getXmlDeclarationPresence()} and {@link #getDoctypePresence()} * are considered valid. See {@link #validateConfiguration()} for details. *

* * @author Daniel Fernández * * @since 2.0.0 */ public static class PrologParseConfiguration implements Serializable, Cloneable { private static final long serialVersionUID = -4291053503740751549L; private boolean validateProlog = false; private PrologPresence prologPresence = PrologPresence.ALLOWED; private PrologPresence xmlDeclarationPresence = PrologPresence.ALLOWED; private PrologPresence doctypePresence = PrologPresence.ALLOWED; private boolean requireDoctypeKeywordsUpperCase = true; /** *

* Creates a {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration} instance with * a default configuration. *

*

* Default values are: *

*
    *
  • {@link #isValidateProlog()} = false
  • *
  • {@link #getPrologPresence()} = {@link PrologPresence#ALLOWED}
  • *
  • {@link #getXmlDeclarationPresence()} = {@link PrologPresence#ALLOWED}
  • *
  • {@link #getDoctypePresence()} = {@link PrologPresence#ALLOWED}
  • *
  • {@link #isRequireDoctypeKeywordsUpperCase()} = true
  • *
* */ protected PrologParseConfiguration() { super(); } /** *

* This flag indicates whether the document's prolog should be validated * at all or not. *

*

* If not validated, prolog-specific structures (XML Declaration * and DOCTYPE) will be allowed to appear anywhere in the document. * All other configuration paramters in this object will be ignored. *

*

* If validated, prolog-specific structures will only be allowed to * appear (under the conditions established in this object) at the * beginning of the document, before the element root. Or if * {@link #getPrologPresence()} is set to {@link PrologPresence#FORBIDDEN}, * it will be validated that such structures do not appear at all. *

*

* Also, if validated and a DOCTYPE is present, it will be checked * that there is only one root element in the document and its name * matches the root element name in the DOCTYPE clause. *

*

* Default value is false. *

* * @return whether prolog is to be validated or not. */ public boolean isValidateProlog() { return this.validateProlog; } public void setValidateProlog(final boolean validateProlog) { this.validateProlog = validateProlog; } /** *

* This flag indicates the level of presence desired for the prolog * in the document, in case {@link #isValidateProlog()} has been set * to true. *

* * @return the level of presence desired for the prolog. */ public PrologPresence getPrologPresence() { return this.prologPresence; } public void setPrologPresence(final PrologPresence prologPresence) { validateNotNull(this.prologPresence, "Prolog presence cannot be null"); this.prologPresence = prologPresence; } /** *

* This flag indicates the level of presence desired for the XML Declaration * (a part of the prolog) in the document, in case {@link #isValidateProlog()} * has been set to true. *

* * @return the level of presence desired for the XML Declaration. */ public PrologPresence getXmlDeclarationPresence() { return this.xmlDeclarationPresence; } public void setXmlDeclarationPresence(final PrologPresence xmlDeclarationPresence) { validateNotNull(this.prologPresence, "XML Declaration presence cannot be null"); this.xmlDeclarationPresence = xmlDeclarationPresence; } /** *

* This flag indicates the level of presence desired for the DOCTYPE clause * (a part of the prolog) in the document, in case {@link #isValidateProlog()} * has been set to true. *

* * @return the level of presence desired for the DOCTYPE clause. */ public PrologPresence getDoctypePresence() { return this.doctypePresence; } public void setDoctypePresence(final PrologPresence doctypePresence) { validateNotNull(this.prologPresence, "DOCTYPE presence cannot be null"); this.doctypePresence = doctypePresence; } /** *

* This configuration parameter allows to check that all keywords in * a DOCTYPE clause ('DOCTYPE', 'SYSTEM', 'PUBLIC') are in upper-case as * required by the XML specification (and not by the HTML5 one, for example). *

*

* Default value is true, but it will apply only if * {@link #isValidateProlog()} is true. *

* * @return whether keywords in the DOCTYPE clause will be forced to be * in upper-case. */ public boolean isRequireDoctypeKeywordsUpperCase() { return this.requireDoctypeKeywordsUpperCase; } public void setRequireDoctypeKeywordsUpperCase(final boolean requireDoctypeKeywordsUpperCase) { this.requireDoctypeKeywordsUpperCase = requireDoctypeKeywordsUpperCase; } /** *

* Checks that the combination of values in the {@link #getPrologPresence()}, * {@link #getXmlDeclarationPresence()} and {@link #getDoctypePresence()} * parameters makes sense. *

*
    *
  1. If {@link #getPrologPresence()} is {@link PrologPresence#FORBIDDEN}, then * {@link #getXmlDeclarationPresence()} and {@link #getDoctypePresence()} must * be {@link PrologPresence#FORBIDDEN} too.
  2. *
  3. Else if at least one of {@link #getXmlDeclarationPresence()} or * {@link #getDoctypePresence()} is {@link PrologPresence#REQUIRED}, the * configuration is considered valid.
  4. *
  5. Else if {@link #getPrologPresence()} is {@link PrologPresence#ALLOWED}, * the configuration is considered valid as long as not both * {@link #getXmlDeclarationPresence()} and {@link #getDoctypePresence()} * are {@link PrologPresence#FORBIDDEN}.
  6. *
* * @throws IllegalArgumentException if the combination of values is not correct. */ public void validateConfiguration() { /* * * 1. PROLOG: REQUIRED, ALLOWED, FORBIDDEN * 2. XMLDECL : REQUIRED, ALLOWED, FORBIDDEN * 3. DOCTYPE : REQUIRED, ALLOWED, FORBIDDEN * * VALID: (RRR, RRA, RRF, RAR, RFR, ARR, ARA, ARF, AAR, AAA, AAF, AFR, AFA, FFF) * NOT VALID: (RAA, RAF, RFA, RFF, AFF, FRR, FRA, FRF, FAR, FAA, FAF, FFR, FFA) * * FORMULA: * [IF (* = F__) -> RET (* = _FF); IF (* = _R_ OR * = __R) -> RET TRUE; IF (* = A__) -> RET (* != _FF); RET FALSE] * */ if (!this.validateProlog) { // There's nothing to check here! return; } if (PrologPresence.FORBIDDEN.equals(this.prologPresence)) { if (PrologPresence.FORBIDDEN.equals(this.xmlDeclarationPresence) && PrologPresence.FORBIDDEN.equals(this.doctypePresence)) { return; } } else { if (PrologPresence.REQUIRED.equals(this.xmlDeclarationPresence) || PrologPresence.REQUIRED.equals(this.doctypePresence)) { return; } if (PrologPresence.ALLOWED.equals(this.prologPresence)) { if (!(PrologPresence.FORBIDDEN.equals(this.xmlDeclarationPresence) && PrologPresence.FORBIDDEN.equals(this.doctypePresence))) { return; } } } throw new IllegalArgumentException( "Prolog parsing configuration is not valid: " + "Prolog presence: " + this.prologPresence + ", " + "XML Declaration presence: " + this.xmlDeclarationPresence + ", " + "DOCTYPE presence: " + this.doctypePresence); } @Override public PrologParseConfiguration clone() throws CloneNotSupportedException { final PrologParseConfiguration conf = (PrologParseConfiguration) super.clone(); conf.validateProlog = this.validateProlog; conf.prologPresence = this.prologPresence; conf.doctypePresence = this.doctypePresence; conf.xmlDeclarationPresence = this.xmlDeclarationPresence; conf.requireDoctypeKeywordsUpperCase = this.requireDoctypeKeywordsUpperCase; return conf; } } private static void validateNotNull(final Object obj, final String message) { if (obj == null) { throw new IllegalArgumentException(message); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy