org.attoparser.config.ParseConfiguration Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of attoparser Show documentation
Powerful, fast and easy to use HTML and XML parser for Java
There is a newer version: 2.0.7.RELEASE
/*
 * =============================================================================
 * 
 *   Copyright (c) 2012-2022, The ATTOPARSER team (https://www.attoparser.org)
 * 
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 * 
 *       https://www.apache.org/licenses/LICENSE-2.0
 * 
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 * 
 * =============================================================================
 */
package org.attoparser.config;

import java.io.Serializable;






/**
 * 
 *   Models a series of parsing configurations that can be applied during document parsing
 *   by {@link org.attoparser.MarkupParser} and its variants
 *   {@link org.attoparser.simple.SimpleMarkupParser} and {@link org.attoparser.dom.DOMMarkupParser}.
 * 
 * 
 *   Among others, the parameters that can be configured are:
 * 
 * 
 *   The parsing mode: XML or HTML.
 *   Whether to expect XML-well-formed code or not.
 *   Whether to perform automatic tag balancing or not.
 *   Whether we will allow parsing of markup fragments or just entire documents.
 * 
 * 
 *   The {@link #htmlConfiguration()} and {@link #xmlConfiguration()} static methods act as starting points
 *   for configuration. Once one of these pre-initialized configurations has been created, it can be
 *   fine-tuned for the user's needs.
 * 
 * 
 *   Note these configuration objects are mutable, so they should not be modified once they
 *   have been passed to a parser in order to initialize it.
 * 
 * 
 *   Instances of this class can be cloned, so creating a variant of an already-tuned configuration
 *   is easy.
 * 
 * 
 * 
 * @author Daniel Fernández
 * 
 * @since 2.0.0
 *
 */
public final class ParseConfiguration implements Serializable, Cloneable {


    /**
     * 
     *   Enumeration representing the possible actions to be taken with regard to element balancing:
     * 
     * 
     *   {@link #NO_BALANCING}: Do not perform element balancing checks at all. Events will be
     *       reported as they appear. There is no guarantee that a DOM tree can be built from the
     *       fired events though.
     *   {@link #REQUIRE_BALANCED}: Require that elements are already correctly balanced in markup,
     *       throwing an exception if not. Note that when in HTML mode, this does not require the
     *       specification of optional tags such as <tbody>. Also note that this
     *       will automatically consider the
     *       {@link #setNoUnmatchedCloseElementsRequired(boolean)} flag to be set to true.
     *   {@link #AUTO_OPEN_CLOSE}: Auto open and close elements, which includes both those elements that,
     *       according to the HTML spec (when in HTML mode) have optional start or end tags (see
     *       http://www.w3.org/html/wg/drafts/html/master/syntax.html#optional-tags)
     *       and those that simply are unclosed at the moment a parent element needs to be closed (so their closing
     *       is forced). As an example of optional tags, the HTML5 spec
     *       establishes that <html>, <body> and <tbody> are optional, and
     *       that an <li> will close any currently
     *       open <li> elements. This is not really
     *       ill-formed code, but something allowed by the spec. All of these will be
     *       reported as auto-* events by the parser.
     *   {@link #AUTO_CLOSE}: Equivalent to {@link #AUTO_OPEN_CLOSE} but not performing any auto-open
     *       operations, so that processing of HTML fragments is possible (no <html> or
     *       <body> elements are automatically added).
     * 
     * 
     *   This enumeration is used at the {@link org.attoparser.config.ParseConfiguration} class.
     * 
     */
    public static enum ElementBalancing {
        NO_BALANCING,
        REQUIRE_BALANCED,
        AUTO_OPEN_CLOSE,
        AUTO_CLOSE }



    private static final long serialVersionUID = 5191449744126332911L;

    // Cannot make public because they are mutable
    private static final ParseConfiguration DEFAULT_HTML_PARSE_CONFIGURATION;
    private static final ParseConfiguration DEFAULT_XML_PARSE_CONFIGURATION;


    private ParsingMode mode = ParsingMode.XML;
    private boolean caseSensitive = true;

    private boolean textSplittable = false;
    
    private ElementBalancing elementBalancing = ElementBalancing.NO_BALANCING;

    private boolean noUnmatchedCloseElementsRequired = false;
    private boolean xmlWellFormedAttributeValuesRequired = false;
    private boolean uniqueAttributesInElementRequired = false;
 
    private PrologParseConfiguration prologParseConfiguration = new PrologParseConfiguration();
    private UniqueRootElementPresence uniqueRootElementPresence = UniqueRootElementPresence.DEPENDS_ON_PROLOG_DOCTYPE;





    static {

        DEFAULT_HTML_PARSE_CONFIGURATION = new ParseConfiguration();
        DEFAULT_HTML_PARSE_CONFIGURATION.setMode(ParsingMode.HTML);
        DEFAULT_HTML_PARSE_CONFIGURATION.setTextSplittable(false);
        DEFAULT_HTML_PARSE_CONFIGURATION.setElementBalancing(ElementBalancing.AUTO_CLOSE);
        DEFAULT_HTML_PARSE_CONFIGURATION.setNoUnmatchedCloseElementsRequired(false);
        DEFAULT_HTML_PARSE_CONFIGURATION.setUniqueAttributesInElementRequired(false);
        DEFAULT_HTML_PARSE_CONFIGURATION.setXmlWellFormedAttributeValuesRequired(false);
        DEFAULT_HTML_PARSE_CONFIGURATION.setUniqueRootElementPresence(UniqueRootElementPresence.NOT_VALIDATED);
        DEFAULT_HTML_PARSE_CONFIGURATION.getPrologParseConfiguration().setValidateProlog(false);
        DEFAULT_HTML_PARSE_CONFIGURATION.getPrologParseConfiguration().setPrologPresence(PrologPresence.ALLOWED);
        DEFAULT_HTML_PARSE_CONFIGURATION.getPrologParseConfiguration().setXmlDeclarationPresence(PrologPresence.ALLOWED);
        DEFAULT_HTML_PARSE_CONFIGURATION.getPrologParseConfiguration().setDoctypePresence(PrologPresence.ALLOWED);
        DEFAULT_HTML_PARSE_CONFIGURATION.getPrologParseConfiguration().setRequireDoctypeKeywordsUpperCase(false);


        DEFAULT_XML_PARSE_CONFIGURATION = new ParseConfiguration();
        DEFAULT_XML_PARSE_CONFIGURATION.setMode(ParsingMode.XML);
        DEFAULT_XML_PARSE_CONFIGURATION.setTextSplittable(false);
        DEFAULT_XML_PARSE_CONFIGURATION.setElementBalancing(ElementBalancing.REQUIRE_BALANCED);
        DEFAULT_XML_PARSE_CONFIGURATION.setNoUnmatchedCloseElementsRequired(true);
        DEFAULT_XML_PARSE_CONFIGURATION.setUniqueAttributesInElementRequired(true);
        DEFAULT_XML_PARSE_CONFIGURATION.setXmlWellFormedAttributeValuesRequired(true);
        DEFAULT_XML_PARSE_CONFIGURATION.setUniqueRootElementPresence(UniqueRootElementPresence.DEPENDS_ON_PROLOG_DOCTYPE);
        DEFAULT_XML_PARSE_CONFIGURATION.getPrologParseConfiguration().setValidateProlog(true);
        DEFAULT_XML_PARSE_CONFIGURATION.getPrologParseConfiguration().setPrologPresence(PrologPresence.ALLOWED);
        DEFAULT_XML_PARSE_CONFIGURATION.getPrologParseConfiguration().setXmlDeclarationPresence(PrologPresence.ALLOWED);
        DEFAULT_XML_PARSE_CONFIGURATION.getPrologParseConfiguration().setDoctypePresence(PrologPresence.ALLOWED);
        DEFAULT_XML_PARSE_CONFIGURATION.getPrologParseConfiguration().setRequireDoctypeKeywordsUpperCase(true);

    }


    /**
     * 
     *   Return an instance of {@link org.attoparser.config.ParseConfiguration} containing a valid configuration
     *   set for most HTML scenarios.
     * 
     * 
     *     Mode: {@link org.attoparser.config.ParseConfiguration.ParsingMode#HTML}
     *     Text splittable: false
     *     Element balancing: {@link org.attoparser.config.ParseConfiguration.ElementBalancing#AUTO_CLOSE}
     *     No unmatched close elements required: false
     *     Unique attributes in elements required: false
     *     Xml-well-formed attribute values required: false
     *     Unique root element presence: {@link org.attoparser.config.ParseConfiguration.UniqueRootElementPresence#NOT_VALIDATED}
     *     Validate Prolog: false
     * 
     *
     * @return a valid default configuration object for HTML parsing.
     */
    public static ParseConfiguration htmlConfiguration() {
        try {
            return DEFAULT_HTML_PARSE_CONFIGURATION.clone();
        } catch (final CloneNotSupportedException e) {
            // Will never be thrown
            throw new IllegalStateException(e);
        }
    }



    /**
     * 
     *   Return an instance of {@link org.attoparser.config.ParseConfiguration} containing a valid configuration
     *   set for most XML scenarios.
     * 
     * 
     *     Mode: {@link org.attoparser.config.ParseConfiguration.ParsingMode#XML}
     *     Text splittable: false
     *     Element balancing: {@link org.attoparser.config.ParseConfiguration.ElementBalancing#REQUIRE_BALANCED}
     *     No unmatched close elements required: true
     *     Unique attributes in elements required: true
     *     Xml-well-formed attribute values required: true
     *     Unique root element presence: {@link org.attoparser.config.ParseConfiguration.UniqueRootElementPresence#DEPENDS_ON_PROLOG_DOCTYPE}
     *     Validate Prolog: true
     *     Prolog presence: {@link org.attoparser.config.ParseConfiguration.PrologPresence#ALLOWED}
     *     XML Declaration presence: {@link org.attoparser.config.ParseConfiguration.PrologPresence#ALLOWED}
     *     DOCTYPE presence: {@link org.attoparser.config.ParseConfiguration.PrologPresence#ALLOWED}
     *     Require DOCTYPE keyword to be uppercase: true
     * 
     *
     * @return a valid default configuration object for XML parsing.
     */
    public static ParseConfiguration xmlConfiguration() {
        try {
            return DEFAULT_XML_PARSE_CONFIGURATION.clone();
        } catch (final CloneNotSupportedException e) {
            // Will never be thrown
            throw new IllegalStateException(e);
        }
    }





    /*
     * No need to make this public. Instances of ParseConfiguration should be created from the static
     * factory methods for XML and HTML config.
     */
    private ParseConfiguration() {
        super();
    }




    /**
     * 
     *   Return the parsing mode to be used. Can be XML or HTML.
     * 
     * 
     *   Depending on the selected mode parsers will behave differently, given HTML has some specific
     *   rules which are not XML-compatible (like void elements which might appear unclosed like
     *   <meta>.
     * 
     *
     * @return the parsing mode to be used.
     */
    public ParsingMode getMode() {
        return mode;
    }


    /**
     * 
     *   Specify the parsing mode to be used. Can be XML or HTML.
     * 
     * 
     *   Depending on the selected mode parsers will behave differently, given HTML has some specific
     *   rules which are not XML-compatible (like void elements which might appear unclosed like
     *   <meta>.
     * 
     *
     * @param mode the parsing mode to be used.
     */
    public void setMode(final ParsingMode mode) {
        this.mode = mode;
        if (ParsingMode.HTML.equals(this.mode)) {
            // We can never use HTML parsing in case-sensitive mode
            this.caseSensitive = false;
        }
    }




    /**
     * 
     *   Returns whether validations performed on the parsed document should be
     *   case sensitive or not (e.g. attribute names, document root element name, element
     *   open vs close elements, etc.)
     * 
     * 
     *   HTML requires this parameter to be false. Default for XML is true.
     * 
     * 
     * @return whether validations should be case sensitive or not. 
     */
    public boolean isCaseSensitive() {
        return this.caseSensitive;
    }


    /**
     * 
     *   Specify whether validations performed on the parsed document should be
     *   case sensitive or not (e.g. attribute names, document root element name, element
     *   open vs close elements, etc.)
     * 
     * 
     *   HTML requires this parameter to be false. Default for XML is true.
     * 
     *
     * @param caseSensitive whether validations should be case sensitive or not.
     */
    public void setCaseSensitive(final boolean caseSensitive) {
        if (caseSensitive && ParsingMode.HTML.equals(this.mode)) {
            throw new IllegalArgumentException(
                    "Cannot set parser as case-sensitive for HTML mode. Use XML mode instead.");
        }
        this.caseSensitive = caseSensitive;
    }




    /**
     * 
     *   Returns whether text fragments in markup can be split in more than one text node, if it
     *   occupies more than an entire buffer in size.
     * 
     * 
     *   Default is false.
     * 
     *
     * @return whether text fragments can be split or not.
     */
    public boolean isTextSplittable() {
        return this.textSplittable;
    }


    /**
     * 
     *   Specify whether text fragments in markup can be split in more than one text node, if it
     *   occupies more than an entire buffer in size.
     * 
     * 
     *   Default is false.
     * 
     *
     * @param textSplittable whether text fragments can be split or not.
     */
    public void setTextSplittable(final boolean textSplittable) {
        this.textSplittable = textSplittable;
    }




    /**
     * 
     *   Returns the level of element balancing required at the document being parsed,
     *   enabling auto-closing of elements if needed.
     * 
     * 
     *   Possible values are:
     * 
     * 
     *   {@link ElementBalancing#NO_BALANCING}: Do not perform element balancing checks at all. Events will be
     *       reported as they appear. There is no guarantee that a DOM tree can be built from the
     *       fired events though.
     *   {@link ElementBalancing#REQUIRE_BALANCED}: Require that elements are already correctly balanced in markup,
     *       throwing an exception if not. Note that when in HTML mode, this does not require the
     *       specification of optional tags such as <tbody>. Also note that this
     *       will automatically consider the
     *       {@link #setNoUnmatchedCloseElementsRequired(boolean)} flag to be set to true.
     *   {@link ElementBalancing#AUTO_OPEN_CLOSE}: Auto open and close elements, which includes both those elements that,
     *       according to the HTML spec (when in HTML mode) have optional start or end tags (see
     *       http://www.w3.org/html/wg/drafts/html/master/syntax.html#optional-tags)
     *       and those that simply are unclosed at the moment a parent element needs to be closed (so their closing
     *       is forced). As an example of optional tags, the HTML5 spec
     *       establishes that <html>, <body> and <tbody> are optional, and
     *       that an <li> will close any currently
     *       open <li> elements. This is not really
     *       ill-formed code, but something allowed by the spec. All of these will be
     *       reported as auto-* events by the parser.
     *   {@link ElementBalancing#AUTO_CLOSE}: Equivalent to {@link ElementBalancing#AUTO_OPEN_CLOSE} but not performing any auto-open
     *       operations, so that processing of HTML fragments is possible (no <html> or
     *       <body> elements are automatically added).
     * 
     * 
     * @return the level of element balancing.
     */
    public ElementBalancing getElementBalancing() {
        return this.elementBalancing;
    }


    /**
     * 
     *   Specify the level of element balancing required at the document being parsed,
     *   enabling auto-closing of elements if needed.
     * 
     * 
     *   Possible values are:
     * 
     * 
     *   {@link ElementBalancing#NO_BALANCING}: Do not perform element balancing checks at all. Events will be
     *       reported as they appear. There is no guarantee that a DOM tree can be built from the
     *       fired events though.
     *   {@link ElementBalancing#REQUIRE_BALANCED}: Require that elements are already correctly balanced in markup,
     *       throwing an exception if not. Note that when in HTML mode, this does not require the
     *       specification of optional tags such as <tbody>. Also note that this
     *       will automatically consider the
     *       {@link #setNoUnmatchedCloseElementsRequired(boolean)} flag to be set to true.
     *   {@link ElementBalancing#AUTO_OPEN_CLOSE}: Auto open and close elements, which includes both those elements that,
     *       according to the HTML spec (when in HTML mode) have optional start or end tags (see
     *       http://www.w3.org/html/wg/drafts/html/master/syntax.html#optional-tags)
     *       and those that simply are unclosed at the moment a parent element needs to be closed (so their closing
     *       is forced). As an example of optional tags, the HTML5 spec
     *       establishes that <html>, <body> and <tbody> are optional, and
     *       that an <li> will close any currently
     *       open <li> elements. This is not really
     *       ill-formed code, but something allowed by the spec. All of these will be
     *       reported as auto-* events by the parser.
     *   {@link ElementBalancing#AUTO_CLOSE}: Equivalent to {@link ElementBalancing#AUTO_OPEN_CLOSE} but not performing any auto-open
     *       operations, so that processing of HTML fragments is possible (no <html> or
     *       <body> elements are automatically added).
     * 
     *
     * @param elementBalancing the level of element balancing.
     */
    public void setElementBalancing(final ElementBalancing elementBalancing) {
        this.elementBalancing = elementBalancing;
    }




    /**
     * 
     *   Returns the {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration} object determining the
     *   way in which prolog (XML Declaration, DOCTYPE) will be dealt with during parsing.
     * 
     * 
     * @return the configuration object.
     */
    public PrologParseConfiguration getPrologParseConfiguration() {
        return this.prologParseConfiguration;
    }




    /**
     * 
     *   Returns whether unmatched close elements (those not matching any equivalent open elements) are
     *   allowed or not.
     * 
     *
     * @return whether unmatched close elements will be allowed (false) or not (true).
     */
    public boolean isNoUnmatchedCloseElementsRequired() {
        return this.noUnmatchedCloseElementsRequired;
    }


    /**
     * 
     *   Specify whether unmatched close elements (those not matching any equivalent open elements) are
     *   allowed or not.
     * 
     *
     * @param noUnmatchedCloseElementsRequired whether unmatched close elements will be allowed
     *                                         (false) or not (true).
     */
    public void setNoUnmatchedCloseElementsRequired(
            final boolean noUnmatchedCloseElementsRequired) {
        this.noUnmatchedCloseElementsRequired = noUnmatchedCloseElementsRequired;
    }




    /**
     * 
     *   Returns whether element attributes will be required to be well-formed from the XML
     *   standpoint. This means:
     * 
     * 
     *   Attributes should always have a value.
     *   Attribute values should be surrounded by double-quotes.
     * 
     *
     * @return whether attributes should be XML-well-formed or not.
     */
    public boolean isXmlWellFormedAttributeValuesRequired() {
        return this.xmlWellFormedAttributeValuesRequired;
    }


    /**
     * 
     *   Specify whether element attributes will be required to be well-formed from the XML
     *   standpoint. This means:
     * 
     * 
     *   Attributes should always have a value.
     *   Attribute values should be surrounded by double-quotes.
     * 
     *
     * @param xmlWellFormedAttributeValuesRequired whether attributes should be XML-well-formed or not.
     */
    public void setXmlWellFormedAttributeValuesRequired(
            final boolean xmlWellFormedAttributeValuesRequired) {
        this.xmlWellFormedAttributeValuesRequired = xmlWellFormedAttributeValuesRequired;
    }




    /**
     * 
     *   Returns whether attributes should never appear duplicated in elements.
     * 
     *
     * @return whether attributes should never appear duplicated in elements.
     */
    public boolean isUniqueAttributesInElementRequired() {
        return this.uniqueAttributesInElementRequired;
    }


    /**
     * 
     *   Returns whether attributes should never appear duplicated in elements.
     * 
     *
     * @param uniqueAttributesInElementRequired whether attributes should never appear duplicated in elements.
     */
    public void setUniqueAttributesInElementRequired(final boolean uniqueAttributesInElementRequired) {
        this.uniqueAttributesInElementRequired = uniqueAttributesInElementRequired;
    }




    /**
     * 
     *   This value determines whether it will be required that the document has a unique
     *   root element.
     * 
     * 
     *   If set to {@link UniqueRootElementPresence#REQUIRED_ALWAYS}, then a document with
     *   more than one elements at the root level will never be considered valid. And if
     *   {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is true and there is a DOCTYPE
     *   clause, it will be checked that the root name established at the DOCTYPE clause
     *   is the same as the document's element root.
     * 
     * 
     *   If set to {@link UniqueRootElementPresence#DEPENDS_ON_PROLOG_DOCTYPE}, then:
     * 
     * 
     *   If {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is false, multiple
     *       document root elements will be allowed.
     *   If {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is true:
     *       
     *         If there is a DOCTYPE clause, a unique element root will be required,
     *             and its name will be checked against the name specified at the DOCTYPE
     *             clause.
     *         If there is no DOCTYPE clause (even if it is forbidden), multiple 
     *             document root elements will be allowed.
     *       
     *   
     * 
     * 
     *   If set to {@link UniqueRootElementPresence#NOT_VALIDATED}, then nothing will be checked
     *   regarding the name of the root element/s.
     * 
     * 
     *   Default value is {@link UniqueRootElementPresence#DEPENDS_ON_PROLOG_DOCTYPE}.
     * 
     * 
     * @return the configuration value for validating the presence of a unique root element.
     */
    public UniqueRootElementPresence getUniqueRootElementPresence() {
        return this.uniqueRootElementPresence;
    }


    /**
     * 
     *   This value determines whether it will be required that the document has a unique
     *   root element.
     * 
     * 
     *   If set to {@link UniqueRootElementPresence#REQUIRED_ALWAYS}, then a document with
     *   more than one elements at the root level will never be considered valid. And if
     *   {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is true and there is a DOCTYPE
     *   clause, it will be checked that the root name established at the DOCTYPE clause
     *   is the same as the document's element root.
     * 
     * 
     *   If set to {@link UniqueRootElementPresence#DEPENDS_ON_PROLOG_DOCTYPE}, then:
     * 
     * 
     *   If {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is false, multiple
     *       document root elements will be allowed.
     *   If {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration#isValidateProlog()} is true:
     *       
     *         If there is a DOCTYPE clause, a unique element root will be required,
     *             and its name will be checked against the name specified at the DOCTYPE
     *             clause.
     *         If there is no DOCTYPE clause (even if it is forbidden), multiple
     *             document root elements will be allowed.
     *       
     *   
     * 
     * 
     *   If set to {@link UniqueRootElementPresence#NOT_VALIDATED}, then nothing will be checked
     *   regarding the name of the root element/s.
     * 
     * 
     *   Default value is {@link UniqueRootElementPresence#DEPENDS_ON_PROLOG_DOCTYPE}.
     * 
     *
     * @param uniqueRootElementPresence the configuration value for validating the presence of a unique root element.
     */
    public void setUniqueRootElementPresence(final UniqueRootElementPresence uniqueRootElementPresence) {
        validateNotNull(uniqueRootElementPresence, "The \"unique root element presence\" configuration value cannot be null");
        this.uniqueRootElementPresence = uniqueRootElementPresence;
    }




    
    @Override
    public ParseConfiguration clone() throws CloneNotSupportedException {
        final ParseConfiguration conf = (ParseConfiguration) super.clone();
        conf.mode = this.mode;
        conf.caseSensitive = this.caseSensitive;
        conf.elementBalancing = this.elementBalancing;
        conf.uniqueAttributesInElementRequired = this.uniqueAttributesInElementRequired;
        conf.xmlWellFormedAttributeValuesRequired = this.xmlWellFormedAttributeValuesRequired;
        conf.uniqueRootElementPresence = this.uniqueRootElementPresence;
        conf.prologParseConfiguration = this.prologParseConfiguration.clone();
        return conf;
    }


    /**
     * 
     *   Enumeration used for determining the parsing mode, which will affect the parser's behaviour.
     *   Values are XML and HTML.
     * 
     * 
     *   This enumeration is used at the {@link org.attoparser.config.ParseConfiguration} class.
     * 
     */
    public static enum ParsingMode {
        HTML, XML
    }


    /**
     * 
     *   Enumeration used for determining whether an element in the document prolog (DOCTYPE, XML Declaration) or
     *   the prolog itself should be allowed, required or even forbidden.
     * 
     * 
     *   This enumeration is used at the {@link org.attoparser.config.ParseConfiguration} class.
     * 
     */
    public static enum PrologPresence {
        
        REQUIRED(true, false, false), 
        ALLOWED(false, true, false), 
        FORBIDDEN(false, false, true); 
        
        private final boolean required;
        private final boolean allowed;
        private final boolean forbidden;
    
        private PrologPresence(
                final boolean required, final boolean allowed, final boolean forbidden) {
            this.required = required;
            this.allowed = allowed;
            this.forbidden = forbidden;
        }

        public boolean isRequired() {
            return this.required;
        }

        public boolean isAllowed() {
            return this.allowed;
        }

        public boolean isForbidden() {
            return this.forbidden;
        }
    
    }


    /**
     * 
     *   Enumeration used for determining the behaviour the parser should have with respect to the presence and
     *   number of root elements in the parsed document.
     * 
     * 
     *   Root elements are the elements that appear at the root of the document (e.g. <html> in
     *   complete HTML documents). This enumeration allows requiring that the root element is unique always,
     *   requiring it only if a document prolog (XML Declaration or DOCTYPE) is present, or not validating
     *   this at all.
     * 
     * 
     *   This enumeration is used at the {@link org.attoparser.config.ParseConfiguration} class.
     * 
     */
    public static enum UniqueRootElementPresence { 
        
        REQUIRED_ALWAYS(true, false), 
        DEPENDS_ON_PROLOG_DOCTYPE(false, true),
        NOT_VALIDATED(false, false);

        private final boolean requiredAlways;
        private final boolean dependsOnPrologDoctype;
    
        private UniqueRootElementPresence(
                final boolean requiredAlways, final boolean dependsOnPrologDoctype) {
            this.requiredAlways = requiredAlways;
            this.dependsOnPrologDoctype = dependsOnPrologDoctype;
        }

        public boolean isRequiredAlways() {
            return this.requiredAlways;
        }

        public boolean isDependsOnPrologDoctype() {
            return this.dependsOnPrologDoctype;
        }
    
    }
    
    
    
    
    

    /**
     * 
     *   Class encapsulating the configuration parameters used for parsing
     *   and validating the "prolog" section of a markup document. The prolog
     *   is the section of an XML/HTML document containing the XML declaration
     *   and the DOCTYPE clause (if these exist).
     * 
     * 
     *   If validateProlog is set to false, all other parameters
     *   should be ignored.
     * 
     * 
     *   If validateProlog is true, then the rest of the parameters
     *   will be considered.
     * 
     * 
     *   Not all combinations of values of the {@link #getPrologPresence()}, 
     *   {@link #getXmlDeclarationPresence()} and {@link #getDoctypePresence()} 
     *   are considered valid. See {@link #validateConfiguration()} for details.
     * 
     * 
     * @author Daniel Fernández
     *  
     * @since 2.0.0
     */
    public static class PrologParseConfiguration implements Serializable, Cloneable {
        
        private static final long serialVersionUID = -4291053503740751549L;
        
        
        private boolean validateProlog = false;
        private PrologPresence prologPresence = PrologPresence.ALLOWED;
        private PrologPresence xmlDeclarationPresence = PrologPresence.ALLOWED;
        private PrologPresence doctypePresence = PrologPresence.ALLOWED;
        private boolean requireDoctypeKeywordsUpperCase = true;
        
        
        /**
         * 
         *   Creates a {@link org.attoparser.config.ParseConfiguration.PrologParseConfiguration} instance with
         *   a default configuration.
         * 
         * 
         *   Default values are:
         * 
         * 
         *   {@link #isValidateProlog()} = false
         *   {@link #getPrologPresence()} = {@link PrologPresence#ALLOWED}
         *   {@link #getXmlDeclarationPresence()} = {@link PrologPresence#ALLOWED}
         *   {@link #getDoctypePresence()} = {@link PrologPresence#ALLOWED}
         *   {@link #isRequireDoctypeKeywordsUpperCase()} = true
         * 
         * 
         */
        protected PrologParseConfiguration() {
            super();
        }

        /**
         * 
         *   This flag indicates whether the document's prolog should be validated
         *   at all or not. 
         * 
         * 
         *   If not validated, prolog-specific structures (XML Declaration
         *   and DOCTYPE) will be allowed to appear anywhere in the document.
         *   All other configuration paramters in this object will be ignored.
         * 
         * 
         *   If validated, prolog-specific structures will only be allowed to
         *   appear (under the conditions established in this object) at the
         *   beginning of the document, before the element root. Or if 
         *   {@link #getPrologPresence()} is set to {@link PrologPresence#FORBIDDEN},
         *   it will be validated that such structures do not appear at all.
         * 
         * 
         *   Also, if validated and a DOCTYPE is present, it will be checked
         *   that there is only one root element in the document and its name
         *   matches the root element name in the DOCTYPE clause.
         * 
         * 
         *   Default value is false.
         * 
         * 
         * @return whether prolog is to be validated or not.
         */
        public boolean isValidateProlog() {
            return this.validateProlog;
        }

        public void setValidateProlog(final boolean validateProlog) {
            this.validateProlog = validateProlog;
        }

        /**
         * 
         *  This flag indicates the level of presence desired for the prolog
         *  in the document, in case {@link #isValidateProlog()} has been set
         *  to true.
         * 
         * 
         * @return the level of presence desired for the prolog.
         */
        public PrologPresence getPrologPresence() {
            return this.prologPresence;
        }

        public void setPrologPresence(final PrologPresence prologPresence) {
            validateNotNull(this.prologPresence, "Prolog presence cannot be null");
            this.prologPresence = prologPresence;
        }

        /**
         * 
         *  This flag indicates the level of presence desired for the XML Declaration
         *  (a part of the prolog) in the document, in case {@link #isValidateProlog()} 
         *  has been set to true.
         * 
         * 
         * @return the level of presence desired for the XML Declaration.
         */
        public PrologPresence getXmlDeclarationPresence() {
            return this.xmlDeclarationPresence;
        }

        public void setXmlDeclarationPresence(final PrologPresence xmlDeclarationPresence) {
            validateNotNull(this.prologPresence, "XML Declaration presence cannot be null");
            this.xmlDeclarationPresence = xmlDeclarationPresence;
        }

        /**
         * 
         *  This flag indicates the level of presence desired for the DOCTYPE clause
         *  (a part of the prolog) in the document, in case {@link #isValidateProlog()} 
         *  has been set to true.
         * 
         * 
         * @return the level of presence desired for the DOCTYPE clause.
         */
        public PrologPresence getDoctypePresence() {
            return this.doctypePresence;
        }

        public void setDoctypePresence(final PrologPresence doctypePresence) {
            validateNotNull(this.prologPresence, "DOCTYPE presence cannot be null");
            this.doctypePresence = doctypePresence;
        }

        /**
         * 
         *   This configuration parameter allows to check that all keywords in
         *   a DOCTYPE clause ('DOCTYPE', 'SYSTEM', 'PUBLIC') are in upper-case as
         *   required by the XML specification (and not by the HTML5 one, for example).
         * 
         * 
         *   Default value is true, but it will apply only if
         *   {@link #isValidateProlog()} is true.
         * 
         * 
         * @return whether keywords in the DOCTYPE clause will be forced to be
         *         in upper-case.
         */
        public boolean isRequireDoctypeKeywordsUpperCase() {
            return this.requireDoctypeKeywordsUpperCase;
        }

        public void setRequireDoctypeKeywordsUpperCase(final boolean requireDoctypeKeywordsUpperCase) {
            this.requireDoctypeKeywordsUpperCase = requireDoctypeKeywordsUpperCase;
        }


        
        /**
         * 
         *   Checks that the combination of values in the {@link #getPrologPresence()}, 
         *   {@link #getXmlDeclarationPresence()} and {@link #getDoctypePresence()} 
         *   parameters makes sense.
         * 
         * 
         *   If {@link #getPrologPresence()} is {@link PrologPresence#FORBIDDEN}, then
         *       {@link #getXmlDeclarationPresence()} and {@link #getDoctypePresence()} must
         *       be {@link PrologPresence#FORBIDDEN} too.
         *   Else if at least one of {@link #getXmlDeclarationPresence()} or
         *       {@link #getDoctypePresence()} is {@link PrologPresence#REQUIRED}, the
         *       configuration is considered valid.
         *   Else if {@link #getPrologPresence()} is {@link PrologPresence#ALLOWED}, 
         *       the configuration is considered valid as long as not both
         *       {@link #getXmlDeclarationPresence()} and {@link #getDoctypePresence()}
         *       are {@link PrologPresence#FORBIDDEN}.
         * 
         * 
         * @throws IllegalArgumentException if the combination of values is not correct.
         */
        public void validateConfiguration() {

            /*
             * 
             *   1. PROLOG: REQUIRED, ALLOWED, FORBIDDEN
             *   2. XMLDECL : REQUIRED, ALLOWED, FORBIDDEN
             *   3. DOCTYPE : REQUIRED, ALLOWED, FORBIDDEN
             *   
             *   VALID: (RRR, RRA, RRF, RAR, RFR, ARR, ARA, ARF, AAR, AAA, AAF, AFR, AFA, FFF)
             *   NOT VALID: (RAA, RAF, RFA, RFF, AFF, FRR, FRA, FRF, FAR, FAA, FAF, FFR, FFA)
             *   
             *   FORMULA:
             *   [IF (* = F__) -> RET (* = _FF); IF (* = _R_ OR * = __R) -> RET TRUE; IF (* = A__) -> RET (* != _FF); RET FALSE] 
             * 
             */
            
            if (!this.validateProlog) {
                // There's nothing to check here!
                return;
            }
            
            if (PrologPresence.FORBIDDEN.equals(this.prologPresence)) {
                if (PrologPresence.FORBIDDEN.equals(this.xmlDeclarationPresence) && 
                        PrologPresence.FORBIDDEN.equals(this.doctypePresence)) {
                    return;
                }
            } else {
                if (PrologPresence.REQUIRED.equals(this.xmlDeclarationPresence) ||
                        PrologPresence.REQUIRED.equals(this.doctypePresence)) {
                    return;
                }
                if (PrologPresence.ALLOWED.equals(this.prologPresence)) {
                    if (!(PrologPresence.FORBIDDEN.equals(this.xmlDeclarationPresence) && 
                            PrologPresence.FORBIDDEN.equals(this.doctypePresence))) {
                        return;
                    }
                }
            }
            
            throw new IllegalArgumentException(
                    "Prolog parsing configuration is not valid: " +
                    "Prolog presence: " + this.prologPresence + ", " +
                    "XML Declaration presence: " + this.xmlDeclarationPresence + ", " +
                    "DOCTYPE presence: " + this.doctypePresence);
            
        }

        
        
        @Override
        public PrologParseConfiguration clone() throws CloneNotSupportedException {
            final PrologParseConfiguration conf = (PrologParseConfiguration) super.clone();
            conf.validateProlog = this.validateProlog;
            conf.prologPresence = this.prologPresence;
            conf.doctypePresence = this.doctypePresence;
            conf.xmlDeclarationPresence = this.xmlDeclarationPresence;
            conf.requireDoctypeKeywordsUpperCase = this.requireDoctypeKeywordsUpperCase;
            return conf;
        }
        
        
    }

    
    
    private static void validateNotNull(final Object obj, final String message) {
        if (obj == null) {
            throw new IllegalArgumentException(message);
        }
    }
    
        
}