src.java.net.htmlparser.jericho.Config Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of com.liferay.wiki.service Show documentation
Liferay Wiki Service
There is a newer version: 5.0.84
Show newest version
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.1
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.

package net.htmlparser.jericho;

import java.util.*;

/**
 * Encapsulates global configuration properties which determine the behaviour of various functions.
 * 
 * All of the properties in this class are static, affecting all objects and threads.
 * Multiple concurrent configurations are not possible.
 * 

 * Properties that relate to user agent
 * compatibility issues are stored in instances of the {@link Config.CompatibilityMode} class.
 * This allows all of the properties in the compatibility mode to be set as a block by setting the static
 * {@link #CurrentCompatibilityMode} property to a different instance.
 *
 * @see Config.CompatibilityMode
 */ 
public final class Config {
	private Config() {}

	/**
	 * Determines the string used to separate a single column's multiple values in the output of the {@link FormFields#getColumnValues(Map)} method.
	 * 

	 * The situation where a single column has multiple values only arises if {@link FormField#getUserValueCount()}>1
	 * on the relevant form field, which usually indicates a poorly designed form.
	 * 

	 * The default value is "," (a comma, not including the quotes).
	 * 

	 * Must not be null.
	 */
	public static String ColumnMultipleValueSeparator=",";

	/**
	 * Determines the string that represents the value true in the output of the {@link FormFields#getColumnValues(Map)} method.
	 * 

	 * The default value is "true" (without the quotes).
	 * 

	 * Must not be null.
	 */
	public static String ColumnValueTrue=Boolean.toString(true);

	/**
	 * Determines the string that represents the value false in the output of the {@link FormFields#getColumnValues(Map)} method.
	 * 

	 * The default value is null, which represents no output at all.
	 */
	public static String ColumnValueFalse=null;

	/**
	 * Determines whether the {@link CharacterReference#decode(CharSequence)} and similar methods convert non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;}) character references to normal spaces.
	 * 

	 * The default value is true.
	 * 

	 * When this property is set to false, non-breaking space ({@link CharacterEntityReference#_nbsp &nbsp;})
	 * character references are decoded as non-breaking space characters (U+00A0) instead of being converted to normal spaces (U+0020).
	 * 

	 * The default behaviour of the library reflects the fact that non-breaking space character references are almost always used in HTML documents
	 * as a non-collapsing white space character.
	 * Converting them to the correct character code U+00A0, which is represented by a visible character in many older character sets, was confusing to most users
	 * who expected to see only normal spaces.
	 * The most common example of this is its visualisation as the character á in the MS-DOS CP437 character set.
	 * 

	 * The functionality of the following methods is affected:
	 * 

	 *  {@link CharacterReference#appendCharTo(Appendable)}
	 *  
{@link CharacterReference#decode(CharSequence)}
	 *  
{@link CharacterReference#decode(CharSequence, boolean insideAttributeValue)}
	 *  
{@link CharacterReference#decodeCollapseWhiteSpace(CharSequence)}
	 *  
{@link CharacterReference#reencode(CharSequence)}
 	 *  
{@link Attribute#getValue()}
 	 *  
{@link Attributes#getValue(String name)}
	 *  
{@link Attributes#populateMap(Map, boolean convertNamesToLowerCase)}
	 *  
{@link StartTag#getAttributeValue(String attributeName)}
	 *  
{@link Element#getAttributeValue(String attributeName)}
	 *  
{@link FormControl#getPredefinedValues()}
	 *  
{@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)}
	 *  
{@link Renderer#getConvertNonBreakingSpaces()}
	 *  
{@link TextExtractor#getConvertNonBreakingSpaces()}
	 * 
	 */
	public static boolean ConvertNonBreakingSpaces=true;


	/**
	 * Determines the currently active {@linkplain Config.CompatibilityMode compatibility mode}.
	 * 
	 * The default setting is {@link Config.CompatibilityMode#IE} (MS Internet Explorer 6.0).
	 * 

	 * Must not be null.
	 */
	public static CompatibilityMode CurrentCompatibilityMode=CompatibilityMode.IE;

	/**
	 * Determines whether apostrophes are encoded when calling the {@link CharacterReference#encode(CharSequence)} method.
	 * 

	 * A value of false means {@linkplain CharacterEntityReference#_apos apostrophe}
	 * (U+0027) characters are not encoded.
	 * The only time apostrophes need to be encoded is within an attribute value delimited by
	 * single quotes (apostrophes), so in most cases ignoring apostrophes is perfectly safe and
	 * enhances the readability of the source document.
	 * 

	 * Note that apostrophes are always encoded as a {@linkplain NumericCharacterReference numeric character reference}, never as the
	 * character entity reference {@link CharacterEntityReference#_apos &apos;}.
	 * 

	 * The default value is false.
	 */
	public static boolean IsApostropheEncoded=false;

	/**
	 * Determines the {@link LoggerProvider} that is used to create the default {@link Logger} object for each new {@link Source} object.
	 * 

	 * The {@link LoggerProvider} interface contains several predefined LoggerProvider instances which this property can be set to,
	 * mostly representing wrappers to common logging frameworks.
	 * 

	 * The default value is null, which results in the auto-detection of the most appropriate logging mechanism according to the following algorithm:
	 * 

	 * 

	 *  If the class org.slf4j.impl.StaticLoggerBinder is detected:
	 *   
	 *    If the class org.slf4j.impl.JDK14LoggerFactory is detected, use {@link LoggerProvider#JAVA}.
	 *    
If the class org.slf4j.impl.Log4jLoggerFactory is detected, use {@link LoggerProvider#LOG4J}.
	 *    
If the class org.slf4j.impl.JCLLoggerFactory is NOT detected, use {@link LoggerProvider#SLF4J}.
	 *   
	 *  
If the class org.apache.commons.logging.Log is detected:
	 *   
	 *    Create an instance of it using the commons-logging LogFactory class.
	 *     
	 *      If the created Log is of type org.apache.commons.logging.impl.Jdk14Logger, use {@link LoggerProvider#JAVA}.
	 *      
If the created Log is of type org.apache.commons.logging.impl.Log4JLogger, use {@link LoggerProvider#LOG4J}.
	 *      
otherwise, use {@link LoggerProvider#JCL}.
	 *     
	 *   
	 *  
If the class org.apache.log4j.Logger is detected, use {@link LoggerProvider#LOG4J}.
	 *  
otherwise, use {@link LoggerProvider#JAVA}.
	 * 
	 *
	 * @see Source#setLogger(Logger)
	 */
	public static LoggerProvider LoggerProvider=null;

	/**
	 * Determines the string used to represent a newline in text output throughout the library.
	 * 
	 * The default value is the standard new line character sequence of the host platform, determined by System.getProperty("line.separator").
	 */
	public static String NewLine=System.getProperty("line.separator");

	/**
	 * Used in Element.getChildElements.
	 * Server elements containing markup should be included in the hierarchy, so consider making this option public in future.
	 */ 
	static final boolean IncludeServerTagsInElementHierarchy=false;

	/**
	 * Represents a set of maximum unicode code points to be recognised for the three types of
	 * unterminated character reference in a given context.
	 * 

	 * The three types of character reference are:
	 * 

	 *  {@linkplain CharacterEntityReference Character entity reference}
	 *  
Decimal character reference
	 *  
Hexadecimal character reference
	 * 
	 * 
	 * The two types of contexts used in this library are:
	 * 

	 *  Inside an attribute value
	 *  
Outside an attribute value
	 * 
	 */ 
	static class UnterminatedCharacterReferenceSettings {
		// use volatile fields to make them thread safe
		public volatile int characterEntityReferenceMaxCodePoint;
		public volatile int decimalCharacterReferenceMaxCodePoint;
		public volatile int hexadecimalCharacterReferenceMaxCodePoint;

		public static UnterminatedCharacterReferenceSettings ACCEPT_ALL=new UnterminatedCharacterReferenceSettings(CompatibilityMode.CODE_POINTS_ALL,CompatibilityMode.CODE_POINTS_ALL,CompatibilityMode.CODE_POINTS_ALL);

		public UnterminatedCharacterReferenceSettings() {
			this(CompatibilityMode.CODE_POINTS_NONE,CompatibilityMode.CODE_POINTS_NONE,CompatibilityMode.CODE_POINTS_NONE);
		}

		public UnterminatedCharacterReferenceSettings(final int characterEntityReferenceMaxCodePoint, final int decimalCharacterReferenceMaxCodePoint, final int hexadecimalCharacterReferenceMaxCodePoint) {
			this.characterEntityReferenceMaxCodePoint=characterEntityReferenceMaxCodePoint;
			this.decimalCharacterReferenceMaxCodePoint=decimalCharacterReferenceMaxCodePoint;
			this.hexadecimalCharacterReferenceMaxCodePoint=hexadecimalCharacterReferenceMaxCodePoint;
		}

		public String toString() {
			return Config.NewLine+"    Character entity reference: "+getDescription(characterEntityReferenceMaxCodePoint)
						+Config.NewLine+"    Decimal character reference: "+getDescription(decimalCharacterReferenceMaxCodePoint)
						+Config.NewLine+"    Haxadecimal character reference: "+getDescription(hexadecimalCharacterReferenceMaxCodePoint);
		}

		private String getDescription(final int codePoint) {
			if (codePoint==CompatibilityMode.CODE_POINTS_NONE) return "None";
			if (codePoint==CompatibilityMode.CODE_POINTS_ALL) return "All";
			return "0x"+Integer.toString(codePoint,16);
		}
	}

	/**
	 * Represents a set of configuration parameters that relate to
	 * user agent compatibility issues.
	 * 
	 * The predefined compatibility modes {@link #IE}, {@link #MOZILLA}, {@link #OPERA} and {@link #XHTML} provide an easy means of
	 * ensuring the library interprets the markup in a way consistent with some of the most commonly used browsers,
	 * at least in relation to the behaviour described by the properties in this class.
	 * 

	 * The properties of any CompatibilityMode object can be modified individually, including those in
	 * the predefined instances as well as newly constructed instances.
	 * Take note however that modifying the properties of the predefined instances has a global affect.
	 * 

	 * The currently active compatibility mode is stored in the static {@link Config#CurrentCompatibilityMode} property.
	 * 

	 */
	public static final class CompatibilityMode {
		private String name;
		private volatile boolean formFieldNameCaseInsensitive;
		volatile UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsInsideAttributeValue;
		volatile UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsOutsideAttributeValue;

		/**
		 * Indicates the recognition of all unicode code points.
		 * 

		 * This value is used in properties which specify a maximum unicode code point to be recognised by the parser.
		 *
		 * @see #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)
		 * @see #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
		 * @see #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
		 */
		public static final int CODE_POINTS_ALL=Character.MAX_CODE_POINT; // 0x10FFFF (decimal 1114111)

		/**
		 * Indicates the recognition of no unicode code points.
		 * 

		 * This value is used in properties which specify a maximum unicode code point to be recognised by the parser.
		 *
		 * @see #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)
		 * @see #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
		 * @see #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
		 */
		public static final int CODE_POINTS_NONE=CharacterReference.INVALID_CODE_POINT;

		/**
		 * Microsoft Internet Explorer compatibility mode.
		 * 

		 * {@link #getName() Name} = IE

		 * {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = true

		 * 
		 *  Recognition of unterminated character references:   (inside attribute)     (outside attribute)  
		 *  
{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}  = U+00FF U+00FF
		 *  
{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}  = {@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_ALL All}
		 *  
{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}  = {@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_NONE None}
		 * 		 
		 */
		public static final CompatibilityMode IE=new CompatibilityMode("IE",true,
			new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes
			new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_NONE) // outside attributes
		);

		/**
		 * Mozilla / 
		 * Firefox /
		 * Netscape compatibility mode.
		 * 
		 * {@link #getName() Name} = Mozilla

		 * {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = false

		 * 
		 *  Recognition of unterminated character references:   (inside attribute)     (outside attribute)  
		 *  
{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}  = U+00FF {@linkplain #CODE_POINTS_ALL All}
		 *  
{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}  = {@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_ALL All}
		 *  
{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}  = {@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_ALL All}
		 * 		 
		 */
		public static final CompatibilityMode MOZILLA=new CompatibilityMode("Mozilla",false,
			new UnterminatedCharacterReferenceSettings(0xFF,            CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes
			new UnterminatedCharacterReferenceSettings(CODE_POINTS_ALL, CODE_POINTS_ALL, CODE_POINTS_ALL) // outside attributes
		);

		/**
		 * Opera compatibility mode.
		 * 
		 * {@link #getName() Name} = Opera

		 * {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = true

		 * 
		 *  Recognition of unterminated character references:   (inside attribute)     (outside attribute)  
		 *  
{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}  = U+003E {@linkplain #CODE_POINTS_ALL All}
		 *  
{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}  = {@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_ALL All}
		 *  
{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}  = {@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_ALL All}
		 * 		 
		 */
		public static final CompatibilityMode OPERA=new CompatibilityMode("Opera",true,
			new UnterminatedCharacterReferenceSettings(0x3E,            CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes
			new UnterminatedCharacterReferenceSettings(CODE_POINTS_ALL, CODE_POINTS_ALL, CODE_POINTS_ALL) // outside attributes
		);

		/**
		 * XHTML compatibility mode.
		 * 
		 * {@link #getName() Name} = XHTML

		 * {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = false

		 * 
		 *  Recognition of unterminated character references:   (inside attribute)     (outside attribute)  
		 *  
{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}  = {@linkplain #CODE_POINTS_NONE None} {@linkplain #CODE_POINTS_NONE None}
		 *  
{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}  = {@linkplain #CODE_POINTS_NONE None} {@linkplain #CODE_POINTS_NONE None}
		 *  
{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}  = {@linkplain #CODE_POINTS_NONE None} {@linkplain #CODE_POINTS_NONE None}
		 * 		 
		 */
		public static final CompatibilityMode XHTML=new CompatibilityMode("XHTML");

		/**
		 * Constructs a new CompatibilityMode with the given {@linkplain #getName() name}.
		 * 
		 * All properties in the new instance are initially assigned their default values, which are the same as the strict
		 * rules of the {@link #XHTML} compatibility mode.
		 *
		 * @param name  the {@linkplain #getName() name} of the new compatibility mode
		 */
		public CompatibilityMode(final String name) {
			this(name,false,new UnterminatedCharacterReferenceSettings(),new UnterminatedCharacterReferenceSettings());
		}

		private CompatibilityMode(final String name, final boolean formFieldNameCaseInsensitive, final UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsInsideAttributeValue, final UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsOutsideAttributeValue) {
			this.name=name;
			this.formFieldNameCaseInsensitive=formFieldNameCaseInsensitive;
			this.unterminatedCharacterReferenceSettingsInsideAttributeValue=unterminatedCharacterReferenceSettingsInsideAttributeValue;
			this.unterminatedCharacterReferenceSettingsOutsideAttributeValue=unterminatedCharacterReferenceSettingsOutsideAttributeValue;
		}

		/**
		 * Returns the name of this compatibility mode.
		 * @return the name of this compatibility mode.
		 */
		public String getName() {
			return name;
		}

		/**
		 * Indicates whether {@linkplain FormField#getName() form field names} are treated as case insensitive.
		 * 

		 * Microsoft Internet Explorer treats field names as case insensitive,
		 * while Mozilla treats them as case sensitive.
		 * 

		 * The value of this property in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}
		 * affects all instances of the {@link FormFields} class.
		 * It should be set to the desired configuration before any instances of FormFields are created.
		 *
		 * @return true if {@linkplain FormField#getName() form field names} are treated as case insensitive, otherwise false.
		 * @see #setFormFieldNameCaseInsensitive(boolean)
		 */
		public boolean isFormFieldNameCaseInsensitive() {
			return formFieldNameCaseInsensitive;
		}

		/**
		 * Sets whether {@linkplain FormField#getName() form field names} are treated as case insensitive.
		 * 

		 * See {@link #isFormFieldNameCaseInsensitive()} for the documentation of this property.
		 *
		 * @param value  the new value of the property
		 */
		public void setFormFieldNameCaseInsensitive(final boolean value) {
			formFieldNameCaseInsensitive=value;
		}

		/**
		 * Returns the maximum unicode code point of an unterminated
		 * {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context.
		 * 

		 * For example, if getUnterminatedCharacterEntityReferenceMaxCodePoint(true) has the value 0xFF (U+00FF)
		 * in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then:
		 * 

		 *  {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&gt",true)}
		 *   returns ">".

		 *   The string is recognised as the character entity reference {@link CharacterEntityReference#_gt &gt;}
		 *   despite the fact that it is unterminated,
		 *   because its unicode code point U+003E is below the maximum of U+00FF set by this property.
		 *  
{@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&euro",true)}
		 *   returns "&euro".

		 *   The string is not recognised as the character entity reference {@link CharacterEntityReference#_euro &euro;}
		 *   because it is unterminated
		 *   and its unicode code point U+20AC is above the maximum of U+00FF set by this property.
		 * 
		 * 
		 * See the documentation of the {@link Attribute#getValue()} method for further discussion.
		 * 
		 * @param insideAttributeValue  the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
		 * @return the maximum unicode code point of an unterminated {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context.
		 * @see #setUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
		 */
		public int getUnterminatedCharacterEntityReferenceMaxCodePoint(final boolean insideAttributeValue) {
			return getUnterminatedCharacterReferenceSettings(insideAttributeValue).characterEntityReferenceMaxCodePoint;
		}

		/**
		 * Sets the maximum unicode code point of an unterminated
		 * {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context.
		 * 

		 * See {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property.
		 *
		 * @param insideAttributeValue  the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
		 * @param maxCodePoint  the maximum unicode code point.
		 */
		public void setUnterminatedCharacterEntityReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) {
			getUnterminatedCharacterReferenceSettings(insideAttributeValue).characterEntityReferenceMaxCodePoint=maxCodePoint;
		}

		/**
		 * Returns the maximum unicode code point of an unterminated
		 * decimal character reference which is to be recognised in the specified context.
		 * 

		 * For example, if getUnterminatedDecimalCharacterReferenceMaxCodePoint(true) had the hypothetical value 0xFF (U+00FF)
		 * in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then:
		 * 

		 *  {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&#62",true)}
		 *   returns ">".

		 *   The string is recognised as the numeric character reference &#62;
		 *   despite the fact that it is unterminated,
		 *   because its unicode code point U+003E is below the maximum of U+00FF set by this property.
		 *  
{@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&#8364",true)}
		 *   returns "&#8364".

		 *   The string is not recognised as the numeric character reference &#8364;
		 *   because it is unterminated
		 *   and its unicode code point U+20AC is above the maximum of U+00FF set by this property.
		 * 
		 * 
		 * @param insideAttributeValue  the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
		 * @return the maximum unicode code point of an unterminated decimal character reference which is to be recognised in the specified context.
		 * @see #setUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
		 */
		public int getUnterminatedDecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue) {
			return getUnterminatedCharacterReferenceSettings(insideAttributeValue).decimalCharacterReferenceMaxCodePoint;
		}

		/**
		 * Sets the maximum unicode code point of an unterminated
		 * decimal character reference which is to be recognised in the specified context.
		 * 
		 * See {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property.
		 *
		 * @param insideAttributeValue  the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
		 * @param maxCodePoint  the maximum unicode code point.
		 */
		public void setUnterminatedDecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) {
			getUnterminatedCharacterReferenceSettings(insideAttributeValue).decimalCharacterReferenceMaxCodePoint=maxCodePoint;
		}

		/**
		 * Returns the maximum unicode code point of an unterminated
		 * hexadecimal character reference which is to be recognised in the specified context.
		 * 

		 * For example, if getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(true) had the hypothetical value 0xFF (U+00FF)
		 * in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then:
		 * 

		 *  {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&#x3e",true)}
		 *   returns ">".

		 *   The string is recognised as the numeric character reference &#x3e;
		 *   despite the fact that it is unterminated,
		 *   because its unicode code point U+003E is below the maximum of U+00FF set by this property.
		 *  
{@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&#x20ac",true)}
		 *   returns "&#x20ac".

		 *   The string is not recognised as the numeric character reference &#20ac;
		 *   because it is unterminated
		 *   and its unicode code point U+20AC is above the maximum of U+00FF set by this property.
		 * 
		 * 
		 * @param insideAttributeValue  the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
		 * @return the maximum unicode code point of an unterminated hexadecimal character reference which is to be recognised in the specified context.
		 * @see #setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
		 */
		public int getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue) {
			return getUnterminatedCharacterReferenceSettings(insideAttributeValue).hexadecimalCharacterReferenceMaxCodePoint;
		}

		/**
		 * Sets the maximum unicode code point of an unterminated
		 * headecimal character reference which is to be recognised in the specified context.
		 * 
		 * See {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property.
		 *
		 * @param insideAttributeValue  the context within an HTML document - true if inside an attribute value or false if outside an attribute value.
		 * @param maxCodePoint  the maximum unicode code point.
		 */
		public void setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) {
			getUnterminatedCharacterReferenceSettings(insideAttributeValue).hexadecimalCharacterReferenceMaxCodePoint=maxCodePoint;
		}

		/**
		 * Returns a string representation of this object useful for debugging purposes.
		 * @return a string representation of this object useful for debugging purposes.
		 */
		public String getDebugInfo() {
			return "Form field name case insensitive: "+formFieldNameCaseInsensitive
				+Config.NewLine+"Maximum codepoints in unterminated character references:"
				+Config.NewLine+"  Inside attribute values:"
				+unterminatedCharacterReferenceSettingsInsideAttributeValue
				+Config.NewLine+"  Outside attribute values:"
				+unterminatedCharacterReferenceSettingsOutsideAttributeValue;
		}
	
		/**
		 * Returns the {@linkplain #getName() name} of this compatibility mode.
		 * @return the {@linkplain #getName() name} of this compatibility mode.
		 */
		public String toString() {
			return getName();
		}

		UnterminatedCharacterReferenceSettings getUnterminatedCharacterReferenceSettings(final boolean insideAttributeValue) {
			return insideAttributeValue ? unterminatedCharacterReferenceSettingsInsideAttributeValue : unterminatedCharacterReferenceSettingsOutsideAttributeValue;
		}
	}
}
Recognition of unterminated character references:		(inside attribute)	(outside attribute) *
{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}	`=`	U+00FF	U+00FF *
{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}	`=`	{@linkplain #CODE_POINTS_ALL All}	{@linkplain #CODE_POINTS_ALL All} *
{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}	`=`	{@linkplain #CODE_POINTS_ALL All}	{@linkplain #CODE_POINTS_NONE None} *
Recognition of unterminated character references:		(inside attribute)	(outside attribute) *
{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}	`=`	U+003E	{@linkplain #CODE_POINTS_ALL All} *
{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}	`=`	{@linkplain #CODE_POINTS_ALL All}	{@linkplain #CODE_POINTS_ALL All} *
{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}	`=`	{@linkplain #CODE_POINTS_ALL All}	{@linkplain #CODE_POINTS_ALL All} *
Recognition of unterminated character references:		(inside attribute)	(outside attribute) *
{@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint}	`=`	{@linkplain #CODE_POINTS_NONE None}	{@linkplain #CODE_POINTS_NONE None} *
{@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint}	`=`	{@linkplain #CODE_POINTS_NONE None}	{@linkplain #CODE_POINTS_NONE None} *
{@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint}	`=`	{@linkplain #CODE_POINTS_NONE None}	{@linkplain #CODE_POINTS_NONE None} *