src.java.net.htmlparser.jericho.Config Maven / Gradle / Ivy
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 3.1
// Copyright (C) 2004-2009 Martin Jericho
// http://jericho.htmlparser.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of either one of the following licences:
//
// 1. The Eclipse Public License (EPL) version 1.0,
// included in this distribution in the file licence-epl-1.0.html
// or available at http://www.eclipse.org/legal/epl-v10.html
//
// 2. The GNU Lesser General Public License (LGPL) version 2.1 or later,
// included in this distribution in the file licence-lgpl-2.1.txt
// or available at http://www.gnu.org/licenses/lgpl.txt
//
// This library is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the individual licence texts for more details.
package net.htmlparser.jericho;
import java.util.*;
/**
* Encapsulates global configuration properties which determine the behaviour of various functions.
*
* All of the properties in this class are static, affecting all objects and threads.
* Multiple concurrent configurations are not possible.
*
* Properties that relate to user agent
* compatibility issues are stored in instances of the {@link Config.CompatibilityMode} class.
* This allows all of the properties in the compatibility mode to be set as a block by setting the static
* {@link #CurrentCompatibilityMode} property to a different instance.
*
* @see Config.CompatibilityMode
*/
public final class Config {
private Config() {}
/**
* Determines the string used to separate a single column's multiple values in the output of the {@link FormFields#getColumnValues(Map)} method.
*
* The situation where a single column has multiple values only arises if {@link FormField#getUserValueCount()}>1
* on the relevant form field, which usually indicates a poorly designed form.
*
* The default value is ",
" (a comma, not including the quotes).
*
* Must not be null
.
*/
public static String ColumnMultipleValueSeparator=",";
/**
* Determines the string that represents the value true
in the output of the {@link FormFields#getColumnValues(Map)} method.
*
* The default value is "true
" (without the quotes).
*
* Must not be null
.
*/
public static String ColumnValueTrue=Boolean.toString(true);
/**
* Determines the string that represents the value false
in the output of the {@link FormFields#getColumnValues(Map)} method.
*
* The default value is null
, which represents no output at all.
*/
public static String ColumnValueFalse=null;
/**
* Determines whether the {@link CharacterReference#decode(CharSequence)} and similar methods convert non-breaking space ({@link CharacterEntityReference#_nbsp }) character references to normal spaces.
*
* The default value is true
.
*
* When this property is set to false
, non-breaking space ({@link CharacterEntityReference#_nbsp })
* character references are decoded as non-breaking space characters (U+00A0) instead of being converted to normal spaces (U+0020).
*
* The default behaviour of the library reflects the fact that non-breaking space character references are almost always used in HTML documents
* as a non-collapsing white space character.
* Converting them to the correct character code U+00A0, which is represented by a visible character in many older character sets, was confusing to most users
* who expected to see only normal spaces.
* The most common example of this is its visualisation as the character á in the MS-DOS CP437 character set.
*
* The functionality of the following methods is affected:
*
* - {@link CharacterReference#appendCharTo(Appendable)}
*
- {@link CharacterReference#decode(CharSequence)}
*
- {@link CharacterReference#decode(CharSequence, boolean insideAttributeValue)}
*
- {@link CharacterReference#decodeCollapseWhiteSpace(CharSequence)}
*
- {@link CharacterReference#reencode(CharSequence)}
*
- {@link Attribute#getValue()}
*
- {@link Attributes#getValue(String name)}
*
- {@link Attributes#populateMap(Map, boolean convertNamesToLowerCase)}
*
- {@link StartTag#getAttributeValue(String attributeName)}
*
- {@link Element#getAttributeValue(String attributeName)}
*
- {@link FormControl#getPredefinedValues()}
*
- {@link OutputDocument#replace(Attributes, boolean convertNamesToLowerCase)}
*
- {@link Renderer#getConvertNonBreakingSpaces()}
*
- {@link TextExtractor#getConvertNonBreakingSpaces()}
*
*/
public static boolean ConvertNonBreakingSpaces=true;
/**
* Determines the currently active {@linkplain Config.CompatibilityMode compatibility mode}.
*
* The default setting is {@link Config.CompatibilityMode#IE} (MS Internet Explorer 6.0).
*
* Must not be null
.
*/
public static CompatibilityMode CurrentCompatibilityMode=CompatibilityMode.IE;
/**
* Determines whether apostrophes are encoded when calling the {@link CharacterReference#encode(CharSequence)} method.
*
* A value of false
means {@linkplain CharacterEntityReference#_apos apostrophe}
* (U+0027) characters are not encoded.
* The only time apostrophes need to be encoded is within an attribute value delimited by
* single quotes (apostrophes), so in most cases ignoring apostrophes is perfectly safe and
* enhances the readability of the source document.
*
* Note that apostrophes are always encoded as a {@linkplain NumericCharacterReference numeric character reference}, never as the
* character entity reference {@link CharacterEntityReference#_apos '}.
*
* The default value is false
.
*/
public static boolean IsApostropheEncoded=false;
/**
* Determines the {@link LoggerProvider} that is used to create the default {@link Logger} object for each new {@link Source} object.
*
* The {@link LoggerProvider} interface contains several predefined LoggerProvider
instances which this property can be set to,
* mostly representing wrappers to common logging frameworks.
*
* The default value is null
, which results in the auto-detection of the most appropriate logging mechanism according to the following algorithm:
*
*
* - If the class
org.slf4j.impl.StaticLoggerBinder
is detected:
*
* - If the class
org.slf4j.impl.JDK14LoggerFactory
is detected, use {@link LoggerProvider#JAVA}.
* - If the class
org.slf4j.impl.Log4jLoggerFactory
is detected, use {@link LoggerProvider#LOG4J}.
* - If the class
org.slf4j.impl.JCLLoggerFactory
is NOT detected, use {@link LoggerProvider#SLF4J}.
*
* - If the class
org.apache.commons.logging.Log
is detected:
*
* Create an instance of it using the commons-logging LogFactory
class.
*
* - If the created
Log
is of type org.apache.commons.logging.impl.Jdk14Logger
, use {@link LoggerProvider#JAVA}.
* - If the created
Log
is of type org.apache.commons.logging.impl.Log4JLogger
, use {@link LoggerProvider#LOG4J}.
* - otherwise, use {@link LoggerProvider#JCL}.
*
*
* - If the class
org.apache.log4j.Logger
is detected, use {@link LoggerProvider#LOG4J}.
* - otherwise, use {@link LoggerProvider#JAVA}.
*
*
* @see Source#setLogger(Logger)
*/
public static LoggerProvider LoggerProvider=null;
/**
* Determines the string used to represent a newline in text output throughout the library.
*
* The default value is the standard new line character sequence of the host platform, determined by System.getProperty("line.separator")
.
*/
public static String NewLine=System.getProperty("line.separator");
/**
* Used in Element.getChildElements.
* Server elements containing markup should be included in the hierarchy, so consider making this option public in future.
*/
static final boolean IncludeServerTagsInElementHierarchy=false;
/**
* Represents a set of maximum unicode code points to be recognised for the three types of
* unterminated character reference in a given context.
*
* The three types of character reference are:
*
* - {@linkplain CharacterEntityReference Character entity reference}
*
- Decimal character reference
*
- Hexadecimal character reference
*
*
* The two types of contexts used in this library are:
*
* - Inside an attribute value
*
- Outside an attribute value
*
*/
static class UnterminatedCharacterReferenceSettings {
// use volatile fields to make them thread safe
public volatile int characterEntityReferenceMaxCodePoint;
public volatile int decimalCharacterReferenceMaxCodePoint;
public volatile int hexadecimalCharacterReferenceMaxCodePoint;
public static UnterminatedCharacterReferenceSettings ACCEPT_ALL=new UnterminatedCharacterReferenceSettings(CompatibilityMode.CODE_POINTS_ALL,CompatibilityMode.CODE_POINTS_ALL,CompatibilityMode.CODE_POINTS_ALL);
public UnterminatedCharacterReferenceSettings() {
this(CompatibilityMode.CODE_POINTS_NONE,CompatibilityMode.CODE_POINTS_NONE,CompatibilityMode.CODE_POINTS_NONE);
}
public UnterminatedCharacterReferenceSettings(final int characterEntityReferenceMaxCodePoint, final int decimalCharacterReferenceMaxCodePoint, final int hexadecimalCharacterReferenceMaxCodePoint) {
this.characterEntityReferenceMaxCodePoint=characterEntityReferenceMaxCodePoint;
this.decimalCharacterReferenceMaxCodePoint=decimalCharacterReferenceMaxCodePoint;
this.hexadecimalCharacterReferenceMaxCodePoint=hexadecimalCharacterReferenceMaxCodePoint;
}
public String toString() {
return Config.NewLine+" Character entity reference: "+getDescription(characterEntityReferenceMaxCodePoint)
+Config.NewLine+" Decimal character reference: "+getDescription(decimalCharacterReferenceMaxCodePoint)
+Config.NewLine+" Haxadecimal character reference: "+getDescription(hexadecimalCharacterReferenceMaxCodePoint);
}
private String getDescription(final int codePoint) {
if (codePoint==CompatibilityMode.CODE_POINTS_NONE) return "None";
if (codePoint==CompatibilityMode.CODE_POINTS_ALL) return "All";
return "0x"+Integer.toString(codePoint,16);
}
}
/**
* Represents a set of configuration parameters that relate to
* user agent compatibility issues.
*
* The predefined compatibility modes {@link #IE}, {@link #MOZILLA}, {@link #OPERA} and {@link #XHTML} provide an easy means of
* ensuring the library interprets the markup in a way consistent with some of the most commonly used browsers,
* at least in relation to the behaviour described by the properties in this class.
*
* The properties of any CompatibilityMode
object can be modified individually, including those in
* the predefined instances as well as newly constructed instances.
* Take note however that modifying the properties of the predefined instances has a global affect.
*
* The currently active compatibility mode is stored in the static {@link Config#CurrentCompatibilityMode} property.
*
*/
public static final class CompatibilityMode {
private String name;
private volatile boolean formFieldNameCaseInsensitive;
volatile UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsInsideAttributeValue;
volatile UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsOutsideAttributeValue;
/**
* Indicates the recognition of all unicode code points.
*
* This value is used in properties which specify a maximum unicode code point to be recognised by the parser.
*
* @see #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)
* @see #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
* @see #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
*/
public static final int CODE_POINTS_ALL=Character.MAX_CODE_POINT; // 0x10FFFF (decimal 1114111)
/**
* Indicates the recognition of no unicode code points.
*
* This value is used in properties which specify a maximum unicode code point to be recognised by the parser.
*
* @see #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)
* @see #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
* @see #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)
*/
public static final int CODE_POINTS_NONE=CharacterReference.INVALID_CODE_POINT;
/**
* Microsoft Internet Explorer compatibility mode.
*
* {@link #getName() Name} = IE
* {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = true
*
* Recognition of unterminated character references: (inside attribute) (outside attribute)
* {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint} =
U+00FF U+00FF
* {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint} =
{@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_ALL All}
* {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint} =
{@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_NONE None}
*
*/
public static final CompatibilityMode IE=new CompatibilityMode("IE",true,
new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes
new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_NONE) // outside attributes
);
/**
* Mozilla /
* Firefox /
* Netscape compatibility mode.
*
* {@link #getName() Name} = Mozilla
* {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = false
*
* Recognition of unterminated character references: (inside attribute) (outside attribute)
* {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint} =
U+00FF {@linkplain #CODE_POINTS_ALL All}
* {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint} =
{@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_ALL All}
* {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint} =
{@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_ALL All}
*
*/
public static final CompatibilityMode MOZILLA=new CompatibilityMode("Mozilla",false,
new UnterminatedCharacterReferenceSettings(0xFF, CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes
new UnterminatedCharacterReferenceSettings(CODE_POINTS_ALL, CODE_POINTS_ALL, CODE_POINTS_ALL) // outside attributes
);
/**
* Opera compatibility mode.
*
* {@link #getName() Name} = Opera
* {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = true
*
* Recognition of unterminated character references: (inside attribute) (outside attribute)
* {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint} =
U+003E {@linkplain #CODE_POINTS_ALL All}
* {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint} =
{@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_ALL All}
* {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint} =
{@linkplain #CODE_POINTS_ALL All} {@linkplain #CODE_POINTS_ALL All}
*
*/
public static final CompatibilityMode OPERA=new CompatibilityMode("Opera",true,
new UnterminatedCharacterReferenceSettings(0x3E, CODE_POINTS_ALL, CODE_POINTS_ALL), // inside attributes
new UnterminatedCharacterReferenceSettings(CODE_POINTS_ALL, CODE_POINTS_ALL, CODE_POINTS_ALL) // outside attributes
);
/**
* XHTML compatibility mode.
*
* {@link #getName() Name} = XHTML
* {@link #isFormFieldNameCaseInsensitive() FormFieldNameCaseInsensitive} = false
*
* Recognition of unterminated character references: (inside attribute) (outside attribute)
* {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean) UnterminatedCharacterEntityReferenceMaxCodePoint} =
{@linkplain #CODE_POINTS_NONE None} {@linkplain #CODE_POINTS_NONE None}
* {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedDecimalCharacterReferenceMaxCodePoint} =
{@linkplain #CODE_POINTS_NONE None} {@linkplain #CODE_POINTS_NONE None}
* {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean) UnterminatedHexadecimalCharacterReferenceMaxCodePoint} =
{@linkplain #CODE_POINTS_NONE None} {@linkplain #CODE_POINTS_NONE None}
*
*/
public static final CompatibilityMode XHTML=new CompatibilityMode("XHTML");
/**
* Constructs a new CompatibilityMode
with the given {@linkplain #getName() name}.
*
* All properties in the new instance are initially assigned their default values, which are the same as the strict
* rules of the {@link #XHTML} compatibility mode.
*
* @param name the {@linkplain #getName() name} of the new compatibility mode
*/
public CompatibilityMode(final String name) {
this(name,false,new UnterminatedCharacterReferenceSettings(),new UnterminatedCharacterReferenceSettings());
}
private CompatibilityMode(final String name, final boolean formFieldNameCaseInsensitive, final UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsInsideAttributeValue, final UnterminatedCharacterReferenceSettings unterminatedCharacterReferenceSettingsOutsideAttributeValue) {
this.name=name;
this.formFieldNameCaseInsensitive=formFieldNameCaseInsensitive;
this.unterminatedCharacterReferenceSettingsInsideAttributeValue=unterminatedCharacterReferenceSettingsInsideAttributeValue;
this.unterminatedCharacterReferenceSettingsOutsideAttributeValue=unterminatedCharacterReferenceSettingsOutsideAttributeValue;
}
/**
* Returns the name of this compatibility mode.
* @return the name of this compatibility mode.
*/
public String getName() {
return name;
}
/**
* Indicates whether {@linkplain FormField#getName() form field names} are treated as case insensitive.
*
* Microsoft Internet Explorer treats field names as case insensitive,
* while Mozilla treats them as case sensitive.
*
* The value of this property in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}
* affects all instances of the {@link FormFields} class.
* It should be set to the desired configuration before any instances of FormFields
are created.
*
* @return true
if {@linkplain FormField#getName() form field names} are treated as case insensitive, otherwise false
.
* @see #setFormFieldNameCaseInsensitive(boolean)
*/
public boolean isFormFieldNameCaseInsensitive() {
return formFieldNameCaseInsensitive;
}
/**
* Sets whether {@linkplain FormField#getName() form field names} are treated as case insensitive.
*
* See {@link #isFormFieldNameCaseInsensitive()} for the documentation of this property.
*
* @param value the new value of the property
*/
public void setFormFieldNameCaseInsensitive(final boolean value) {
formFieldNameCaseInsensitive=value;
}
/**
* Returns the maximum unicode code point of an unterminated
* {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context.
*
* For example, if getUnterminatedCharacterEntityReferenceMaxCodePoint(true)
has the value 0xFF
(U+00FF)
* in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then:
*
* - {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode(">",true)}
* returns "
>
".
* The string is recognised as the character entity reference {@link CharacterEntityReference#_gt >}
* despite the fact that it is unterminated,
* because its unicode code point U+003E is below the maximum of U+00FF set by this property.
* - {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("&euro",true)}
* returns "
&euro
".
* The string is not recognised as the character entity reference {@link CharacterEntityReference#_euro €}
* because it is unterminated
* and its unicode code point U+20AC is above the maximum of U+00FF set by this property.
*
*
* See the documentation of the {@link Attribute#getValue()} method for further discussion.
*
* @param insideAttributeValue the context within an HTML document - true
if inside an attribute value or false
if outside an attribute value.
* @return the maximum unicode code point of an unterminated {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context.
* @see #setUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
*/
public int getUnterminatedCharacterEntityReferenceMaxCodePoint(final boolean insideAttributeValue) {
return getUnterminatedCharacterReferenceSettings(insideAttributeValue).characterEntityReferenceMaxCodePoint;
}
/**
* Sets the maximum unicode code point of an unterminated
* {@linkplain CharacterEntityReference character entity reference} which is to be recognised in the specified context.
*
* See {@link #getUnterminatedCharacterEntityReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property.
*
* @param insideAttributeValue the context within an HTML document - true
if inside an attribute value or false
if outside an attribute value.
* @param maxCodePoint the maximum unicode code point.
*/
public void setUnterminatedCharacterEntityReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) {
getUnterminatedCharacterReferenceSettings(insideAttributeValue).characterEntityReferenceMaxCodePoint=maxCodePoint;
}
/**
* Returns the maximum unicode code point of an unterminated
* decimal character reference which is to be recognised in the specified context.
*
* For example, if getUnterminatedDecimalCharacterReferenceMaxCodePoint(true)
had the hypothetical value 0xFF
(U+00FF)
* in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then:
*
* - {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode(">",true)}
* returns "
>
".
* The string is recognised as the numeric character reference >
* despite the fact that it is unterminated,
* because its unicode code point U+003E is below the maximum of U+00FF set by this property.
* - {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("€",true)}
* returns "
€
".
* The string is not recognised as the numeric character reference €
* because it is unterminated
* and its unicode code point U+20AC is above the maximum of U+00FF set by this property.
*
*
* @param insideAttributeValue the context within an HTML document - true
if inside an attribute value or false
if outside an attribute value.
* @return the maximum unicode code point of an unterminated decimal character reference which is to be recognised in the specified context.
* @see #setUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
*/
public int getUnterminatedDecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue) {
return getUnterminatedCharacterReferenceSettings(insideAttributeValue).decimalCharacterReferenceMaxCodePoint;
}
/**
* Sets the maximum unicode code point of an unterminated
* decimal character reference which is to be recognised in the specified context.
*
* See {@link #getUnterminatedDecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property.
*
* @param insideAttributeValue the context within an HTML document - true
if inside an attribute value or false
if outside an attribute value.
* @param maxCodePoint the maximum unicode code point.
*/
public void setUnterminatedDecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) {
getUnterminatedCharacterReferenceSettings(insideAttributeValue).decimalCharacterReferenceMaxCodePoint=maxCodePoint;
}
/**
* Returns the maximum unicode code point of an unterminated
* hexadecimal character reference which is to be recognised in the specified context.
*
* For example, if getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(true)
had the hypothetical value 0xFF
(U+00FF)
* in the {@linkplain Config#CurrentCompatibilityMode current compatibility mode}, then:
*
* - {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode(">",true)}
* returns "
>
".
* The string is recognised as the numeric character reference >
* despite the fact that it is unterminated,
* because its unicode code point U+003E is below the maximum of U+00FF set by this property.
* - {@link CharacterReference#decode(CharSequence,boolean) CharacterReference.decode("€",true)}
* returns "
€
".
* The string is not recognised as the numeric character reference ac;
* because it is unterminated
* and its unicode code point U+20AC is above the maximum of U+00FF set by this property.
*
*
* @param insideAttributeValue the context within an HTML document - true
if inside an attribute value or false
if outside an attribute value.
* @return the maximum unicode code point of an unterminated hexadecimal character reference which is to be recognised in the specified context.
* @see #setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue, int maxCodePoint)
*/
public int getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue) {
return getUnterminatedCharacterReferenceSettings(insideAttributeValue).hexadecimalCharacterReferenceMaxCodePoint;
}
/**
* Sets the maximum unicode code point of an unterminated
* headecimal character reference which is to be recognised in the specified context.
*
* See {@link #getUnterminatedHexadecimalCharacterReferenceMaxCodePoint(boolean insideAttributeValue)} for the documentation of this property.
*
* @param insideAttributeValue the context within an HTML document - true
if inside an attribute value or false
if outside an attribute value.
* @param maxCodePoint the maximum unicode code point.
*/
public void setUnterminatedHexadecimalCharacterReferenceMaxCodePoint(final boolean insideAttributeValue, final int maxCodePoint) {
getUnterminatedCharacterReferenceSettings(insideAttributeValue).hexadecimalCharacterReferenceMaxCodePoint=maxCodePoint;
}
/**
* Returns a string representation of this object useful for debugging purposes.
* @return a string representation of this object useful for debugging purposes.
*/
public String getDebugInfo() {
return "Form field name case insensitive: "+formFieldNameCaseInsensitive
+Config.NewLine+"Maximum codepoints in unterminated character references:"
+Config.NewLine+" Inside attribute values:"
+unterminatedCharacterReferenceSettingsInsideAttributeValue
+Config.NewLine+" Outside attribute values:"
+unterminatedCharacterReferenceSettingsOutsideAttributeValue;
}
/**
* Returns the {@linkplain #getName() name} of this compatibility mode.
* @return the {@linkplain #getName() name} of this compatibility mode.
*/
public String toString() {
return getName();
}
UnterminatedCharacterReferenceSettings getUnterminatedCharacterReferenceSettings(final boolean insideAttributeValue) {
return insideAttributeValue ? unterminatedCharacterReferenceSettingsInsideAttributeValue : unterminatedCharacterReferenceSettingsOutsideAttributeValue;
}
}
}