org.w3c.tidy.AttrCheckImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jtidy Show documentation
Show all versions of jtidy Show documentation
JTidy is a Java port of HTML Tidy, a HTML syntax checker and pretty printer. Like its non-Java cousin, JTidy can be
used as a tool for cleaning up malformed and faulty HTML. In addition, JTidy provides a DOM interface to the
document that is being processed, which effectively makes you able to use JTidy as a DOM parser for real-world HTML.
/*
* Java HTML Tidy - JTidy
* HTML parser and pretty printer
*
* Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
* Institute of Technology, Institut National de Recherche en
* Informatique et en Automatique, Keio University). All Rights
* Reserved.
*
* Contributing Author(s):
*
* Dave Raggett
* Andy Quick (translation to Java)
* Gary L Peskin (Java development)
* Sami Lempinen (release management)
* Fabrizio Giustina
*
* The contributing author(s) would like to thank all those who
* helped with testing, bug fixes, and patience. This wouldn't
* have been possible without all of you.
*
* COPYRIGHT NOTICE:
*
* This software and documentation is provided "as is," and
* the copyright holders and contributing author(s) make no
* representations or warranties, express or implied, including
* but not limited to, warranties of merchantability or fitness
* for any particular purpose or that the use of the software or
* documentation will not infringe any third party patents,
* copyrights, trademarks or other rights.
*
* The copyright holders and contributing author(s) will not be
* liable for any direct, indirect, special or consequential damages
* arising out of any use of the software or documentation, even if
* advised of the possibility of such damage.
*
* Permission is hereby granted to use, copy, modify, and distribute
* this source code, or portions hereof, documentation and executables,
* for any purpose, without fee, subject to the following restrictions:
*
* 1. The origin of this source code must not be misrepresented.
* 2. Altered versions must be plainly marked as such and must
* not be misrepresented as being the original source.
* 3. This Copyright notice may not be removed or altered from any
* source or altered source distribution.
*
* The copyright holders and contributing author(s) specifically
* permit, without fee, and encourage the use of this source code
* as a component for supporting the Hypertext Markup Language in
* commercial products. If you use this source code in a product,
* acknowledgment is not required but would be appreciated.
*
*/
package org.w3c.tidy;
import java.util.HashMap;
import java.util.Map;
/**
* Check attribute values implementations.
* @author Dave Raggett [email protected]
* @author Andy Quick [email protected] (translation to Java)
* @author Fabrizio Giustina
* @version $Revision$ ($Author$)
*/
public final class AttrCheckImpl
{
/**
* checker for URLs.
*/
public static final AttrCheck URL = new CheckUrl();
/**
* checker for scripts.
*/
public static final AttrCheck SCRIPT = new CheckScript();
/**
* checker for "name" attribute.
*/
public static final AttrCheck NAME = new CheckName();
/**
* checker for ids.
*/
public static final AttrCheck ID = new CheckId();
/**
* checker for "align" attribute.
*/
public static final AttrCheck ALIGN = new CheckAlign();
/**
* checker for "valign" attribute.
*/
public static final AttrCheck VALIGN = new CheckValign();
/**
* checker for boolean attributes.
*/
public static final AttrCheck BOOL = new CheckBool();
/**
* checker for "lenght" attribute.
*/
public static final AttrCheck LENGTH = new CheckLength();
/**
* checker for "target" attribute.
*/
public static final AttrCheck TARGET = new CheckTarget();
/**
* checker for "submit" attribute.
*/
public static final AttrCheck FSUBMIT = new CheckFsubmit();
/**
* checker for "clear" attribute.
*/
public static final AttrCheck CLEAR = new CheckClear();
/**
* checker for "shape" attribute.
*/
public static final AttrCheck SHAPE = new CheckShape();
/**
* checker for "number" attribute.
*/
public static final AttrCheck NUMBER = new CheckNumber();
/**
* checker for "scope" attribute.
*/
public static final AttrCheck SCOPE = new CheckScope();
/**
* checker for "color" attribute.
*/
public static final AttrCheck COLOR = new CheckColor();
/**
* checker for "vtype" attribute.
*/
public static final AttrCheck VTYPE = new CheckVType();
/**
* checker for "scroll" attribute.
*/
public static final AttrCheck SCROLL = new CheckScroll();
/**
* checker for "dir" attribute.
*/
public static final AttrCheck TEXTDIR = new CheckTextDir();
/**
* checker for "lang" and "xml:lang" attributes.
*/
public static final AttrCheck LANG = new CheckLang();
/**
* checker for text attributes. Actually null (no validation).
*/
public static final AttrCheck TEXT = null;
/**
* checker for "charset" attribute. Actually null (no validation).
*/
public static final AttrCheck CHARSET = null;
/**
* checker for "type" attribute. Actually null (no validation).
*/
public static final AttrCheck TYPE = null;
/**
* checker for attributes that can contain a single character. Actually null (no validation).
*/
public static final AttrCheck CHARACTER = null;
/**
* checker for attributes which contain a list of urls. Actually null (no validation).
*/
public static final AttrCheck URLS = null;
/**
* checker for "cols" attribute. Actually null (no validation).
*/
public static final AttrCheck COLS = null;
/**
* checker for "coords" attribute. Actually null (no validation).
*/
public static final AttrCheck COORDS = null;
/**
* checker for attributes containing dates. Actually null (no validation).
*/
public static final AttrCheck DATE = null;
/**
* checker for attributes referencng an id. Actually null (no validation).
*/
public static final AttrCheck IDREF = null;
/**
* checker for table "frame" attribute. Actually null (no validation).
*/
public static final AttrCheck TFRAME = null;
/**
* checker for "frameborder" attribute. Actually null (no validation).
*/
public static final AttrCheck FBORDER = null;
/**
* checker for "media" attribute. Actually null (no validation).
*/
public static final AttrCheck MEDIA = null;
/**
* checker for "rel" and "rev" attributes. Actually null (no validation).
*/
public static final AttrCheck LINKTYPES = null;
/**
* checker for table "rules" attribute. Actually null (no validation).
*/
public static final AttrCheck TRULES = null;
/**
* utility class, don't instantiate.
*/
private AttrCheckImpl()
{
// empty private constructor
}
/**
* AttrCheck implementation for checking URLs.
*/
public static class CheckUrl implements AttrCheck
{
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
char c;
StringBuilder dest;
boolean escapeFound = false;
boolean backslashFound = false;
int i = 0;
if (attval.value == null)
{
lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
return;
}
String p = attval.value;
boolean isJavascript = attval.value.startsWith("javascript:");
for (i = 0; i < p.length(); ++i)
{
c = p.charAt(i);
// find \
if (c == '\\')
{
backslashFound = true;
}
// find non-ascii chars
else if ((c > 0x7e) || (c <= 0x20) || (c == '<') || (c == '>'))
{
escapeFound = true;
}
}
// backslashes found, fix them
if (lexer.configuration.fixBackslash && backslashFound && !isJavascript)
{
attval.value = attval.value.replace('\\', '/');
p = attval.value;
}
// non-ascii chars found, fix them
if (lexer.configuration.fixUri && escapeFound)
{
dest = new StringBuilder();
for (i = 0; i < p.length(); ++i)
{
c = p.charAt(i);
if ((c > 0x7e) || (c <= 0x20) || (c == '<') || (c == '>'))
{
dest.append('%');
dest.append(Integer.toHexString(c).toUpperCase());
}
else
{
dest.append(c);
}
}
attval.value = dest.toString();
}
if (backslashFound)
{
if (lexer.configuration.fixBackslash)
{
lexer.report.attrError(lexer, node, attval, Report.FIXED_BACKSLASH);
}
else
{
lexer.report.attrError(lexer, node, attval, Report.BACKSLASH_IN_URI);
}
}
if (escapeFound)
{
if (lexer.configuration.fixUri)
{
lexer.report.attrError(lexer, node, attval, Report.ESCAPED_ILLEGAL_URI);
}
else
{
lexer.report.attrError(lexer, node, attval, Report.ILLEGAL_URI_REFERENCE);
}
lexer.badChars |= Report.INVALID_URI;
}
}
}
/**
* AttrCheck implementation for checking scripts.
*/
public static class CheckScript implements AttrCheck
{
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
// not implemented
}
}
/**
* AttrCheck implementation for checking the "align" attribute.
*/
public static class CheckAlign implements AttrCheck
{
/**
* valid values for this attribute.
*/
private static final String[] VALID_VALUES = new String[]{"left", "center", "right", "justify"};
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
// IMG, OBJECT, APPLET and EMBED use align for vertical position
if (node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0))
{
VALIGN.check(lexer, node, attval);
return;
}
if (attval.value == null)
{
lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
return;
}
attval.checkLowerCaseAttrValue(lexer, node);
if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
{
lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
}
}
}
/**
* AttrCheck implementation for checking the "valign" attribute.
*/
public static class CheckValign implements AttrCheck
{
/**
* valid values for this attribute.
*/
private static final String[] VALID_VALUES = new String[]{"top", "middle", "bottom", "baseline"};
/**
* valid values for this attribute (only for img tag).
*/
private static final String[] VALID_VALUES_IMG = new String[]{"left", "right"};
/**
* proprietary values for this attribute.
*/
private static final String[] VALID_VALUES_PROPRIETARY = new String[]{
"texttop",
"absmiddle",
"absbottom",
"textbottom"};
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
String value;
if (attval.value == null)
{
lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
return;
}
attval.checkLowerCaseAttrValue(lexer, node);
value = attval.value;
if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES, value))
{
// all is fine
return;
}
if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES_IMG, value))
{
if (!(node.tag != null && ((node.tag.model & Dict.CM_IMG) != 0)))
{
lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
}
}
else if (TidyUtils.isInValuesIgnoreCase(VALID_VALUES_PROPRIETARY, value))
{
lexer.constrainVersion(Dict.VERS_PROPRIETARY);
lexer.report.attrError(lexer, node, attval, Report.PROPRIETARY_ATTR_VALUE);
}
else
{
lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
}
}
}
/**
* AttrCheck implementation for checking boolean attributes.
*/
public static class CheckBool implements AttrCheck
{
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
if (attval.value == null)
{
return;
}
attval.checkLowerCaseAttrValue(lexer, node);
}
}
/**
* AttrCheck implementation for checking the "length" attribute.
*/
public static class CheckLength implements AttrCheck
{
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
if (attval.value == null)
{
lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
return;
}
// don't check for and
if ("width".equalsIgnoreCase(attval.attribute)
&& (node.tag == lexer.configuration.tt.tagCol || node.tag == lexer.configuration.tt.tagColgroup))
{
return;
}
String p = attval.value;
if (p.length() == 0 || (!Character.isDigit(p.charAt(0)) && '%' != p.charAt(0)))
{
lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
}
else
{
TagTable tt = lexer.configuration.tt;
for (int j = 1; j < p.length(); j++)
{
// elements th and td must not use percentages
if ((!Character.isDigit(p.charAt(j)) && (node.tag == tt.tagTd || node.tag == tt.tagTh))
|| (!Character.isDigit(p.charAt(j)) && p.charAt(j) != '%'))
{
lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
break;
}
}
}
}
}
/**
* AttrCheck implementation for checking the "target" attribute.
*/
public static class CheckTarget implements AttrCheck
{
/**
* valid values for this attribute.
*/
private static final String[] VALID_VALUES = new String[]{"_blank", "_self", "_parent", "_top"};
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
// No target attribute in strict HTML versions
lexer.constrainVersion(~Dict.VERS_HTML40_STRICT);
if (attval.value == null || attval.value.length() == 0)
{
lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
return;
}
String value = attval.value;
// target names must begin with A-Za-z ...
if (Character.isLetter(value.charAt(0)))
{
return;
}
// or be one of _blank, _self, _parent and _top
if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, value))
{
lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
}
}
}
/**
* AttrCheck implementation for checking the "submit" attribute.
*/
public static class CheckFsubmit implements AttrCheck
{
/**
* valid values for this attribute.
*/
private static final String[] VALID_VALUES = new String[]{"get", "post"};
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
if (attval.value == null)
{
lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
return;
}
attval.checkLowerCaseAttrValue(lexer, node);
if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
{
lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
}
}
}
/**
* AttrCheck implementation for checking the "clear" attribute.
*/
public static class CheckClear implements AttrCheck
{
/**
* valid values for this attribute.
*/
private static final String[] VALID_VALUES = new String[]{"none", "left", "right", "all"};
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
if (attval.value == null)
{
lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
attval.value = VALID_VALUES[0];
return;
}
attval.checkLowerCaseAttrValue(lexer, node);
if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
{
lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
}
}
}
/**
* AttrCheck implementation for checking the "shape" attribute.
*/
public static class CheckShape implements AttrCheck
{
/**
* valid values for this attribute.
*/
private static final String[] VALID_VALUES = new String[]{"rect", "default", "circle", "poly"};
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
if (attval.value == null)
{
lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
return;
}
attval.checkLowerCaseAttrValue(lexer, node);
if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
{
lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
}
}
}
/**
* AttrCheck implementation for checking Scope.
*/
public static class CheckScope implements AttrCheck
{
/**
* valid values for this attribute.
*/
private static final String[] VALID_VALUES = new String[]{"row", "rowgroup", "col", "colgroup"};
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
if (attval.value == null)
{
lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
return;
}
attval.checkLowerCaseAttrValue(lexer, node);
if (!TidyUtils.isInValuesIgnoreCase(VALID_VALUES, attval.value))
{
lexer.report.attrError(lexer, node, attval, Report.BAD_ATTRIBUTE_VALUE);
}
}
}
/**
* AttrCheck implementation for checking numbers.
*/
public static class CheckNumber implements AttrCheck
{
/**
* @see AttrCheck#check(Lexer, Node, AttVal)
*/
public void check(Lexer lexer, Node node, AttVal attval)
{
if (attval.value == null)
{
lexer.report.attrError(lexer, node, attval, Report.MISSING_ATTR_VALUE);
return;
}
// don't check