Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.w3c.tidy.ParsePropertyImpl Maven / Gradle / Ivy
/*
* Java HTML Tidy - JTidy
* HTML parser and pretty printer
*
* Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
* Institute of Technology, Institut National de Recherche en
* Informatique et en Automatique, Keio University). All Rights
* Reserved.
*
* Contributing Author(s):
*
* Dave Raggett
* Andy Quick (translation to Java)
* Gary L Peskin (Java development)
* Sami Lempinen (release management)
* Fabrizio Giustina
*
* The contributing author(s) would like to thank all those who
* helped with testing, bug fixes, and patience. This wouldn't
* have been possible without all of you.
*
* COPYRIGHT NOTICE:
*
* This software and documentation is provided "as is," and
* the copyright holders and contributing author(s) make no
* representations or warranties, express or implied, including
* but not limited to, warranties of merchantability or fitness
* for any particular purpose or that the use of the software or
* documentation will not infringe any third party patents,
* copyrights, trademarks or other rights.
*
* The copyright holders and contributing author(s) will not be
* liable for any direct, indirect, special or consequential damages
* arising out of any use of the software or documentation, even if
* advised of the possibility of such damage.
*
* Permission is hereby granted to use, copy, modify, and distribute
* this source code, or portions hereof, documentation and executables,
* for any purpose, without fee, subject to the following restrictions:
*
* 1. The origin of this source code must not be misrepresented.
* 2. Altered versions must be plainly marked as such and must
* not be misrepresented as being the original source.
* 3. This Copyright notice may not be removed or altered from any
* source or altered source distribution.
*
* The copyright holders and contributing author(s) specifically
* permit, without fee, and encourage the use of this source code
* as a component for supporting the Hypertext Markup Language in
* commercial products. If you use this source code in a product,
* acknowledgment is not required but would be appreciated.
*
*/
package org.w3c.tidy;
import java.util.List;
import java.util.StringTokenizer;
/**
* Property parser instances.
* @author Fabrizio Giustina
* @version $Revision $ ($Author $)
*/
public final class ParsePropertyImpl
{
/**
* configuration parser for int values.
*/
static final ParseProperty INT = new ParseInt();
/**
* configuration parser for boolean values.
*/
static final ParseProperty BOOL = new ParseBoolean();
/**
* configuration parser for inverted boolean values.
*/
static final ParseProperty INVBOOL = new ParseInvBoolean();
/**
* configuration parser for char encoding values.
*/
static final ParseProperty CHAR_ENCODING = new ParseCharEncoding();
/**
* configuration parser for name values.
*/
static final ParseProperty NAME = new ParseName();
/**
* configuration parser for tag names.
*/
static final ParseProperty TAGNAMES = new ParseTagNames();
/**
* configuration parser for doctype property.
*/
static final ParseProperty DOCTYPE = new ParseDocType();
/**
* configuration parser for repetated attribute property.
*/
static final ParseProperty REPEATED_ATTRIBUTES = new ParseRepeatedAttribute();
/**
* configuration parser for String values.
*/
static final ParseProperty STRING = new ParseString();
/**
* configuration parser for indent property.
*/
static final ParseProperty INDENT = new ParseIndent();
/**
* configuration parser for css selectors.
*/
static final ParseProperty CSS1SELECTOR = new ParseCSS1Selector();
/**
* configuration parser for new line bytes.
*/
static final ParseProperty NEWLINE = new ParseNewLine();
/**
* don't instantiate.
*/
private ParsePropertyImpl()
{
// unused
}
/**
* parser for integer values.
*/
static class ParseInt implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
int i = 0;
try
{
i = Integer.parseInt(value);
}
catch (NumberFormatException e)
{
configuration.report.badArgument(value, option);
i = -1;
}
return i;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "Integer";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "0, 1, 2, ...";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
return value == null ? "" : value.toString();
}
}
/**
* parser for boolean values.
*/
static class ParseBoolean implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
Boolean b = Boolean.TRUE;
if (value != null && value.length() > 0)
{
char c = value.charAt(0);
if ((c == 't') || (c == 'T') || (c == 'Y') || (c == 'y') || (c == '1'))
{
b = Boolean.TRUE;
}
else if ((c == 'f') || (c == 'F') || (c == 'N') || (c == 'n') || (c == '0'))
{
b = Boolean.FALSE;
}
else
{
configuration.report.badArgument(value, option);
}
}
return b;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "Boolean";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "y/n, yes/no, t/f, true/false, 1/0";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
if (value == null)
{
return "";
}
return (Boolean) value ? "yes" : "no";
}
}
/**
* parser for boolean values.
*/
static class ParseInvBoolean implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
return ((Boolean) BOOL.parse(value, option, configuration) ? Boolean.FALSE : Boolean.TRUE);
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "Boolean";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "yes, no, true, false";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
if (value == null)
{
return "";
}
return (Boolean) value ? "no" : "yes";
}
}
/**
* parse character encoding option. Can be any java encoding name supported by the runtime platform.
*/
static class ParseCharEncoding implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
if ("raw".equalsIgnoreCase(value))
{
// special value for compatibility with tidy c
configuration.rawOut = true;
}
else if (!TidyUtils.isCharEncodingSupported(value))
{
configuration.report.badArgument(value, option);
}
else if ("input-encoding".equalsIgnoreCase(option))
{
configuration.setInCharEncodingName(value);
}
else if ("output-encoding".equalsIgnoreCase(option))
{
configuration.setOutCharEncodingName(value);
}
else if ("char-encoding".equalsIgnoreCase(option))
{
configuration.setInCharEncodingName(value);
configuration.setOutCharEncodingName(value);
}
return null;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "Encoding";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
// ascii, latin1, raw, utf-8, iso2022, mac, utf-16, utf-16be, utf-16le, big5, shiftjis
return "Any valid java char encoding name";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
if ("output-encoding".equalsIgnoreCase(option))
{
return configuration.getOutCharEncodingName();
}
// for input-encoding or char-encoding
return configuration.getInCharEncodingName();
}
}
/**
* parser for name values (a string excluding whitespace).
*/
static class ParseName implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
StringTokenizer t = new StringTokenizer(value);
String rs = null;
if (t.countTokens() >= 1)
{
rs = t.nextToken();
}
else
{
configuration.report.badArgument(value, option);
}
return rs;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "Name";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "-";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
return value == null ? "" : value.toString();
}
}
/**
* parser for name values.
*/
static class ParseTagNames implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
short tagType = Dict.TAGTYPE_INLINE;
if ("new-inline-tags".equals(option))
{
tagType = Dict.TAGTYPE_INLINE;
}
else if ("new-blocklevel-tags".equals(option))
{
tagType = Dict.TAGTYPE_BLOCK;
}
else if ("new-empty-tags".equals(option))
{
tagType = Dict.TAGTYPE_EMPTY;
}
else if ("new-pre-tags".equals(option))
{
tagType = Dict.TAGTYPE_PRE;
}
StringTokenizer t = new StringTokenizer(value, " \t\n\r,");
while (t.hasMoreTokens())
{
configuration.definedTags |= tagType;
configuration.tt.defineTag(tagType, t.nextToken());
}
return null;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "Tag names";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "tagX, tagY, ...";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
short tagType;
if ("new-inline-tags".equals(option))
{
tagType = Dict.TAGTYPE_INLINE;
}
else if ("new-blocklevel-tags".equals(option))
{
tagType = Dict.TAGTYPE_BLOCK;
}
else if ("new-empty-tags".equals(option))
{
tagType = Dict.TAGTYPE_EMPTY;
}
else if ("new-pre-tags".equals(option))
{
tagType = Dict.TAGTYPE_PRE;
}
else
{
return "";
}
List tagList = configuration.tt.findAllDefinedTag(tagType);
if (tagList.isEmpty())
{
return "";
}
StringBuilder buffer = new StringBuilder();
for (Object aTagList : tagList)
{
buffer.append(aTagList);
buffer.append(" ");
}
return buffer.toString();
}
}
/**
* Parse doctype preference. doctype: omit | auto | strict | loose | [fpi]
where the fpi is a string
* similar to "-//ACME//DTD HTML 3.14159//EN"
.
*/
static class ParseDocType implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
value = value.trim();
/* "-//ACME//DTD HTML 3.14159//EN" or similar */
if (value.startsWith("\""))
{
configuration.docTypeMode = Configuration.DOCTYPE_USER;
return value;
}
/* read first word */
String word = "";
StringTokenizer t = new StringTokenizer(value, " \t\n\r,");
if (t.hasMoreTokens())
{
word = t.nextToken();
}
// #443663 - fix by Terry Teague 23 Jul 01
if ("auto".equalsIgnoreCase(word))
{
configuration.docTypeMode = Configuration.DOCTYPE_AUTO;
}
else if ("omit".equalsIgnoreCase(word))
{
configuration.docTypeMode = Configuration.DOCTYPE_OMIT;
}
else if ("strict".equalsIgnoreCase(word))
{
configuration.docTypeMode = Configuration.DOCTYPE_STRICT;
}
else if ("loose".equalsIgnoreCase(word) || "transitional".equalsIgnoreCase(word))
{
configuration.docTypeMode = Configuration.DOCTYPE_LOOSE;
}
else
{
configuration.report.badArgument(value, option);
}
return null;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "DocType";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "omit | auto | strict | loose | [fpi]";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
String stringValue;
switch (configuration.docTypeMode)
{
case Configuration.DOCTYPE_AUTO :
stringValue = "auto";
break;
case Configuration.DOCTYPE_OMIT :
stringValue = "omit";
break;
case Configuration.DOCTYPE_STRICT :
stringValue = "strict";
break;
case Configuration.DOCTYPE_LOOSE :
stringValue = "transitional";
break;
case Configuration.DOCTYPE_USER :
stringValue = configuration.docTypeStr;
break;
default :
stringValue = "unknown";
break;
}
return stringValue;
}
}
/**
* keep-first or keep-last?
*/
static class ParseRepeatedAttribute implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
int dupAttr;
if ("keep-first".equalsIgnoreCase(value))
{
dupAttr = Configuration.KEEP_FIRST;
}
else if ("keep-last".equalsIgnoreCase(value))
{
dupAttr = Configuration.KEEP_LAST;
}
else
{
configuration.report.badArgument(value, option);
dupAttr = -1;
}
return dupAttr;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "Enum";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "keep-first, keep-last";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
if (value == null)
{
return "";
}
int intValue = (Integer) value;
String stringValue;
switch (intValue)
{
case Configuration.KEEP_FIRST :
stringValue = "keep-first";
break;
case Configuration.KEEP_LAST :
stringValue = "keep-last";
break;
default :
stringValue = "unknown";
break;
}
return stringValue;
}
}
/**
* Parser for String values.
*/
static class ParseString implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
return value;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "String";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "-";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
return value == null ? "" : (String) value;
}
}
/**
* Parser for indent values.
*/
static class ParseIndent implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
boolean b = configuration.indentContent;
if ("yes".equalsIgnoreCase(value))
{
b = true;
configuration.smartIndent = false;
}
else if ("true".equalsIgnoreCase(value))
{
b = true;
configuration.smartIndent = false;
}
else if ("no".equalsIgnoreCase(value))
{
b = false;
configuration.smartIndent = false;
}
else if ("false".equalsIgnoreCase(value))
{
b = false;
configuration.smartIndent = false;
}
else if ("auto".equalsIgnoreCase(value))
{
b = true;
configuration.smartIndent = true;
}
else
{
configuration.report.badArgument(value, option);
}
return b ? Boolean.TRUE : Boolean.FALSE;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "Indent";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "auto, y/n, yes/no, t/f, true/false, 1/0";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
return value == null ? "" : value.toString();
}
}
/**
* Parser for css selectors.
*/
static class ParseCSS1Selector implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
StringTokenizer t = new StringTokenizer(value);
String buf = null;
if (t.countTokens() >= 1)
{
buf = t.nextToken() + "-"; // Make sure any escaped Unicode is terminated so valid class names are
// generated after Tidy appends last digits.
}
else
{
configuration.report.badArgument(value, option);
}
if (!Lexer.isCSS1Selector(value))
{
configuration.report.badArgument(value, option);
}
return buf;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "Name";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "CSS1 selector";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
return value == null ? "" : (String) value;
}
}
/**
* Parser for newline bytes. Allows lf|crlf|cr.
*/
static class ParseNewLine implements ParseProperty
{
/**
* @see org.w3c.tidy.ParseProperty#parse(java.lang.String, java.lang.String, org.w3c.tidy.Configuration)
*/
public Object parse(String value, String option, Configuration configuration)
{
// lf|crlf|cr
if ("lf".equalsIgnoreCase(value))
{
configuration.newline = new char[]{'\n'};
}
else if ("cr".equalsIgnoreCase(value))
{
configuration.newline = new char[]{'\r'};
}
else if ("crlf".equalsIgnoreCase(value))
{
configuration.newline = new char[]{'\r', '\n'};
}
else
{
configuration.report.badArgument(value, option);
}
return null;
}
/**
* @see org.w3c.tidy.ParseProperty#getType()
*/
public String getType()
{
return "Enum";
}
/**
* @see org.w3c.tidy.ParseProperty#getOptionValues()
*/
public String getOptionValues()
{
return "lf, crlf, cr";
}
/**
* @see org.w3c.tidy.ParseProperty#getFriendlyName(java.lang.String, java.lang.Object, Configuration)
*/
public String getFriendlyName(String option, Object value, Configuration configuration)
{
if (configuration.newline.length == 1)
{
return (configuration.newline[0] == '\n') ? "lf" : "cr";
}
return "crlf";
}
}
}