Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Java HTML Tidy - JTidy
* HTML parser and pretty printer
*
* Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
* Institute of Technology, Institut National de Recherche en
* Informatique et en Automatique, Keio University). All Rights
* Reserved.
*
* Contributing Author(s):
*
* Dave Raggett
* Andy Quick (translation to Java)
* Gary L Peskin (Java development)
* Sami Lempinen (release management)
* Fabrizio Giustina
*
* The contributing author(s) would like to thank all those who
* helped with testing, bug fixes, and patience. This wouldn't
* have been possible without all of you.
*
* COPYRIGHT NOTICE:
*
* This software and documentation is provided "as is," and
* the copyright holders and contributing author(s) make no
* representations or warranties, express or implied, including
* but not limited to, warranties of merchantability or fitness
* for any particular purpose or that the use of the software or
* documentation will not infringe any third party patents,
* copyrights, trademarks or other rights.
*
* The copyright holders and contributing author(s) will not be
* liable for any direct, indirect, special or consequential damages
* arising out of any use of the software or documentation, even if
* advised of the possibility of such damage.
*
* Permission is hereby granted to use, copy, modify, and distribute
* this source code, or portions hereof, documentation and executables,
* for any purpose, without fee, subject to the following restrictions:
*
* 1. The origin of this source code must not be misrepresented.
* 2. Altered versions must be plainly marked as such and must
* not be misrepresented as being the original source.
* 3. This Copyright notice may not be removed or altered from any
* source or altered source distribution.
*
* The copyright holders and contributing author(s) specifically
* permit, without fee, and encourage the use of this source code
* as a component for supporting the Hypertext Markup Language in
* commercial products. If you use this source code in a product,
* acknowledgment is not required but would be appreciated.
*
*/
package org.w3c.tidy;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.NumberFormat;
/**
* Pretty print parse tree. Block-level and unknown elements are printed on new lines and their contents indented 2
* spaces Inline elements are printed inline. Inline content is wrapped on spaces (except in attribute values or
* preformatted text, after start tags and before end tags.
*
* @author Dave Raggett [email protected]
* @author Andy Quick [email protected] (translation to Java)
* @author Fabrizio Giustina
* @version $Revision$ ($Author$)
*/
public class PPrint
{
/**
* position: normal.
*/
private static final short NORMAL = 0;
/**
* position: preformatted text.
*/
private static final short PREFORMATTED = 1;
/**
* position: comment.
*/
private static final short COMMENT = 2;
/**
* position: attribute value.
*/
private static final short ATTRIBVALUE = 4;
/**
* position: nowrap.
*/
private static final short NOWRAP = 8;
/**
* position: cdata.
*/
private static final short CDATA = 16;
/**
* Start cdata token.
*/
private static final String CDATA_START = "";
/**
* Javascript comment start.
*/
private static final String JS_COMMENT_START = "//";
/**
* Javascript comment end.
*/
private static final String JS_COMMENT_END = "";
/**
* VB comment start.
*/
private static final String VB_COMMENT_START = "\'";
/**
* VB comment end.
*/
private static final String VB_COMMENT_END = "";
/**
* CSS comment start.
*/
private static final String CSS_COMMENT_START = "/*";
/**
* CSS comment end.
*/
private static final String CSS_COMMENT_END = "*/";
/**
* Default comment start.
*/
private static final String DEFAULT_COMMENT_START = "";
/**
* Default comment end.
*/
private static final String DEFAULT_COMMENT_END = "";
private int[] linebuf;
private int lbufsize;
private int linelen;
private int wraphere;
private boolean inAttVal;
private boolean inString;
/**
* Current slide number.
*/
private int slide;
/**
* Total slides count.
*/
private int count;
/**
* The slide content node.
*/
private Node slidecontent;
/**
* current configuration.
*/
private Configuration configuration;
/**
* Instantiates a new PPrint.
*
* @param configuration configuration
*/
public PPrint(Configuration configuration)
{
this.configuration = configuration;
}
/**
* Wrap length.
*
* @param ind the wrap amount
* @return the total wrap amount
*/
int cWrapLen(int ind)
{
/* #431953 - start RJ Wraplen adjusted for smooth international ride */
if ("zh".equals(this.configuration.language))
{
// Chinese characters take two positions on a fixed-width screen
// It would be more accurate to keep a parallel linelen and wraphere incremented by 2 for Chinese characters
// and 1 otherwise, but this is way simpler.
return (ind + ((this.configuration.wraplen - ind) / 2));
}
if ("ja".equals(this.configuration.language))
{
/* average Japanese text is 30% kanji */
return (ind + (((this.configuration.wraplen - ind) * 7) / 10));
}
return (this.configuration.wraplen);
/* #431953 - end RJ */
}
/**
* Return one less than the number of bytes used by the UTF-8 byte sequence.
* The Unicode char is returned in ch.
*
* @param str points to the UTF-8 byte sequence
* @param start starting offset in string
* @param ch initialised to 1st byte, passed as an array to allow modification
* @return one less that the number of bytes used by UTF-8 char
*/
public static int getUTF8(byte[] str, int start, int[] ch)
{
int[] n = new int[1];
int[] bytes = new int[]{0};
// first byte "str[0]" is passed in separately from the
// rest of the UTF-8 byte sequence starting at "str[1]"
boolean err = EncodingUtils.decodeUTF8BytesToChar(
n,
TidyUtils.toUnsigned(str[start]),
str,
null,
bytes,
start + 1);
if (err)
{
n[0] = 0xFFFD; // replacement char
}
ch[0] = n[0];
return bytes[0] - 1;
}
/**
* Store char c as UTF-8 encoded byte stream.
*
* @param buf the buffer
* @param start the start offset
* @param c the char
* @return the current position
*/
public static int putUTF8(byte[] buf, int start, int c)
{
int[] count = new int[]{0};
boolean err = EncodingUtils.encodeCharToUTF8Bytes(c, buf, null, count);
if (err)
{
// replacement char 0xFFFD encoded as UTF-8
buf[0] = (byte) 0xEF;
buf[1] = (byte) 0xBF;
buf[2] = (byte) 0xBD;
count[0] = 3;
}
start += count[0];
return start;
}
/**
* Add char at index.
*
* @param c the char
* @param index the index
*/
private void addC(int c, int index)
{
if (index + 1 >= lbufsize)
{
while (index + 1 >= lbufsize)
{
if (lbufsize == 0)
{
lbufsize = 256;
}
else
{
lbufsize = lbufsize * 2;
}
}
int[] temp = new int[lbufsize];
if (linebuf != null)
{
System.arraycopy(linebuf, 0, temp, 0, index);
}
linebuf = temp;
}
linebuf[index] = c;
}
/**
* Adds an ASCII String.
*
* @param str String to be added
* @param index actual line length
* @return final line length
*/
private int addAsciiString(String str, int index)
{
int len = str.length();
if (index + len >= lbufsize)
{
while (index + len >= lbufsize)
{
if (lbufsize == 0)
{
lbufsize = 256;
}
else
{
lbufsize = lbufsize * 2;
}
}
int[] temp = new int[lbufsize];
if (linebuf != null)
{
System.arraycopy(linebuf, 0, temp, 0, index);
}
linebuf = temp;
}
for (int ix = 0; ix < len; ++ix)
{
linebuf[index + ix] = str.charAt(ix);
}
return index + len;
}
/**
* Wrap line.
*
* @param fout where to write output
* @param indent the indent amount
*/
private void wrapLine(Out fout, int indent)
{
int i, p, q;
if (wraphere == 0)
{
return;
}
for (i = 0; i < indent; ++i)
{
fout.outc(' ');
}
for (i = 0; i < wraphere; ++i)
{
fout.outc(linebuf[i]);
}
if (inString)
{
fout.outc(' ');
fout.outc('\\');
}
fout.newline();
if (linelen > wraphere)
{
p = 0;
if (linebuf[wraphere] == ' ')
{
++wraphere;
}
q = wraphere;
addC('\0', linelen);
while (true)
{
linebuf[p] = linebuf[q];
if (linebuf[q] == 0)
{
break;
}
p++;
q++;
}
linelen -= wraphere;
}
else
{
linelen = 0;
}
wraphere = 0;
}
/**
* Wrap attribute value.
*
* @param fout where to write output
* @param indent the indent amount
* @param inString value to wrap
*/
private void wrapAttrVal(Out fout, int indent, boolean inString)
{
int i, p, q;
for (i = 0; i < indent; ++i)
{
fout.outc(' ');
}
for (i = 0; i < wraphere; ++i)
{
fout.outc(linebuf[i]);
}
fout.outc(' ');
if (inString)
{
fout.outc('\\');
}
fout.newline();
if (linelen > wraphere)
{
p = 0;
if (linebuf[wraphere] == ' ')
{
++wraphere;
}
q = wraphere;
addC('\0', linelen);
while (true)
{
linebuf[p] = linebuf[q];
if (linebuf[q] == 0)
{
break;
}
p++;
q++;
}
linelen -= wraphere;
}
else
{
linelen = 0;
}
wraphere = 0;
}
/**
* Flush the line.
*
* @param fout where to write output
* @param indent the indent amount
* @param shouldAddNewLine true to add new line after flush
*/
public void flushLine(Out fout, int indent, boolean shouldAddNewLine)
{
int i;
if (linelen > 0)
{
if (indent + linelen >= this.configuration.wraplen)
{
wrapLine(fout, indent);
}
if (!inAttVal || this.configuration.indentAttributes)
{
for (i = 0; i < indent; ++i)
{
fout.outc(' ');
}
}
for (i = 0; i < linelen; ++i)
{
fout.outc(linebuf[i]);
}
}
if(shouldAddNewLine) {
fout.newline();
}
linelen = 0;
wraphere = 0;
inAttVal = false;
}
/**
* Flush the line.
*
* @param fout where to write output
* @param indent the indent amount
*/
public void flushLine(Out fout, int indent)
{
flushLine(fout, indent, true);
}
/**
* Conditional flush the line.
*
* @param fout where to write output
* @param indent the indent amount
* @param shouldAddNewLine true to add new line after flush
*/
public void condFlushLine(Out fout, int indent, boolean shouldAddNewLine)
{
int i;
if (linelen > 0)
{
if (indent + linelen >= this.configuration.wraplen)
{
wrapLine(fout, indent);
}
if (!inAttVal || this.configuration.indentAttributes)
{
for (i = 0; i < indent; ++i)
{
fout.outc(' ');
}
}
for (i = 0; i < linelen; ++i)
{
fout.outc(linebuf[i]);
}
if(shouldAddNewLine) {
fout.newline();
}
linelen = 0;
wraphere = 0;
inAttVal = false;
}
}
/**
* Conditional flush the line.
*
* @param fout where to write output
* @param indent the indent amount
*/
public void condFlushLine(Out fout, int indent)
{
condFlushLine(fout, indent, true);
}
/**
* Print char.
*
* @param c the char to print
* @param mode the mode to use
*/
private void printChar(int c, short mode)
{
String entity;
boolean breakable = false; // #431953 - RJ
if (c == ' ' && !TidyUtils.toBoolean(mode & (PREFORMATTED | COMMENT | ATTRIBVALUE | CDATA)))
{
// coerce a space character to a non-breaking space
if (TidyUtils.toBoolean(mode & NOWRAP))
{
// by default XML doesn't define
if (this.configuration.numEntities || this.configuration.xmlTags)
{
addC('&', linelen++);
addC('#', linelen++);
addC('1', linelen++);
addC('6', linelen++);
addC('0', linelen++);
addC(';', linelen++);
}
else
{
// otherwise use named entity
addC('&', linelen++);
addC('n', linelen++);
addC('b', linelen++);
addC('s', linelen++);
addC('p', linelen++);
addC(';', linelen++);
}
return;
}
wraphere = linelen;
}
// comment characters are passed raw
if (TidyUtils.toBoolean(mode & (COMMENT | CDATA)))
{
addC(c, linelen++);
return;
}
// except in CDATA map < to < etc.
if (!TidyUtils.toBoolean(mode & CDATA))
{
if (c == '<')
{
addC('&', linelen++);
addC('l', linelen++);
addC('t', linelen++);
addC(';', linelen++);
return;
}
if (c == '>')
{
addC('&', linelen++);
addC('g', linelen++);
addC('t', linelen++);
addC(';', linelen++);
return;
}
// naked '&' chars can be left alone or quoted as &
// The latter is required for XML where naked '&' are illegal.
if (c == '&' && this.configuration.quoteAmpersand)
{
addC('&', linelen++);
addC('a', linelen++);
addC('m', linelen++);
addC('p', linelen++);
addC(';', linelen++);
return;
}
if (c == '"' && this.configuration.quoteMarks)
{
addC('&', linelen++);
addC('q', linelen++);
addC('u', linelen++);
addC('o', linelen++);
addC('t', linelen++);
addC(';', linelen++);
return;
}
if (c == '\'' && this.configuration.quoteMarks)
{
addC('&', linelen++);
addC('#', linelen++);
addC('3', linelen++);
addC('9', linelen++);
addC(';', linelen++);
return;
}
if (c == 160 && !this.configuration.rawOut)
{
if (this.configuration.makeBare)
{
addC(' ', linelen++);
}
else if (this.configuration.quoteNbsp)
{
addC('&', linelen++);
if (this.configuration.numEntities || this.configuration.xmlTags)
{
addC('#', linelen++);
addC('1', linelen++);
addC('6', linelen++);
addC('0', linelen++);
}
else
{
addC('n', linelen++);
addC('b', linelen++);
addC('s', linelen++);
addC('p', linelen++);
}
addC(';', linelen++);
}
else
{
addC(c, linelen++);
}
return;
}
}
// #431953 - start RJ
// Handle encoding-specific issues
if ("UTF8".equals(this.configuration.getOutCharEncodingName()))
{
// Chinese doesn't have spaces, so it needs other kinds of breaks
// This will also help documents using nice Unicode punctuation
// But we leave the ASCII range punctuation untouched
// Break after any punctuation or spaces characters
if ((c >= 0x2000) && !TidyUtils.toBoolean(mode & PREFORMATTED))
{
if (((c >= 0x2000) && (c <= 0x2006))
|| ((c >= 0x2008) && (c <= 0x2010))
|| ((c >= 0x2011) && (c <= 0x2046))
|| ((c >= 0x207D) && (c <= 0x207E))
|| ((c >= 0x208D) && (c <= 0x208E))
|| ((c >= 0x2329) && (c <= 0x232A))
|| ((c >= 0x3001) && (c <= 0x3003))
|| ((c >= 0x3008) && (c <= 0x3011))
|| ((c >= 0x3014) && (c <= 0x301F))
|| ((c >= 0xFD3E) && (c <= 0xFD3F))
|| ((c >= 0xFE30) && (c <= 0xFE44))
|| ((c >= 0xFE49) && (c <= 0xFE52))
|| ((c >= 0xFE54) && (c <= 0xFE61))
|| ((c >= 0xFE6A) && (c <= 0xFE6B))
|| ((c >= 0xFF01) && (c <= 0xFF03))
|| ((c >= 0xFF05) && (c <= 0xFF0A))
|| ((c >= 0xFF0C) && (c <= 0xFF0F))
|| ((c >= 0xFF1A) && (c <= 0xFF1B))
|| ((c >= 0xFF1F) && (c <= 0xFF20))
|| ((c >= 0xFF3B) && (c <= 0xFF3D))
|| ((c >= 0xFF61) && (c <= 0xFF65)))
{
wraphere = linelen + 1;
breakable = true;
}
else
{
switch (c)
{
case 0xFE63:
case 0xFE68:
case 0x3030:
case 0x30FB:
case 0xFF3F:
case 0xFF5B:
case 0xFF5D:
wraphere = linelen + 1;
breakable = true;
default:
break;
}
}
// but break before a left punctuation
if (breakable)
{
if (((c >= 0x201A) && (c <= 0x201C)) || ((c >= 0x201E) && (c <= 0x201F)))
{
wraphere--;
}
else
{
switch (c)
{
case 0x2018:
case 0x2039:
case 0x2045:
case 0x207D:
case 0x208D:
case 0x2329:
case 0x3008:
case 0x300A:
case 0x300C:
case 0x300E:
case 0x3010:
case 0x3014:
case 0x3016:
case 0x3018:
case 0x301A:
case 0x301D:
case 0xFD3E:
case 0xFE35:
case 0xFE37:
case 0xFE39:
case 0xFE3B:
case 0xFE3D:
case 0xFE3F:
case 0xFE41:
case 0xFE43:
case 0xFE59:
case 0xFE5B:
case 0xFE5D:
case 0xFF08:
case 0xFF3B:
case 0xFF5B:
case 0xFF62:
wraphere--;
}
}
}
}
else if ("BIG5".equals(this.configuration.getOutCharEncodingName()))
{
// Allow linebreak at Chinese punctuation characters
// There are not many spaces in Chinese
addC(c, linelen++);
if (((c & 0xFF00) == 0xA100) && !TidyUtils.toBoolean(mode & PREFORMATTED))
{
wraphere = linelen;
// opening brackets have odd codes: break before them
if ((c > 0x5C) && (c < 0xAD) && ((c & 1) == 1))
{
wraphere--;
}
}
return;
}
else if ("SHIFTJIS".equals(this.configuration.getOutCharEncodingName())
|| "ISO2022".equals(this.configuration.getOutCharEncodingName()))
{
// ISO 2022 characters are passed raw
addC(c, linelen++);
return;
}
else
{
if (this.configuration.rawOut)
{
addC(c, linelen++);
return;
}
}
// #431953 - end RJ
}
// if preformatted text, map to space
if (c == 160 && TidyUtils.toBoolean(mode & PREFORMATTED))
{
addC(' ', linelen++);
return;
}
// Filters from Word and PowerPoint often use smart quotes resulting in character codes between 128 and 159.
// Unfortunately, the corresponding HTML 4.0 entities for these are not widely supported.
// The following converts dashes and quotation marks to the nearest ASCII equivalent.
// My thanks to Andrzej Novosiolov for his help with this code.
if (this.configuration.makeClean && this.configuration.asciiChars || this.configuration.makeBare)
{
if (c >= 0x2013 && c <= 0x201E)
{
switch (c)
{
case 0x2013: // en dash
case 0x2014: // em dash
c = '-';
break;
case 0x2018: // left single quotation mark
case 0x2019: // right single quotation mark
case 0x201A: // single low-9 quotation mark
c = '\'';
break;
case 0x201C: // left double quotation mark
case 0x201D: // right double quotation mark
case 0x201E: // double low-9 quotation mark
c = '"';
break;
}
}
}
// don't map latin-1 chars to entities
if ("ISO8859_1".equals(this.configuration.getOutCharEncodingName()))
{
if (c > 255) /* multi byte chars */
{
if (!this.configuration.numEntities)
{
entity = EntityTable.getDefaultEntityTable().entityName((short) c);
if (entity != null)
{
entity = "&" + entity + ";";
}
else
{
entity = "" + c + ";";
}
}
else
{
entity = "" + c + ";";
}
for (int i = 0; i < entity.length(); i++)
{
addC(entity.charAt(i), linelen++);
}
return;
}
if (c > 126 && c < 160)
{
entity = "" + c + ";";
for (int i = 0; i < entity.length(); i++)
{
addC(entity.charAt(i), linelen++);
}
return;
}
addC(c, linelen++);
return;
}
// don't map utf8 or utf16 chars to entities
if (this.configuration.getOutCharEncodingName().startsWith("UTF"))
{
addC(c, linelen++);
return;
}
// use numeric entities only for XML
if (this.configuration.xmlTags)
{
// if ASCII use numeric entities for chars > 127
if (c > 127 && "ASCII".equals(this.configuration.getOutCharEncodingName()))
{
entity = "" + c + ";";
for (int i = 0; i < entity.length(); i++)
{
addC(entity.charAt(i), linelen++);
}
return;
}
// otherwise output char raw
addC(c, linelen++);
return;
}
// default treatment for ASCII
if ("ASCII".equals(this.configuration.getOutCharEncodingName()) && (c > 126 || (c < ' ' && c != '\t')))
{
if (!this.configuration.numEntities)
{
entity = EntityTable.getDefaultEntityTable().entityName((short) c);
if (entity != null)
{
entity = "&" + entity + ";";
}
else
{
entity = "" + c + ";";
}
}
else
{
entity = "" + c + ";";
}
for (int i = 0; i < entity.length(); i++)
{
addC(entity.charAt(i), linelen++);
}
return;
}
addC(c, linelen++);
}
/**
* The line buffer is uint not char so we can hold unicode values un-encoded.
* The translation to UTF-8 is deferred to
* the outc routine called to flush the line buffer.
*
* @param fout where to write output
* @param mode the mode to use
* @param indent the indent amount
* @param textarray the text
* @param start offset to start from
* @param end offset to read up to (exclusive)
*/
private void printText(Out fout, short mode, int indent, byte[] textarray, int start, int end)
{
int i, c;
int[] ci = new int[1];
for (i = start; textarray != null && i < end; ++i)
{
if (indent + linelen >= this.configuration.wraplen)
{
wrapLine(fout, indent);
}
c = (textarray[i]) & 0xFF; // Convert to unsigned.
// look for UTF-8 multibyte character
if (c > 0x7F)
{
i += getUTF8(textarray, i, ci);
c = ci[0];
}
if (c == '\n')
{
flushLine(fout, indent);
continue;
}
printChar(c, mode);
}
}
/**
* @param str the string to print
*/
private void printString(String str)
{
for (int i = 0; i < str.length(); i++)
{
addC(str.charAt(i), linelen++);
}
}
/**
* Print the attribute value.
*
* @param fout where to write output
* @param indent the indent amount
* @param value the value
* @param delim the delimiter
* @param wrappable wrappable or not
*/
private void printAttrValue(Out fout, int indent, String value, int delim, boolean wrappable)
{
int c;
int[] ci = new int[1];
boolean wasinstring = false;
byte[] valueChars = null;
int i;
short mode = (wrappable ? (short) (NORMAL | ATTRIBVALUE) : (short) (PREFORMATTED | ATTRIBVALUE));
if (value != null)
{
valueChars = TidyUtils.getBytes(value);
}
// look for ASP, Tango or PHP instructions for computed attribute value
if (valueChars != null && valueChars.length >= 5 && valueChars[0] == '<')
{
if (valueChars[1] == '%' || valueChars[1] == '@' || (new String(valueChars, 0, 5)).equals("= this.configuration.wraplen)
{
wrapLine(fout, indent);
}
if (indent + linelen < this.configuration.wraplen)
{
wraphere = linelen;
}
else
{
condFlushLine(fout, indent);
}
}
addC(delim, linelen++);
if (value != null)
{
inString = false;
i = 0;
while (i < valueChars.length)
{
c = (valueChars[i]) & 0xFF; // Convert to unsigned.
if (wrappable && c == ' ' && indent + linelen < this.configuration.wraplen)
{
wraphere = linelen;
wasinstring = inString;
}
if (wrappable && wraphere > 0 && indent + linelen >= this.configuration.wraplen)
{
wrapAttrVal(fout, indent, wasinstring);
}
if (c == delim)
{
String entity;
entity = (c == '"' ? """ : "'");
for (int j = 0; j < entity.length(); j++)
{
addC(entity.charAt(j), linelen++);
}
++i;
continue;
}
else if (c == '"')
{
if (this.configuration.quoteMarks)
{
addC('&', linelen++);
addC('q', linelen++);
addC('u', linelen++);
addC('o', linelen++);
addC('t', linelen++);
addC(';', linelen++);
}
else
{
addC('"', linelen++);
}
if (delim == '\'')
{
inString = !inString;
}
++i;
continue;
}
else if (c == '\'')
{
if (this.configuration.quoteMarks)
{
addC('&', linelen++);
addC('#', linelen++);
addC('3', linelen++);
addC('9', linelen++);
addC(';', linelen++);
}
else
{
addC('\'', linelen++);
}
if (delim == '"')
{
inString = !inString;
}
++i;
continue;
}
// look for UTF-8 multibyte character
if (c > 0x7F)
{
i += getUTF8(valueChars, i, ci);
c = ci[0];
}
++i;
if (c == '\n')
{
flushLine(fout, indent);
continue;
}
printChar(c, mode);
}
}
inString = false;
addC(delim, linelen++);
}
/**
* Print the attribute.
*
* @param fout where to write output
* @param indent the indent amount
* @param node the node to use
* @param attr the attribute value
*/
private void printAttribute(Out fout, int indent, Node node, AttVal attr)
{
String name;
boolean wrappable = false;
if (this.configuration.indentAttributes)
{
flushLine(fout, indent);
indent += this.configuration.spaces;
}
name = attr.attribute;
if (indent + linelen >= this.configuration.wraplen)
{
wrapLine(fout, indent);
}
if (!this.configuration.xmlTags && !this.configuration.xmlOut && attr.dict != null)
{
if (AttributeTable.getDefaultAttributeTable().isScript(name))
{
wrappable = this.configuration.wrapScriptlets;
}
else if (!attr.dict.isNowrap() && this.configuration.wrapAttVals)
{
wrappable = true;
}
}
if (indent + linelen < this.configuration.wraplen)
{
wraphere = linelen;
addC(' ', linelen++);
}
else
{
condFlushLine(fout, indent);
addC(' ', linelen++);
}
for (int i = 0; i < name.length(); i++)
{
addC(
TidyUtils.foldCase(name.charAt(i), this.configuration.upperCaseAttrs, this.configuration.xmlTags),
linelen++);
}
if (indent + linelen >= this.configuration.wraplen)
{
wrapLine(fout, indent);
}
if (attr.value == null)
{
if (this.configuration.xmlTags || this.configuration.xmlOut)
{
printAttrValue(fout, indent, (attr.isBoolAttribute() ? attr.attribute : ""), attr.delim, true);
}
else if (!attr.isBoolAttribute() && node != null && !node.isNewNode())
{
printAttrValue(fout, indent, "", attr.delim, true);
}
else if (indent + linelen < this.configuration.wraplen)
{
wraphere = linelen;
}
}
else
{
printAttrValue(fout, indent, attr.value, attr.delim, wrappable);
}
}
/**
* Print the attributes.
*
* @param fout where to write output
* @param indent the indent amount
* @param node the node to use
* @param attr the attribute value
*/
private void printAttrs(Out fout, int indent, Node node, AttVal attr)
{
// add xml:space attribute to pre and other elements
if (configuration.xmlOut
&& configuration.xmlSpace
&& ParserImpl.XMLPreserveWhiteSpace(node, configuration.tt)
&& node.getAttrByName("xml:space") == null)
{
node.addAttribute("xml:space", "preserve");
if (attr != null)
{
attr = node.attributes;
}
}
if (attr != null)
{
if (attr.next != null)
{
printAttrs(fout, indent, node, attr.next);
}
if (attr.attribute != null)
{
Attribute attribute = attr.dict;
if (!this.configuration.dropProprietaryAttributes
|| !(attribute == null || TidyUtils.containsAny(attribute.getVersions(), Dict.VERS_PROPRIETARY)))
{
printAttribute(fout, indent, node, attr);
}
}
else if (attr.asp != null)
{
addC(' ', linelen++);
printAsp(fout, indent, attr.asp);
}
else if (attr.php != null)
{
addC(' ', linelen++);
printPhp(fout, indent, attr.php);
}
}
}
/**
* Line can be wrapped immediately after inline start tag provided if follows a text node ending in a space, or it
* parent is an inline element that that rule applies to. This behaviour was reverse engineered from Netscape 3.0
*
* @param node current Node
* @return true if the current char follows a space
*/
private static boolean afterSpace(Node node)
{
Node prev;
int c;
if (node == null || node.tag == null || !TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE))
{
return true;
}
prev = node.prev;
if (prev != null)
{
if (prev.type == Node.TEXT_NODE && prev.end > prev.start && prev.textarray != null)
{
c = (prev.textarray[prev.end - 1]) & 0xFF; // Convert to unsigned.
return c == 160 || c == ' ' || c == '\n';
}
return false;
}
return afterSpace(node.parent);
}
/**
* Print the tag.
*
* @param lexer the Lexer to use
* @param fout where to write output
* @param mode the mode to use
* @param indent the indent amount
* @param node the node to use
*/
private void printTag(Lexer lexer, Out fout, short mode, int indent, Node node)
{
String p;
TagTable tt = this.configuration.tt;
addC('<', linelen++);
if (node.type == Node.END_TAG)
{
addC('/', linelen++);
}
p = node.element;
for (int i = 0; i < p.length(); i++)
{
addC(
TidyUtils.foldCase(p.charAt(i), this.configuration.upperCaseTags, this.configuration.xmlTags),
linelen++);
}
printAttrs(fout, indent, node, node.attributes);
if ((this.configuration.xmlOut || this.configuration.xHTML)
&& (node.type == Node.START_END_TAG || TidyUtils.toBoolean(node.tag.model & Dict.CM_EMPTY)))
{
addC(' ', linelen++); // Space is NS compatibility hack
addC('/', linelen++); // Required end tag marker
}
addC('>', linelen++);
if ((node.type != Node.START_END_TAG || configuration.xHTML) && !TidyUtils.toBoolean(mode & PREFORMATTED))
{
if (indent + linelen >= this.configuration.wraplen)
{
wrapLine(fout, indent);
}
if (indent + linelen < this.configuration.wraplen)
{
// wrap after start tag if is or if it's not inline
// fix for [514348]
if (!TidyUtils.toBoolean(mode & NOWRAP)
&& (!TidyUtils.toBoolean(node.tag.model & Dict.CM_INLINE) || (node.tag == tt.tagBr))
&& afterSpace(node))
{
wraphere = linelen;
}
}
}
else
{
condFlushLine(fout, indent, !hasPreAncestor(node));
}
}
/**
* Check if the given node has a "pre" as ancestor.
*
* @param node The note to check.
* @return true if it has a pre note as ancestor.
*/
private boolean hasPreAncestor(Node node)
{
boolean hasPreParent = false;
Node parent = node.parent;
while (parent != null) {
if(parent.tag != null && parent.tag.getParser() == ParserImpl.PRE) {
hasPreParent = true;
break;
}
parent = parent.parent;
}
return hasPreParent;
}
/**
* Print the end tag.
*
* @param mode the mode to use
* @param indent the indent amount
* @param node the node to use
*/
private void printEndTag(short mode, int indent, Node node)
{
String p;
// Netscape ignores SGML standard by not ignoring a line break before or etc.
// To avoid rendering this as an underlined space, I disable line wrapping before inline end tags
// if (indent + linelen < this.configuration.wraplen && !TidyUtils.toBoolean(mode & NOWRAP))
// {
// wraphere = linelen;
// }
addC('<', linelen++);
addC('/', linelen++);
p = node.element;
for (int i = 0; i < p.length(); i++)
{
addC(
TidyUtils.foldCase(p.charAt(i), this.configuration.upperCaseTags, this.configuration.xmlTags),
linelen++);
}
addC('>', linelen++);
}
/**
* Print comment.
*
* @param fout where to write output
* @param indent the indent amount
* @param node the node to use
*/
private void printComment(Out fout, int indent, Node node)
{
if (this.configuration.hideComments)
{
return;
}
if (indent + linelen < this.configuration.wraplen)
{
wraphere = linelen;
}
addC('<', linelen++);
addC('!', linelen++);
addC('-', linelen++);
addC('-', linelen++);
printText(fout, COMMENT, indent, node.textarray, node.start, node.end);
// See Lexer.java: AQ 8Jul2000
addC('-', linelen++);
addC('-', linelen++);
addC('>', linelen++);
if (node.linebreak)
{
flushLine(fout, indent);
}
}
/**
* Print document type.
*
* @param fout where to write output
* @param indent the indent amount
* @param lexer the Lexer to use
* @param node the node to use
*/
private void printDocType(Out fout, int indent, Lexer lexer, Node node)
{
int i, c = 0;
short mode = 0;
boolean q = this.configuration.quoteMarks;
this.configuration.quoteMarks = false;
if (indent + linelen < this.configuration.wraplen)
{
wraphere = linelen;
}
condFlushLine(fout, indent);
addC('<', linelen++);
addC('!', linelen++);
addC('D', linelen++);
addC('O', linelen++);
addC('C', linelen++);
addC('T', linelen++);
addC('Y', linelen++);
addC('P', linelen++);
addC('E', linelen++);
addC(' ', linelen++);
if (indent + linelen < this.configuration.wraplen)
{
wraphere = linelen;
}
for (i = node.start; i < node.end; ++i)
{
if (indent + linelen >= this.configuration.wraplen)
{
wrapLine(fout, indent);
}
c = node.textarray[i] & 0xFF; // Convert to unsigned.
// inDTDSubset?
if (TidyUtils.toBoolean(mode & CDATA))
{
if (c == ']')
{
mode &= ~CDATA;
}
}
else if (c == '[')
{
mode |= CDATA;
}
int[] ci = new int[1];
// look for UTF-8 multibyte character
if (c > 0x7F)
{
i += getUTF8(node.textarray, i, ci);
c = ci[0];
}
if (c == '\n')
{
flushLine(fout, indent);
continue;
}
printChar(c, mode);
}
if (linelen < this.configuration.wraplen)
{
wraphere = linelen;
}
addC('>', linelen++);
this.configuration.quoteMarks = q;
condFlushLine(fout, indent);
}
/**
* Pretty print processing instruction.
*
* @param fout where to write output
* @param indent the indent amount
* @param node the node to use
*/
private void printPI(Out fout, int indent, Node node)
{
if (indent + linelen < this.configuration.wraplen)
{
wraphere = linelen;
}
addC('<', linelen++);
addC('?', linelen++);
// set CDATA to pass < and > unescaped
printText(fout, CDATA, indent, node.textarray, node.start, node.end);
if (node.end <= 0 || node.textarray[node.end - 1] != '?') // #542029 - fix by Terry Teague 10 Apr 02
{
addC('?', linelen++);
}
addC('>', linelen++);
condFlushLine(fout, indent);
}
/**
* Pretty print the xml declaration.
*
* @param fout where to write output
* @param indent the indent amount
* @param node the node to use
*/
private void printXmlDecl(Out fout, int indent, Node node)
{
if (indent + linelen < this.configuration.wraplen)
{
wraphere = linelen;
}
addC('<', linelen++);
addC('?', linelen++);
addC('x', linelen++);
addC('m', linelen++);
addC('l', linelen++);
printAttrs(fout, indent, node, node.attributes);
if (node.end <= 0 || node.textarray[node.end - 1] != '?') // #542029 - fix by Terry Teague 10 Apr 02
{
addC('?', linelen++);
}
addC('>', linelen++);
condFlushLine(fout, indent);
}
/**
* Note ASP and JSTE share <% ... %> syntax.
*
* @param fout where to write output
* @param indent the indent amount
* @param node the node to use
*/
private void printAsp(Out fout, int indent, Node node)
{
int savewraplen = this.configuration.wraplen;
// disable wrapping if so requested
if (!this.configuration.wrapAsp || !this.configuration.wrapJste)
{
this.configuration.wraplen = 0xFFFFFF; // a very large number
}
addC('<', linelen++);
addC('%', linelen++);
printText(fout, (this.configuration.wrapAsp ? CDATA : COMMENT), indent, node.textarray, node.start, node.end);
addC('%', linelen++);
addC('>', linelen++);
/* condFlushLine(fout, indent); */
this.configuration.wraplen = savewraplen;
}
/**
* JSTE also supports <# ... #> syntax
*
* @param fout where to write output
* @param indent the indent amount
* @param node the node to use
*/
private void printJste(Out fout, int indent, Node node)
{
int savewraplen = this.configuration.wraplen;
// disable wrapping if so requested
if (!this.configuration.wrapJste)
{
this.configuration.wraplen = 0xFFFFFF; // a very large number
}
addC('<', linelen++);
addC('#', linelen++);
printText(fout, (this.configuration.wrapJste ? CDATA : COMMENT), indent, node.textarray, node.start, node.end);
addC('#', linelen++);
addC('>', linelen++);
// condFlushLine(fout, indent);
this.configuration.wraplen = savewraplen;
}
/**
* PHP is based on XML processing instructions.
*
* @param fout where to write output
* @param indent the indent amount
* @param node the node to use
*/
private void printPhp(Out fout, int indent, Node node)
{
int savewraplen = this.configuration.wraplen;
// disable wrapping if so requested
if (!this.configuration.wrapPhp)
{
this.configuration.wraplen = 0xFFFFFF; // a very large number
}
addC('<', linelen++);
addC('?', linelen++);
printText(fout, (this.configuration.wrapPhp ? CDATA : COMMENT), indent, node.textarray, node.start, node.end);
addC('?', linelen++);
addC('>', linelen++);
// PCondFlushLine(fout, indent);
this.configuration.wraplen = savewraplen;
}
/**
* @param fout where to write output
* @param indent the indent amount
* @param node the node to use
*/
private void printCDATA(Out fout, int indent, Node node)
{
int savewraplen = this.configuration.wraplen;
if (!this.configuration.indentCdata)
{
indent = 0;
}
condFlushLine(fout, indent);
// disable wrapping
this.configuration.wraplen = 0xFFFFFF; // a very large number
addC('<', linelen++);
addC('!', linelen++);
addC('[', linelen++);
addC('C', linelen++);
addC('D', linelen++);
addC('A', linelen++);
addC('T', linelen++);
addC('A', linelen++);
addC('[', linelen++);
printText(fout, COMMENT, indent, node.textarray, node.start, node.end);
addC(']', linelen++);
addC(']', linelen++);
addC('>', linelen++);
condFlushLine(fout, indent);
this.configuration.wraplen = savewraplen;
}
/**
* @param fout where to write output
* @param indent the indent amount
* @param node the node to use
*/
private void printSection(Out fout, int indent, Node node)
{
int savewraplen = this.configuration.wraplen;
// disable wrapping if so requested
if (!this.configuration.wrapSection)
{
this.configuration.wraplen = 0xFFFFFF; // a very large number
}
addC('<', linelen++);
addC('!', linelen++);
addC('[', linelen++);
printText(
fout,
(this.configuration.wrapSection ? CDATA : COMMENT),
indent,
node.textarray,
node.start,
node.end);
addC(']', linelen++);
addC('>', linelen++);
// PCondFlushLine(fout, indent);
this.configuration.wraplen = savewraplen;
}
/**
* Is the current node inside HEAD?
*
* @param node the node to use
* @return true if node is inside an HEAD tag
*/
private boolean insideHead(Node node)
{
if (node.tag == this.configuration.tt.tagHead)
{
return true;
}
if (node.parent != null)
{
return insideHead(node.parent);
}
return false;
}
/**
* Is text node and already ends w/ a newline? Used to pretty print CDATA/PRE text content. If it already ends on a
* newline, it is not necessary to print another before printing end tag.
*
* @param lexer Lexer
* @param node text node
* @return text indent
*/
private int textEndsWithNewline(Lexer lexer, Node node)
{
if (node.type == Node.TEXT_NODE && node.end > node.start)
{
int ch, ix = node.end - 1;
// Skip non-newline whitespace
while (ix >= node.start
&& TidyUtils.toBoolean(ch = (node.textarray[ix] & 0xff))
&& (ch == ' ' || ch == '\t' || ch == '\r'))
{
--ix;
}
if (ix >= 0 && node.textarray[ix] == '\n')
{
return node.end - ix - 1; // #543262 tidy eats all memory
}
}
return -1;
}
/**
* Does the current node contain a CDATA section?
*
* @param lexer Lexer
* @param node Node
* @return true if node contains a CDATA section
*/
static boolean hasCDATA(Lexer lexer, Node node)
{
// Scan forward through the textarray. Since the characters we're
// looking for are < 0x7f, we don't have to do any UTF-8 decoding.
if (node.type != Node.TEXT_NODE)
{
return false;
}
int len = node.end - node.start + 1;
String start = TidyUtils.getString(node.textarray, node.start, len);
int indexOfCData = start.indexOf(CDATA_START);
return indexOfCData > -1 && indexOfCData <= len;
}
/**
* Print script and style elements. For XHTML, wrap the content as follows:
*
*