All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.w3c.tidy5.Tidy Maven / Gradle / Ivy

/*
 *  Java HTML Tidy - JTidy
 *  HTML parser and pretty printer
 *
 *  Copyright (c) 1998-2000 World Wide Web Consortium (Massachusetts
 *  Institute of Technology, Institut National de Recherche en
 *  Informatique et en Automatique, Keio University). All Rights
 *  Reserved.
 *
 *  Contributing Author(s):
 *
 *     Dave Raggett 
 *     Andy Quick  (translation to Java)
 *     Gary L Peskin  (Java development)
 *     Sami Lempinen  (release management)
 *     Fabrizio Giustina 
 *
 *  The contributing author(s) would like to thank all those who
 *  helped with testing, bug fixes, and patience.  This wouldn't
 *  have been possible without all of you.
 *
 *  COPYRIGHT NOTICE:
 *
 *  This software and documentation is provided "as is," and
 *  the copyright holders and contributing author(s) make no
 *  representations or warranties, express or implied, including
 *  but not limited to, warranties of merchantability or fitness
 *  for any particular purpose or that the use of the software or
 *  documentation will not infringe any third party patents,
 *  copyrights, trademarks or other rights.
 *
 *  The copyright holders and contributing author(s) will not be
 *  liable for any direct, indirect, special or consequential damages
 *  arising out of any use of the software or documentation, even if
 *  advised of the possibility of such damage.
 *
 *  Permission is hereby granted to use, copy, modify, and distribute
 *  this source code, or portions hereof, documentation and executables,
 *  for any purpose, without fee, subject to the following restrictions:
 *
 *  1. The origin of this source code must not be misrepresented.
 *  2. Altered versions must be plainly marked as such and must
 *     not be misrepresented as being the original source.
 *  3. This Copyright notice may not be removed or altered from any
 *     source or altered source distribution.
 *
 *  The copyright holders and contributing author(s) specifically
 *  permit, without fee, and encourage the use of this source code
 *  as a component for supporting the Hypertext Markup Language in
 *  commercial products. If you use this source code in a product,
 *  acknowledgment is not required but would be appreciated.
 *
 */
package org.w3c.tidy5;

import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.Serializable;
import java.io.Writer;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.Set;

import javax.xml.transform.dom.DOMSource;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;

import net.sf.saxon.s9api.DocumentBuilder;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XPathExecutable;
import net.sf.saxon.s9api.XPathSelector;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;

/**
 * HTML parser and pretty printer.
 * 
 * @author Dave Raggett [email protected] 
 * @author Andy Quick
 *         [email protected] 
 *         (translation to Java)
 * @author Fabrizio Giustina
 * @version $Revision: 923 $ ($Author: aditsu $)
 */
public class Tidy implements Serializable {
	static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
	/**
	 * Serial Version UID to avoid problems during serialization.
	 */
	static final long serialVersionUID = -2794371560623987718L;

	/**
	 * Alias for configuration options accepted in command line.
	 */
	private static final Map CMDLINE_ALIAS = new HashMap();

	private boolean omitXmlns = true;
	// omit

	public boolean isOmitXmlns() {
		return omitXmlns;
	}

	public void setOmitXmlns(boolean omitXmlns) {
		this.omitXmlns = omitXmlns;
	}

	static {
		CMDLINE_ALIAS.put("xml", "input-xml");
		CMDLINE_ALIAS.put("xml", "output-xhtml");
		CMDLINE_ALIAS.put("asxml", "output-xhtml");
		CMDLINE_ALIAS.put("ashtml", "output-html");
		CMDLINE_ALIAS.put("omit", "hide-endtags");
		CMDLINE_ALIAS.put("upper", "uppercase-tags");
		CMDLINE_ALIAS.put("raw", "output-raw");
		CMDLINE_ALIAS.put("numeric", "numeric-entities");
		CMDLINE_ALIAS.put("change", "write-back");
		CMDLINE_ALIAS.put("update", "write-back");
		CMDLINE_ALIAS.put("modify", "write-back");
		CMDLINE_ALIAS.put("errors", "only-errors");
		CMDLINE_ALIAS.put("slides", "split");
		CMDLINE_ALIAS.put("lang", "language");
		CMDLINE_ALIAS.put("w", "wrap");
		CMDLINE_ALIAS.put("file", "error-file");
		CMDLINE_ALIAS.put("f", "error-file");
	}

	/**
	 * Error output stream.
	 */
	private PrintWriter errout;

	private PrintWriter stderr;

	private Configuration configuration;

	private String inputStreamName = "InputStream";

	private int parseErrors;

	private int parseWarnings;

	private Report report;
	TagTable tt;

	/**
	 * Instantiates a new Tidy instance. It's reccomended that a new instance is
	 * used at each parsing.
	 */
	public Tidy() {
		this.report = new Report();
		configuration = new Configuration(this.report);

		this.tt = new TagTable();
		tt.setConfiguration(configuration);
		configuration.tt = tt;

		configuration.errfile = null;
		stderr = new PrintWriter(System.err, true);
		errout = stderr;

	}

	public static Tidy me() {

		Tidy tidy = new Tidy();
		tidy.configuration.showErrors = 0;
		tidy.configuration.showWarnings = false;
		tidy.setErrout(Report.NO_OUT);
		tidy.setXHTML(true);
//		tidy.setOnlyErrors(true);
//		tidy.setShowWarnings(false);
		tidy.setShowErrors(0);
		tidy.setInputEncoding("utf-8");
		tidy.setOutputEncoding("UTF-8");
		return tidy;
	}

	public static Tidy me(Reader in) {

		Tidy me = me();
		me.parseDOMSource(in);
		return me;

	}

	public static Tidy me(InputStream in) {
		Tidy me = me();
		me.parseDOMSource(in);
		return me;
	}

	

	public TagTable getTagTable() {
		return tt;
	}

	public void defineTag(short tagType, String name) {
		tt.defineTag(tagType, name);
	}

	public Dict install(Dict dict) {
		return tt.install(dict);
	}

	/**
	 * Returns the actual configuration
	 * 
	 * @return tidy configuration
	 */
	public Configuration getConfiguration() {
		return configuration;
	}

	public PrintWriter getStderr() {
		return stderr;
	}

	/**
	 * ParseErrors - the number of errors that occurred in the most recent parse
	 * operation.
	 * 
	 * @return number of errors that occurred in the most recent parse
	 *         operation.
	 */
	public int getParseErrors() {
		return parseErrors;
	}

	/**
	 * ParseWarnings - the number of warnings that occurred in the most recent
	 * parse operation.
	 * 
	 * @return number of warnings that occurred in the most recent parse
	 *         operation.
	 */
	public int getParseWarnings() {
		return parseWarnings;
	}

	/**
	 * InputStreamName - the name of the input stream (printed in the header
	 * information).
	 * 
	 * @param name
	 *            input stream name
	 */
	public void setInputStreamName(String name) {
		if (name != null) {
			inputStreamName = name;
		}
	}

	public String getInputStreamName() {
		return inputStreamName;
	}

	/**
	 * Errout - the error output stream.
	 * 
	 * @return error output stream.
	 */
	public PrintWriter getErrout() {
		return errout;
	}

	public void setErrout(PrintWriter out) {
		this.errout = out;
	}

	/**
	 * Sets the configuration from a configuration file.
	 * 
	 * @param filename
	 *            configuration file name/path.
	 */
	public void setConfigurationFromFile(String filename) {
		configuration.parseFile(filename);
	}

	/**
	 * Sets the configuration from a properties object.
	 * 
	 * @param props
	 *            Properties object
	 */
	public void setConfigurationFromProps(Properties props) {
		configuration.addProps(props);
	}

	/**
	 * Creates an empty DOM Document.
	 * 
	 * @return a new org.w3c.dom.Document
	 */
	public static org.w3c.dom.Document createEmptyDocument() {
		Node document = new Node(Node.ROOT_NODE, new byte[0], 0, 0);
		Node node = new Node(Node.START_TAG, new byte[0], 0, 0, "html", new TagTable());
		if (document != null && node != null) {
			document.insertNodeAtStart(node);
			return (org.w3c.dom.Document) document.getAdapter();
		}

		return null;
	}

	/**
	 * Reads from the given input and returns the root Node. If out is non-null,
	 * pretty prints to out. Warning: caller is responsible for calling close()
	 * on input and output after calling this method.
	 * 
	 * @param in
	 *            input
	 * @param out
	 *            optional destination for pretty-printed document
	 * @return parsed org.w3c.tidy.Node
	 */
	public Node parse(InputStream in, OutputStream out) {

		StreamIn streamIn = StreamInFactory.getStreamIn(configuration, in);

		Out o = null;
		if (out != null) {
			o = OutFactory.getOut(this.configuration, out); // normal output
															// stream
		}

		return parse(streamIn, o);
	}

	/**
	 * Reads from the given input and returns the root Node. If out is non-null,
	 * pretty prints to out. Warning: caller is responsible for calling close()
	 * on input and output after calling this method.
	 * 
	 * @param in
	 *            input
	 * @param out
	 *            optional destination for pretty-printed document
	 * @return parsed org.w3c.tidy.Node
	 */
	public Node parse(Reader in, OutputStream out) {

		StreamIn streamIn = StreamInFactory.getStreamIn(configuration, in);

		Out o = null;
		if (out != null) {
			o = OutFactory.getOut(this.configuration, out); // normal output
															// stream
		}

		return parse(streamIn, o);
	}

	/**
	 * Reads from the given input and returns the root Node. If out is non-null,
	 * pretty prints to out. Warning: caller is responsible for calling close()
	 * on input and output after calling this method.
	 * 
	 * @param in
	 *            input
	 * @param out
	 *            optional destination for pretty-printed document
	 * @return parsed org.w3c.tidy.Node
	 */
	public Node parse(Reader in, Writer out) {
		StreamIn streamIn = StreamInFactory.getStreamIn(configuration, in);

		Out o = null;
		if (out != null) {
			o = OutFactory.getOut(this.configuration, out); // normal output
															// stream
		}

		return parse(streamIn, o);
	}

	/**
	 * Reads from the given input and returns the root Node. If out is non-null,
	 * pretty prints to out. Warning: caller is responsible for calling close()
	 * on input and output after calling this method.
	 * 
	 * @param in
	 *            input
	 * @param out
	 *            optional destination for pretty-printed document
	 * @return parsed org.w3c.tidy.Node
	 */
	public Node parse(InputStream in, Writer out) {
		StreamIn streamIn = StreamInFactory.getStreamIn(configuration, in);

		Out o = null;
		if (out != null) {
			o = OutFactory.getOut(this.configuration, out); // normal output
															// stream
		}

		return parse(streamIn, o);
	}

	/**
	 * Parses InputStream in and returns a DOM Document node. If out is
	 * non-null, pretty prints to OutputStream out.
	 * 
	 * @param in
	 *            input stream
	 * @param out
	 *            optional output stream
	 * @return parsed org.w3c.dom.Document
	 */
	public org.w3c.dom.Document parseDOM(InputStream in, OutputStream out) {
		Node document = parse(in, out);
		if (document != null) {
			return (org.w3c.dom.Document) document.getAdapter();
		}
		return null;
	}

	public DOMSource parseDOMSource(InputStream in) {
		return new DOMSource(parseDOM(in, null));
	}

	public DOMSource parseDOMSource(Reader in) {
		return new DOMSource(parseDOM(in, null));
	}

	public org.w3c.dom.Document parseDOM(Reader in, Writer out) {
		Node document = parse(in, out);
		if (document != null) {
			return (org.w3c.dom.Document) document.getAdapter();
		}
		return null;
	}

	/**
	 * Pretty-prints a DOM Document. Must be an instance of
	 * org.w3c.tidy.DOMDocumentImpl. Caller is responsible for closing the
	 * outputStream after calling this method.
	 * 
	 * @param doc
	 *            org.w3c.dom.Document
	 * @param out
	 *            output stream
	 */
	public void pprint(org.w3c.dom.Document doc, OutputStream out) {
		if (!(doc instanceof DOMDocumentImpl)) {
			// @todo should we inform users that tidy can't print a generic
			// Document or change the method signature?
			return;
		}

		pprint(((DOMDocumentImpl) doc).adaptee, out);
	}

	/**
	 * Pretty-prints a DOM Node. Caller is responsible for closing the
	 * outputStream after calling this method.
	 * 
	 * @param node
	 *            org.w3c.dom.Node. Must be an instance of
	 *            org.w3c.tidy.DOMNodeImpl.
	 * @param out
	 *            output stream
	 */
	public void pprint(org.w3c.dom.Node node, OutputStream out) {
		if (!(node instanceof DOMNodeImpl)) {
			// @todo should we inform users than tidy can't print a generic Node
			// or change the method signature?
			return;
		}

		pprint(((DOMNodeImpl) node).adaptee, out);
	}

	Node document = null;

	/**
	 * Internal routine that actually does the parsing.
	 * 
	 * @param streamIn
	 *            tidy StreamIn
	 * @param o
	 *            tidy Out
	 * @return parsed org.w3c.tidy.Node
	 */
	private Node parse(StreamIn streamIn, Out o) {
		Lexer lexer;

		Node doctype;
		PPrint pprint;

		if (errout == null) {
			return null;
		}

		// ensure config is self-consistent
		configuration.adjust();

		parseErrors = 0;
		parseWarnings = 0;

		lexer = new Lexer(streamIn, configuration, this.report);
		lexer.errout = errout;

		// store pointer to lexer in input stream to allow character encoding
		// errors to be reported
		streamIn.setLexer(lexer);

		this.report.setFilename(inputStreamName); // #431895 - fix by Dave Bryan
													// 04 Jan 01

		// Tidy doesn't alter the doctype for generic XML docs
		if (configuration.xmlTags) {
			document = ParserImpl.parseXMLDocument(lexer);
			if (!document.checkNodeIntegrity()) {
				if (!configuration.quiet) {
					report.badTree(errout);
				}
				return null;
			}
		} else {
			lexer.warnings = 0;

			document = ParserImpl.parseDocument(lexer);

			if (!document.checkNodeIntegrity()) {
				if (!configuration.quiet) {
					this.report.badTree(errout);
				}
				return null;
			}

			Clean cleaner = new Clean(configuration.tt);

			// simplifies  ...  ...  etc.
			cleaner.nestedEmphasis(document);

			// cleans up  indented text  etc.
			cleaner.list2BQ(document);
			cleaner.bQ2Div(document);

			// replaces i by em and b by strong
			if (configuration.logicalEmphasis) {
				cleaner.emFromI(document);
			}

			if (configuration.word2000 && cleaner.isWord2000(document)) {
				// prune Word2000's  ... 
				cleaner.dropSections(lexer, document);

				// drop style & class attributes and empty p, span elements
				cleaner.cleanWord2000(lexer, document);
			}

			// replaces presentational markup by style rules
			if (configuration.makeClean || configuration.dropFontTags) {
				cleaner.cleanTree(lexer, document);
			}

			if (!document.checkNodeIntegrity()) {
				this.report.badTree(errout);
				return null;
			}

			doctype = document.findDocType();

			// remember given doctype
			if (doctype != null) {
				doctype = doctype.cloneNode(false);
			}

			if (document.content != null) {
				if (configuration.xHTML) {
					lexer.setXHTMLDocType(document);
				} else {
					lexer.fixDocType(document);
				}

				if (configuration.tidyMark) {
					lexer.addGenerator(document);
				}
			}

			// ensure presence of initial 
			if (configuration.xmlOut && configuration.xmlPi) {
				lexer.fixXmlDecl(document);
			}

			if (!configuration.quiet && document.content != null) {
				this.report.reportVersion(errout, lexer, inputStreamName, doctype);
			}
		}

		if (!configuration.quiet) {
			parseWarnings = lexer.warnings;
			parseErrors = lexer.errors;
			this.report.reportNumWarnings(errout, lexer);
		}

		if (!configuration.quiet && lexer.errors > 0 && !configuration.forceOutput) {
			this.report.needsAuthorIntervention(errout);
		}

		if (!configuration.onlyErrors && (lexer.errors == 0 || configuration.forceOutput)) {
			if (configuration.burstSlides) {
				Node body;

				body = null;
				// remove doctype to avoid potential clash with markup
				// introduced when bursting into slides

				// discard the document type
				doctype = document.findDocType();

				if (doctype != null) {
					Node.discardElement(doctype);
				}

				/* slides use transitional features */
				lexer.versions |= Dict.VERS_HTML40_LOOSE;

				// and patch up doctype to match
				if (configuration.xHTML) {
					lexer.setXHTMLDocType(document);
				} else {
					lexer.fixDocType(document);
				}

				// find the body element which may be implicit
				body = document.findBody(configuration.tt);

				if (body != null) {
					pprint = new PPrint(configuration);
					if (!configuration.quiet) {
						this.report.reportNumberOfSlides(errout, pprint.countSlides(body));
					}
					pprint.createSlides(lexer, document);
				} else if (!configuration.quiet) {
					this.report.missingBody(errout);
				}
			} else if (o != null) {
				pprint = new PPrint(configuration);

				if (document.findDocType() == null) {
					// only use numeric character references if no doctype could
					// be determined (e.g., because
					// the document contains proprietary features) to ensure
					// well-formedness.
					configuration.numEntities = true;
				}
				if (configuration.bodyOnly) {
					// Feature request #434940 - fix by Dave Raggett/Ignacio
					// Vazquez-Abrams 21 Jun 01
					pprint.printBody(o, lexer, document, configuration.xmlOut);
				} else if (configuration.xmlOut && !configuration.xHTML) {
					pprint.printXMLTree(o, (short) 0, 0, lexer, document);
				} else {
					pprint.printTree(o, (short) 0, 0, lexer, document);
				}

				pprint.flushLine(o, 0);
				o.flush();
			}

		}

		if (!configuration.quiet) {
			this.report.errorSummary(lexer);
		}
		if (omitXmlns) {
			Document doc = (org.w3c.dom.Document) document.getAdapter();
			Element element = doc.getDocumentElement();
			NamedNodeMap attributes = element.getAttributes();
			for (int i = 0; i < attributes.getLength(); i++) {
				if (attributes.item(i).getNodeName().toLowerCase().startsWith("xmlns"))
					element.removeAttribute(attributes.item(i).getNodeName());
			}
		}
		return document;
	}

	/**
	 * Internal routine that actually does the parsing. The caller can pass
	 * either an InputStream or file name. If both are passed, the file name is
	 * preferred.
	 * 
	 * @param in
	 *            input stream (used only if file is null)
	 * @param file
	 *            file name
	 * @param out
	 *            output stream
	 * @return parsed org.w3c.tidy.Node
	 * @throws FileNotFoundException
	 *             if file is not null but it can't be found
	 * @throws IOException
	 *             for errors in reading input stream or file
	 */
	private Node parse(InputStream in, String file, OutputStream out) throws FileNotFoundException, IOException {

		StreamIn streamIn;
		Out o = null;
		boolean inputStreamOpen = false;
		boolean outputStreamOpen = false;

		if (file != null) {
			in = new FileInputStream(file);
			inputStreamOpen = true;
			inputStreamName = file;
		} else if (in == null) {
			in = System.in;
			inputStreamName = "stdin";
		}

		streamIn = StreamInFactory.getStreamIn(configuration, in);

		if (configuration.writeback && (file != null)) {
			out = new FileOutputStream(file);
			outputStreamOpen = true;
		}

		if (out != null) {
			o = OutFactory.getOut(this.configuration, out); // normal output
															// stream
		}

		Node node = parse(streamIn, o);

		// Try to close the InputStream but only if if we created it.
		if (inputStreamOpen) {
			try {
				in.close();
			} catch (IOException e) {
				// ignore
			}
		}

		// Try to close the OutputStream but only if if we created it.
		if (outputStreamOpen) {
			try {
				out.close();
			} catch (IOException e) {
				// ignore
			}
		}

		return node;

	}

	/**
	 * Pretty-prints a tidy Node.
	 * 
	 * @param node
	 *            org.w3c.tidy.Node
	 * @param out
	 *            output stream
	 */
	private void pprint(Node node, OutputStream out) {
		PPrint pprint;

		if (out != null) {

			Out o = OutFactory.getOut(this.configuration, out);

			Lexer lexer = new Lexer(null, this.configuration, this.report);

			pprint = new PPrint(configuration);

			if (configuration.xmlTags) {
				pprint.printXMLTree(o, (short) 0, 0, lexer, node);
			} else {
				pprint.printTree(o, (short) 0, 0, lexer, node);
			}

			pprint.flushLine(o, 0);

			o.flush();
		}
	}

	/**
	 * Command line interface to parser and pretty printer.
	 * 
	 * @param argv
	 *            command line parameters
	 */
	public static void main(String[] argv) {
		Tidy tidy = new Tidy();
		int returnCode = tidy.mainExec(argv);
		System.exit(returnCode);
	}

	/**
	 * Main method, but returns the return code as an int instead of calling
	 * System.exit(code). Needed for testing main method without shutting down
	 * tests.
	 * 
	 * @param argv
	 *            command line parameters
	 * @return return code
	 */
	protected int mainExec(String[] argv) {
		String file;
		int argCount = argv.length;
		int argIndex = 0;

		// read command line
		Properties properties = new Properties();

		while (argCount > 0) {
			if (argv[argIndex].startsWith("-")) {
				// support -foo and --foo
				String argName = argv[argIndex].toLowerCase();
				while (argName.length() > 0 && argName.charAt(0) == '-') {
					argName = argName.substring(1);
				}

				// "exclusive" options
				if (argName.equals("help") || argName.equals("h") || argName.equals("?")) {
					this.report.helpText(new PrintWriter(System.out, true));
					return 0;
				} else if (argName.equals("help-config")) {
					configuration.printConfigOptions(new PrintWriter(System.out, true), false);
					return 0;
				} else if (argName.equals("show-config")) {
					configuration.adjust(); // ensure config is self-consistent
					configuration.printConfigOptions(errout, true);
					return 0;
				} else if (argName.equals("version") || argName.equals("v")) {
					this.report.showVersion(errout);
					return 0;
				}

				// optional value for non boolean options
				String argValue = null;
				if (argCount > 2 && !argv[argIndex + 1].startsWith("-")) {
					argValue = argv[argIndex + 1];
					--argCount;
					++argIndex;
				}

				// handle "special" aliases
				String alias = (String) CMDLINE_ALIAS.get(argName);
				if (alias != null) {
					argName = alias;
				}

				if (Configuration.isKnownOption(argName)) // handle any standard
															// config option
				{
					properties.setProperty(argName, (argValue == null ? "" : argValue));
				} else if (argName.equals("config")) // parse a property file
				{
					if (argValue != null) {
						configuration.parseFile(argValue);
					}
				} else if (TidyUtils.isCharEncodingSupported(argName)) // handle
																		// any
																		// encoding
																		// name
				{
					properties.setProperty("char-encoding", argName);
				} else {

					for (int i = 0; i < argName.length(); i++) {
						switch (argName.charAt(i)) {
						case 'i':
							configuration.indentContent = true;
							configuration.smartIndent = true;
							break;

						case 'o':
							configuration.hideEndTags = true;
							break;

						case 'u':
							configuration.upperCaseTags = true;
							break;

						case 'c':
							configuration.makeClean = true;
							break;

						case 'b':
							configuration.makeBare = true;
							break;

						case 'n':
							configuration.numEntities = true;
							break;

						case 'm':
							configuration.writeback = true;
							break;

						case 'e':
							configuration.onlyErrors = true;
							break;

						case 'q':
							configuration.quiet = true;
							break;

						default:
							this.report.unknownOption(this.errout, argName.charAt(i));
							break;
						}
					}
				}

				--argCount;
				++argIndex;
				continue;
			}

			configuration.addProps(properties);

			// ensure config is self-consistent
			configuration.adjust();

			// user specified error file
			if (configuration.errfile != null) {

				String errorfile = "stderr";

				// is it same as the currently opened file?
				if (!configuration.errfile.equals(errorfile)) {
					// no so close previous error file

					if (this.errout != this.stderr) {
						this.errout.close();
					}

					// and try to open the new error file
					try {
						this.setErrout(new PrintWriter(new FileWriter(configuration.errfile), true));
						errorfile = configuration.errfile;
					} catch (IOException e) {
						// can't be opened so fall back to stderr
						errorfile = "stderr";
						this.setErrout(stderr);
					}
				}
			}

			if (argCount > 0) {
				file = argv[argIndex];
			} else {
				file = "stdin";
			}

			try {
				parse(null, file, System.out);
			} catch (FileNotFoundException fnfe) {
				this.report.unknownFile(this.errout, file);
			} catch (IOException ioe) {
				this.report.unknownFile(this.errout, file);
			}

			--argCount;
			++argIndex;

			if (argCount <= 0) {
				break;
			}
		}

		if (this.parseErrors + this.parseWarnings > 0 && !configuration.quiet) {
			this.report.generalInfo(this.errout);
		}

		if (this.errout != this.stderr) {
			this.errout.close();
		}

		// return status can be used by scripts
		if (this.parseErrors > 0) {
			return 2;
		}

		if (this.parseWarnings > 0) {
			return 1;
		}

		// 0 means all is ok
		return 0;
	}

	/**
	 * Attach a TidyMessageListener which will be notified for messages and
	 * errors.
	 * 
	 * @param listener
	 *            TidyMessageListener implementation
	 */
	public void setMessageListener(TidyMessageListener listener) {
		this.report.addMessageListener(listener);
	}

	/**
	 * indent-spaces- default indentation.
	 * 
	 * @param spaces
	 *            number of spaces used for indentation
	 * @see Configuration#spaces
	 */
	public void setSpaces(int spaces) {
		configuration.spaces = spaces;
	}

	/**
	 * indent-spaces- default indentation.
	 * 
	 * @return number of spaces used for indentation
	 * @see Configuration#spaces
	 */
	public int getSpaces() {
		return configuration.spaces;
	}

	/**
	 * wrap- default wrap margin.
	 * 
	 * @param wraplen
	 *            default wrap margin
	 * @see Configuration#wraplen
	 */
	public void setWraplen(int wraplen) {
		configuration.wraplen = wraplen;
	}

	/**
	 * wrap- default wrap margin.
	 * 
	 * @return default wrap margin
	 * @see Configuration#wraplen
	 */
	public int getWraplen() {
		return configuration.wraplen;
	}

	/**
	 * tab-size- tab size in chars.
	 * 
	 * @param tabsize
	 *            tab size in chars
	 * @see Configuration#tabsize
	 */
	public void setTabsize(int tabsize) {
		configuration.tabsize = tabsize;
	}

	/**
	 * tab-size- tab size in chars.
	 * 
	 * @return tab size in chars
	 * @see Configuration#tabsize
	 */
	public int getTabsize() {
		return configuration.tabsize;
	}

	/**
	 * Errfile - file name to write errors to.
	 * 
	 * @param errfile
	 *            file name to write errors to
	 * @see Configuration#errfile
	 */
	public void setErrfile(String errfile) {
		configuration.errfile = errfile;
	}

	/**
	 * Errfile - file name to write errors to.
	 * 
	 * @return error file name
	 * @see Configuration#errfile
	 */
	public String getErrfile() {
		return configuration.errfile;
	}

	/**
	 * writeback - if true then output tidied markup. NOTE: this property is
	 * ignored when parsing from an InputStream.
	 * 
	 * @param writeback
	 *            true= output tidied markup
	 * @see Configuration#writeback
	 */
	public void setWriteback(boolean writeback) {
		configuration.writeback = writeback;
	}

	/**
	 * writeback - if true then output tidied markup. NOTE: this property is
	 * ignored when parsing from an InputStream.
	 * 
	 * @return true if tidy will output tidied markup in input file
	 * @see Configuration#writeback
	 */
	public boolean getWriteback() {
		return configuration.writeback;
	}

	/**
	 * only-errors - if true normal output is suppressed.
	 * 
	 * @param onlyErrors
	 *            if true normal output is suppressed.
	 * @see Configuration#onlyErrors
	 */
	public void setOnlyErrors(boolean onlyErrors) {
		configuration.onlyErrors = onlyErrors;
	}

	/**
	 * only-errors - if true normal output is suppressed.
	 * 
	 * @return true if normal output is suppressed.
	 * @see Configuration#onlyErrors
	 */
	public boolean getOnlyErrors() {
		return configuration.onlyErrors;
	}

	/**
	 * show-warnings - show warnings? (errors are always shown).
	 * 
	 * @param showWarnings
	 *            if false warnings are not shown
	 * @see Configuration#showWarnings
	 */
	public void setShowWarnings(boolean showWarnings) {
		configuration.showWarnings = showWarnings;
	}

	/**
	 * show-warnings - show warnings? (errors are always shown).
	 * 
	 * @return false if warnings are not shown
	 * @see Configuration#showWarnings
	 */
	public boolean getShowWarnings() {
		return configuration.showWarnings;
	}

	/**
	 * quiet - no 'Parsing X', guessed DTD or summary.
	 * 
	 * @param quiet
	 *            true= don't output summary, warnings or errors
	 * @see Configuration#quiet
	 */
	public void setQuiet(boolean quiet) {
		configuration.quiet = quiet;
	}

	/**
	 * quiet - no 'Parsing X', guessed DTD or summary.
	 * 
	 * @return true if tidy will not output summary, warnings or
	 *         errors
	 * @see Configuration#quiet
	 */
	public boolean getQuiet() {
		return configuration.quiet;
	}

	/**
	 * indent - indent content of appropriate tags.
	 * 
	 * @param indentContent
	 *            indent content of appropriate tags
	 * @see Configuration#indentContent
	 */
	public void setIndentContent(boolean indentContent) {
		configuration.indentContent = indentContent;
	}

	/**
	 * indent - indent content of appropriate tags.
	 * 
	 * @return true if tidy will indent content of appropriate tags
	 * @see Configuration#indentContent
	 */
	public boolean getIndentContent() {
		return configuration.indentContent;
	}

	/**
	 * SmartIndent - does text/block level content effect indentation.
	 * 
	 * @param smartIndent
	 *            true if text/block level content should effect
	 *            indentation
	 * @see Configuration#smartIndent
	 */
	public void setSmartIndent(boolean smartIndent) {
		configuration.smartIndent = smartIndent;
	}

	/**
	 * SmartIndent - does text/block level content effect indentation.
	 * 
	 * @return true if text/block level content should effect
	 *         indentation
	 * @see Configuration#smartIndent
	 */
	public boolean getSmartIndent() {
		return configuration.smartIndent;
	}

	/**
	 * hide-endtags - suppress optional end tags.
	 * 
	 * @param hideEndTags
	 *            true= suppress optional end tags
	 * @see Configuration#hideEndTags
	 */
	public void setHideEndTags(boolean hideEndTags) {
		configuration.hideEndTags = hideEndTags;
	}

	/**
	 * hide-endtags - suppress optional end tags.
	 * 
	 * @return true if tidy will suppress optional end tags
	 * @see Configuration#hideEndTags
	 */
	public boolean getHideEndTags() {
		return configuration.hideEndTags;
	}

	/**
	 * input-xml - treat input as XML.
	 * 
	 * @param xmlTags
	 *            true if tidy should treat input as XML
	 * @see Configuration#xmlTags
	 */
	public void setXmlTags(boolean xmlTags) {
		configuration.xmlTags = xmlTags;
	}

	/**
	 * input-xml - treat input as XML.
	 * 
	 * @return true if tidy will treat input as XML
	 * @see Configuration#xmlTags
	 */
	public boolean getXmlTags() {
		return configuration.xmlTags;
	}

	/**
	 * output-xml - create output as XML.
	 * 
	 * @param xmlOut
	 *            true if tidy should create output as xml
	 * @see Configuration#xmlOut
	 */
	public void setXmlOut(boolean xmlOut) {
		configuration.xmlOut = xmlOut;
	}

	/**
	 * output-xml - create output as XML.
	 * 
	 * @return true if tidy will create output as xml
	 * @see Configuration#xmlOut
	 */
	public boolean getXmlOut() {
		return configuration.xmlOut;
	}

	/**
	 * output-xhtml - output extensible HTML.
	 * 
	 * @param xhtml
	 *            true if tidy should output XHTML
	 * @see Configuration#xHTML
	 */
	public void setXHTML(boolean xhtml) {
		configuration.xHTML = xhtml;
	}

	/**
	 * output-xhtml - output extensible HTML.
	 * 
	 * @return true if tidy will output XHTML
	 * @see Configuration#xHTML
	 */
	public boolean getXHTML() {
		return configuration.xHTML;
	}

	/**
	 * uppercase-tags - output tags in upper case.
	 * 
	 * @param upperCaseTags
	 *            true if tidy should output tags in upper case
	 *            (default is lowercase)
	 * @see Configuration#upperCaseTags
	 */
	public void setUpperCaseTags(boolean upperCaseTags) {
		configuration.upperCaseTags = upperCaseTags;
	}

	/**
	 * uppercase-tags - output tags in upper case.
	 * 
	 * @return true if tidy should will tags in upper case
	 * @see Configuration#upperCaseTags
	 */
	public boolean getUpperCaseTags() {
		return configuration.upperCaseTags;
	}

	/**
	 * uppercase-attributes - output attributes in upper case.
	 * 
	 * @param upperCaseAttrs
	 *            true if tidy should output attributes in upper
	 *            case (default is lowercase)
	 * @see Configuration#upperCaseAttrs
	 */
	public void setUpperCaseAttrs(boolean upperCaseAttrs) {
		configuration.upperCaseAttrs = upperCaseAttrs;
	}

	/**
	 * uppercase-attributes - output attributes in upper case.
	 * 
	 * @return true if tidy should will attributes in upper case
	 * @see Configuration#upperCaseAttrs
	 */
	public boolean getUpperCaseAttrs() {
		return configuration.upperCaseAttrs;
	}

	/**
	 * make-clean - remove presentational clutter.
	 * 
	 * @param makeClean
	 *            true to remove presentational clutter
	 * @see Configuration#makeClean
	 */
	public void setMakeClean(boolean makeClean) {
		configuration.makeClean = makeClean;
	}

	/**
	 * make-clean - remove presentational clutter.
	 * 
	 * @return true if tidy will remove presentational clutter
	 * @see Configuration#makeClean
	 */
	public boolean getMakeClean() {
		return configuration.makeClean;
	}

	/**
	 * make-bare - remove Microsoft cruft.
	 * 
	 * @param makeBare
	 *            true to remove Microsoft cruft
	 * @see Configuration#makeBare
	 */
	public void setMakeBare(boolean makeBare) {
		configuration.makeBare = makeBare;
	}

	/**
	 * make-clean - remove Microsoft cruft.
	 * 
	 * @return true if tidy will remove Microsoft cruft
	 * @see Configuration#makeBare
	 */
	public boolean getMakeBare() {
		return configuration.makeBare;
	}

	/**
	 * break-before-br - output newline before <br>.
	 * 
	 * @param breakBeforeBR
	 *            true if tidy should output a newline before
	 *            <br>
	 * @see Configuration#breakBeforeBR
	 */
	public void setBreakBeforeBR(boolean breakBeforeBR) {
		configuration.breakBeforeBR = breakBeforeBR;
	}

	/**
	 * break-before-br - output newline before <br>.
	 * 
	 * @return true if tidy will output a newline before <br>
	 * @see Configuration#breakBeforeBR
	 */
	public boolean getBreakBeforeBR() {
		return configuration.breakBeforeBR;
	}

	/**
	 * split- create slides on each h2 element.
	 * 
	 * @param burstSlides
	 *            true if tidy should create slides on each h2
	 *            element
	 * @see Configuration#burstSlides
	 */
	public void setBurstSlides(boolean burstSlides) {
		configuration.burstSlides = burstSlides;
	}

	/**
	 * split- create slides on each h2 element.
	 * 
	 * @return true if tidy will create slides on each h2 element
	 * @see Configuration#burstSlides
	 */
	public boolean getBurstSlides() {
		return configuration.burstSlides;
	}

	/**
	 * numeric-entities- output entities other than the built-in
	 * HTML entities in the numeric rather than the named entity form.
	 * 
	 * @param numEntities
	 *            true if tidy should output entities in the
	 *            numeric form.
	 * @see Configuration#numEntities
	 */
	public void setNumEntities(boolean numEntities) {
		configuration.numEntities = numEntities;
	}

	/**
	 * numeric-entities- output entities other than the built-in
	 * HTML entities in the numeric rather than the named entity form.
	 * 
	 * @return true if tidy will output entities in the numeric
	 *         form.
	 * @see Configuration#numEntities
	 */
	public boolean getNumEntities() {
		return configuration.numEntities;
	}

	/**
	 * quote-marks- output " marks as &quot;.
	 * 
	 * @param quoteMarks
	 *            true if tidy should output " marks as &quot;
	 * @see Configuration#quoteMarks
	 */
	public void setQuoteMarks(boolean quoteMarks) {
		configuration.quoteMarks = quoteMarks;
	}

	/**
	 * quote-marks- output " marks as &quot;.
	 * 
	 * @return true if tidy will output " marks as &quot;
	 * @see Configuration#quoteMarks
	 */
	public boolean getQuoteMarks() {
		return configuration.quoteMarks;
	}

	/**
	 * quote-nbsp- output non-breaking space as entity.
	 * 
	 * @param quoteNbsp
	 *            true if tidy should output non-breaking space as
	 *            entity
	 * @see Configuration#quoteNbsp
	 */
	public void setQuoteNbsp(boolean quoteNbsp) {
		configuration.quoteNbsp = quoteNbsp;
	}

	/**
	 * quote-nbsp- output non-breaking space as entity.
	 * 
	 * @return true if tidy will output non-breaking space as
	 *         entity
	 * @see Configuration#quoteNbsp
	 */
	public boolean getQuoteNbsp() {
		return configuration.quoteNbsp;
	}

	/**
	 * quote-ampersand- output naked ampersand as &.
	 * 
	 * @param quoteAmpersand
	 *            true if tidy should output naked ampersand as
	 *            &
	 * @see Configuration#quoteAmpersand
	 */
	public void setQuoteAmpersand(boolean quoteAmpersand) {
		configuration.quoteAmpersand = quoteAmpersand;
	}

	/**
	 * quote-ampersand- output naked ampersand as &.
	 * 
	 * @return true if tidy will output naked ampersand as &
	 * @see Configuration#quoteAmpersand
	 */
	public boolean getQuoteAmpersand() {
		return configuration.quoteAmpersand;
	}

	/**
	 * wrap-attributes- wrap within attribute values.
	 * 
	 * @param wrapAttVals
	 *            true if tidy should wrap within attribute values
	 * @see Configuration#wrapAttVals
	 */
	public void setWrapAttVals(boolean wrapAttVals) {
		configuration.wrapAttVals = wrapAttVals;
	}

	/**
	 * wrap-attributes- wrap within attribute values.
	 * 
	 * @return true if tidy will wrap within attribute values
	 * @see Configuration#wrapAttVals
	 */
	public boolean getWrapAttVals() {
		return configuration.wrapAttVals;
	}

	/**
	 * wrap-script-literals- wrap within JavaScript string
	 * literals.
	 * 
	 * @param wrapScriptlets
	 *            true if tidy should wrap within JavaScript string
	 *            literals
	 * @see Configuration#wrapScriptlets
	 */
	public void setWrapScriptlets(boolean wrapScriptlets) {
		configuration.wrapScriptlets = wrapScriptlets;
	}

	/**
	 * wrap-script-literals- wrap within JavaScript string
	 * literals.
	 * 
	 * @return true if tidy will wrap within JavaScript string
	 *         literals
	 * @see Configuration#wrapScriptlets
	 */
	public boolean getWrapScriptlets() {
		return configuration.wrapScriptlets;
	}

	/**
	 * wrap-sections- wrap within <![ ... ]> section tags
	 * 
	 * @param wrapSection
	 *            true if tidy should wrap within <![ ... ]>
	 *            section tags
	 * @see Configuration#wrapSection
	 */
	public void setWrapSection(boolean wrapSection) {
		configuration.wrapSection = wrapSection;
	}

	/**
	 * wrap-sections- wrap within <![ ... ]> section tags
	 * 
	 * @return true if tidy will wrap within <![ ... ]>
	 *         section tags
	 * @see Configuration#wrapSection
	 */
	public boolean getWrapSection() {
		return configuration.wrapSection;
	}

	/**
	 * alt-text- default text for alt attribute.
	 * 
	 * @param altText
	 *            default text for alt attribute
	 * @see Configuration#altText
	 */
	public void setAltText(String altText) {
		configuration.altText = altText;
	}

	/**
	 * alt-text- default text for alt attribute.
	 * 
	 * @return default text for alt attribute
	 * @see Configuration#altText
	 */
	public String getAltText() {
		return configuration.altText;
	}

	/**
	 * add-xml-pi- add <?xml?> for XML docs.
	 * 
	 * @param xmlPi
	 *            true if tidy should add <?xml?> for XML
	 *            docs
	 * @see Configuration#xmlPi
	 */
	public void setXmlPi(boolean xmlPi) {
		configuration.xmlPi = xmlPi;
	}

	/**
	 * add-xml-pi- add <?xml?> for XML docs.
	 * 
	 * @return true if tidy will add <?xml?> for XML docs
	 * @see Configuration#xmlPi
	 */
	public boolean getXmlPi() {
		return configuration.xmlPi;
	}

	/**
	 * drop-font-tags- discard presentation tags.
	 * 
	 * @param dropFontTags
	 *            true if tidy should discard presentation tags
	 * @see Configuration#dropFontTags
	 */
	public void setDropFontTags(boolean dropFontTags) {
		configuration.dropFontTags = dropFontTags;
	}

	/**
	 * drop-font-tags- discard presentation tags.
	 * 
	 * @return true if tidy will discard presentation tags
	 * @see Configuration#dropFontTags
	 */
	public boolean getDropFontTags() {
		return configuration.dropFontTags;
	}

	/**
	 * drop-proprietary-attributes- discard proprietary attributes.
	 * 
	 * @param dropProprietaryAttributes
	 *            true if tidy should discard proprietary
	 *            attributes
	 * @see Configuration#dropProprietaryAttributes
	 */
	public void setDropProprietaryAttributes(boolean dropProprietaryAttributes) {
		configuration.dropProprietaryAttributes = dropProprietaryAttributes;
	}

	/**
	 * drop-proprietary-attributes- discard proprietary attributes.
	 * 
	 * @return true if tidy will discard proprietary attributes
	 * @see Configuration#dropProprietaryAttributes
	 */
	public boolean getDropProprietaryAttributes() {
		return configuration.dropProprietaryAttributes;
	}

	/**
	 * drop-empty-paras- discard empty p elements.
	 * 
	 * @param dropEmptyParas
	 *            true if tidy should discard empty p elements
	 * @see Configuration#dropEmptyParas
	 */
	public void setDropEmptyParas(boolean dropEmptyParas) {
		configuration.dropEmptyParas = dropEmptyParas;
	}

	/**
	 * drop-empty-paras- discard empty p elements.
	 * 
	 * @return true if tidy will discard empty p elements
	 * @see Configuration#dropEmptyParas
	 */
	public boolean getDropEmptyParas() {
		return configuration.dropEmptyParas;
	}

	/**
	 * fix-bad-comments- fix comments with adjacent hyphens.
	 * 
	 * @param fixComments
	 *            true if tidy should fix comments with adjacent
	 *            hyphens
	 * @see Configuration#fixComments
	 */
	public void setFixComments(boolean fixComments) {
		configuration.fixComments = fixComments;
	}

	/**
	 * fix-bad-comments- fix comments with adjacent hyphens.
	 * 
	 * @return true if tidy will fix comments with adjacent hyphens
	 * @see Configuration#fixComments
	 */
	public boolean getFixComments() {
		return configuration.fixComments;
	}

	/**
	 * wrap-asp- wrap within ASP pseudo elements.
	 * 
	 * @param wrapAsp
	 *            true if tidy should wrap within ASP pseudo
	 *            elements
	 * @see Configuration#wrapAsp
	 */
	public void setWrapAsp(boolean wrapAsp) {
		configuration.wrapAsp = wrapAsp;
	}

	/**
	 * wrap-asp- wrap within ASP pseudo elements.
	 * 
	 * @return true if tidy will wrap within ASP pseudo elements
	 * @see Configuration#wrapAsp
	 */
	public boolean getWrapAsp() {
		return configuration.wrapAsp;
	}

	/**
	 * wrap-jste- wrap within JSTE pseudo elements.
	 * 
	 * @param wrapJste
	 *            true if tidy should wrap within JSTE pseudo
	 *            elements
	 * @see Configuration#wrapJste
	 */
	public void setWrapJste(boolean wrapJste) {
		configuration.wrapJste = wrapJste;
	}

	/**
	 * wrap-jste- wrap within JSTE pseudo elements.
	 * 
	 * @return true if tidy will wrap within JSTE pseudo elements
	 * @see Configuration#wrapJste
	 */
	public boolean getWrapJste() {
		return configuration.wrapJste;
	}

	/**
	 * wrap-php- wrap within PHP pseudo elements.
	 * 
	 * @param wrapPhp
	 *            true if tidy should wrap within PHP pseudo
	 *            elements
	 * @see Configuration#wrapPhp
	 */
	public void setWrapPhp(boolean wrapPhp) {
		configuration.wrapPhp = wrapPhp;
	}

	/**
	 * wrap-php- wrap within PHP pseudo elements.
	 * 
	 * @return true if tidy will wrap within PHP pseudo elements
	 * @see Configuration#wrapPhp
	 */
	public boolean getWrapPhp() {
		return configuration.wrapPhp;
	}

	/**
	 * fix-backslash- fix URLs by replacing \ with /.
	 * 
	 * @param fixBackslash
	 *            true if tidy should fix URLs by replacing \ with
	 *            /
	 * @see Configuration#fixBackslash
	 */
	public void setFixBackslash(boolean fixBackslash) {
		configuration.fixBackslash = fixBackslash;
	}

	/**
	 * fix-backslash- fix URLs by replacing \ with /.
	 * 
	 * @return true if tidy will fix URLs by replacing \ with /
	 * @see Configuration#fixBackslash
	 */
	public boolean getFixBackslash() {
		return configuration.fixBackslash;
	}

	/**
	 * indent-attributes- newline+indent before each attribute.
	 * 
	 * @param indentAttributes
	 *            true if tidy should output a newline+indent
	 *            before each attribute
	 * @see Configuration#indentAttributes
	 */
	public void setIndentAttributes(boolean indentAttributes) {
		configuration.indentAttributes = indentAttributes;
	}

	/**
	 * indent-attributes- newline+indent before each attribute.
	 * 
	 * @return true if tidy will output a newline+indent before
	 *         each attribute
	 * @see Configuration#indentAttributes
	 */
	public boolean getIndentAttributes() {
		return configuration.indentAttributes;
	}

	/**
	 * doctype- user specified doctype.
	 * 
	 * @param doctype
	 *            omit | auto | strict | loose | fpi where
	 *            the fpi  is a string similar to "-//ACME//DTD
	 *            HTML 3.14159//EN" Note: for fpi  include the
	 *            double-quotes in the string.
	 * @see Configuration#docTypeStr
	 * @see Configuration#docTypeMode
	 */
	public void setDocType(String doctype) {
		if (doctype != null) {
			configuration.docTypeStr = (String) ParsePropertyImpl.DOCTYPE.parse(doctype, "doctype", configuration);
		}
	}

	/**
	 * doctype- user specified doctype.
	 * 
	 * @return omit | auto | strict | loose | fpi where
	 *         the fpi  is a string similar to "-//ACME//DTD HTML
	 *         3.14159//EN" Note: for fpi  include the
	 *         double-quotes in the string.
	 * @see Configuration#docTypeStr
	 * @see Configuration#docTypeMode
	 */
	public String getDocType() {
		String result = null;
		switch (configuration.docTypeMode) {
		case Configuration.DOCTYPE_OMIT:
			result = "omit";
			break;
		case Configuration.DOCTYPE_AUTO:
			result = "auto";
			break;
		case Configuration.DOCTYPE_STRICT:
			result = "strict";
			break;
		case Configuration.DOCTYPE_LOOSE:
			result = "loose";
			break;
		case Configuration.DOCTYPE_USER:
			result = configuration.docTypeStr;
			break;
		}
		return result;
	}

	/**
	 * logical-emphasis- replace i by em and b by strong.
	 * 
	 * @param logicalEmphasis
	 *            true if tidy should replace i by em and b by
	 *            strong
	 * @see Configuration#logicalEmphasis
	 */
	public void setLogicalEmphasis(boolean logicalEmphasis) {
		configuration.logicalEmphasis = logicalEmphasis;
	}

	/**
	 * logical-emphasis- replace i by em and b by strong.
	 * 
	 * @return true if tidy will replace i by em and b by strong
	 * @see Configuration#logicalEmphasis
	 */
	public boolean getLogicalEmphasis() {
		return configuration.logicalEmphasis;
	}

	/**
	 * assume-xml-procins This option specifies if Tidy should
	 * change the parsing of processing instructions to require ?> as the
	 * terminator rather than >. This option is automatically set if the input
	 * is in XML.
	 * 
	 * @param xmlPIs
	 *            true if tidy should expect a ?> at the end of
	 *            processing instructions
	 * @see Configuration#xmlPIs
	 */
	public void setXmlPIs(boolean xmlPIs) {
		configuration.xmlPIs = xmlPIs;
	}

	/**
	 * assume-xml-procins This option specifies if Tidy should
	 * change the parsing of processing instructions to require ?> as the
	 * terminator rather than >. This option is automatically set if the input
	 * is in XML.
	 * 
	 * @return true if tidy will expect a ?> at the end of
	 *         processing instructions
	 * @see Configuration#xmlPIs
	 */
	public boolean getXmlPIs() {
		return configuration.xmlPIs;
	}

	/**
	 * enclose-text- if true text at body is wrapped in
	 * <p>'s.
	 * 
	 * @param encloseText
	 *            true if tidy should wrap text at body in
	 *            <p>'s.
	 * @see Configuration#encloseBodyText
	 */
	public void setEncloseText(boolean encloseText) {
		configuration.encloseBodyText = encloseText;
	}

	/**
	 * enclose-text- if true text at body is wrapped in
	 * <p>'s.
	 * 
	 * @return true if tidy will wrap text at body in <p>'s.
	 * @see Configuration#encloseBodyText
	 */
	public boolean getEncloseText() {
		return configuration.encloseBodyText;
	}

	/**
	 * enclose-block-text- if true text in blocks is wrapped in
	 * <p>'s.
	 * 
	 * @param encloseBlockText
	 *            true if tidy should wrap text text in blocks in
	 *            <p>'s.
	 * @see Configuration#encloseBlockText
	 */
	public void setEncloseBlockText(boolean encloseBlockText) {
		configuration.encloseBlockText = encloseBlockText;
	}

	/**
	 * enclose-block-text- if true text in blocks is wrapped in
	 * <p>'s. return true if tidy should will text text in
	 * blocks in <p>'s.
	 * 
	 * @see Configuration#encloseBlockText
	 */
	public boolean getEncloseBlockText() {
		return configuration.encloseBlockText;
	}

	/**
	 * word-2000- draconian cleaning for Word2000.
	 * 
	 * @param word2000
	 *            true if tidy should clean word2000 documents
	 * @see Configuration#word2000
	 */
	public void setWord2000(boolean word2000) {
		configuration.word2000 = word2000;
	}

	/**
	 * word-2000- draconian cleaning for Word2000.
	 * 
	 * @return true if tidy will clean word2000 documents
	 * @see Configuration#word2000
	 */
	public boolean getWord2000() {
		return configuration.word2000;
	}

	/**
	 * tidy-mark- add meta element indicating tidied doc.
	 * 
	 * @param tidyMark
	 *            true if tidy should add meta element indicating
	 *            tidied doc
	 * @see Configuration#tidyMark
	 */
	public void setTidyMark(boolean tidyMark) {
		configuration.tidyMark = tidyMark;
	}

	/**
	 * tidy-mark- add meta element indicating tidied doc.
	 * 
	 * @return true if tidy will add meta element indicating tidied
	 *         doc
	 * @see Configuration#tidyMark
	 */
	public boolean getTidyMark() {
		return configuration.tidyMark;
	}

	/**
	 * add-xml-space- if set to yes adds xml:space attr as needed.
	 * 
	 * @param xmlSpace
	 *            true if tidy should add xml:space attr as needed
	 * @see Configuration#xmlSpace
	 */
	public void setXmlSpace(boolean xmlSpace) {
		configuration.xmlSpace = xmlSpace;
	}

	/**
	 * add-xml-space- if set to yes adds xml:space attr as needed.
	 * 
	 * @return true if tidy will add xml:space attr as needed
	 * @see Configuration#xmlSpace
	 */
	public boolean getXmlSpace() {
		return configuration.xmlSpace;
	}

	/**
	 * gnu-emacs- if true format error output for GNU Emacs.
	 * 
	 * @param emacs
	 *            true if tidy should format error output for GNU
	 *            Emacs
	 * @see Configuration#emacs
	 */
	public void setEmacs(boolean emacs) {
		configuration.emacs = emacs;
	}

	/**
	 * gnu-emacs- if true format error output for GNU Emacs.
	 * 
	 * @return true if tidy will format error output for GNU Emacs
	 * @see Configuration#emacs
	 */
	public boolean getEmacs() {
		return configuration.emacs;
	}

	/**
	 * literal-attributes- if true attributes may use newlines.
	 * 
	 * @param literalAttribs
	 *            true if attributes may use newlines
	 * @see Configuration#literalAttribs
	 */
	public void setLiteralAttribs(boolean literalAttribs) {
		configuration.literalAttribs = literalAttribs;
	}

	/**
	 * literal-attributes- if true attributes may use newlines.
	 * 
	 * @return true if attributes may use newlines
	 * @see Configuration#literalAttribs
	 */
	public boolean getLiteralAttribs() {
		return configuration.literalAttribs;
	}

	/**
	 * print-body-only- output BODY content only.
	 * 
	 * @param bodyOnly
	 *            true = print only the document body
	 * @see Configuration#bodyOnly
	 */
	public void setPrintBodyOnly(boolean bodyOnly) {
		configuration.bodyOnly = bodyOnly;
	}

	/**
	 * print-body-only- output BODY content only.
	 * 
	 * @return true if tidy will print only the document body
	 */
	public boolean getPrintBodyOnly() {
		return configuration.bodyOnly;
	}

	/**
	 * fix-uri- fix uri references applying URI encoding if
	 * necessary.
	 * 
	 * @param fixUri
	 *            true = fix uri references
	 * @see Configuration#fixUri
	 */
	public void setFixUri(boolean fixUri) {
		configuration.fixUri = fixUri;
	}

	/**
	 * fix-uri- output BODY content only.
	 * 
	 * @return true if tidy will fix uri references
	 */
	public boolean getFixUri() {
		return configuration.fixUri;
	}

	/**
	 * lower-literals- folds known attribute values to lower case.
	 * 
	 * @param lowerLiterals
	 *            true = folds known attribute values to lower case
	 * @see Configuration#lowerLiterals
	 */
	public void setLowerLiterals(boolean lowerLiterals) {
		configuration.lowerLiterals = lowerLiterals;
	}

	/**
	 * lower-literals- folds known attribute values to lower case.
	 * 
	 * @return true if tidy will folds known attribute values to lower case
	 */
	public boolean getLowerLiterals() {
		return configuration.lowerLiterals;
	}

	/**
	 * hide-comments- hides all (real) comments in output.
	 * 
	 * @param hideComments
	 *            true = hides all comments in output
	 * @see Configuration#hideComments
	 */
	public void setHideComments(boolean hideComments) {
		configuration.hideComments = hideComments;
	}

	/**
	 * hide-comments- hides all (real) comments in output.
	 * 
	 * @return true if tidy will hide all comments in output
	 */
	public boolean getHideComments() {
		return configuration.hideComments;
	}

	/**
	 * indent-cdata- indent CDATA sections.
	 * 
	 * @param indentCdata
	 *            true = indent CDATA sections
	 * @see Configuration#indentCdata
	 */
	public void setIndentCdata(boolean indentCdata) {
		configuration.indentCdata = indentCdata;
	}

	/**
	 * indent-cdata- indent CDATA sections.
	 * 
	 * @return true if tidy will indent CDATA sections
	 */
	public boolean getIndentCdata() {
		return configuration.indentCdata;
	}

	/**
	 * force-output- output document even if errors were found.
	 * 
	 * @param forceOutput
	 *            true = output document even if errors were found
	 * @see Configuration#forceOutput
	 */
	public void setForceOutput(boolean forceOutput) {
		configuration.forceOutput = forceOutput;
	}

	/**
	 * force-output- output document even if errors were found.
	 * 
	 * @return true if tidy will output document even if errors were found
	 */
	public boolean getForceOutput() {
		return configuration.forceOutput;
	}

	/**
	 * show-errors- set the number of errors to put out.
	 * 
	 * @param showErrors
	 *            number of errors to put out
	 * @see Configuration#showErrors
	 */
	public void setShowErrors(int showErrors) {
		configuration.showErrors = showErrors;
	}

	/**
	 * show-errors- number of errors to put out.
	 * 
	 * @return the number of errors tidy will put out
	 */
	public int getShowErrors() {
		return configuration.showErrors;
	}

	/**
	 * ascii-chars- convert quotes and dashes to nearest ASCII
	 * char.
	 * 
	 * @param asciiChars
	 *            true = convert quotes and dashes to nearest ASCII char
	 * @see Configuration#asciiChars
	 */
	public void setAsciiChars(boolean asciiChars) {
		configuration.asciiChars = asciiChars;
	}

	/**
	 * ascii-chars- convert quotes and dashes to nearest ASCII
	 * char.
	 * 
	 * @return true if tidy will convert quotes and dashes to nearest ASCII char
	 */
	public boolean getAsciiChars() {
		return configuration.asciiChars;
	}

	/**
	 * join-classes- join multiple class attributes.
	 * 
	 * @param joinClasses
	 *            true = join multiple class attributes
	 * @see Configuration#joinClasses
	 */
	public void setJoinClasses(boolean joinClasses) {
		configuration.joinClasses = joinClasses;
	}

	/**
	 * join-classes- join multiple class attributes.
	 * 
	 * @return true if tidy will join multiple class attributes
	 */
	public boolean getJoinClasses() {
		return configuration.joinClasses;
	}

	/**
	 * join-styles- join multiple style attributes.
	 * 
	 * @param joinStyles
	 *            true = join multiple style attributes
	 * @see Configuration#joinStyles
	 */
	public void setJoinStyles(boolean joinStyles) {
		configuration.joinStyles = joinStyles;
	}

	/**
	 * join-styles- join multiple style attributes.
	 * 
	 * @return true if tidy will join multiple style attributes
	 */
	public boolean getJoinStyles() {
		return configuration.joinStyles;
	}

	/**
	 * trim-empty-elements- trim empty elements.
	 * 
	 * @param trim-empty-elements
	 *            true = trim empty elements
	 * @see Configuration#trimEmpty
	 */
	public void setTrimEmptyElements(boolean trimEmpty) {
		configuration.trimEmpty = trimEmpty;
	}

	/**
	 * trim-empty-elements- trim empty elements.
	 * 
	 * @return true if tidy will trim empty elements
	 */
	public boolean getTrimEmptyElements() {
		return configuration.trimEmpty;
	}

	/**
	 * replace-color- replace hex color attribute values with
	 * names.
	 * 
	 * @param replaceColor
	 *            true = replace hex color attribute values with names
	 * @see Configuration#replaceColor
	 */
	public void setReplaceColor(boolean replaceColor) {
		configuration.replaceColor = replaceColor;
	}

	/**
	 * replace-color- replace hex color attribute values with
	 * names.
	 * 
	 * @return true if tidy will replace hex color attribute values with names
	 */
	public boolean getReplaceColor() {
		return configuration.replaceColor;
	}

	/**
	 * escape-cdata- replace CDATA sections with escaped text.
	 * 
	 * @param escapeCdata
	 *            true = replace CDATA sections with escaped text
	 * @see Configuration#escapeCdata
	 */
	public void setEscapeCdata(boolean escapeCdata) {
		configuration.escapeCdata = escapeCdata;
	}

	/**
	 * escape-cdata -replace CDATA sections with escaped text.
	 * 
	 * @return true if tidy will replace CDATA sections with escaped text
	 */
	public boolean getEscapeCdata() {
		return configuration.escapeCdata;
	}

	/**
	 * repeated-attributes- keep first or last duplicate attribute.
	 * 
	 * @param repeatedAttributes
	 *            Configuration.KEEP_FIRST | Configuration.KEEP_LAST
	 * @see Configuration#duplicateAttrs
	 */
	public void setRepeatedAttributes(int repeatedAttributes) {
		configuration.duplicateAttrs = repeatedAttributes;
	}

	/**
	 * repeated-attributes- keep first or last duplicate attribute.
	 * 
	 * @return Configuration.KEEP_FIRST | Configuration.KEEP_LAST
	 */
	public int getRepeatedAttributes() {
		return configuration.duplicateAttrs;
	}

	/**
	 * keep-time- if true last modified time is preserved.
	 * 
	 * @param keepFileTimes
	 *            true if tidy should preserved last modified time
	 *            in input file.
	 * @todo this is NOT supported at this time. 
	 * @see Configuration#keepFileTimes
	 */
	public void setKeepFileTimes(boolean keepFileTimes) {
		configuration.keepFileTimes = keepFileTimes;
	}

	/**
	 * keep-time- if true last modified time is preserved.
	 * 
	 * @return true if tidy will preserved last modified time in
	 *         input file.
	 * @todo this is NOT supported at this time. 
	 * @see Configuration#keepFileTimes
	 */
	public boolean getKeepFileTimes() {
		return configuration.keepFileTimes;
	}

	/**
	 * output-raw- avoid mapping values > 127 to entities. This has
	 * the same effect of specifying a "raw" encoding in the original version of
	 * tidy.
	 * 
	 * @param rawOut
	 *            avoid mapping values > 127 to entities
	 * @see Configuration#rawOut
	 */
	public void setRawOut(boolean rawOut) {
		configuration.rawOut = rawOut;
	}

	/**
	 * output-raw- avoid mapping values > 127 to entities.
	 * 
	 * @return true if tidy will not map values > 127 to entities
	 * @see Configuration#rawOut
	 */
	public boolean getRawOut() {
		return configuration.rawOut;
	}

	/**
	 * input-encoding the character encoding used for input.
	 * 
	 * @param encoding
	 *            a valid java encoding name
	 */
	public void setInputEncoding(String encoding) {
		configuration.setInCharEncodingName(encoding);
	}

	/**
	 * input-encoding the character encoding used for input.
	 * 
	 * @return the java name of the encoding currently used for input
	 */
	public String getInputEncoding() {
		return configuration.getInCharEncodingName();
	}

	/**
	 * output-encoding the character encoding used for output.
	 * 
	 * @param encoding
	 *            a valid java encoding name
	 */
	public void setOutputEncoding(String encoding) {
		configuration.setOutCharEncodingName(encoding);
	}

	/**
	 * output-encoding the character encoding used for output.
	 * 
	 * @return the java name of the encoding currently used for output
	 */
	public String getOutputEncoding() {
		return configuration.getOutCharEncodingName();
	}

	public String asString(Document document) {
		ByteArrayOutputStream bos = new ByteArrayOutputStream();
		pprint(document, bos);
		return bos.toString();
	}

	public String asString() {
		ByteArrayOutputStream bos = new ByteArrayOutputStream();
		pprint(document, bos);
		return bos.toString();
	}

//	public static class SaxonSelector {
//
//		static final net.sf.saxon.s9api.Processor PROCESSOR = new net.sf.saxon.s9api.Processor(false);
//		static final net.sf.saxon.s9api.XPathCompiler XPATH = PROCESSOR.newXPathCompiler();
//		static final DocumentBuilder BUILDER = PROCESSOR.newDocumentBuilder();
//		DOMSource source;
//		XdmNode xn;
//
//		SaxonSelector(DOMSource source) throws Exception {
//			this.source = source;
//			xn = BUILDER.build(source);
//		}
//
//		public String evaluate(String xpath) throws Exception {
//			XPathExecutable compile = XPATH.compile(xpath);
//			XPathSelector selector = compile.load();
//			selector.setContextItem(xn);
//			StringBuilder sbr = new StringBuilder();
//			for (XdmItem xdmItem : selector)
//				sbr.append(xdmItem.toString());
//
//			return sbr.toString();
//		}
//		public Map evaluate(Map xpaths) throws Exception {
//			Set> entrySet = xpaths.entrySet();
//			HashMap res=new HashMap();
//			for (Entry entry : entrySet) {
//				String xpath = entry.getValue();
//				String key = entry.getKey();
//				XPathExecutable compile = XPATH.compile(xpath);
//				XPathSelector selector = compile.load();
//				selector.setContextItem(xn);
//				StringBuilder sbr = new StringBuilder();
//				for (XdmItem xdmItem : selector)
//					sbr.append(xdmItem.toString());
//				res.put(key, sbr.toString());
//			}
//			
//			return res;
//		}
//		public List evaluate(List xpaths) throws Exception {
//			
//			 List res=new ArrayList();
//			for (String xpath : xpaths) {
//			
//				XPathExecutable compile = XPATH.compile(xpath);
//				XPathSelector selector = compile.load();
//				selector.setContextItem(xn);
//				StringBuilder sbr = new StringBuilder();
//				for (XdmItem xdmItem : selector)
//					sbr.append(xdmItem.toString());
//				res.add(sbr.toString());
//			}
//			
//			return res;
//		}
//
//	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy