All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.pwall.xml.XMLFormatter Maven / Gradle / Ivy

/*
 * @(#) XMLFormatter.java
 *
 * XML Formatter Class
 * Copyright (c) 2012, 2013, 2014 Peter Wall
 *
 * This file is free software: you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see .
 */

package net.pwall.xml;

import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;

import net.pwall.util.UserError;

import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.ext.DefaultHandler2;
import org.xml.sax.helpers.XMLReaderFactory;

/**
 * XML Formatter.  Takes a stream of SAX events and outputs the corresponding external
 * representation of the XML.
 *
 * @author Peter Wall
 */
public class XMLFormatter extends DefaultHandler2 implements AutoCloseable {

    /**
     * Enumeration to control whitespace handling in the formatted output:
     * 
*
NONE
*
All non-essential whitespace will be dropped
*
ALL
*
All whitespace will be output
*
INDENT
*
The formatter will, where possible, format the output in a conventional indented * form
*
*/ public enum Whitespace { NONE, ALL, INDENT } private static String eol = System.getProperty("line.separator"); private OutputStream out; private Whitespace whitespace; private String encoding; private int indent; private StringBuilder data; private boolean elementCloseAngleBracketPending; private List elements; private List prefixMappings; private Writer writer; private boolean inCDATA; private Locator locator; private boolean documentStarted; private boolean documentEnded; private boolean dtdStarted; private boolean dtdInternalSubset; private boolean dtdEnded; /** * Construct an {@code XMLFormatter} using the given {@link OutputStream}, with the given * whitespace option. * * @param out the {@link OutputStream} * @param whitespace the whitespace option */ public XMLFormatter(OutputStream out, Whitespace whitespace) { this.out = out; this.whitespace = whitespace; encoding = "UTF-8"; indent = 2; data = new StringBuilder(); elementCloseAngleBracketPending = false; elements = new ArrayList<>(); prefixMappings = new ArrayList<>(); writer = null; inCDATA = false; locator = null; documentStarted = false; documentEnded = false; } /** * Construct an {@code XMLFormatter} using the given {@link OutputStream}, with the default * whitespace option. * * @param out the {@link OutputStream} */ public XMLFormatter(OutputStream out) { this(out, Whitespace.ALL); } /** * Close the formatter. * * @throws IOException on any errors closing the output {@link Writer}. */ @Override public void close() throws IOException { if (writer != null) { writer.close(); writer = null; } if (!documentEnded) { documentEnded = true; throw new IOException("Premature XMLFormatter close"); } } /** * Receive notification of the beginning of the DTD. * * @param name the document element name * @param publicId the public id * @param systemId the system id * @throws SAXException on any errors * @see org.xml.sax.ext.LexicalHandler#startDTD(String, String, String) */ @Override public void startDTD(String name, String publicId, String systemId) throws SAXException { if (!documentStarted) throw new SAXException("Document not started"); if (dtdStarted || dtdEnded) throw new SAXException("Misplaced DTD"); dtdStarted = true; String data = checkData(); if (!XML.isAllWhiteSpace(data)) throw new SAXException("Misplaced data before DOCTYPE"); try { if (whitespace == Whitespace.ALL) write(data); write("'); if (whitespace == Whitespace.INDENT) write(eol); } catch (IOException ioe) { throw new SAXException("Error in XMLFormatter", ioe); } } /** * Receive notification of the beginning of the document. * * @exception org.xml.sax.SAXException on any errors * @see org.xml.sax.ContentHandler#startDocument() */ @Override public void startDocument() throws SAXException { if (documentStarted) throw new SAXException("Document already started"); documentStarted = true; } /** * Receive notification of the end of the document. * * @exception org.xml.sax.SAXException on any errors * @see org.xml.sax.ContentHandler#endDocument() */ @Override public void endDocument() throws SAXException { if (!documentStarted) throw new SAXException("Document not started"); if (documentEnded) throw new SAXException("Document already ended"); documentEnded = true; if (elements.size() > 0) throw new SAXException("Premature document end"); if (!XML.isAllWhiteSpace(data)) throw new SAXException("Data after last element"); try { write(checkData()); } catch (IOException ioe) { throw new SAXException("Error in XMLFormatter", ioe); } } /** * Receive notification of the start of a Namespace mapping. * * @param prefix the Namespace prefix being declared * @param uri the Namespace URI mapped to the prefix * @exception org.xml.sax.SAXException on any errors * @see org.xml.sax.ContentHandler#startPrefixMapping(String, String) */ @Override public void startPrefixMapping(String prefix, String uri) throws SAXException { if (prefix == null || uri == null) throw new SAXException("Null argument not allowed"); if ("xml".equals(prefix)) throw new SAXException("Can't use xml as namespace prefix"); prefixMappings.add(new PrefixMapping(prefix, uri)); } /** * Receive notification of the end of a Namespace mapping. * * @param prefix the Namespace prefix * @exception org.xml.sax.SAXException on any errors * @see org.xml.sax.ContentHandler#endPrefixMapping(String) */ @Override public void endPrefixMapping(String prefix) throws SAXException { if (prefix == null) throw new SAXException("Null argument not allowed"); // do nothing } /** * Set the {@link Locator} object for references from this document. * * @param locator the {@link Locator} object */ @Override public void setDocumentLocator(Locator locator) { this.locator = locator; } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (!documentStarted) throw new SAXException("Document not started"); if (documentEnded) throw new SAXException("Document already ended"); try { write(checkData()); if (whitespace == Whitespace.INDENT) writeSpaces(elements.size() * getIndent()); int prefixMappingIndex = elements.isEmpty() ? 0 : elements.get(elements.size() - 1).getPrefixMappingIndex(); write('<'); write(qName); // if qualified, check that prefix mapping exists? for (int i = 0, n = attributes.getLength(); i < n; i++) { write(' '); write(attributes.getQName(i)); write("=\""); write(XML.escape(attributes.getValue(i))); write('"'); } for (; prefixMappingIndex < prefixMappings.size(); prefixMappingIndex++) { PrefixMapping prefixMapping = prefixMappings.get(prefixMappingIndex); write(" xmlns"); if (prefixMapping.getPrefix().length() > 0) { write(':'); write(prefixMapping.getPrefix()); } write("=\""); write(XML.escape(prefixMapping.getUri())); write('"'); } elements.add(new Element(uri, localName, qName, prefixMappingIndex)); elementCloseAngleBracketPending = true; } catch (Exception e) { throw new SAXException("Error in XMLFormatter", e); } } private boolean elementsMatch(String uri, String localName, String qName) { int n = elements.size(); if (n == 0) return false; Element e = elements.remove(n - 1); return Objects.equals(uri, e.getUri()) && Objects.equals(localName, e.getLocalName()) && Objects.equals(qName, e.getQName()); } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if (!documentStarted) throw new SAXException("Document not started"); if (documentEnded) throw new SAXException("Document already ended"); try { if (!elementsMatch(uri, localName, qName)) throw new IllegalStateException("Unmatched element end"); String data = this.data.toString(); this.data.setLength(0); if (whitespace == Whitespace.NONE) { if (elementCloseAngleBracketPending) { if (XML.isAllWhiteSpace(data)) write("/>"); else { write('>'); write(XML.escapeData(XML.trim(data))); write("'); } elementCloseAngleBracketPending = false; } else { if (!XML.isAllWhiteSpace(data)) { if (XML.isWhiteSpace(data.charAt(0))) write(' '); write(XML.escapeData(XML.trim(data))); } write("'); } } else if (whitespace == Whitespace.ALL) { if (elementCloseAngleBracketPending) { if (data.length() > 0) { write('>'); write(XML.escapeData(data)); write("'); } else write("/>"); elementCloseAngleBracketPending = false; } else { write(XML.escapeData(data)); write("'); } } else { // whitespace == Whitespace.INDENT if (elementCloseAngleBracketPending) { if (XML.isAllWhiteSpace(data)) { write("/>"); write(eol); } else { write('>'); write(XML.escapeData(XML.trim(data))); write("'); write(eol); } elementCloseAngleBracketPending = false; } else { if (!XML.isAllWhiteSpace(data)) { writeSpaces((elements.size() + 1) * getIndent()); write(XML.escapeData(XML.trim(data))); write(eol); } writeSpaces(elements.size() * getIndent()); write("'); write(eol); } } } catch (Exception e) { throw new SAXException("Error in XMLFormatter", e); } } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (!documentStarted) throw new SAXException("Document not started"); if (documentEnded) throw new SAXException("Document already ended"); try { if (inCDATA) { for (int i = 0, n = length - 1; i < n; i++) { if (ch[i] == ']' && ch[i + 1] == ']') throw new SAXParseException("Illegal content in CDATA", locator); } write(ch, start, length); } else data.append(ch, start, length); } catch (SAXException saxe) { throw saxe; } catch (Exception e) { throw new SAXException("Error in XMLFormatter", e); } } public void characters(String str) throws SAXException { characters(str.toCharArray(), 0, str.length()); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { characters(ch, start, length); } @Override public void processingInstruction(String target, String data) throws SAXException { if (!documentStarted) throw new SAXException("Document not started"); if (documentEnded) throw new SAXException("Document already ended"); try { write(checkData()); if (target.indexOf("?>") >= 0 || data.indexOf("?>") >= 0) throw new SAXParseException("Illegal content in processing instruction", locator); if (whitespace == Whitespace.INDENT) writeSpaces(elements.size() * getIndent()); write(""); if (whitespace == Whitespace.INDENT) write(eol); } catch (SAXException saxe) { throw saxe; } catch (Exception e) { throw new SAXException("Error in XMLFormatter", e); } } @Override public void warning(SAXParseException e) throws SAXException { throw e; } @Override public void error(SAXParseException e) throws SAXException { throw e; } @Override public void fatalError(SAXParseException e) throws SAXException { throw e; } @Override public void startCDATA() throws SAXException { if (!documentStarted) throw new SAXException("Document not started"); if (documentEnded) throw new SAXException("Document already ended"); try { write(checkData()); if (whitespace == Whitespace.INDENT) writeSpaces(elements.size() * getIndent()); write(""); if (whitespace == Whitespace.INDENT) write(eol); } catch (Exception e) { throw new SAXException("Error in XMLFormatter", e); } } @Override public void comment(char[] ch, int start, int length) throws SAXException { if (!documentStarted) throw new SAXException("Document not started"); if (documentEnded) throw new SAXException("Document already ended"); try { write(checkData()); if (whitespace == Whitespace.INDENT) writeSpaces(elements.size() * getIndent()); for (int i = 0, n = length - 1; i < n; i++) { if (ch[i] == '-' && ch[i + 1] == '-') throw new SAXParseException("Illegal content in comment", locator); } write(""); if (whitespace == Whitespace.INDENT) write(eol); } catch (SAXException saxe) { throw saxe; } catch (Exception e) { throw new SAXException("Error in XMLFormatter", e); } } public void xslProcessingInstruction(String href) throws SAXException { if (href != null && href.length() > 0) processingInstruction("xml-stylesheet", "href=\"" + href + "\" type=\"text/xsl\""); } public void prefix(String standalone) throws SAXException { try { write(createPrefix(standalone)); if (whitespace != Whitespace.NONE) write(eol); } catch (Exception e) { throw new SAXException("Error in XMLFormatter", e); } } public void prefix() throws SAXException { prefix(null); } public String createPrefix(String standalone) { StringBuilder output = new StringBuilder(""); return output.toString(); } public String createPrefix() { return createPrefix(null); } private String checkData() { StringBuilder output = new StringBuilder(); String data = this.data.toString(); this.data.setLength(0); if (whitespace == Whitespace.NONE) { if (elementCloseAngleBracketPending) { output.append('>'); elementCloseAngleBracketPending = false; if (!XML.isAllWhiteSpace(data)) { output.append(XML.escapeData(XML.trim(data))); if (XML.isWhiteSpace(data.charAt(data.length() - 1))) output.append(' '); } } else if (data.length() > 0) { if (XML.isAllWhiteSpace(data)) output.append(' '); else { if (XML.isWhiteSpace(data.charAt(0))) output.append(' '); output.append(XML.escapeData(XML.trim(data))); if (XML.isWhiteSpace(data.charAt(data.length() - 1))) output.append(' '); } } } else if (whitespace == Whitespace.ALL) { if (elementCloseAngleBracketPending) { output.append('>'); elementCloseAngleBracketPending = false; } output.append(XML.escapeData(data)); } else { // whitespace == Whitespace.INDENT if (elementCloseAngleBracketPending) { output.append('>'); output.append(eol); elementCloseAngleBracketPending = false; } if (!XML.isAllWhiteSpace(data)) { addSpaces(output, elements.size() * getIndent()); output.append(XML.escapeData(XML.trim(data))); output.append(eol); } } return output.toString(); } private void write(String str) throws IOException { getWriter().write(str); } private void write(char ch) throws IOException { getWriter().write(ch); } private void write(char[] buf, int start, int length) throws IOException { getWriter().write(buf, start, length); } private void writeSpaces(int length) throws IOException { while (length-- > 0) getWriter().write(' '); } private synchronized Writer getWriter() throws IOException { if (writer == null) { if (out == null) throw new IllegalStateException("Output Stream not set"); String encoding = getEncoding(); writer = new BufferedWriter(encoding == null ? new OutputStreamWriter(out) : new OutputStreamWriter(out, encoding)); } return writer; } private static void addSpaces(StringBuilder a, int n) { for (; n > 0; n--) a.append(' '); } public Whitespace getWhitespace() { return whitespace; } public void setWhitespace(Whitespace whitespace) { this.whitespace = whitespace; } public String getEncoding() { return encoding; } public void setEncoding(String encoding) { this.encoding = encoding; } public int getIndent() { return indent; } public void setIndent(int indent) { this.indent = indent; } private static boolean isEmpty(String s) { return s == null || s.isEmpty(); } public static void main(String[] args) { try { File in = null; File out = null; Whitespace ws = null; int indent = -1; Boolean nsAware = null; Boolean extEntities = null; for (int i = 0; i < args.length; ++i) { String arg = args[i]; if (arg.equals("-in")) { if (++i >= args.length || args[i].startsWith("-")) throw new UserError("-in with no pathname"); if (in != null) throw new UserError("Duplicate " + arg); in = new File(args[i]); if (!in.exists() || !in.isFile()) throw new UserError("-in file does not exist - " + args[i]); } else if (arg.equals("-out")) { if (++i >= args.length || args[i].startsWith("-")) throw new UserError("-out with no pathname"); if (out != null) throw new UserError("Duplicate " + arg); out = new File(args[i]); } else if (arg.equals("-whitespace") || arg.equals("-ws")) { if (++i >= args.length || args[i].startsWith("-")) throw new UserError(arg + " with no option"); if (ws != null) throw new UserError("Duplicate " + arg); try { ws = Whitespace.valueOf(args[i].toUpperCase()); } catch (IllegalArgumentException iae) { throw new UserError("Illegal " + arg + " option - " + args[i]); } if ((ws == Whitespace.ALL || ws == Whitespace.NONE) && indent >= 0) throw new UserError("-indent conflicts with whitespace option"); } else if (arg.equals("-indent")) { if (++i >= args.length || args[i].startsWith("-")) throw new UserError("-indent with no value"); if (indent >= 0) throw new UserError("Duplicate " + arg); if (ws == Whitespace.ALL || ws == Whitespace.NONE) throw new UserError("-indent conflicts with whitespace option"); try { indent = Integer.parseInt(args[i]); if (indent < 0 || indent > 100) throw new UserError("Illegal -indent"); } catch (NumberFormatException e) { throw new UserError("Illegal -indent"); } } else if (arg.equals("-ns")) { // make parser namespace-aware if (nsAware != null) throw new UserError("Duplicate " + arg); nsAware = Boolean.TRUE; } else if (arg.equals("-nons")) { if (nsAware != null) throw new UserError("Duplicate " + arg); nsAware = Boolean.FALSE; } else if (arg.equals("-ext")) { // allow external entities if (extEntities != null) throw new UserError("Duplicate " + arg); extEntities = Boolean.TRUE; } else if (arg.equals("-noext")) { if (extEntities != null) throw new UserError("Duplicate " + arg); extEntities = Boolean.FALSE; } else throw new UserError("Unrecognised argument - " + arg); } if (in == null) throw new UserError("No -in specified"); if (nsAware == null) nsAware = Boolean.FALSE; if (extEntities == null) extEntities = Boolean.FALSE; if (out != null) { try (OutputStream os = new FileOutputStream(out)) { run(os, in, ws, indent, nsAware, extEntities); } catch (IOException ioe) { throw new RuntimeException("Error writing output file", ioe); } } else run(System.out, in, ws, indent, nsAware, extEntities); } catch (Exception e) { e.printStackTrace(); System.exit(1); } } private static void run(OutputStream os, File in, Whitespace ws, int indent, boolean nsAware, boolean extEntities) { try (XMLFormatter formatter = new XMLFormatter(os); InputStream is = new FileInputStream(in)) { formatter.setWhitespace(ws != null ? ws : Whitespace.INDENT); formatter.setIndent(indent >= 0 ? indent : 2); formatter.prefix(); XMLReader reader = XMLReaderFactory.createXMLReader(); reader.setFeature(XML.VALIDATION_FEATURE, false); reader.setFeature(XML.RESOLVE_DTD_URIS_FEATURE, false); reader.setFeature(XML.NAMESPACES_FEATURE, nsAware); reader.setFeature(XML.EXTERNAL_GENERAL_ENTITIES_FEATURE, extEntities); reader.setFeature(XML.EXTERNAL_PARAMETER_ENTITIES_FEATURE, extEntities); reader.setContentHandler(formatter); reader.setErrorHandler(formatter); try { reader.setProperty(XML.LEXICAL_HANDLER_PROPERTY, formatter); } catch (SAXNotRecognizedException e) { // ignore } reader.parse(new InputSource(is)); } catch (Exception e) { throw new RuntimeException("Unexpected error", e); } } /** * A simple class to store the tagname information about the current element. */ public static class Element { private String uri; private String localName; private String qName; private int prefixMappingIndex; public Element(String uri, String localName, String qName, int prefixMappingIndex) { this.uri = uri; this.localName = localName; this.qName = qName; this.prefixMappingIndex = prefixMappingIndex; } public String getUri() { return uri; } public String getLocalName() { return localName; } public String getQName() { return qName; } public int getPrefixMappingIndex() { return prefixMappingIndex; } } public static class PrefixMapping { private String prefix; private String uri; public PrefixMapping(String prefix, String uri) { this.prefix = prefix; this.uri = uri; } public String getPrefix() { return prefix; } public String getUri() { return uri; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy