org.apache.calcite.util.XmlOutput Maven / Gradle / Ivy
Show all versions of calcite-core Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.util;
import com.google.common.collect.Lists;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Deque;
/**
* Streaming XML output.
*
* Use this class to write XML to any streaming source.
* While the class itself is unstructured and doesn't enforce any DTD
* specification, use of the class
* does ensure that the output is syntactically valid XML.
*/
public class XmlOutput {
// This Writer is the underlying output stream to which all XML is
// written.
private final PrintWriter out;
// The tagStack is maintained to check that tags are balanced.
private final Deque tagStack = new ArrayDeque<>();
// The class maintains an indentation level to improve output quality.
private int indent;
// The class also maintains the total number of tags written. This
// is used to monitor changes to the output
private int tagsWritten;
// This flag is set to true if the output should be compacted.
// Compacted output is free of extraneous whitespace and is designed
// for easier transport.
private boolean compact;
/** @see #setIndentString */
private String indentString = "\t";
/** @see #setGlob */
private boolean glob;
/**
* Whether we have started but not finished a start tag. This only happens
* if glob
is true. The start tag is automatically closed
* when we start a child node. If there are no child nodes, {@link #endTag}
* creates an empty tag.
*/
private boolean inTag;
/** @see #setAlwaysQuoteCData */
private boolean alwaysQuoteCData;
/** @see #setIgnorePcdata */
private boolean ignorePcdata;
/**
* Private helper function to display a degree of indentation
* @param out the PrintWriter to which to display output.
* @param indent the degree of indentation.
*/
private void displayIndent(PrintWriter out, int indent) {
if (!compact) {
for (int i = 0; i < indent; i++) {
out.print(indentString);
}
}
}
/**
* Constructs a new XmlOutput based on any {@link Writer}.
*
* @param out the writer to which this XmlOutput generates results.
*/
public XmlOutput(Writer out) {
this(new PrintWriter(out, true));
}
/**
* Constructs a new XmlOutput based on a {@link PrintWriter}.
*
* @param out the writer to which this XmlOutput generates results.
*/
public XmlOutput(PrintWriter out) {
this.out = out;
indent = 0;
tagsWritten = 0;
}
/**
* Sets or unsets the compact mode. Compact mode causes the generated
* XML to be free of extraneous whitespace and other unnecessary
* characters.
*
* @param compact true to turn on compact mode, or false to turn it off.
*/
public void setCompact(boolean compact) {
this.compact = compact;
}
public boolean getCompact() {
return compact;
}
/**
* Sets the string to print for each level of indentation. The default is a
* tab. The value must not be null
. Set this to the empty
* string to achieve no indentation (note that
* {@link #setCompact}(true)
removes indentation and
* newlines).
*/
public void setIndentString(String indentString) {
this.indentString = indentString;
}
/**
* Sets whether to detect that tags are empty.
*/
public void setGlob(boolean glob) {
this.glob = glob;
}
/**
* Sets whether to always quote cdata segments (even if they don't contain
* special characters).
*/
public void setAlwaysQuoteCData(boolean alwaysQuoteCData) {
this.alwaysQuoteCData = alwaysQuoteCData;
}
/**
* Sets whether to ignore unquoted text, such as whitespace.
*/
public void setIgnorePcdata(boolean ignorePcdata) {
this.ignorePcdata = ignorePcdata;
}
public boolean getIgnorePcdata() {
return ignorePcdata;
}
/**
* Sends a string directly to the output stream, without escaping any
* characters. Use with caution!
*/
public void print(String s) {
out.print(s);
}
/**
* Starts writing a new tag to the stream. The tag's name must be given and
* its attributes should be specified by a fully constructed AttrVector
* object.
*
* @param tagName the name of the tag to write.
* @param attributes an XMLAttrVector containing the attributes to include
* in the tag.
*/
public void beginTag(String tagName, XMLAttrVector attributes) {
beginBeginTag(tagName);
if (attributes != null) {
attributes.display(out, indent);
}
endBeginTag(tagName);
}
public void beginBeginTag(String tagName) {
if (inTag) {
// complete the parent's start tag
if (compact) {
out.print(">");
} else {
out.println(">");
}
inTag = false;
}
displayIndent(out, indent);
out.print("<");
out.print(tagName);
}
public void endBeginTag(String tagName) {
if (glob) {
inTag = true;
} else if (compact) {
out.print(">");
} else {
out.println(">");
}
out.flush();
tagStack.push(tagName);
indent++;
tagsWritten++;
}
/**
* Writes an attribute.
*/
public void attribute(String name, String value) {
printAtt(out, name, value);
}
/**
* If we are currently inside the start tag, finishes it off.
*/
public void beginNode() {
if (inTag) {
// complete the parent's start tag
if (compact) {
out.print(">");
} else {
out.println(">");
}
inTag = false;
}
}
/**
* Completes a tag. This outputs the end tag corresponding to the
* last exposed beginTag. The tag name must match the name of the
* corresponding beginTag.
* @param tagName the name of the end tag to write.
*/
public void endTag(String tagName) {
// Check that the end tag matches the corresponding start tag
String x = tagStack.pop();
assert x.equals(tagName);
// Lower the indent and display the end tag
indent--;
if (inTag) {
// we're still in the start tag -- this element had no children
if (compact) {
out.print("/>");
} else {
out.println("/>");
}
inTag = false;
} else {
displayIndent(out, indent);
out.print("");
out.print(tagName);
if (compact) {
out.print(">");
} else {
out.println(">");
}
}
out.flush();
}
/**
* Writes an empty tag to the stream. An empty tag is one with no
* tags inside it, although it may still have attributes.
*
* @param tagName the name of the empty tag.
* @param attributes an XMLAttrVector containing the attributes to
* include in the tag.
*/
public void emptyTag(String tagName, XMLAttrVector attributes) {
if (inTag) {
// complete the parent's start tag
if (compact) {
out.print(">");
} else {
out.println(">");
}
inTag = false;
}
displayIndent(out, indent);
out.print("<");
out.print(tagName);
if (attributes != null) {
out.print(" ");
attributes.display(out, indent);
}
if (compact) {
out.print("/>");
} else {
out.println("/>");
}
out.flush();
tagsWritten++;
}
/**
* Writes a CDATA section. Such sections always appear on their own line.
* The nature in which the CDATA section is written depends on the actual
* string content with respect to these special characters/sequences:
*
* &
* "
* '
* <
* >
*
* Additionally, the sequence ]]>
is special.
*
* - Content containing no special characters will be left as-is.
*
- Content containing one or more special characters but not the
* sequence
]]>
will be enclosed in a CDATA section.
* - Content containing special characters AND at least one
*
]]>
sequence will be left as-is but have all of its
* special characters encoded as entities.
*
* These special treatment rules are required to allow cdata sections
* to contain XML strings which may themselves contain cdata sections.
* Traditional CDATA sections do not nest.
*/
public void cdata(String data) {
cdata(data, false);
}
/**
* Writes a CDATA section (as {@link #cdata(String)}).
*
* @param data string to write
* @param quote if true, quote in a <![CDATA[
* ... ]]>
regardless of the content of
* data
; if false, quote only if the content needs it
*/
public void cdata(String data, boolean quote) {
if (inTag) {
// complete the parent's start tag
if (compact) {
out.print(">");
} else {
out.println(">");
}
inTag = false;
}
if (data == null) {
data = "";
}
boolean specials = false;
boolean cdataEnd = false;
// Scan the string for special characters
// If special characters are found, scan the string for ']]>'
if (stringHasXMLSpecials(data)) {
specials = true;
if (data.contains("]]>")) {
cdataEnd = true;
}
}
// Display the result
displayIndent(out, indent);
if (quote || alwaysQuoteCData) {
out.print("");
} else if (!specials) {
out.print(data);
} else {
stringEncodeXML(data, out);
}
out.println();
out.flush();
tagsWritten++;
}
/**
* Writes a String tag; a tag containing nothing but a CDATA section.
*/
public void stringTag(String name, String data) {
beginTag(name, null);
cdata(data);
endTag(name);
}
/**
* Writes content.
*/
public void content(String content) {
// This method previously used a LineNumberReader, but that class is
// susceptible to a form of DoS attack. It uses lots of memory and CPU if a
// malicious client gives it input with very long lines.
if (content != null) {
indent++;
final char[] chars = content.toCharArray();
int prev = 0;
for (int i = 0; i < chars.length; i++) {
if (chars[i] == '\n'
|| chars[i] == '\r'
&& i + 1 < chars.length
&& chars[i + 1] == '\n') {
displayIndent(out, indent);
out.println(content.substring(prev, i));
if (chars[i] == '\r') {
++i;
}
prev = i + 1;
}
}
displayIndent(out, indent);
out.println(content.substring(prev, chars.length));
indent--;
out.flush();
}
tagsWritten++;
}
/**
* Write header. Use default version 1.0.
*/
public void header() {
out.println("");
out.flush();
tagsWritten++;
}
/**
* Write header, take version as input.
*/
public void header(String version) {
out.print("");
out.flush();
tagsWritten++;
}
/**
* Get the total number of tags written
* @return the total number of tags written to the XML stream.
*/
public int numTagsWritten() {
return tagsWritten;
}
/** Print an XML attribute name and value for string val */
private static void printAtt(PrintWriter pw, String name, String val) {
if (val != null /* && !val.equals("") */) {
pw.print(" ");
pw.print(name);
pw.print("=\"");
pw.print(escapeForQuoting(val));
pw.print("\"");
}
}
/**
* Encode a String for XML output, displaying it to a PrintWriter.
* The String to be encoded is displayed, except that
* special characters are converted into entities.
* @param input a String to convert.
* @param out a PrintWriter to which to write the results.
*/
private static void stringEncodeXML(String input, PrintWriter out) {
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
switch (c) {
case '<':
case '>':
case '"':
case '\'':
case '&':
case '\t':
case '\n':
case '\r':
out.print("" + (int) c + ";");
break;
default:
out.print(c);
}
}
}
private static String escapeForQuoting(String val) {
return StringEscaper.XML_NUMERIC_ESCAPER.escapeString(val);
}
/**
* Returns whether a string contains any XML special characters.
*
* If this function returns true, the string will need to be
* encoded either using the stringEncodeXML function above or using a
* CDATA section. Note that MSXML has a nasty bug whereby whitespace
* characters outside of a CDATA section are lost when parsing. To
* avoid hitting this bug, this method treats many whitespace characters
* as "special".
*
* @param input the String to scan for XML special characters.
* @return true if the String contains any such characters.
*/
private static boolean stringHasXMLSpecials(String input) {
for (int i = 0; i < input.length(); i++) {
char c = input.charAt(i);
switch (c) {
case '<':
case '>':
case '"':
case '\'':
case '&':
case '\t':
case '\n':
case '\r':
return true;
}
}
return false;
}
/**
* Utility for replacing special characters
* with escape sequences in strings.
*
* A StringEscaper starts out as an identity transform in the "mutable"
* state. Call {@link #defineEscape} as many times as necessary to set up
* mappings, and then call {@link #makeImmutable} before
* actually applying the defined transform. Or,
* use one of the global mappings pre-defined here.
*/
static class StringEscaper implements Cloneable {
private ArrayList translationVector;
private String [] translationTable;
public static final StringEscaper XML_ESCAPER;
public static final StringEscaper XML_NUMERIC_ESCAPER;
public static final StringEscaper HTML_ESCAPER;
public static final StringEscaper URL_ARG_ESCAPER;
public static final StringEscaper URL_ESCAPER;
/**
* Identity transform
*/
StringEscaper() {
translationVector = new ArrayList<>();
}
/**
* Map character "from" to escape sequence "to"
*/
public void defineEscape(char from, String to) {
int i = (int) from;
if (i >= translationVector.size()) {
// Extend list by adding the requisite number of nulls.
final int count = i + 1 - translationVector.size();
translationVector.addAll(Collections.nCopies(count, null));
}
translationVector.set(i, to);
}
/**
* Call this before attempting to escape strings; after this,
* defineEscape may not be called again.
*/
public void makeImmutable() {
translationTable =
translationVector.toArray(new String[0]);
translationVector = null;
}
/**
* Apply an immutable transformation to the given string.
*/
public String escapeString(String s) {
StringBuilder sb = null;
int n = s.length();
for (int i = 0; i < n; i++) {
char c = s.charAt(i);
String escape;
// codes >= 128 (e.g. Euro sign) are always escaped
if (c > 127) {
escape = "" + Integer.toString(c) + ";";
} else if (c >= translationTable.length) {
escape = null;
} else {
escape = translationTable[c];
}
if (escape == null) {
if (sb != null) {
sb.append(c);
}
} else {
if (sb == null) {
sb = new StringBuilder(n * 2);
sb.append(s, 0, i);
}
sb.append(escape);
}
}
if (sb == null) {
return s;
} else {
return sb.toString();
}
}
protected StringEscaper clone() {
StringEscaper clone = new StringEscaper();
if (translationVector != null) {
clone.translationVector = new ArrayList<>(translationVector);
}
if (translationTable != null) {
clone.translationTable = translationTable.clone();
}
return clone;
}
/**
* Create a mutable escaper from an existing escaper, which may
* already be immutable.
*/
public StringEscaper getMutableClone() {
StringEscaper clone = clone();
if (clone.translationVector == null) {
clone.translationVector = Lists.newArrayList(clone.translationTable);
clone.translationTable = null;
}
return clone;
}
static {
HTML_ESCAPER = new StringEscaper();
HTML_ESCAPER.defineEscape('&', "&");
HTML_ESCAPER.defineEscape('"', """);
// htmlEscaper.defineEscape('\'',"'");
HTML_ESCAPER.defineEscape('\'', "'");
HTML_ESCAPER.defineEscape('<', "<");
HTML_ESCAPER.defineEscape('>', ">");
XML_NUMERIC_ESCAPER = new StringEscaper();
XML_NUMERIC_ESCAPER.defineEscape('&', "&");
XML_NUMERIC_ESCAPER.defineEscape('"', """);
XML_NUMERIC_ESCAPER.defineEscape('\'', "'");
XML_NUMERIC_ESCAPER.defineEscape('<', "<");
XML_NUMERIC_ESCAPER.defineEscape('>', ">");
URL_ARG_ESCAPER = new StringEscaper();
URL_ARG_ESCAPER.defineEscape('?', "%3f");
URL_ARG_ESCAPER.defineEscape('&', "%26");
URL_ESCAPER = URL_ARG_ESCAPER.getMutableClone();
URL_ESCAPER.defineEscape('%', "%%");
URL_ESCAPER.defineEscape('"', "%22");
URL_ESCAPER.defineEscape('\r', "+");
URL_ESCAPER.defineEscape('\n', "+");
URL_ESCAPER.defineEscape(' ', "+");
URL_ESCAPER.defineEscape('#', "%23");
HTML_ESCAPER.makeImmutable();
XML_ESCAPER = HTML_ESCAPER;
XML_NUMERIC_ESCAPER.makeImmutable();
URL_ARG_ESCAPER.makeImmutable();
URL_ESCAPER.makeImmutable();
}
}
/** List of attribute names and values. */
static class XMLAttrVector {
public void display(PrintWriter out, int indent) {
throw new UnsupportedOperationException();
}
}
}
// End XmlOutput.java