src.java.com.ctc.wstx.io.UTFTextWriter Maven / Gradle / Ivy
package com.ctc.wstx.io;
import java.io.*;
/**
* Basic escaping writer used when outputting normal textual content.
* Only needs to escape '<' and '&' characters, plus '>' when
* following "]]" string. Note that to detect the last case, the logic
* is bit simplified, so that any '>' that immediately follows a ']'
* gets escaped. Further, since the Writer does not know of text segment
* boundaries, it is possible that there is no immediate sequence in the
* output. This is not a correctness problem,
* however (since such escaping is perfectly legal, although not
* strictly necessary), just a slightly "unoptimal" behaviour.
*
*/
public class UTFTextWriter
extends WriterBase
{
private final boolean mEscapeCR;
private boolean mJustWroteBracket = false;
/**
* @param enc Name of actual encoding in use; ignored for UTF
* writers
* @param escapeCR If true, will encode \r character; if false, will
* output as is (former is needed for reliable round-tripping, but
* adds verbosity without necessary benefits)
*/
public UTFTextWriter(Writer out, String enc, boolean escapeCR)
{
super(out);
mEscapeCR = escapeCR;
}
public void write(int c) throws IOException
{
if (c <= HIGHEST_ENCODABLE_TEXT_CHAR) {
switch (c) {
case '<':
out.write("<");
break;
case '&':
out.write("&");
break;
case '>':
if (mJustWroteBracket) {
out.write(">");
} else {
out.write(c);
}
break;
case '\r':
if (mEscapeCR) {
out.write(STR_ESCAPED_CR);
} else {
out.write(c);
}
break;
default:
out.write(c);
}
mJustWroteBracket = false;
} else {
out.write(c);
mJustWroteBracket = (c == ']');
}
}
public void write(char cbuf[], int offset, int len) throws IOException
{
// Let's simplify code a bit and offload the trivial case...
if (len < 2) {
if (len == 1) {
write(cbuf[offset]);
}
return;
}
char c = CHAR_NULL;
len += offset; // to get the index past last char to output
// Need special handing for leftover ']' to cause quoting of '>'
if (mJustWroteBracket) {
c = cbuf[offset];
if (c == '>') {
out.write(">");
++offset;
}
}
do {
int start = offset;
String ent = null;
for (; offset < len; ++offset) {
c = cbuf[offset];
if (c > HIGHEST_ENCODABLE_TEXT_CHAR) {
continue;
}
if (c == '<') {
ent = "<";
} else if (c == '&') {
ent = "&";
} else if (c == '\r') {
if (!mEscapeCR) {
continue;
}
ent = STR_ESCAPED_CR;
} else if (c == '>' && (offset > start)
&& cbuf[offset-1] == ']') {
ent = ">";
} else {
continue;
}
break;
}
int outLen = offset - start;
if (outLen > 0) {
out.write(cbuf, start, outLen);
}
if (ent != null) {
out.write(ent);
ent = null;
}
} while (++offset < len);
// Ok, did we end up with a bracket?
mJustWroteBracket = (c == ']');
}
public void write(String str, int offset, int len) throws IOException
{
if (len < 2) { // let's do a simple check here
if (len == 1) {
write(str.charAt(offset));
}
return;
}
char c = CHAR_NULL;
len += offset; // to get the index past last char to output
// Ok, leftover ']' to cause quoting of '>'?
if (mJustWroteBracket) {
c = str.charAt(offset);
if (c == '>') {
out.write(">");
++offset;
}
}
do {
int start = offset;
String ent = null;
for (; offset < len; ++offset) {
c = str.charAt(offset);
if (c > HIGHEST_ENCODABLE_TEXT_CHAR) {
continue;
}
if (c == '<') {
ent = "<";
} else if (c == '&') {
ent = "&";
} else if (c == '\r') {
if (!mEscapeCR) {
continue;
}
ent = STR_ESCAPED_CR;
} else if (c == '>' && (offset > start)
&& str.charAt(offset-1) == ']') {
ent = ">";
} else {
continue;
}
break;
}
int outLen = offset - start;
if (outLen > 0) {
out.write(str, start, outLen);
}
if (ent != null) {
out.write(ent);
ent = null;
}
} while (++offset < len);
// Ok, did we end up with a bracket?
mJustWroteBracket = (c == ']');
}
}