sk.antons.jaul.xml.XmlFormat Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jaul Show documentation
Show all versions of jaul Show documentation
just another utility library
/*
* Copyright 2015 Anton Straka
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package sk.antons.jaul.xml;
import sk.antons.jaul.Is;
import sk.antons.jaul.util.TextFile;
/**
* Helper class for formating xml in string form. It does't parse xml as document so it can porduce some error outputs.
* But it is usefull for makeing some debug log outputs.
*
* String xml = ...;
* String oneline = XmlFormat.instance(xml, 10000).forceoneline().cutStringLiterals(400).format());
* String formatted = XmlFormat.instance(xml, 10000).forceoneline().indent(" ").format());
*
*
* @author antons
*/
public class XmlFormat {
private Source source = null;
private StringBuilder newxml = null;
private boolean oneline = true;
private boolean forceoneline = false;
private String indent = " ";
private int length;
private boolean cut = false;
private int cutLength = 1;
/**
* Create instance of formatter.
* @param xml xml to be formated
* @param treshhold if size of xml > treshhold no temporal copy of xml will be created. Otherwice it creates xml.toCharArray() copy is created.
*/
public XmlFormat(String xml, int treshhold) {
if(xml != null) {
this.length = xml.length();
}
if(treshhold >= length) this.source = CharsSource.instance(xml);
else this.source = StringSource.instance(xml);
}
/**
* Create instance of formatter.
* @param xml xml to be formated
* @param treshhold if size of xml > treshhold no temporal copy of xml will be created. Otherwice it creates xml.toCharArray() copy is created.
* @return new XmlFormat instance
*/
public static XmlFormat instance(String xml, int treshhold) { return new XmlFormat(xml, treshhold); }
/**
* Cause indended formatting (default is no formatted)
* @param indent string to indent (like two spaces " ")
* @return this
*/
public XmlFormat indent(String indent) {
this.indent = indent;
this.oneline = false;
return this;
}
/**
* Cause escaping of \r and \n chars.
* @return this
*/
public XmlFormat forceoneline() {
this.forceoneline = true;
return this;
}
/**
* Cause cutting of text literal to specified length.
* Useful for some debug logs of xml containng long binary data.
* @param length length to cut literals
* @return this
*/
public XmlFormat cutStringLiterals(int length) {
if(length < 4) length = 4;
this.cut = true;
this.cutLength = length;
return this;
}
/**
* Produce formated output.
* @return tormated output.
*/
public String format() {
if(source.isEmpty()) return source.original();
if(oneline) this.newxml = new StringBuilder(this.length + this.length/2);
else this.newxml = new StringBuilder(this.length);
int index = 0;
int depth = 0;
Token prevprevtoken = null;
Token prevtoken = null;
Position position = new Position();
position.reset();
next(index, position);
while(position.token != null) {
//append(index, position.index);
//System.out.println(" token " + position.token + " - " + position.index);
int pos = -1;
switch(position.token) {
case LEFT:
if(position.nonspacecharacters) {
append(index, position.index);
} else {
if(oneline) {
} else {
appendIndent(depth);
}
}
append(Token.LEFT);
depth++;
break;
case RIGHT:
appendTag(index, position.index);
append(Token.RIGHT);
//if(prevtoken == Token.CDATA_LEFT) depth--;
break;
case LEFT_END:
depth--;
if(((prevprevtoken == Token.LEFT) && (prevtoken == Token.RIGHT))
|| (prevtoken == Token.CDATA_RIGHT)) {
append(index, position.index);
} else {
if(position.nonspacecharacters) {
append(index, position.index);
} else {
if(oneline) {
} else {
appendIndent(depth);
}
}
}
append(Token.LEFT_END);
break;
case RIGHT_SINGLE:
depth--;
appendTag(index, position.index);
append(Token.RIGHT_SINGLE);
break;
case CDATA_LEFT:
append(index, position.index);
pos = find(position.index, Token.CDATA_RIGHT);
append(Token.CDATA_LEFT);
if(pos > -1) {
append(position.index+Token.CDATA_LEFT.size(), pos);
append(Token.CDATA_RIGHT);
position.index = pos;
position.token = Token.CDATA_RIGHT;
} else {
append(position.index+Token.CDATA_LEFT.size(), length);
position.index = length;
position.token = Token.CDATA_RIGHT;
append(index);
}
break;
case CDATA_RIGHT:
append(index, position.index);
append(Token.CDATA_RIGHT);
break;
case COMMENT_LEFT:
append(index, position.index);
pos = find(position.index, Token.COMMENT_RIGHT);
append(Token.COMMENT_LEFT);
if(pos > -1) {
append(position.index+Token.COMMENT_LEFT.size(), pos);
append(Token.COMMENT_RIGHT);
position.index = pos;
position.token = Token.COMMENT_RIGHT;
} else {
append(position.index+Token.COMMENT_LEFT.size(), length);
position.index = length;
position.token = Token.COMMENT_RIGHT;
append(index);
}
break;
case COMMENT_RIGHT:
append(index, position.index);
append(Token.COMMENT_RIGHT);
break;
case PROLOG_LEFT:
append(Token.PROLOG_LEFT);
break;
case PROLOG_RIGHT:
appendTag(index, position.index);
append(Token.PROLOG_RIGHT);
break;
default:
}
prevprevtoken = prevtoken;
prevtoken = position.token;
index = position.index + prevtoken.size();
position.reset();
next(index, position);
}
append(index);
return newxml.toString();
}
private int find(int index, Token token) {
return source.indexOf(token, index);
}
private void next(int index, Position position) {
for(int i = index; i < length; i++) {
char c = source.charAt(i);
switch(c) {
case '<':
if(i+1':
position.token = Token.RIGHT;
break;
case '-':
if(source.match(Token.COMMENT_RIGHT, i)) position.token = Token.COMMENT_RIGHT;
break;
case '/':
if(source.match(Token.RIGHT_SINGLE, i)) position.token = Token.RIGHT_SINGLE;
break;
case ']':
if(source.match(Token.CDATA_RIGHT, i)) position.token = Token.CDATA_RIGHT;
break;
case '?':
if(source.match(Token.PROLOG_RIGHT, i)) position.token = Token.PROLOG_RIGHT;
break;
default:
}
if(position.token != null) {
position.index = i;
return;
}
position.nonspacecharacters = position.nonspacecharacters
|| (!((c == ' ') || (c == '\n') || (c == '\t') || (c == '\r')));
}
}
private static class Position {
private int index;
private Token token;
private boolean nonspacecharacters;
public void reset() {
index = -1;
token = null;
nonspacecharacters = false;
}
public boolean isValid() { return index > -1; }
}
private void appendTag(int from, int to) {
char prev = 0;
char escape = 0;
for(int i = from; i < to; i++) {
char c = source.charAt(i);
if(escape > 0) {
newxml.append(c);
if(escape == c) escape = 0;
} else if(c == '\'') {
newxml.append(c);
escape = c;
} else if(c == '"') {
newxml.append(c);
escape = c;
} else {
if((c == '\n') || (c == '\t') || (c == '\r')) c = ' ';
if((c != ' ') || (prev != ' ')) {
newxml.append(c);
}
}
prev = c;
}
}
private void appendIndent(int depth) {
if(newxml.length() > 0) newxml.append('\n');
for(int i = 0; i < depth; i++) {
newxml.append(indent);
}
}
private void append(int from, int to) {
source.append(newxml, from, to, forceoneline, cut, cutLength);
}
private void append(int from) {
source.append(newxml, from, forceoneline, cut, cutLength);
}
private void append(Token token) {
newxml.append(token.pattern);
}
private static enum Token {
LEFT("<")
, RIGHT(">")
, LEFT_END("")
, RIGHT_SINGLE("/>")
, CDATA_LEFT("")
, COMMENT_LEFT("")
, PROLOG_LEFT("")
, PROLOG_RIGHT("?>")
;
private String value;
private char[] pattern;
private Token(String value) {
this.value = value;
this.pattern = value.toCharArray();
}
public int size() { return pattern.length; }
}
private static interface Source {
int length();
char charAt(int index);
boolean isEmpty();
String original();
int indexOf(Token token, int index);
boolean match(Token token, int index);
void append(StringBuilder sb, int from, int to, boolean forceoneline, boolean cut, int cutLength);
void append(StringBuilder sb, int from, boolean forceoneline, boolean cut, int cutLength);
}
private static class StringSource implements Source {
private String xml;
private int length;
public StringSource(String xml) {
this.xml = xml;
if(!Is.empty(xml)) {
this.length = xml.length();
}
}
public static StringSource instance(String xml) { return new StringSource(xml); }
@Override public int length() { return length; }
@Override public char charAt(int index) { return xml.charAt(index); }
@Override public boolean isEmpty() { return length == 0; }
@Override public String original() { return xml; }
@Override public int indexOf(Token token, int index) { return xml.indexOf(token.value, index); }
@Override
public boolean match(Token token, int index) {
if(index+token.pattern.length >= length) return false;
for(int i = 0; i < token.pattern.length; i++) {
char c1 = xml.charAt(i+index);
char c2 = token.pattern[i];
if(c1 != c2) return false;
}
return true;
}
@Override
public void append(StringBuilder sb, int from, int to, boolean forceoneline, boolean cut, int cutLength) {
String value = xml.substring(from, to);
boolean cutting = false;
if(cut && value.length() > cutLength) {
value = value.substring(0, cutLength - 3);
cutting = true;
}
if(forceoneline) {
value = value.replace("\r", "
");
value = value.replace("\n", "
");
}
sb.append(value);
if(cutting) sb.append("...");
}
@Override
public void append(StringBuilder sb, int from, boolean forceoneline, boolean cut, int cutLength) {
String value = xml.substring(from);
boolean cutting = false;
if(cut && value.length() > cutLength) {
value = value.substring(0, cutLength - 3);
cutting = true;
}
if(forceoneline) {
value = value.replace("\r", "
");
value = value.replace("\n", "
");
}
sb.append(value);
if(cutting) sb.append("...");
}
}
private static class CharsSource implements Source {
private String xml;
private char[] chars;
private int length;
public CharsSource(String xml) {
this.xml = xml;
if(!Is.empty(xml)) {
this.chars = xml.toCharArray();
this.length = xml.length();
}
}
public static CharsSource instance(String xml) { return new CharsSource(xml); }
@Override public int length() { return length; }
@Override public char charAt(int index) { return chars[index]; }
@Override public boolean isEmpty() { return length == 0; }
@Override public String original() { return xml; }
@Override
public int indexOf(Token token, int index) {
int len = token.pattern.length;
int len2 = length - len;
for(int i = index; i < len2; i++) {
boolean found = true;
for(int j = 0; j < len; j++) {
if(chars[i+j] != token.pattern[j]) {
found = false;
break;
}
}
if(found) return i;
}
return -1;
}
@Override
public boolean match(Token token, int index) {
int len = token.pattern.length;
if(len + index > length) return false;
for(int j = 0; j < len; j++) {
if(chars[index+j] != token.pattern[j]) return false;
}
return true;
}
@Override
public void append(StringBuilder sb, int from, int to, boolean forceoneline, boolean cut, int cutLength) {
int toto = to;
boolean cutting = false;
if(cut) {
if(from + cutLength < to) {
toto = from + cutLength - 3;
cutting = true;
}
}
if(forceoneline) {
for(int i = from; i < toto; i++) {
char c = chars[i];
switch(c) {
case 13:
sb.append("
");
break;
case 10:
sb.append("
");
break;
default:
sb.append(c);
}
}
} else {
sb.append(chars, from, toto - from);
}
if(cutting) sb.append("...");
}
@Override
public void append(StringBuilder sb, int from, boolean forceoneline, boolean cut, int cutLength) {
int toto = length;
boolean cutting = false;
if(cut) {
if(from + cutLength < length) {
toto = from + cutLength - 3;
cutting = true;
}
}
if(forceoneline) {
for(int i = from; i < toto; i++) {
char c = chars[i];
switch(c) {
case 13:
sb.append("
");
break;
case 10:
sb.append("
");
break;
default:
sb.append(c);
}
}
} else {
sb.append(chars, from, toto - from);
}
if(cutting) sb.append("...");
}
}
public static void main(String[] argv) {
String xml = TextFile.read("/tmp/aaa/a.xml", "utf-8");
//String xml = TextFile.read("/home/pdocs/year-2020/2020-06-03 - nksr - rfo/xsd/319-2-2v00-20140324 RFO WS Poskytnutie referenčných údajov zoznamu IFO online/PoskytnutieUdajovIFOOnlineWS-v1.0 - zásielka dáta.xml", "utf-8");
//String xml = "\njab\nlko \nsomarina\n ";
int treshhold = 1000000;
long timestart = System.currentTimeMillis();
System.out.println(" ------------");
System.out.println(XmlFormat.instance(xml, treshhold).forceoneline().cutStringLiterals(5).format());
System.out.println(" ------------");
System.out.println(XmlFormat.instance(xml, treshhold).indent(" ").cutStringLiterals(5).forceoneline().format());
System.out.println(" ------------");
System.out.println(XmlFormat.instance(XmlFormat.instance(xml, treshhold).forceoneline().format(), treshhold).indent(" ").forceoneline().format());
System.out.println(" ------------");
long timeend = System.currentTimeMillis();
System.out.println(" time: " + (timeend - timestart));
// int num = 0;
// long tstart = System.currentTimeMillis();
// for(int i = 0; i < 100; i++) {
// System.out.println(" ------------");
// num = num + XmlFormat.instance(xml, treshhold).format().length();
// num = num + XmlFormat.instance(xml, treshhold).indent(" ").format().length();
// //num = num + Xml.documentToString(Xml.document(xml), "utf-8", false, false).length();
// //num = num + Xml.documentToString(Xml.document(xml), "utf-8", true, false).length();
// }
// long tend = System.currentTimeMillis();
// System.out.println(" num: " + num);
// System.out.println(" time: " + (tend - tstart));
}
}