cdc.io.tools.XmlStats Maven / Gradle / Ivy
package cdc.io.tools;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import cdc.io.xml.XmlUtils;
import cdc.util.cli.AbstractMainSupport;
import cdc.util.files.Resources;
import cdc.util.function.IterableUtils;
import cdc.util.strings.StringUtils;
/**
* Utility that counts elements, attributes and texts in XML files.
*
* @author Damien Carbonne
*
*/
public final class XmlStats {
protected static final Logger LOGGER = LogManager.getLogger(XmlStats.class);
protected final MainArgs margs;
public static class MainArgs {
public final List inputs = new ArrayList<>();
public File output;
}
private XmlStats(MainArgs margs) {
this.margs = margs;
}
private void execute() throws Exception {
final SAXParserFactory factory = SAXParserFactory.newInstance();
final Handler handler = new Handler();
try {
final SAXParser parser = factory.newSAXParser();
for (final URL input : margs.inputs) {
LOGGER.info("Analyze({})", input);
parser.parse(input.openStream(), handler);
}
handler.dump();
} catch (final ParserConfigurationException e) {
LOGGER.trace(e);
} catch (final SAXException e) {
throw new IOException(e);
}
}
public static void execute(MainArgs margs) throws Exception {
final XmlStats instance = new XmlStats(margs);
instance.execute();
}
public static void main(String[] args) {
final MainSupport support = new MainSupport();
support.main(args);
}
private static class MainSupport extends AbstractMainSupport {
public MainSupport() {
super(XmlStats.class, LOGGER);
}
@Override
protected String getVersion() {
return Config.VERSION;
}
@Override
protected void addSpecificOptions(Options options) {
options.addOption(Option.builder()
.longOpt(INPUT)
.desc("Name(s) of the XML input(s) .")
.hasArgs()
.required()
.build());
options.addOption(Option.builder()
.longOpt(OUTPUT)
.desc("Name of the CSV to generate.")
.hasArg()
.required()
.build());
}
@Override
protected MainArgs analyze(CommandLine cl) throws ParseException {
final MainArgs margs = new MainArgs();
for (final String input : cl.getOptionValues(INPUT)) {
final URL url = Resources.getResource(input);
if (url == null) {
throw new ParseException("Invalid url: " + input);
}
margs.inputs.add(url);
}
margs.output = getValueAsFile(cl, OUTPUT);
return margs;
}
@Override
protected Void execute(MainArgs margs) throws Exception {
XmlStats.execute(margs);
return null;
}
}
private class Handler extends DefaultHandler {
private final Map counts = new HashMap<>();
private final List namesStack = new ArrayList<>();
private final List pathsStack = new ArrayList<>();
public Handler() {
super();
}
private String getPathKind(String path) {
if (path.indexOf('@') != -1) {
return "ATTRIBUTE";
} else if (path.indexOf("()") == -1) {
return "ELEMENT";
} else {
return "TEXT";
}
}
private int getPathDepth(String path) {
final int slashes = StringUtils.countMatches(path, '/');
final int arobases = StringUtils.countMatches(path, '@');
return slashes + arobases;
}
private void push(String name) {
namesStack.add(name);
if (namesStack.size() == 1) {
pathsStack.add("/" + name);
} else {
pathsStack.add(pathsStack.get(pathsStack.size() - 1) + "/" + name);
}
}
private void pop() {
namesStack.remove(namesStack.size() - 1);
pathsStack.remove(pathsStack.size() - 1);
}
private String getPath() {
if (pathsStack.isEmpty()) {
return "/";
} else {
return pathsStack.get(pathsStack.size() - 1);
}
}
private void add(String name) {
final String path = getPath();
final String qname = name == null ? path : path + name;
if (counts.containsKey(qname)) {
counts.put(qname, counts.get(qname) + 1);
} else {
counts.put(qname, 1);
}
}
@Override
public void startElement(String uri,
String localName,
String qName,
Attributes attributes) throws SAXException {
push(qName);
add(null);
for (int index = 0; index < attributes.getLength(); index++) {
add("@" + attributes.getQName(index));
}
}
@Override
public void endElement(String uri,
String localName,
String qName) throws SAXException {
pop();
}
@Override
public void characters(char[] ch,
int start,
int length) throws SAXException {
if (!XmlUtils.isWhiteSpace(ch, start, length)) {
add("/text()");
}
}
public void dump() throws FileNotFoundException {
LOGGER.info("Generate: {}", margs.output);
try (final PrintStream out = new PrintStream(margs.output)) {
out.println("path;kind;depth;count");
for (final String path : IterableUtils.toSortedList(counts.keySet())) {
out.println(path + ";" + getPathKind(path) + ";" + getPathDepth(path) + ";" + counts.get(path));
}
}
LOGGER.info("Done");
}
}
}