All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cdc.io.tools.XmlStats Maven / Gradle / Ivy

package cdc.io.tools;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.PrintStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import cdc.io.xml.XmlUtils;
import cdc.util.cli.AbstractMainSupport;
import cdc.util.files.Resources;
import cdc.util.function.IterableUtils;
import cdc.util.strings.StringUtils;

/**
 * Utility that counts elements, attributes and texts in XML files.
 *
 * @author Damien Carbonne
 *
 */
public final class XmlStats {
    protected static final Logger LOGGER = LogManager.getLogger(XmlStats.class);
    protected final MainArgs margs;

    public static class MainArgs {
        public final List inputs = new ArrayList<>();
        public File output;
    }

    private XmlStats(MainArgs margs) {
        this.margs = margs;
    }

    private void execute() throws Exception {
        final SAXParserFactory factory = SAXParserFactory.newInstance();
        final Handler handler = new Handler();
        try {
            final SAXParser parser = factory.newSAXParser();
            for (final URL input : margs.inputs) {
                LOGGER.info("Analyze({})", input);
                parser.parse(input.openStream(), handler);
            }
            handler.dump();
        } catch (final ParserConfigurationException e) {
            LOGGER.trace(e);
        } catch (final SAXException e) {
            throw new IOException(e);
        }
    }

    public static void execute(MainArgs margs) throws Exception {
        final XmlStats instance = new XmlStats(margs);
        instance.execute();
    }

    public static void main(String[] args) {
        final MainSupport support = new MainSupport();
        support.main(args);
    }

    private static class MainSupport extends AbstractMainSupport {
        public MainSupport() {
            super(XmlStats.class, LOGGER);
        }

        @Override
        protected String getVersion() {
            return Config.VERSION;
        }

        @Override
        protected void addSpecificOptions(Options options) {
            options.addOption(Option.builder()
                                    .longOpt(INPUT)
                                    .desc("Name(s) of the XML input(s) .")
                                    .hasArgs()
                                    .required()
                                    .build());
            options.addOption(Option.builder()
                                    .longOpt(OUTPUT)
                                    .desc("Name of the CSV  to generate.")
                                    .hasArg()
                                    .required()
                                    .build());
        }

        @Override
        protected MainArgs analyze(CommandLine cl) throws ParseException {
            final MainArgs margs = new MainArgs();

            for (final String input : cl.getOptionValues(INPUT)) {
                final URL url = Resources.getResource(input);
                if (url == null) {
                    throw new ParseException("Invalid url: " + input);
                }
                margs.inputs.add(url);
            }

            margs.output = getValueAsFile(cl, OUTPUT);
            return margs;
        }

        @Override
        protected Void execute(MainArgs margs) throws Exception {
            XmlStats.execute(margs);
            return null;
        }
    }

    private class Handler extends DefaultHandler {
        private final Map counts = new HashMap<>();
        private final List namesStack = new ArrayList<>();
        private final List pathsStack = new ArrayList<>();

        public Handler() {
            super();
        }

        private String getPathKind(String path) {
            if (path.indexOf('@') != -1) {
                return "ATTRIBUTE";
            } else if (path.indexOf("()") == -1) {
                return "ELEMENT";
            } else {
                return "TEXT";
            }
        }

        private int getPathDepth(String path) {
            final int slashes = StringUtils.countMatches(path, '/');
            final int arobases = StringUtils.countMatches(path, '@');
            return slashes + arobases;
        }

        private void push(String name) {
            namesStack.add(name);
            if (namesStack.size() == 1) {
                pathsStack.add("/" + name);
            } else {
                pathsStack.add(pathsStack.get(pathsStack.size() - 1) + "/" + name);
            }
        }

        private void pop() {
            namesStack.remove(namesStack.size() - 1);
            pathsStack.remove(pathsStack.size() - 1);
        }

        private String getPath() {
            if (pathsStack.isEmpty()) {
                return "/";
            } else {
                return pathsStack.get(pathsStack.size() - 1);
            }
        }

        private void add(String name) {
            final String path = getPath();
            final String qname = name == null ? path : path + name;

            if (counts.containsKey(qname)) {
                counts.put(qname, counts.get(qname) + 1);
            } else {
                counts.put(qname, 1);
            }
        }

        @Override
        public void startElement(String uri,
                                 String localName,
                                 String qName,
                                 Attributes attributes) throws SAXException {
            push(qName);
            add(null);
            for (int index = 0; index < attributes.getLength(); index++) {
                add("@" + attributes.getQName(index));
            }
        }

        @Override
        public void endElement(String uri,
                               String localName,
                               String qName) throws SAXException {
            pop();
        }

        @Override
        public void characters(char[] ch,
                               int start,
                               int length) throws SAXException {
            if (!XmlUtils.isWhiteSpace(ch, start, length)) {
                add("/text()");
            }
        }

        public void dump() throws FileNotFoundException {
            LOGGER.info("Generate: {}", margs.output);
            try (final PrintStream out = new PrintStream(margs.output)) {
                out.println("path;kind;depth;count");
                for (final String path : IterableUtils.toSortedList(counts.keySet())) {
                    out.println(path + ";" + getPathKind(path) + ";" + getPathDepth(path) + ";" + counts.get(path));
                }
            }
            LOGGER.info("Done");
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy