All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fr.dralagen.Csv2xml Maven / Gradle / Ivy

The newest version!
package fr.dralagen;

/*
 * csv2xml
 *
 * Copyright (C) 2014-2015 dralagen, Stephan Kreutzer
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.FactoryConfigurationError;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.*;

/**
 * Created on 14/11/14.
 *
 * @author dralagen
 */
public class Csv2xml {

    private DocumentBuilder domBuilder = null;

    private Document document;

    private Node currentElement;

    private boolean compact = false;

    private int indentSize = 4;

    public Csv2xml() {
        try {
            DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
            domBuilder = domFactory.newDocumentBuilder();
        } catch (FactoryConfigurationError exp) {
            System.err.println(exp.toString());
        } catch (ParserConfigurationException exp) {
            System.err.println(exp.toString());
        } catch (Exception exp) {
            System.err.println(exp.toString());
        }
    }

    /**
     * Create a new document
     *
     * If the document already exist, it will be erased
     *
     * @param node name of root node
     */
    public void createNewDocument(String node) {

        document = domBuilder.newDocument();
        // Root element
        Element element = document.createElement(node);
        document.appendChild(element);

        currentElement = element;

    }

    /**
     * Create a new document with default name "document"
     *
     * If the document already exist, it will be erased
     */
    public void createNewDocument() {
        createNewDocument("document");
    }

    /**
     * Add a new child node into the document
     *
     * Create default document if not created
     *
     * @param node name of new node
     */
    public void addNode(String node) {
        if ( document == null ) {
            createNewDocument();
        }
        Element element = document.createElement(node);
        currentElement.appendChild(element);

        currentElement = element;
    }

    /**
     * Move your cursor of current element to parent node
     */
    public void parent() {
        currentElement = currentElement.getParentNode();
    }

    /**
     * Convert the csv input stream into a internal document xml
     *
     * @param csv InputStream contain your csv file
     * @param delimiter the delimiter character of csv field
     * @param nodeRow name of node who receive all values of one row
     * @return number of rows converted
     */
    public int convert(InputStream csv, String delimiter, String nodeRow) {

        int rowsCount = 0;
        try {
            // Read csv file
            LineNumberReader csvReader;
            csvReader = new LineNumberReader(new InputStreamReader(csv, "UTF-8"));

            List headers = new ArrayList();

            { // Header row
                String text;

                // Header row
                if ( (text = csvReader.readLine()) != null ) {
                    String[] rowValues = text.split(delimiter);
                    Collections.addAll(headers, rowValues);
                }
            }

            {  // Data rows
                List rowValues;
                while ( (rowValues = split(csvReader, delimiter, headers.size())) != null ) {

                    Element rowElement = document.createElement(nodeRow);
                    currentElement.appendChild(rowElement);

                    for ( int col = 0; col < headers.size(); col++ ) {

                        String header = headers.get(col);
                        String value = "";

                        if ( col < rowValues.size() ) {
                            value = rowValues.get(col);
                        }

                        Element curElement;

                        try
                        {
                            curElement = document.createElement(header);
                        }
                        catch (org.w3c.dom.DOMException e)
                        {
                            if (e.code == org.w3c.dom.DOMException.INVALID_CHARACTER_ERR)
                            {
                                System.out.println("csv2xml: '" + header + "' isn't a valid XML tag name. Please check the first line of the CSV input file.");
                            }

                            throw e;
                        }

                        curElement.appendChild(document.createTextNode(value.replaceAll("\"\"", "\"")));
                        rowElement.appendChild(curElement);
                    }

                    rowsCount++;
                }
            }

        } catch (IOException e) {
            e.printStackTrace();
        }
        return rowsCount;
        // "XLM Document has been created" + rowsCount;
    }

    /**
     * Convert the csv input stream into a internal document xml and use default name of node row
     *
     * @param csv InputStream contain your csv file
     * @param delimiter the delimiter character of csv field
     * @return number of rows converted
     */
    public int convert(InputStream csv, String delimiter) {
        return convert(csv, delimiter, "element");
    }

    /**
     * Write the xml document in out
     *
     * You can use writeTo(System.out) to write the xml result into your console
     *
     * @param out Write the xml document in output
     */
    public void writeTo(OutputStream out) {
        ByteArrayOutputStream baos = null;
        OutputStreamWriter osw = null;

        try {

            baos = new ByteArrayOutputStream();
            osw = new OutputStreamWriter(baos, "UTF-8");

            TransformerFactory tranFactory = TransformerFactory.newInstance();
            Transformer aTransformer = tranFactory.newTransformer();
            aTransformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
            aTransformer.setOutputProperty(OutputKeys.INDENT, (isCompact())?"no":"yes");
            aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
            aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", String.valueOf(indentSize));

            Source src = new DOMSource(document);
            Result result = new StreamResult(osw);
            aTransformer.transform(src, result);

            osw.flush();
            String output = new String(baos.toByteArray(), "UTF-8");
            out.write(output.getBytes("UTF-8"));

        } catch (Exception exp) {
            exp.printStackTrace();
        } finally {
            if (osw != null) {
                try {
                    osw.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (baos != null) {
                try {
                    baos.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }

    private List split(LineNumberReader reader, String delimiter, int limit) throws IOException {
        return split(reader, delimiter, limit, false);
    }

    private List split(LineNumberReader reader, String delimiter, int limit, boolean fieldOpened) throws IOException {

        String text = reader.readLine();

        if (text == null) {
            return null;
        }

        // text.split(delimiter) delete end empty field
        String[] splited =  text.split(delimiter, Integer.MAX_VALUE);

        List result = new ArrayList();

        int i = 0;
        while (i < splited.length) {
            int j = i;

            String field = splited[i];

            // find a complex field with delimiter character or multiline
            if (!field.equals("")
                    && (field.charAt(0) == '"' || fieldOpened)
                    && (field.charAt(field.length() - 1) != '"')) {

                if (!fieldOpened && field.length() > 1 && field.charAt(1) != '"') {
                    // delete the " unnecessary
                    field = field.substring(1);
                    fieldOpened = true;
                }

                if (fieldOpened) {
                    ++j;
                    if (j < splited.length) {
                        while (j < splited.length
                                && (splited[j].equals("") || splited[j].charAt(splited[j].length() - 1) != '"')
                                ) {
                            field += delimiter + splited[j];
                            ++j;
                        }
                    }

                    // we find the end field
                    if (j < splited.length) {
                        field += delimiter + splited[j];
                        field = field.substring(0, field.length() - 2);
                        fieldOpened = false;
                    }
                }
            }

            // we find a quote field
            if (!field.equals("")
                    && (fieldOpened || field.charAt(0) == '"')
                    && field.charAt(field.length()-1) == '"') {

                int startIndex = (fieldOpened) ? 0 : 1;
                result.add(field.substring(startIndex, Math.max(field.length() - 1, 1)));
                fieldOpened = false;
            }
            else {
                result.add(field);
            }
            i = j+1;
        }

        // complete line who field contain '\n'
        if ( result.size() < limit ) {
            List extendsRowValues;
            if ((extendsRowValues = split(reader, delimiter, limit - result.size()+1, fieldOpened)) != null) {

                int rowValuesLastIndex = result.size() - 1;

                result.set(rowValuesLastIndex, result.get(rowValuesLastIndex) + "\n" + extendsRowValues.get(0));

                if ( extendsRowValues.size() > 1 ) {
                    result.addAll(extendsRowValues.subList(1, extendsRowValues.size()));
                }
            }
        }

        return result;
    }

    public boolean isCompact() {
        return compact;
    }

    public void setCompact(boolean compact) {
        this.compact = compact;
    }

    public int getIndentSize() {
        return indentSize;
    }

    public void setIndentSize(int indentSize) {
        this.indentSize = (indentSize > 0) ? indentSize : 0;
    }

    /**
     * Create an InputStream form an url or a path of fileSystem
     *
     * @see java.net.URL
     * @see java.io.FileInputStream
     *
     * @param inputName is an URL or a path of file
     * @return InputStream form inputName
     * @throws java.io.IOException if a error in read the input
     */
    public static InputStream getInputStream(String inputName) throws IOException {
        InputStream inputStream;

        try {
            URL url = new URL(inputName);
            inputStream = url.openStream();
        } catch (MalformedURLException e) {

            inputStream = new FileInputStream(
                    new File(inputName)
            );

        }

        return inputStream;
    }

    public static void main (String[] args) {

        System.out.print("csv2xml Copyright (C) 2014-2015 dralagen, Stephan Kreutzer\n" +
                         "This program comes with ABSOLUTELY NO WARRANTY.\n" +
                         "This is free software, and you are welcome to redistribute it\n" +
                         "under certain conditions. See the GNU Affero General Public\n" +
                         "License, either version 3 of the License, or (at your option) any\n" +
                         "later version for details. Also, see the source code repository:\n" +
                         "https://github.com/dralagen/csv2xml/\n\n");

        if (args.length != 3) {
            System.out.println("Usage : csv2xml \"path/of/input/file.csv\" \"path/of/output/file.xml\" \";\"");
            System.exit(1);
        }

        Csv2xml converter = new Csv2xml();

        converter.createNewDocument();
        converter.addNode("data");

        InputStream csvInput = null;
        try {
            csvInput = Csv2xml.getInputStream(args[0]);
        } catch (IOException e) {
            e.printStackTrace();
        }

        converter.convert(csvInput, args[2], "element");

        //converter.writeTo(System.out);

        OutputStream xmlOutput;
        try {
            xmlOutput = new FileOutputStream(args[1]);

            converter.writeTo(xmlOutput);

        } catch (FileNotFoundException e) {
            e.printStackTrace();
        }

    }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy