com.github.lespaul361.commons.commonroutines.utilities.Streams.XML.XMLStream Maven / Gradle / Ivy

Go to download
/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package com.github.lespaul361.commons.commonroutines.utilities.Streams.XML;

import com.github.lespaul361.commons.commonroutines.utilities.Streams.WriterOutputStream;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.Charset;
import java.text.MessageFormat;
import java.util.ArrayList;
import java.util.List;

/**
 * Stream for reading and writing XML documents
 *
 * @author Charles Hamilton
 */
public class XMLStream {

    RootNode root;
    private final String XML_HEADER = "";
    private float version = Float.parseFloat("1.0");
//completedString = completedString.replaceAll("\n", System.getProperty("line.separator"));

    /**
     * Allowed encoding for XML documents. UTF 8 is the default
     */
    public enum StringEncodings {

        /**
         * UTF-8 is a character encoding capable of encoding all possible
         * characters, or code points, defined by Unicode and originally
         * designed by Ken Thompson and Rob Pike. The encoding is
         * variable-length and uses 8-bit code units.
         */
        UTF_8,
        /**
         * UTF-16 (16-bit Unicode Transformation Format) is a character encoding
         * capable of encoding all 1,112,064 possible characters in Unicode. The
         * encoding is variable-length, as code points are encoded with one or
         * two 16-bit code units.
         */
        UTF_16,
        /**
         * The Universal Coded Character Set (UCS), is a standard set of
         * characters defined by the International Standard ISO/IEC 10646,
         * Information technology — Universal Coded Character Set (UCS) (plus
         * amendments to that standard), which is the basis of many character
         * encodings. The UCS contains over 128,000 abstract characters, each
         * identified by an unambiguous name and an integer number called its
         * code point.
         */
        ISO_10646_UCS_2,
        /**
         * The Universal Coded Character Set (UCS), is a standard set of
         * characters defined by the International Standard ISO/IEC 10646,
         * Information technology — Universal Coded Character Set (UCS) (plus
         * amendments to that standard), which is the basis of many character
         * encodings. The UCS contains over 128,000 abstract characters, each
         * identified by an unambiguous name and an integer number called its
         * code point.
         */
        ISO_10646_UCS_4,
        /**
         * ISO 8859-1 encodes what it refers to as "Latin alphabet no. 1,"
         * consisting of 191 characters from the Latin script. This
         * character-encoding scheme is used throughout the Americas, Western
         * Europe, Oceania, and much of Africa. It is also commonly used in most
         * standard romanizations of East-Asian languages.
         */
        ISO_8859_1,
        /**
         * ISO/IEC 8859-2:1999, Information technology — 8-bit single-byte coded
         * graphic character sets — Part 2: Latin alphabet No. 2, is part of the
         * ISO/IEC 8859 series of ASCII-based standard character encodings,
         * first edition published in 1987. It is informally referred to as
         * "Latin-2".
         */
        ISO_8859_2,
        /**
         * ISO/IEC 8859-3:1999, Information technology — 8-bit single-byte coded
         * graphic character sets — Part 3: Latin alphabet No. 3, is part of the
         * ISO/IEC 8859 series of ASCII-based standard character encodings,
         * first edition published in 1988. It is informally referred to as
         * Latin-3 or South European.
         */
        ISO_8859_3,
        /**
         * ISO/IEC 8859-4:1998, Information technology — 8-bit single-byte coded
         * graphic character sets — Part 4: Latin alphabet No. 4, is part of the
         * ISO/IEC 8859 series of ASCII-based standard character encodings,
         * first edition published in 1988. It is informally referred to as
         * Latin-4 or North European.
         */
        ISO_8859_4,
        /**
         * ISO/IEC 8859-5:1999, Information technology — 8-bit single-byte coded
         * graphic character sets — Part 5: Latin/Cyrillic alphabet, is part of
         * the ISO/IEC 8859 series of ASCII-based standard character encodings,
         * first edition published in 1988. It is informally referred to as
         * Latin/Cyrillic. It was designed to cover languages using a Cyrillic
         * alphabet such as Bulgarian, Belarusian, Russian, Serbian and
         * Macedonian but was never widely used.
         */
        ISO_8859_5,
        /**
         * ISO/IEC 8859-6:1999, Information technology — 8-bit single-byte coded
         * graphic character sets — Part 6: Latin/Arabic alphabet, is part of
         * the ISO/IEC 8859 series of ASCII-based standard character encodings,
         * first edition published in 1987. It is informally referred to as
         * Latin/Arabic. It was designed to cover languages using the Arabic
         * alphabet (though it does not include the extra letters needed to
         * write most Arabic-script languages other than Arabic itself, such as
         * Persian, Urdu, etc.).
         */
        ISO_8859_6,
        /**
         * ISO/IEC 8859-7:2003, Information technology — 8-bit single-byte coded
         * graphic character sets — Part 7: Latin/Greek alphabet, is part of the
         * ISO/IEC 8859 series of ASCII-based standard character encodings,
         * first edition published in 1987. It is informally referred to as
         * Latin/Greek. It was designed to cover the modern Greek language.
         */
        ISO_8859_7,
        /**
         * ISO/IEC 8859-8:1999, Information technology — 8-bit single-byte coded
         * graphic character sets — Part 8: Latin/Hebrew alphabet, is part of
         * the ISO/IEC 8859 series of ASCII-based standard character encodings,
         * first edition published in 1987. It is informally referred to as
         * Latin/Hebrew. ISO/IEC 8859-8 covers all the Hebrew letters, but no
         * Hebrew vowel signs.
         */
        ISO_8859_8,
        /**
         * ISO/IEC 8859-9:1999, Information technology — 8-bit single-byte coded
         * graphic character sets — Part 9: Latin alphabet No. 5, is part of the
         * ISO/IEC 8859 series of ASCII-based standard character encodings,
         * first edition published in 1989. It is informally referred to as
         * Latin-5 or Turkish. It was designed to cover the Turkish language,
         * designed as being of more use than the ISO/IEC 8859-3 encoding. It is
         * identical to ISO/IEC 8859-1 except for these six replacements of
         * Icelandic characters with characters unique to the Turkish alphabet
         */
        ISO_8859_9,
        /**
         * The text with "ISO-2022-JP-2" starts in ASCII [ASCII], and switches
         * to other character sets of ISO 2022 [ISO2022] through limited
         * combinations of escape sequences. All the characters are encoded with
         * 7 bits only.
         */
        ISO_2022_JP,
        /**
         * Shift JIS (Shift Japanese Industrial Standards, also SJIS, MIME name
         * Shift_JIS) is a character encoding for the Japanese language,
         * originally developed by a Japanese company called ASCII Corporation
         * in conjunction with Microsoft and standardized as JIS X 0208 Appendix
         * 1. 1.1% of all web pages use Shift JIS in January 2016.
         */
        Shift_JIS,
        /**
         * Extended Unix Code (EUC) is a multibyte character encoding system
         * used primarily for Japanese, Korean, and simplified Chinese.
         */
        EUC_JP;

    }
    private StringEncodings encoding = StringEncodings.UTF_8;
    private boolean isStandAlone = true;
    private StringReader bsr;

    /**
     * @return the isStandAlone
     */
    boolean getIsStandAlone() {
        return isStandAlone;
    }

    /**
     * @param isStandAlone the isStandAlone to set
     */
    void setIsStandAlone(boolean isStandAlone) {
        this.isStandAlone = isStandAlone;
    }

    /**
     * @return the encoding
     */
    StringEncodings getEncoding() {
        return encoding;
    }

    /**
     * @param encoding the encoding to set
     */
    void setEncoding(StringEncodings encoding) {
        this.encoding = encoding;
    }

    /**
     * Constructs a new XMLStream with a root Node
     * name.
     *
     * @param rootNodeName name of the node. "" can be used.
     */
    public XMLStream(String rootNodeName) {
        RootNode node = new RootNode();
        node.setName(rootNodeName);
        root = node;
    }

    /**
     * Constructs a new XMLStream with a root Node
     * name and an Attribute
     *
     * @param rootNodeName name of the node. "" can be used.
     * @param attribute the attribute for this Node
     */
    public XMLStream(String rootNodeName, com.github.lespaul361.commons.commonroutines.utilities.Streams.XML.Attribute attribute) {
        RootNode node = new RootNode();
        node.setName(rootNodeName);
        root = node;
        root.addAttribute(attribute);
    }

    /**
     * Constructs a new XMLStream with a root Node
     * name and an array of Attribute
     *
     * @param rootNodeName name of the node. "" can be used.
     * @param attributes an array of attribute for this
     * Node
     */
    public XMLStream(String rootNodeName, com.github.lespaul361.commons.commonroutines.utilities.Streams.XML.Attribute[] attributes) {
        RootNode node = new RootNode();
        node.setName(rootNodeName);
        root = node;
        for (com.github.lespaul361.commons.commonroutines.utilities.Streams.XML.Attribute att : attributes) {
            root.addAttribute(att);
        }

    }

    /**
     * Constructs a new XMLStream from a XML File
     *
     * @param file the File to read
     * @throws Exception any throwable Exception
     */
    public XMLStream(File file) throws Exception {
        String tmp = "";
        String currentLine = "";
        String lastLine = "";
        int pos1 = -1;
        int pos2 = -1;
        boolean hasHeader = setUpHeaderFromFile(file);
        Node parent = null;
        Node currentNode = null;
        String nodeName = "";
        Attribute[] attributes = null;

        try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
            read(reader);
        } catch (Exception ex) {
            System.out.println(ex.getStackTrace().toString());
            throw ex;
        }
    }

    /**
     * Constructs a new XMLStream from a
     * BufferedReader
     *
     * @param reader a BufferedReader
     */
    public XMLStream(BufferedReader reader) {
        read(reader);
    }

    private void read(BufferedReader reader) {
        String tmp = "";
        String currentLine = "";
        String lastLine = "";
        int pos1 = -1;
        int pos2 = -1;
        boolean hasHeader = setUpHeaderFromBufferedReader(reader);
        reader = new BufferedReader(bsr);
        Node parent = null;
        Node currentNode = null;
        String nodeName = "";
        Attribute[] attributes = null;

        try {
            if (hasHeader) {
                reader.readLine();
            }
            for (currentLine = reader.readLine(); currentLine != null; currentLine = reader.readLine()) {
                currentLine = currentLine.trim();
                if (!lastLine.equalsIgnoreCase("")) {
                    lastLine = lastLine + currentLine + "/n";
                    currentLine = lastLine;
                }
                if (!currentLine.equalsIgnoreCase("")) {
                    if ((currentLine.length() > 1) && (currentLine.substring(0, 2).equalsIgnoreCase("<\\"))) {
                        //end of node
                    } else if ((currentLine.length() > 3) && (currentLine.substring(0, 4).equalsIgnoreCase(" 1) && (currentLine.substring(0, 2).equalsIgnoreCase("")) {
            if (!line.contains("/>") & !line.contains("") + 1 == line.length()) {
                    return true;
                }
            }
        }

        return false;
    }

    private boolean isEndTag(String line, String tagName) {
        if (tagName.equalsIgnoreCase("")) {
            return false;
        }
        return (line.equalsIgnoreCase(""));
    }

    private boolean isCompleteTag(String line, String tagName) {
        if (line.contains("<" + tagName)) {
            if (line.contains("")) {
                return true;
            }
        }
        return false;
    }

    private boolean hasAttributes(String line) {
        if (line.contains("=")) {
            return true;
        }
        return false;
    }

    private String getTagValue(String line) {
        int pos1;
        int pos2;

        pos1 = line.indexOf(">") + 1;
        pos2 = line.indexOf("<", pos1);
        if (pos2 < pos1) {
            return "";
        }
        String ret = line.substring(pos1, pos2);

        return ret;
    }

    private String getTagName(String line) {
        String line1 = line.substring(0, line.indexOf(">") + 1);
        int pos1 = line1.indexOf(" ");
        int pos2 = line1.indexOf(">");
        String nodeName = "";

        if (pos1 == -1) {
            nodeName = line1.substring(1, pos2);
        } else if (pos1 > pos2) {
            pos1 = line1.indexOf("<");
        } else {
            nodeName = line1.substring(1, pos1);
        }

        nodeName = nodeName.trim();

        return nodeName;
    }

    private String getAttributeLine(String line) {
        int pos1 = line.indexOf(" ");
        int pos2 = line.indexOf(">");
        String ret = line.substring(pos1, pos2);
        return ret;

    }

    private boolean setUpHeaderFromFile(File file) {
        try (BufferedReader reader = new BufferedReader(new FileReader(file))) {
            return setUpHeaderFromBufferedReader(reader);
        } catch (Exception e) {
            e.printStackTrace(System.out);
            return false;
        }
    }

    private boolean setUpHeaderFromBufferedReader(BufferedReader reader) {
        String tmp = "";
        String currentLine = "";
        boolean hasHeader = false;
        StringWriter sw = new StringWriter();
        BufferedWriter writer = new BufferedWriter(sw);
        try {
            currentLine = reader.readLine();
            writer.write(currentLine);
            writer.newLine();
            currentLine = currentLine.trim();
            while ((tmp = reader.readLine()) != null) {
                writer.write(tmp);
                writer.newLine();
            }
            reader.close();
            writer.close();
            bsr = new StringReader(sw.toString());
            reader = new BufferedReader(bsr);
            tmp = "";

            if ((currentLine.length() > 1) && (currentLine.substring(0, 2).equalsIgnoreCase(" ret = new ArrayList<>();
        String name = "";
        String value = "";
        String[] splitPair;
        for (String curPair : nameValues) {
            if (curPair.contains("=")) {
                splitPair = curPair.split("=");
                name = splitPair[0];
                value = splitPair[1];
                value = value.replace("\"", "");
                ret.add(new Attribute(value, name));
            }
        }
        return ret.toArray(new Attribute[ret.size()]);
    }

    private String readAttributeValueFromString(String line, String attributeName) {
        String tmp = "";
        int pos = line.toLowerCase().indexOf(attributeName);
        if (pos > -1) {
            pos = line.toLowerCase().indexOf("=", pos);
            tmp = line.substring(pos + 1, line.indexOf(("\""), pos + 2));
            tmp = tmp.trim();
            tmp = tmp.replace("\"", "");
        }
        return tmp;
    }

    /**
     * Writes the XML RootNode to the Writer
     *
     * @param writer the Writer to write the node to
     * @param isStandAlone sets if the XML document stand alone
     */
    public void save(Writer writer, boolean isStandAlone) {
        save(writer, encoding, isStandAlone);
    }

    /**
     * Writes the XML RootNode to the Writer
     *
     * @param writer the Writer to write the node to
     * @param encoding the encoding to use when writing the XML document
     * @param isStandAlone sets if the XML document stand alone
     */
    public void save(Writer writer, StringEncodings encoding, boolean isStandAlone) {
        this.isStandAlone = isStandAlone;
        save(writer, encoding);
    }

    /**
     * Writes the XML RootNode to the Writer
     *
     * @param writer the Writer to write the node to
     * @param encoding the encoding to use when writing the XML document
     */
    public void save(Writer writer, StringEncodings encoding) {
        this.encoding = encoding;
        save(writer);
    }

    /**
     * Writes the XML RootNode to the Writer
     *
     * @param writer the Writer to write the node to
     */
    public void save(Writer writer) {
        String newLine = System.getProperty("line.separator");
        try (BufferedWriter bw = getBufferedWriter(writer)) {
            bw.write(getHeaderLine());
            bw.write(newLine);
            bw.write(newLine);
            if (!this.root.getName().equalsIgnoreCase("")) {
                String nodes = getNodeToString(this.root, 0);
                nodes = nodes.replace("\n", newLine);
                bw.write(nodes);
            } else {
                for (Node curNode : this.root.getChildrenNodes()) {
                    String nodes = getNodeToString(curNode, 0);
                    nodes = nodes.replace("\n", newLine);
                    bw.write(nodes);
                }
            }
            bw.close();
        } catch (Exception e) {
            System.out.println(e.getMessage());
        }
    }

    /**
     * Writes the XML RootNode to the specified file path
     *
     * @param filePath the full path to of the file to save the XML Document
     * @param isStandAlone sets if the XML document stand alone
     */
    public void save(String filePath, boolean isStandAlone) {
        save(filePath, encoding, isStandAlone);
    }

    /**
     * Writes the XML RootNode to the specified file path
     *
     * @param filePath the full path to of the file to save the XML Document
     * @param encoding the encoding to use when writing the XML document
     * @param isStandAlone sets if the XML document stand alone
     */
    public void save(String filePath, StringEncodings encoding, boolean isStandAlone) {
        this.isStandAlone = isStandAlone;
        save(filePath, encoding);
    }

    /**
     * Writes the XML RootNode to the specified file path
     *
     * @param filePath the full path to of the file to save the XML Document
     * @param encoding the encoding to use when writing the XML document
     */
    public void save(String filePath, StringEncodings encoding) {
        this.encoding = encoding;
        save(filePath);
    }

    /**
     * Writes the XML RootNode to the specified file path
     *
     * @param filePath the full path to of the file to save the XML Document
     */
    public void save(String filePath) {
        String newLine = System.getProperty("line.separator");
        try (BufferedWriter bw = getBufferedWriter(filePath)) {
            bw.write(getHeaderLine());
            bw.write(newLine);
            bw.write(newLine);
            if (!this.root.getName().equalsIgnoreCase("")) {
                String nodes = getNodeToString(this.root, 0);
                nodes = nodes.replace("\n", newLine);
                bw.write(nodes);
            } else {
                for (Node curNode : this.root.getChildrenNodes()) {
                    String nodes = getNodeToString(curNode, 0);
                    nodes = nodes.replace("\n", newLine);
                    bw.write(nodes);
                }
            }
            bw.close();
        } catch (Exception e) {
            System.out.println(e.getMessage());
        }
    }

    private BufferedWriter getBufferedWriter(String filePath) throws Exception {
        String encodingString = this.encoding.name();
        encodingString = encodingString.replace("_", "-");

        FileOutputStream fo = null;
        OutputStreamWriter osw = null;
        BufferedWriter bw = null;

        try {
            fo = new FileOutputStream(filePath);
            osw = new OutputStreamWriter(fo, encodingString);
            bw = new BufferedWriter(osw);
        } catch (Exception e) {
            fo = new FileOutputStream(filePath);
            osw = new OutputStreamWriter(fo, "UTF-8");
            bw = new BufferedWriter(osw);
        }
        return bw;
    }

    private BufferedWriter getBufferedWriter(Writer writer) throws Exception {
        String encodingString = this.encoding.name();
        encodingString = encodingString.replace("_", "-");
        WriterOutputStream wos = null;
        try {
            wos = new WriterOutputStream(writer,Charset.defaultCharset());// encodingString);
        } catch (Exception e) {
            try {
                wos = new WriterOutputStream(writer, "UTF_8");
            } catch (Exception ex) {
                throw ex;
            }
        }

        if (wos != null) {
            try {
                BufferedWriter out = new BufferedWriter(writer);
                return out;
            } catch (Exception e) {
                System.out.println(e.getMessage());
                throw e;
            }
        }
        return null;
    }

    private String getTabString(int tabCount) {
        if (tabCount == 0) {
            return "";
        }
        String ret = "";

        for (int i = 0; i < tabCount; i++) {
            ret = ret + "\t";
        }

        return ret;
    }

    private String getNodeToString(Node node, Integer tabLevel) {
        int originalTabLevel = tabLevel;
        int nextNodeTabLevel = tabLevel + 1;
        StringBuilder ret = new StringBuilder();
        String originalTabString = getTabString(originalTabLevel);
        String nextNodeTabString = getTabString(nextNodeTabLevel);
        ret.append(originalTabString + "<");
        ret.append(node.getName());

        if (node.getAttributes().size() > 0) {
            ret.append(" ");
            for (com.github.lespaul361.commons.commonroutines.utilities.Streams.XML.Attribute att : node.getAttributes()) {
                ret.append(att.getName() + "=" + "\"" + att.getValue() + "\"");
                ret.append(" ");
            }
            ret = new StringBuilder(ret.toString().substring(0, ret.length() - 1));
        }
        ret.append(">");
        try {
            if (!node.getValue().equalsIgnoreCase("")) {
                String value = node.getValue();
                value = value.replace("\n", "\n" + originalTabString);
                ret.append(value);
                ret.append("");
                ret.append("\n");
            } else {
                if (node.getChildrenNodes().size() > 0) {
                    ret.append("\n");
                    for (int curNode = 0; curNode < node.getChildrenNodes().size(); curNode++) {
                        ret.append(getNodeToString(node.getChildrenNodes().get(curNode), nextNodeTabLevel));
                        String tmp = ret.toString().substring(ret.toString().length() - 1, ret.toString().length());
                        if (!tmp.equalsIgnoreCase("\n")) {
                            ret.append("\n");
                        }
                    }
                }
                ret.append(originalTabString + "");
                ret.append("\n");
            }
        } catch (Exception e) {
        }

        return ret.toString();
    }

    private String nodeHeadToString(Node node) {
        StringBuilder ret = new StringBuilder();
        ret.append("<");
        ret.append(node.getName());
        if (node.getAttributes().size() > 0) {
            ret.append(" ");
            for (com.github.lespaul361.commons.commonroutines.utilities.Streams.XML.Attribute att : node.getAttributes()) {
                ret.append(att.getName() + "=" + att.getValue());
                ret.append(" ");
            }
            ret = new StringBuilder(ret.toString().substring(0, ret.length() - 1));
        }
        ret.append(">");
        ret.append("\n");
        return ret.toString();
    }

    private String nodeEndToString(Node node) {
        return "";
    }

    private String getHeaderLine() {

        String encodingString = this.encoding.toString();
        encodingString = encodingString.replace("_", "-");
        String standAloneString = "";
        if (isStandAlone) {
            standAloneString = "yes";
        } else {
            standAloneString = "no";
        }

        return MessageFormat.format(this.XML_HEADER, encodingString, standAloneString);

    }

    private boolean nodeHasValue(Node node) {
        return node.getValue().equalsIgnoreCase("");
    }

    private boolean nodeHasChildren(Node node) {
        return node.getChildrenNodes().size() > 0;
    }

    /**
     * Gets the RootNode
     *
     * @return RootNode of the XML Document
     */
    public RootNode getRootNode() {
        return this.root;
    }
}