All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.flatpack.xml.MapParser Maven / Gradle / Ivy

Go to download

Simple Java delimited and fixed width file parser. Handles CSV, Excel CSV, Tab, Pipe delimiters, just to name a few. Maps column positions in the file to user friendly names via XML. See FlatPack Feature List under News for complete feature list.

The newest version!
/*
 * ObjectLab, http://www.objectlab.co.uk/open is supporting FlatPack.
 *
 * Based in London, we are world leaders in the design and development
 * of bespoke applications for the securities financing markets.
 *
 * Click here to learn more
 *           ___  _     _           _   _          _
 *          / _ \| |__ (_) ___  ___| |_| |    __ _| |__
 *         | | | | '_ \| |/ _ \/ __| __| |   / _` | '_ \
 *         | |_| | |_) | |  __/ (__| |_| |__| (_| | |_) |
 *          \___/|_.__// |\___|\___|\__|_____\__,_|_.__/
 *                   |__/
 *
 *                     www.ObjectLab.co.uk
 *
 * $Id: ColorProvider.java 74 2006-10-24 22:19:05Z benoitx $
 *
 * Copyright 2006 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package net.sf.flatpack.xml;

import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import net.sf.flatpack.Parser;
import net.sf.flatpack.structure.ColumnMetaData;
import net.sf.flatpack.util.FPConstants;
import net.sf.flatpack.util.ParserUtils;

/**
 * @author zepernick
 *
 * Parses a PZmap definition XML file
 */
public final class MapParser {
    private static final Logger LOGGER = LoggerFactory.getLogger(MapParser.class);
    private static final String COLUMN = "COLUMN";
    private static final String LENGTH = "length";
    private static boolean showDebug = false;

    /**
     * Constructor
     */
    private MapParser() {
    }

    /**
     * New method based on Reader. Reads the XMLDocument for a PZMetaData
     * file from an InputStream, WebStart compatible. Parses the XML file, and
     * returns a Map containing Lists of ColumnMetaData.
     *
     * @param xmlStreamReader
     * @param pzparser
     *          Can be null.  Allows additional opts to be set during the XML map read
     * @return Map <records> with their corresponding
     * @throws IOException
     * @throws ParserConfigurationException
     * @throws SAXException
     */
    public static Map parse(final Reader xmlStreamReader, final Parser pzparser)
            throws IOException, ParserConfigurationException, SAXException {
        if (xmlStreamReader == null) {
            throw new NullPointerException("XML Reader Is Not Allowed To Be Null...");
        }
        final Map mdIndex = new LinkedHashMap<>(); // retain the same order

        final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
        final DocumentBuilder builder = factory.newDocumentBuilder();
        builder.setEntityResolver(new ResolveLocalDTD());
        final org.w3c.dom.Document document = builder.parse(new InputSource(xmlStreamReader));
        final NodeList nodeList = document.getDocumentElement().getChildNodes();

        final List columnResults = parseColumnElements(nodeList);

        mdIndex.put(FPConstants.DETAIL_ID, columnResults); // always force detail
        // to the top of
        // the map no matter what
        mdIndex.put(FPConstants.COL_IDX, ParserUtils.buidColumnIndexMap(columnResults, pzparser));

        for (int i = 0; i < nodeList.getLength(); i++) {
            final Node node = nodeList.item(i);

            final String nodeName = node.getNodeName();

            if ("RECORD".equalsIgnoreCase(nodeName)) {
                final NamedNodeMap attributes = node.getAttributes();
                final Node id = attributes.getNamedItem("id");
                if (id != null && FPConstants.DETAIL_ID.equalsIgnoreCase(id.getTextContent())) {
                    // make sure the id attribute does not have a value of "detail" this
                    // is the harcoded
                    // value we are using to mark columns specified outside of a
                    //  element
                    throw new IllegalArgumentException("The ID 'detail' on the  element is reserved, please select another id");
                }

                final List columns = parseColumnElements(node.getChildNodes());
                final XMLRecordElement xmlre = new XMLRecordElement();
                xmlre.setColumns(columns, pzparser);
                xmlre.setIndicator(getAttributeValue(attributes, "indicator"));
                xmlre.setElementNumber(convertAttributeToInt(getAttributeValue(attributes, "elementNumber")));
                xmlre.setStartPosition(convertAttributeToInt(getAttributeValue(attributes, "startPosition")));
                xmlre.setEndPositition(convertAttributeToInt(getAttributeValue(attributes, "endPosition")));
                xmlre.setElementCount(convertAttributeToInt(getAttributeValue(attributes, "elementCount")));
                mdIndex.put(getAttributeValue(attributes, "id"), xmlre);
                // make a column index for non detail records
                mdIndex.put(FPConstants.COL_IDX + "_" + getAttributeValue(attributes, "id"), ParserUtils.buidColumnIndexMap(columns, pzparser));

            }
        }
        if (showDebug) {
            setShowDebug(mdIndex);
        }
        return mdIndex;
    }

    private static String getAttributeValue(final NamedNodeMap attributes, final String attributeName) {
        final Node namedItem = attributes.getNamedItem(attributeName);
        return namedItem != null ? namedItem.getTextContent() : null;
    }

    private static List parseColumnElements(final NodeList nodeList) {
        final Set columnNames = new HashSet<>();
        final List columnResults = new ArrayList<>();
        for (int i = 0; i < nodeList.getLength(); i++) {
            final Node node = nodeList.item(i);

            final String nodeName = node.getNodeName();

            if (COLUMN.equalsIgnoreCase(nodeName)) {
                final NamedNodeMap attributes = node.getAttributes();
                final ColumnMetaData cmd = new ColumnMetaData();

                // make sure the name attribute is present on the column
                final String columnName = attributes.getNamedItem("name").getTextContent();
                if (columnName == null) {
                    throw new IllegalArgumentException("Name attribute is required on the column tag!");
                } else if (columnNames.contains(columnName)) {
                    // make sure the names in columnInfo are unique
                    throw new IllegalArgumentException("Duplicate name column '" + columnName + "'");
                }

                cmd.setColName(columnName);
                columnNames.add(columnName);

                // check to see if the column length can be set
                if (attributes.getNamedItem(LENGTH) != null) {
                    try {
                        cmd.setColLength(Integer.parseInt(attributes.getNamedItem(LENGTH).getTextContent()));
                    } catch (final Exception ex) {
                        throw new IllegalArgumentException(
                                "LENGTH ATTRIBUTE ON COLUMN ELEMENT MUST BE AN INTEGER.  GOT: " + attributes.getNamedItem(LENGTH).getTextContent(),
                                ex);
                    }
                }
                columnResults.add(cmd);
            }
        }
        return columnResults;
    }

    // helper to convert to integer
    private static int convertAttributeToInt(final String attribute) {
        if (attribute == null) {
            return 0;
        }

        try {
            return Integer.parseInt(attribute);
        } catch (final Exception ignore) {
            return 0;
        }
    }

    /**
     * If set to true, debug information for the map file will be thrown to the
     * console after the parse is finished
     *
     * @param b
     */
    public static void setDebug(final boolean b) {
        showDebug = b;
    }

    private static void setShowDebug(final Map xmlResults) {
        for (final Entry entry : xmlResults.entrySet()) {
            XMLRecordElement xmlrecEle = null;
            final String recordID = entry.getKey();
            List columns = null;
            if (recordID.equals(FPConstants.DETAIL_ID)) {
                columns = (List) entry.getValue();
            } else {
                xmlrecEle = (XMLRecordElement) entry.getValue();
                columns = xmlrecEle.getColumns();
            }

            LOGGER.debug(">>>>Column MD Id:{}", recordID);
            if (xmlrecEle != null) {
                LOGGER.debug("Start Position: {} End Postion: {} Element Number: {} Indicator{}", xmlrecEle.getStartPosition(),
                        xmlrecEle.getEndPositition(), xmlrecEle.getElementNumber(), xmlrecEle.getIndicator());
            }
            for (final ColumnMetaData cmd : columns) {
                LOGGER.debug("Column Name: {} LENGTH: {}", cmd.getColName(), cmd.getColLength());

            }
        }
    }

    /**
     * New method based on Reader. Reads the XMLDocument for a PZMetaData
     * file from an InputStream, WebStart compatible. Parses the XML file, and
     * returns a Map containing Lists of ColumnMetaData.
     *
     * @param xmlStreamReader
     * @param pzparser
     *          Can be null.  Allows additional opts to be set during the XML map read
     * @return Map <records> with their corresponding
     * @throws IOException
     * @throws SAXException
     * @throws ParserConfigurationException
     */
    public static MetaData parseMap(final Reader xmlStreamReader, final Parser pzparser)
            throws IOException, ParserConfigurationException, SAXException {
        final Map map = parse(xmlStreamReader, pzparser);

        final List col = (List) map.get(FPConstants.DETAIL_ID);
        map.remove(FPConstants.DETAIL_ID);

        final Map m = (Map) map.get(FPConstants.COL_IDX);
        map.remove(FPConstants.COL_IDX);

        // loop through the map and remove anything else that is an index of FPConstancts.COL_IDX + _
        // these were put in for the writer.
        // TODO maybe these should be thrown into the MetaData instead of just discarded, but they are unused
        // in the Reader the moment. This parseMap is not utilized in the writer so it is safe to remove them here
        final Iterator entrySetIt = map.entrySet().iterator();
        while (entrySetIt.hasNext()) {
            final Entry e = (Entry) entrySetIt.next();
            if (((String) e.getKey()).startsWith(FPConstants.COL_IDX + "_")) {
                entrySetIt.remove();
            }
        }

        return new MetaData(col, m, map);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy