All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datacleaner.configuration.JaxbPojoDatastoreAdaptor Maven / Gradle / Ivy

/**
 * DataCleaner (community edition)
 * Copyright (C) 2014 Neopost - Customer Information Management
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.datacleaner.configuration;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import javax.xml.bind.JAXBElement;

import org.apache.metamodel.DataContext;
import org.apache.metamodel.MetaModelHelper;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.data.Row;
import org.apache.metamodel.pojo.ArrayTableDataProvider;
import org.apache.metamodel.pojo.TableDataProvider;
import org.apache.metamodel.query.Query;
import org.apache.metamodel.schema.Column;
import org.apache.metamodel.schema.ColumnType;
import org.apache.metamodel.schema.ColumnTypeImpl;
import org.apache.metamodel.schema.Schema;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.CollectionUtils;
import org.apache.metamodel.util.SimpleTableDef;
import org.datacleaner.configuration.jaxb.AbstractDatastoreType;
import org.datacleaner.configuration.jaxb.PojoDatastoreType;
import org.datacleaner.configuration.jaxb.PojoTableType;
import org.datacleaner.configuration.jaxb.PojoTableType.Columns;
import org.datacleaner.configuration.jaxb.PojoTableType.Rows;
import org.datacleaner.connection.Datastore;
import org.datacleaner.connection.DatastoreConnection;
import org.datacleaner.connection.PojoDatastore;
import org.datacleaner.util.CollectionUtils2;
import org.datacleaner.util.ReflectionUtils;
import org.datacleaner.util.StringUtils;
import org.datacleaner.util.convert.StringConverter;
import org.datacleaner.util.xml.XmlUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

/**
 * Convenient utility class for reading and writing POJO datastores from and to
 * XML (JAXB) elements.
 */
public class JaxbPojoDatastoreAdaptor {

    private static final Logger logger = LoggerFactory.getLogger(JaxbPojoDatastoreAdaptor.class);

    private final StringConverter _converter;

    /**
     * @deprecated use the
     *             {@link #JaxbPojoDatastoreAdaptor(DataCleanerConfiguration)}
     *             constructor instead
     */
    @Deprecated
    public JaxbPojoDatastoreAdaptor() {
        _converter = StringConverter.simpleInstance();
    }

    public JaxbPojoDatastoreAdaptor(final DataCleanerConfiguration configuration) {
        _converter = new StringConverter(configuration);
    }

    public PojoDatastore read(final PojoDatastoreType pojoDatastore) {
        final String name = pojoDatastore.getName();
        final String schemaName = (pojoDatastore.getSchemaName() == null ? name : pojoDatastore.getSchemaName());

        final List> tableDataProviders = new ArrayList<>();
        final List tables = pojoDatastore.getTable();
        for (final PojoTableType table : tables) {
            final String tableName = table.getName();

            final List columns = table.getColumns().getColumn();
            final int columnCount = columns.size();
            final String[] columnNames = new String[columnCount];
            final ColumnType[] columnTypes = new ColumnType[columnCount];

            for (int i = 0; i < columnCount; i++) {
                final Columns.Column column = columns.get(i);
                columnNames[i] = column.getName();
                columnTypes[i] = ColumnTypeImpl.valueOf(column.getType());
            }

            final SimpleTableDef tableDef = new SimpleTableDef(tableName, columnNames, columnTypes);

            final Collection arrays = new ArrayList<>();
            final Rows rowsType = table.getRows();
            if (rowsType != null) {
                final List rows = rowsType.getRow();
                for (final Rows.Row row : rows) {
                    final List values = row.getV();
                    if (values.size() != columnCount) {
                        throw new IllegalStateException(
                                "Row value count is not equal to column count in datastore '" + name + "'. Expected "
                                        + columnCount + " values, found " + values.size() + " (table " + tableName
                                        + ", row no. " + arrays.size() + ")");
                    }
                    final Object[] array = new Object[columnCount];
                    for (int i = 0; i < array.length; i++) {

                        final Class expectedClass = columnTypes[i].getJavaEquivalentClass();

                        final Object rawValue = values.get(i);
                        final Object value = deserializeValue(rawValue, expectedClass);
                        array[i] = value;
                    }
                    arrays.add(array);
                }
            }

            final TableDataProvider tableDataProvider = new ArrayTableDataProvider(tableDef, arrays);
            tableDataProviders.add(tableDataProvider);
        }

        return new PojoDatastore(name, schemaName, tableDataProviders);
    }

    private Object deserializeValue(final Object value, final Class expectedClass) {
        if (value == null) {
            return null;
        }

        if (value instanceof Node) {
            final Node node = (Node) value;
            logger.debug("Value is a DOM node: {}", node);
            return getNodeValue(node, expectedClass);
        }

        if (value instanceof JAXBElement) {
            final JAXBElement element = (JAXBElement) value;
            logger.debug("Value is a JAXBElement: {}", element);
            final Object jaxbValue = element.getValue();
            return deserializeValue(jaxbValue, expectedClass);
        }

        if (value instanceof String) {
            final String str = (String) value;
            return _converter.deserialize(str, expectedClass);
        } else {
            throw new UnsupportedOperationException("Unknown value type: " + value);
        }
    }

    @SuppressWarnings("unchecked")
    private  T getNodeValue(final Node node, Class expectedClass) {
        if (node.getNodeType() == Node.TEXT_NODE) {
            final String str = node.getNodeValue();
            if (expectedClass == null) {
                // we will fall back to string class
                expectedClass = (Class) String.class;
            }
            final Object result = _converter.deserialize(str, determineExpectedClass(node, expectedClass));
            return (T) result;
        }

        // a top-level value
        final List childNodes = getChildNodes(node);
        if (childNodes.isEmpty()) {
            return null;
        } else if (childNodes.size() == 1 && childNodes.get(0).getNodeType() == Node.TEXT_NODE) {
            expectedClass = (Class) determineExpectedClass(node, expectedClass);
            final Node child = childNodes.get(0);
            return getNodeValue(child, expectedClass);
        }

        if (expectedClass == null) {
            final Node firstChild = childNodes.get(0);
            if ("i".equals(firstChild.getNodeName())) {
                final List list = getNodeList(childNodes);
                return (T) list;
            } else if ("e".equals(firstChild.getNodeName())) {
                final Map map = getNodeMap(childNodes);
                return (T) map;
            } else {
                throw new UnsupportedOperationException(
                        "Unexpected child nodes. First child: " + printNode(firstChild));
            }
        } else if (ReflectionUtils.is(expectedClass, List.class)) {
            final List list = getNodeList(childNodes);
            return (T) list;
        } else if (ReflectionUtils.is(expectedClass, Map.class)) {
            final Map map = getNodeMap(childNodes);
            return (T) map;
        } else if (expectedClass.isArray()) {
            final List list = getNodeList(childNodes);
            final Class componentType = expectedClass.getComponentType();
            return (T) CollectionUtils2.toArray(list, componentType);
        }

        throw new UnsupportedOperationException("Not a value (v) node type: " + printNode(node));
    }

    private Class determineExpectedClass(final Node node, final Class fallbackType) {
        final NamedNodeMap attributes = node.getAttributes();
        if (attributes != null) {
            final Node attribute = attributes.getNamedItem("class");
            if (attribute != null) {
                final String className = attribute.getTextContent();
                if (!StringUtils.isNullOrEmpty(className)) {
                    try {
                        return Class.forName(className);
                    } catch (final ClassNotFoundException e) {
                        logger.error("Could not load class: " + className + ". Falling back to String type.", e);
                    }
                }
            }
        }

        return fallbackType;
    }

    private List getNodeList(final List childNodes) {
        final List list = new ArrayList<>();
        for (final Node childNode : childNodes) {
            final Object value = getNodeValue(childNode, null);
            list.add(value);
        }
        return list;
    }

    private List getChildNodes(final Node node) {
        final List list = new ArrayList<>();
        final NodeList childNodes = node.getChildNodes();
        for (int i = 0; i < childNodes.getLength(); i++) {
            final Node child = childNodes.item(i);
            switch (child.getNodeType()) {
            case Node.ELEMENT_NODE:
                list.add(child);
                // fallthru
            case Node.TEXT_NODE:
                final String text = child.getNodeValue();
                if (!StringUtils.isNullOrEmpty(text)) {
                    list.add(child);
                }
                break;
            default: // ignore
            }
        }
        return list;
    }

    private Map getNodeMap(final List entryNodes) {
        final Map map = new LinkedHashMap<>();
        for (final Node entryNode : entryNodes) {
            final String entryNodeName = entryNode.getNodeName();
            if (!"e".equals(entryNodeName)) {
                throw new UnsupportedOperationException(
                        "Node passed as Map entry does not appear to be the right type: " + printNode(entryNode));
            }

            String key = null;
            Object value = null;

            final List keyOrValueNodes = getChildNodes(entryNode);

            assert keyOrValueNodes.size() == 2;

            for (final Node keyOrValueNode : keyOrValueNodes) {
                final String keyOrValueNodeName = keyOrValueNode.getNodeName();
                if ("k".equals(keyOrValueNodeName)) {
                    key = getNodeValue(keyOrValueNode, String.class);
                } else if ("v".equals(keyOrValueNodeName)) {
                    value = getNodeValue(keyOrValueNode, null);
                }
            }

            if (key == null) {
                throw new UnsupportedOperationException("Map key (k) node not set in entry: " + printNode(entryNode));
            }

            map.put(key, value);
        }
        return map;
    }

    private String printNode(final Node node) {
        return XmlUtils.writeDocumentToString(node, false);
    }

    private org.datacleaner.configuration.jaxb.PojoTableType.Rows.Row createPojoRow(final Row row,
            final Document document) {
        final org.datacleaner.configuration.jaxb.PojoTableType.Rows.Row rowType =
                new org.datacleaner.configuration.jaxb.PojoTableType.Rows.Row();
        final Object[] values = row.getValues();
        for (final Object value : values) {
            final Element elem = document.createElement("v");
            createPojoValue(value, elem, document, false);
            rowType.getV().add(elem);
        }
        return rowType;
    }

    private void createPojoValue(Object value, final Element elem, final Document document,
            final boolean explicitType) {
        if (value == null) {
            // return an empty element
            return;
        }

        if (value.getClass().isArray()) {
            final Class componentType = value.getClass().getComponentType();
            if (componentType.isPrimitive() || componentType == String.class) {
                // leave the array to be serialized using the string converter -
                // it
                // will take up much less space.
            } else {
                value = CollectionUtils.toList(value);
            }
        }

        if (value instanceof List) {
            final List list = (List) value;
            for (final Object item : list) {
                final Element itemElement = document.createElement("i");
                createPojoValue(item, itemElement, document, true);
                elem.appendChild(itemElement);
            }
            return;
        }

        if (value instanceof Map) {
            final Map map = (Map) value;
            for (final Entry entry : map.entrySet()) {
                final Element keyElement = document.createElement("k");
                createPojoValue(entry.getKey(), keyElement, document, true);

                final Element valueElement = document.createElement("v");
                createPojoValue(entry.getValue(), valueElement, document, true);

                final Element entryElement = document.createElement("e");
                entryElement.appendChild(keyElement);
                entryElement.appendChild(valueElement);

                elem.appendChild(entryElement);
            }
            return;
        }

        try {
            final String stringValue = _converter.serialize(value);
            elem.setTextContent(stringValue);
            if (explicitType) {
                elem.setAttribute("class", value.getClass().getName());
            }
        } catch (final RuntimeException e) {
            logger.warn("Failed to serialize value: " + value + ". Returning null.", e);
        }
        return;
    }

    private org.datacleaner.configuration.jaxb.PojoTableType.Columns.Column createPojoColumn(final String name,
            final ColumnType type) {
        final org.datacleaner.configuration.jaxb.PojoTableType.Columns.Column columnType =
                new org.datacleaner.configuration.jaxb.PojoTableType.Columns.Column();
        columnType.setName(name);
        columnType.setType(type.toString());
        return columnType;
    }

    public PojoTableType createPojoTable(final DataContext dataContext, final Table table, final List usedColumns,
            final int maxRows) {
        final PojoTableType tableType = new PojoTableType();
        tableType.setName(table.getName());

        // read columns
        final Columns columnsType = new Columns();
        for (final Column column : usedColumns) {
            columnsType.getColumn().add(createPojoColumn(column.getName(), column.getType()));
        }
        tableType.setColumns(columnsType);

        if (maxRows > 0) {
            // read values
            final Query q = dataContext.query().from(table).select(usedColumns).toQuery();
            q.setMaxRows(maxRows);

            final Document document = XmlUtils.createDocument();
            final Rows rowsType = new Rows();
            try (DataSet ds = dataContext.executeQuery(q)) {
                while (ds.next()) {
                    final Row row = ds.getRow();
                    rowsType.getRow().add(createPojoRow(row, document));
                }
            }

            tableType.setRows(rowsType);
        }

        return tableType;
    }

    public AbstractDatastoreType createPojoDatastore(final String datastoreName, final String schemaName,
            final Collection tables) {
        final PojoDatastoreType datastoreType = new PojoDatastoreType();
        datastoreType.setName(datastoreName);
        datastoreType.setSchemaName(schemaName);
        datastoreType.getTable().addAll(tables);

        return datastoreType;
    }

    /**
     * Creates a serialized POJO copy of a datastore.
     *
     * @param datastore
     *            the datastore to copy
     * @param columns
     *            the columns to include, or null if all tables/columns should
     *            be included.
     * @param maxRowsToQuery
     *            the maximum number of records to query and include in the
     *            datastore copy. Keep this number reasonably low, or else the
     *            copy might cause out-of-memory issues (Both while reading and
     *            writing).
     * @return
     */
    public AbstractDatastoreType createPojoDatastore(final Datastore datastore, final Set columns,
            final int maxRowsToQuery) {
        final PojoDatastoreType datastoreType = new PojoDatastoreType();
        datastoreType.setName(datastore.getName());
        datastoreType.setDescription(datastore.getDescription());

        try (DatastoreConnection con = datastore.openConnection()) {
            final DataContext dataContext = con.getDataContext();

            final Schema schema;
            final List tables;
            if (columns == null || columns.isEmpty()) {
                schema = dataContext.getDefaultSchema();
                tables = schema.getTables();
            } else {
                tables = Arrays.asList(MetaModelHelper.getTables(columns));
                // TODO: There's a possibility that tables span multiple
                // schemas, but we cannot currently support that in a
                // PojoDatastore, so we just pick the first and cross our
                // fingers.
                schema = tables.get(0).getSchema();
            }

            datastoreType.setSchemaName(schema.getName());

            for (final Table table : tables) {
                final List usedColumns;
                if (columns == null || columns.isEmpty()) {
                    usedColumns = table.getColumns();
                } else {
                    usedColumns = Arrays.asList(MetaModelHelper.getTableColumns(table, columns));
                }

                final PojoTableType tableType = createPojoTable(dataContext, table, usedColumns, maxRowsToQuery);
                datastoreType.getTable().add(tableType);
            }
        }

        return datastoreType;
    }
}