All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dk.eobjects.metamodel.XmlDataContextStrategy Maven / Gradle / Ivy

/**
 *  This file is part of MetaModel.
 *
 *  MetaModel is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  MetaModel is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with MetaModel.  If not, see .
 */
package dk.eobjects.metamodel;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
import org.xml.sax.InputSource;

import dk.eobjects.metamodel.data.DataSet;
import dk.eobjects.metamodel.data.InMemoryDataSet;
import dk.eobjects.metamodel.query.FromItem;
import dk.eobjects.metamodel.query.JoinType;
import dk.eobjects.metamodel.query.Query;
import dk.eobjects.metamodel.query.SelectItem;
import dk.eobjects.metamodel.schema.Column;
import dk.eobjects.metamodel.schema.ColumnType;
import dk.eobjects.metamodel.schema.MutableColumn;
import dk.eobjects.metamodel.schema.MutableRelationship;
import dk.eobjects.metamodel.schema.MutableSchema;
import dk.eobjects.metamodel.schema.MutableTable;
import dk.eobjects.metamodel.schema.Relationship;
import dk.eobjects.metamodel.schema.Schema;
import dk.eobjects.metamodel.schema.Table;
import dk.eobjects.metamodel.schema.TableType;
import dk.eobjects.metamodel.util.FileHelper;
import dk.eobjects.metamodel.util.NumberComparator;

/**
 * A DataContext strategy that reads XML content and maps it to a table-based
 * model similar to the rest of MetaModel. Tables are created by examining the
 * data in the XML file, NOT by reading XML Schemas (xsd/dtd's). This enables
 * compliancy with ALL xml formats but also raises a risk that two XML files
 * with the same format wont nescesarily yield the same table model if some
 * optional attributes or tags are omitted in one of the files.
 */
public class XmlDataContextStrategy extends QueryPostprocessDataContextStrategy {

	public static final String NATIVE_TYPE_PRIMARY_KEY = "Auto-generated primary key";
	public static final String NATIVE_TYPE_FOREIGN_KEY = "Auto-generated foreign key";
	public static final String NATIVE_TYPE_ATTRIBUTE = "XML Attribute";
	public static final String NATIVE_TYPE_TEXT = "XML Text";
	private static final String TEXT_CONTENT_TEMP_SUFFIX = "_metamodel_text_content";
	private MutableSchema _schema;
	private Map> _tableData = new HashMap>();;
	private boolean _autoFlattenTables;
	private String _schemaName;
	private InputSource _inputSource;

	/**
	 * Creates an XML DataContext strategy based on an already parsed Document.
	 * 
	 * @param schemaName
	 * @param document
	 * @param autoFlattenTables
	 */
	public XmlDataContextStrategy(String schemaName, Document document,
			boolean autoFlattenTables) {
		_autoFlattenTables = autoFlattenTables;
		_schemaName = schemaName;
		_schema = new MutableSchema(_schemaName);
		loadSchema(document);
	}

	/**
	 * Creates an XML DataContext strategy based on a file.
	 * 
	 * @param file
	 *            the file to parse
	 * @param autoFlattenTables
	 *            a parameter indicating whether or not tags with only text
	 *            content or a single attribute should be flattened with it's
	 *            parent table
	 * 
	 * @throws IllegalArgumentException
	 *             if the file does not exist
	 */
	public XmlDataContextStrategy(File file, boolean autoFlattenTables)
			throws IllegalArgumentException {
		this(new InputSource(FileHelper.getReader(file)), file.getName(),
				autoFlattenTables);
	}

	public XmlDataContextStrategy(InputSource inputSource, String schemaName,
			boolean autoFlattenTables) {
		_inputSource = inputSource;
		_schemaName = schemaName;
		_autoFlattenTables = autoFlattenTables;
	}

	public XmlDataContextStrategy(URL url, boolean autoFlattenTables)
			throws IllegalArgumentException {
		try {
			_inputSource = new InputSource(url.openStream());
			_autoFlattenTables = autoFlattenTables;
			_schemaName = url.toExternalForm();
			int lastIndex = _schemaName.lastIndexOf('/');
			if (lastIndex != -1) {
				_schemaName = _schemaName.substring(lastIndex);
			}
		} catch (IOException e) {
			throw new IllegalArgumentException(e);
		}
	}

	/**
	 * Creates an XML DataContext strategy based on a file.
	 * 
	 * @param file
	 *            the file to parse
	 */
	public XmlDataContextStrategy(File file) {
		this(file, true);
	}

	public boolean isAutoFlattenTables() {
		return _autoFlattenTables;
	}

	public void setAutoFlattenTables(boolean autoFlattenTables) {
		_autoFlattenTables = autoFlattenTables;
	}

	@Override
	public DataSet materializeMainSchemaTable(Table table, Column[] columns,
			int maxRows) {
		loadSchema();
		List tableData = _tableData.get(table.getName());
		if (tableData == null) {
			throw new IllegalStateException("No such table name: '"
					+ table.getName() + "'. Valid table names are: "
					+ _tableData.keySet());
		}
		List resultData = new ArrayList();
		for (Object[] tableDataRow : tableData) {
			if (maxRows == 0) {
				break;
			}
			maxRows--;
			Object[] dataValues = new Object[columns.length];
			for (int i = 0; i < columns.length; i++) {
				Column column = columns[i];
				int columnNumber = column.getColumnNumber();
				// Some rows may not contain values for all columns
				// (attributes)
				if (columnNumber < tableDataRow.length) {
					dataValues[i] = tableDataRow[columnNumber];
				} else {
					dataValues[i] = null;
				}
			}
			resultData.add(dataValues);
		}

		SelectItem[] selectItems = new SelectItem[columns.length];
		for (int i = 0; i < columns.length; i++) {
			Column column = columns[i];
			selectItems[i] = new SelectItem(column);
		}

		return new InMemoryDataSet(selectItems, resultData);
	}

	@Override
	protected String getMainSchemaName() throws MetaModelException {
		return _schemaName;
	}

	@Override
	protected Schema getMainSchema() throws MetaModelException {
		loadSchema();
		return _schema;
	}

	/**
	 * Forces a fresh load of the schema, even though it has already been loaded
	 */
	public XmlDataContextStrategy reloadSchema() {
		_schema = null;
		loadSchema();
		return this;
	}

	/**
	 * Loads the schema if it hasn't been loaded before
	 */
	public XmlDataContextStrategy loadSchema() {
		if (_schema == null) {
			_schema = new MutableSchema(_schemaName);
			try {
				DocumentBuilderFactory dbf = DocumentBuilderFactory
						.newInstance();
				dbf.setIgnoringComments(true);
				DocumentBuilder db = dbf.newDocumentBuilder();
				Document document = db.parse(_inputSource);
				loadSchema(document);
			} catch (Exception e) {
				throw new MetaModelException("Error parsing XML file: "
						+ e.getMessage(), e);
			}
		}
		return this;
	}

	private void loadSchema(Document document) {
		Element rootElement = document.getDocumentElement();
		loadTables(rootElement, "", null, 0);

		// Remove tables from schema that has no data (typically root
		// node or pure XML structure)
		Table[] tables = _schema.getTables();
		for (Table table : tables) {
			String tableName = table.getName();
			List tableRows = _tableData.get(tableName);
			if (tableRows == null) {
				logger.info("Remove table (no data in it): {}", tableName);
				_schema.removeTable(table);
			} else {
				// Rename all ID columns to reasonable names (preferably
				// "id")
				MutableColumn idColumn = getIdColumn((MutableTable) table);
				MutableColumn column = (MutableColumn) table
						.getColumnByName("id");
				if (column == null) {
					idColumn.setName("id");
				}

				// Remove text content column, if it is never populated
				MutableColumn textContentColumn = (MutableColumn) getTextContentColumn(
						(MutableTable) table, null);
				int textContentColumnIndex = textContentColumn
						.getColumnNumber();
				boolean found = false;
				for (Object[] objects : tableRows) {
					if (objects[textContentColumnIndex] != null) {
						found = true;
						break;
					}
				}
				if (!found) {
					((MutableTable) table).removeColumn(textContentColumn);
				} else {
					// Rename all text content columns to reasonable
					// names (preferably element node name)
					String currentName = textContentColumn.getName();
					String preferredName = currentName.substring(
							0,
							currentName.length()
									- TEXT_CONTENT_TEMP_SUFFIX.length());
					column = (MutableColumn) table
							.getColumnByName(preferredName);
					if (column == null) {
						textContentColumn.setName(preferredName);
					}
				}
			}
		}
		if (_autoFlattenTables) {
			autoFlattenTables();
		}
	}

	private void loadTables(Element element, String tablePrefix,
			Column parentKeyColumn, int parentKey) {
		Attr[] attributes = getAttributes(element);
		String textContent = getTextContent(element);
		String tableName = tablePrefix + element.getNodeName();
		if (attributes.length > 0 || textContent != null
				|| hasSiblings(element)) {
			// We need to represent this type of node with a table
			MutableTable table = (MutableTable) _schema
					.getTableByName(tableName);
			Column idColumn;
			MutableColumn foreignKeyColumn;
			List tableRows;
			if (table == null) {
				logger.info("Creating table: {}", tableName);
				table = new MutableTable(tableName, TableType.TABLE, _schema);
				_schema.addTable(table);
				idColumn = getIdColumn(table);
				tableRows = new ArrayList();
				_tableData.put(tableName, tableRows);

				if (parentKeyColumn != null) {
					Table parentTable = parentKeyColumn.getTable();
					foreignKeyColumn = new MutableColumn(parentTable.getName()
							+ "_id", parentKeyColumn.getType(), table,
							table.getColumnCount(), false);
					foreignKeyColumn.setNativeType(NATIVE_TYPE_FOREIGN_KEY);
					table.addColumn(foreignKeyColumn);

					MutableRelationship.createRelationship(
							new Column[] { parentKeyColumn },
							new Column[] { foreignKeyColumn });

				} else {
					foreignKeyColumn = null;
				}
			} else {
				idColumn = getIdColumn(table);
				tableRows = _tableData.get(tableName);
				Column[] foreignKeys = table.getForeignKeys();
				if (foreignKeys.length == 1) {
					foreignKeyColumn = (MutableColumn) foreignKeys[0];
				} else {
					foreignKeyColumn = null;
				}
			}

			Column textContentColumn = getTextContentColumn(table,
					element.getNodeName());
			Map columnValues = new HashMap();
			for (Attr attr : attributes) {
				String name = attr.getName();
				MutableColumn column = (MutableColumn) table
						.getColumnByName(name);
				if (column == null) {
					logger.info("Creating column: {}.{}", tableName, name);
					column = new MutableColumn(name, ColumnType.VARCHAR, table,
							table.getColumnCount(), true);
					column.setNativeType(NATIVE_TYPE_ATTRIBUTE);
					table.addColumn(column);
				}
				columnValues.put(column, attr.getValue());
			}

			// Create a row
			Object[] rowData = new Object[table.getColumnCount()];
			// Iterate id column
			int id = tableRows.size() + 1;
			rowData[idColumn.getColumnNumber()] = id;
			if (foreignKeyColumn != null) {
				rowData[foreignKeyColumn.getColumnNumber()] = parentKey;
			}
			// Add value for text content (if available)
			if (textContent != null) {
				rowData[textContentColumn.getColumnNumber()] = textContent;
			}
			// Add values for attributes
			for (Entry entry : columnValues.entrySet()) {
				rowData[entry.getKey().getColumnNumber()] = entry.getValue();
			}

			if (logger.isDebugEnabled()) {
				logger.debug("Adding data [{}] to table: {}",
						Arrays.toString(rowData), tableName);
			}

			if (!isRootElement(element)) {
				// Set the parent key column to this tables id column so
				// child tables can create relationship to it
				parentKey = id;
				parentKeyColumn = idColumn;
			}
			tableRows.add(rowData);
		}

		if (!isRootElement(element)) {
			tablePrefix = tableName + "_";
		}
		Element[] childElements = getChildElements(element);
		for (int i = 0; i < childElements.length; i++) {
			loadTables(childElements[i], tablePrefix, parentKeyColumn,
					parentKey);
		}
	}

	private Column getTextContentColumn(MutableTable table,
			String preferredColumnName) {
		Column[] columns = table.getColumns();
		MutableColumn column = null;
		for (Column col : columns) {
			if (NATIVE_TYPE_TEXT.equals(col.getNativeType())) {
				column = (MutableColumn) col;
				break;
			}
		}
		if (column == null && preferredColumnName != null) {
			logger.info("Creating text content column for table: "
					+ table.getName());
			column = new MutableColumn(preferredColumnName
					+ TEXT_CONTENT_TEMP_SUFFIX, ColumnType.VARCHAR, table,
					table.getColumnCount(), true);
			column.setNativeType(NATIVE_TYPE_TEXT);
			table.addColumn(column);
		}
		return column;
	}

	private MutableColumn getIdColumn(MutableTable table) {
		Column[] columns = table.getColumns();
		MutableColumn column = null;
		for (Column col : columns) {
			if (NATIVE_TYPE_PRIMARY_KEY.equals(col.getNativeType())) {
				column = (MutableColumn) col;
				break;
			}
		}
		if (column == null) {
			String tableName = table.getName();
			logger.info("Creating id column for table: " + tableName);
			column = new MutableColumn(tableName + "_metamodel_surrogate_id",
					ColumnType.INTEGER, table, table.getColumnCount(), false);
			column.setNativeType(NATIVE_TYPE_PRIMARY_KEY);
			column.setIndexed(true);
			table.addColumn(column);
		}
		return column;
	}

	public static String getTextContent(Element element) {
		String textContent = null;
		NodeList childNodes = element.getChildNodes();
		for (int i = 0; i < childNodes.getLength(); i++) {
			Node node = childNodes.item(i);
			if (node instanceof Text) {
				textContent = ((Text) node).getWholeText();
				break;
			}
		}
		if (textContent != null) {
			textContent = textContent.trim();
			if (!"".equals(textContent)) {
				return textContent;
			}
		}
		return null;
	}

	public static Attr[] getAttributes(Element element) {
		List result = new ArrayList();
		NamedNodeMap attributes = element.getAttributes();
		for (int i = 0; i < attributes.getLength(); i++) {
			Attr attribute = (Attr) attributes.item(i);
			result.add(attribute);
		}
		return result.toArray(new Attr[result.size()]);
	}

	public static boolean hasSiblings(Element element) {
		// Don't look for siblings when we are at the root element
		if (!isRootElement(element)) {
			String name = element.getNodeName();
			Element[] siblingNodes = getChildElements((Element) element
					.getParentNode());
			for (int i = 0; i < siblingNodes.length; i++) {
				Element siblingNode = siblingNodes[i];
				if (siblingNode != element
						&& name.equals(siblingNode.getNodeName())) {
					return true;
				}
			}
		}
		return false;
	}

	public static Element[] getChildElements(Element element) {
		List result = new ArrayList();
		NodeList childNodes = element.getChildNodes();
		for (int i = 0; i < childNodes.getLength(); i++) {
			Node child = childNodes.item(i);
			if (child instanceof Element) {
				result.add((Element) child);
			}
		}
		return result.toArray(new Element[result.size()]);
	}

	public static boolean isRootElement(Element element) {
		return !(element.getParentNode() instanceof Element);
	}

	public XmlDataContextStrategy flattenTables(Relationship relationship) {
		MutableTable primaryTable = (MutableTable) relationship
				.getPrimaryTable();
		MutableTable foreignTable = (MutableTable) relationship
				.getForeignTable();

		// Check that foreignTable is not primary table in other relationships
		// (if so we can't flatten as that would require id-rewriting of those
		// foreign tables as well)
		if (foreignTable.getPrimaryKeyRelationships().length != 0) {
			Relationship[] foreignPrimaryRelationships = foreignTable
					.getPrimaryKeyRelationships();
			String[] foreignPrimaryNames = new String[foreignPrimaryRelationships.length];
			for (int i = 0; i < foreignPrimaryRelationships.length; i++) {
				foreignPrimaryNames[i] = foreignPrimaryRelationships[i]
						.getForeignTable().getName();
			}
			throw new UnsupportedOperationException(
					"Cannot flatten foreign table '" + foreignTable.getName()
							+ "' as it acts as primary table for tables: "
							+ Arrays.toString(foreignPrimaryNames));
		}

		List primaryColumns = new ArrayList(
				Arrays.asList(primaryTable.getColumns()));
		List foreignColumns = new ArrayList(
				Arrays.asList(foreignTable.getColumns()));

		// Remove the surrogate id
		String primaryTableName = primaryTable.getName();
		String foreignTableName = foreignTable.getName();
		MutableColumn idColumn = getIdColumn(foreignTable);
		foreignColumns.remove(idColumn);

		// Remove the foreign keys
		Column[] foreignKeys = foreignTable.getForeignKeys();
		for (Column foreignKey : foreignKeys) {
			foreignColumns.remove(foreignKey);
		}

		Query q = new Query();
		q.select(primaryColumns.toArray(new Column[primaryColumns.size()]));
		q.select(foreignColumns.toArray(new Column[foreignColumns.size()]));
		q.from(new FromItem(JoinType.LEFT, relationship));
		if (logger.isDebugEnabled()) {
			logger.debug("Setting table data for '{}' to query result: {}",
					primaryTableName, q.toString());
		}
		List tableRows = executeQuery(q).toObjectArrays();

		for (Column foreignColumn : foreignColumns) {
			MutableColumn newPrimaryColumn = new MutableColumn(
					foreignColumn.getName(), foreignColumn.getType(),
					primaryTable, primaryTable.getColumnCount(),
					foreignColumn.isNullable());
			newPrimaryColumn.setIndexed(foreignColumn.isIndexed());
			newPrimaryColumn.setNativeType(foreignColumn.getNativeType());
			primaryTable.addColumn(newPrimaryColumn);
		}
		_tableData.put(primaryTableName, tableRows);
		((MutableSchema) foreignTable.getSchema()).removeTable(foreignTable);
		_tableData.remove(foreignTableName);
		((MutableRelationship) relationship).remove();

		if (logger.isInfoEnabled()) {
			logger.info("Tables '" + primaryTableName + "' and '"
					+ foreignTableName + "' flattened to: " + primaryTableName);
			if (logger.isDebugEnabled()) {
				logger.debug(primaryTableName + " columns: "
						+ Arrays.toString(primaryTable.getColumns()));
			}
		}

		return this;
	}

	/**
	 * Automatically flattens tables that only contain a single data carrying
	 * column. Data carrying column are all columns that are not artificial
	 * columns (created to enable referential integrity between tag-to-table
	 * mapped tables).
	 */
	public XmlDataContextStrategy autoFlattenTables() {
		Table[] tables = _schema.getTables();
		for (Table table : tables) {
			// First check to see that this table still exist (ie. has not been
			// flattened in a previous loop)
			if (_tableData.containsKey(table.getName())) {
				// Find all tables that represent inner tags
				Relationship[] foreignKeyRelationships = table
						.getForeignKeyRelationships();
				if (foreignKeyRelationships.length == 1
						&& table.getPrimaryKeyRelationships().length == 0) {
					Relationship foreignKeyRelationship = foreignKeyRelationships[0];

					// If there is exactly one inner tag then we can probably
					// flatten the tables, but it's only relevant if the inner
					// tag only carry a single data column
					int nonDataColumns = 0;
					Column[] columns = table.getColumns();
					for (Column column : columns) {
						String nativeType = column.getNativeType();
						// Use the native column type constants to determine if
						// the column is an artificial column
						if (NATIVE_TYPE_FOREIGN_KEY.equals(nativeType)
								|| NATIVE_TYPE_PRIMARY_KEY.equals(nativeType)) {
							nonDataColumns++;
						}
					}

					if (columns.length == nonDataColumns + 1) {
						// If the foreign key is unique for all rows, we will
						// flatten it (otherwise it means that multiple inner
						// tags occur, which requires two tables to deal with
						// multiplicity)
						boolean uniqueForeignKeys = true;

						Column[] foreignColumns = foreignKeyRelationship
								.getForeignColumns();

						SelectItem countAllItem = SelectItem.getCountAllItem();
						Query q = new Query().select(foreignColumns)
								.select(countAllItem).from(table)
								.groupBy(foreignColumns);
						DataSet data = executeQuery(q);
						Comparable comparable = NumberComparator
								.getComparable(1);
						while (data.next()) {
							Object value = data.getRow().getValue(countAllItem);
							if (comparable.compareTo(value) < 0) {
								// If the value is compared larger than 1, we
								// have several inner tags
								uniqueForeignKeys = false;
								break;
							}
						}
						data.close();

						if (uniqueForeignKeys) {
							flattenTables(foreignKeyRelationship);
						}
					}
				}
			}
		}
		return this;
	}
}