All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dk.eobjects.metamodel.XmlDataContextStrategy Maven / Gradle / Ivy

Go to download

The eobjects.dk MetaModel is a common domain model, query-engine and optimizer for different kinds of datastores.

The newest version!
/*
 * Copyright 2008 eobjects.dk
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package dk.eobjects.metamodel;

import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

import dk.eobjects.metamodel.data.DataSet;
import dk.eobjects.metamodel.query.FromItem;
import dk.eobjects.metamodel.query.JoinType;
import dk.eobjects.metamodel.query.Query;
import dk.eobjects.metamodel.query.SelectItem;
import dk.eobjects.metamodel.schema.Column;
import dk.eobjects.metamodel.schema.ColumnType;
import dk.eobjects.metamodel.schema.Relationship;
import dk.eobjects.metamodel.schema.Schema;
import dk.eobjects.metamodel.schema.Table;
import dk.eobjects.metamodel.schema.TableType;
import dk.eobjects.metamodel.util.NumberComparator;

/**
 * A DataContext strategy that reads XML content and maps it to a table-based
 * model similar to the rest of MetaModel. Tables are created by examining the
 * data in the XML file, NOT by reading XML Schemas (xsd/dtd's). This enables
 * compliancy with ALL xml formats but also raises a risk that two XML files
 * with the same format wont nescesarily yield the same table model if some
 * optional attributes or tags are omitted in one of the files.
 */
public class XmlDataContextStrategy extends QueryPostprocessDataContextStrategy {

	public static final String NATIVE_TYPE_PRIMARY_KEY = "Auto-generated primary key";
	public static final String NATIVE_TYPE_FOREIGN_KEY = "Auto-generated foreign key";
	public static final String NATIVE_TYPE_ATTRIBUTE = "XML Attribute";
	public static final String NATIVE_TYPE_TEXT = "XML Text";
	private static final String TEXT_CONTENT_TEMP_SUFFIX = "_metamodel_text_content";
	private static final Log _log = LogFactory
			.getLog(XmlDataContextStrategy.class);
	private File _file;
	private Schema _schema;
	private Map> _tableData = new HashMap>();;
	private boolean _autoFlattenTables;
	private String _schemaName;

	/**
	 * Creates an XML DataContext strategy based on an already parsed Document.
	 * 
	 * @param schemaName
	 * @param document
	 * @param autoFlattenTables
	 */
	public XmlDataContextStrategy(String schemaName, Document document,
			boolean autoFlattenTables) {
		_autoFlattenTables = autoFlattenTables;
		_schemaName = schemaName;
		_schema = new Schema(_schemaName);
		loadSchema(document);
	}

	/**
	 * Creates an XML DataContext strategy based on a file.
	 * 
	 * @param file
	 *            the file to parse
	 * @param autoFlattenTables
	 *            a parameter indicating whether or not tags with only text
	 *            content or a single attribute should be flattened with it's
	 *            parent table
	 */
	public XmlDataContextStrategy(File file, boolean autoFlattenTables) {
		_file = file;
		_schemaName = file.getName();
		_autoFlattenTables = autoFlattenTables;
	}

	/**
	 * Creates an XML DataContext strategy based on a file.
	 * 
	 * @param file
	 *            the file to parse
	 */
	public XmlDataContextStrategy(File file) {
		this(file, true);
	}

	public boolean isAutoFlattenTables() {
		return _autoFlattenTables;
	}

	public void setAutoFlattenTables(boolean autoFlattenTables) {
		_autoFlattenTables = autoFlattenTables;
	}

	@Override
	public DataSet materializeTable(Table table, Column[] columns, int maxRows) {
		loadSchema();
		List tableData = _tableData.get(table.getName());
		if (tableData != null) {
			List resultData = new ArrayList();
			for (Object[] tableDataRow : tableData) {
				if (maxRows == 0) {
					break;
				}
				maxRows--;
				Object[] dataValues = new Object[columns.length];
				for (int i = 0; i < columns.length; i++) {
					Column column = columns[i];
					int columnNumber = column.getColumnNumber();
					// Some rows may not contain values for all columns
					// (attributes)
					if (columnNumber < tableDataRow.length) {
						dataValues[i] = tableDataRow[columnNumber];
					} else {
						dataValues[i] = null;
					}
				}
				resultData.add(dataValues);
			}

			SelectItem[] selectItems = new SelectItem[columns.length];
			for (int i = 0; i < columns.length; i++) {
				Column column = columns[i];
				selectItems[i] = new SelectItem(column);
			}

			return new DataSet(selectItems, resultData);
		}
		return null;
	}

	public String[] getSchemaNames() throws MetaModelException {
		return new String[] { _schemaName };
	}

	public String getDefaultSchemaName() throws MetaModelException {
		return _schemaName;
	}

	public Schema getSchemaByName(String name) throws MetaModelException {
		if (name != null && name.equals(_schemaName)) {
			loadSchema();
			return _schema;
		}
		return null;
	}

	/**
	 * Forces a fresh load of the schema, even though it has already been loaded
	 */
	public XmlDataContextStrategy reloadSchema() {
		_schema = null;
		loadSchema();
		return this;
	}

	/**
	 * Loads the schema if it hasn't been loaded before
	 */
	public XmlDataContextStrategy loadSchema() {
		if (_schema == null) {
			_schema = new Schema(_schemaName);
			try {
				DocumentBuilderFactory dbf = DocumentBuilderFactory
						.newInstance();
				dbf.setIgnoringComments(true);
				DocumentBuilder db = dbf.newDocumentBuilder();
				Document document = db.parse(_file);
				loadSchema(document);
			} catch (Exception e) {
				throw new MetaModelException("Error parsing XML file: "
						+ e.getMessage(), e);
			}
		}
		return this;
	}

	private void loadSchema(Document document) {
		Element rootElement = document.getDocumentElement();
		loadTables(rootElement, "", null, 0);

		// Remove tables from schema that has no data (typically root
		// node or pure XML structure)
		Table[] tables = _schema.getTables();
		for (Table table : tables) {
			String tableName = table.getName();
			List tableRows = _tableData.get(tableName);
			if (tableRows == null) {
				_log.info("Remove table (no data in it):" + tableName);
				_schema.removeTable(table);
			} else {
				// Rename all ID columns to reasonable names (preferably
				// "id")
				Column idColumn = getIdColumn(table);
				Column column = table.getColumnByName("id");
				if (column == null) {
					idColumn.setName("id");
				}

				// Remove text content column, if it is never populated
				Column textContentColumn = getTextContentColumn(table, null);
				int textContentColumnIndex = textContentColumn
						.getColumnNumber();
				boolean found = false;
				for (Object[] objects : tableRows) {
					if (objects[textContentColumnIndex] != null) {
						found = true;
						break;
					}
				}
				if (!found) {
					table.removeColumn(textContentColumn);
				} else {
					// Rename all text content columns to reasonable
					// names (preferably element node name)
					String currentName = textContentColumn.getName();
					String preferredName = currentName.substring(0, currentName
							.length()
							- TEXT_CONTENT_TEMP_SUFFIX.length());
					column = table.getColumnByName(preferredName);
					if (column == null) {
						textContentColumn.setName(preferredName);
					}
				}
			}
		}
		if (_autoFlattenTables) {
			autoFlattenTables();
		}
	}

	private void loadTables(Element element, String tablePrefix,
			Column parentKeyColumn, int parentKey) {
		Attr[] attributes = getAttributes(element);
		String textContent = getTextContent(element);
		String tableName = tablePrefix + element.getNodeName();
		if (attributes.length > 0 || textContent != null
				|| hasSiblings(element)) {
			// We need to represent this type of node with a table
			Table table = _schema.getTableByName(tableName);
			Column idColumn;
			Column foreignKeyColumn;
			List tableRows;
			if (table == null) {
				_log.info("Creating table: " + tableName);
				table = new Table(tableName, TableType.TABLE, _schema);
				_schema.addTable(table);
				idColumn = getIdColumn(table);
				tableRows = new ArrayList();
				_tableData.put(tableName, tableRows);

				if (parentKeyColumn != null) {
					Table parentTable = parentKeyColumn.getTable();
					foreignKeyColumn = new Column(
							parentTable.getName() + "_id", parentKeyColumn
									.getType(), table, table.getColumnCount(),
							false);
					foreignKeyColumn.setNativeType(NATIVE_TYPE_FOREIGN_KEY);
					table.addColumn(foreignKeyColumn);

					Relationship.createRelationship(
							new Column[] { parentKeyColumn },
							new Column[] { foreignKeyColumn });

				} else {
					foreignKeyColumn = null;
				}
			} else {
				idColumn = getIdColumn(table);
				tableRows = _tableData.get(tableName);
				Column[] foreignKeys = table.getForeignKeys();
				if (foreignKeys.length == 1) {
					foreignKeyColumn = foreignKeys[0];
				} else {
					foreignKeyColumn = null;
				}
			}

			Column textContentColumn = getTextContentColumn(table, element
					.getNodeName());
			Map columnValues = new HashMap();
			for (Attr attr : attributes) {
				String name = attr.getName();
				Column column = table.getColumnByName(name);
				if (column == null) {
					_log.info("Creating column: " + tableName + "." + name);
					column = new Column(name, ColumnType.VARCHAR, table, table
							.getColumnCount(), true);
					column.setNativeType(NATIVE_TYPE_ATTRIBUTE);
					table.addColumn(column);
				}
				columnValues.put(column, attr.getValue());
			}

			// Create a row
			Object[] rowData = new Object[table.getColumnCount()];
			// Iterate id column
			int id = tableRows.size() + 1;
			rowData[idColumn.getColumnNumber()] = id;
			if (foreignKeyColumn != null) {
				rowData[foreignKeyColumn.getColumnNumber()] = parentKey;
			}
			// Add value for text content (if available)
			if (textContent != null) {
				rowData[textContentColumn.getColumnNumber()] = textContent;
			}
			// Add values for attributes
			for (Entry entry : columnValues.entrySet()) {
				rowData[entry.getKey().getColumnNumber()] = entry.getValue();
			}

			if (_log.isDebugEnabled()) {
				_log.debug("Adding data [" + ArrayUtils.toString(rowData)
						+ "] to table: " + tableName);
			}

			if (!isRootElement(element)) {
				// Set the parent key column to this tables id column so
				// child tables can create relationship to it
				parentKey = id;
				parentKeyColumn = idColumn;
			}
			tableRows.add(rowData);
		}

		if (!isRootElement(element)) {
			tablePrefix = tableName + "_";
		}
		Element[] childElements = getChildElements(element);
		for (int i = 0; i < childElements.length; i++) {
			loadTables(childElements[i], tablePrefix, parentKeyColumn,
					parentKey);
		}
	}

	private Column getTextContentColumn(Table table, String preferredColumnName) {
		Column[] columns = table.getColumns();
		Column column = null;
		for (Column col : columns) {
			if (NATIVE_TYPE_TEXT.equals(col.getNativeType())) {
				column = col;
				break;
			}
		}
		if (column == null && preferredColumnName != null) {
			_log.info("Creating text content column for table: "
					+ table.getName());
			column = new Column(preferredColumnName + TEXT_CONTENT_TEMP_SUFFIX,
					ColumnType.VARCHAR, table, table.getColumnCount(), true);
			column.setNativeType(NATIVE_TYPE_TEXT);
			table.addColumn(column);
		}
		return column;
	}

	private Column getIdColumn(Table table) {
		Column[] columns = table.getColumns();
		Column column = null;
		for (Column col : columns) {
			if (NATIVE_TYPE_PRIMARY_KEY.equals(col.getNativeType())) {
				column = col;
				break;
			}
		}
		if (column == null) {
			String tableName = table.getName();
			_log.info("Creating id column for table: " + tableName);
			column = new Column(tableName + "_metamodel_surrogate_id",
					ColumnType.INTEGER, table, table.getColumnCount(), false);
			column.setNativeType(NATIVE_TYPE_PRIMARY_KEY);
			column.setIndexed(true);
			table.addColumn(column);
		}
		return column;
	}

	public static String getTextContent(Element element) {
		String textContent = null;
		NodeList childNodes = element.getChildNodes();
		for (int i = 0; i < childNodes.getLength(); i++) {
			Node node = childNodes.item(i);
			if (node instanceof Text) {
				textContent = ((Text) node).getWholeText();
				break;
			}
		}
		if (textContent != null) {
			textContent = textContent.trim();
			if (!"".equals(textContent)) {
				return textContent;
			}
		}
		return null;
	}

	public static Attr[] getAttributes(Element element) {
		List result = new ArrayList();
		NamedNodeMap attributes = element.getAttributes();
		for (int i = 0; i < attributes.getLength(); i++) {
			Attr attribute = (Attr) attributes.item(i);
			result.add(attribute);
		}
		return result.toArray(new Attr[result.size()]);
	}

	public static boolean hasSiblings(Element element) {
		// Don't look for siblings when we are at the root element
		if (!isRootElement(element)) {
			String name = element.getNodeName();
			Element[] siblingNodes = getChildElements((Element) element
					.getParentNode());
			for (int i = 0; i < siblingNodes.length; i++) {
				Element siblingNode = siblingNodes[i];
				if (siblingNode != element
						&& name.equals(siblingNode.getNodeName())) {
					return true;
				}
			}
		}
		return false;
	}

	public static Element[] getChildElements(Element element) {
		List result = new ArrayList();
		NodeList childNodes = element.getChildNodes();
		for (int i = 0; i < childNodes.getLength(); i++) {
			Node child = childNodes.item(i);
			if (child instanceof Element) {
				result.add((Element) child);
			}
		}
		return result.toArray(new Element[result.size()]);
	}

	public static boolean isRootElement(Element element) {
		return !(element.getParentNode() instanceof Element);
	}

	public XmlDataContextStrategy flattenTables(Relationship relationship) {
		Table primaryTable = relationship.getPrimaryTable();
		Table foreignTable = relationship.getForeignTable();
		List primaryColumns = new ArrayList(Arrays
				.asList(primaryTable.getColumns()));
		List foreignColumns = new ArrayList(Arrays
				.asList(foreignTable.getColumns()));

		// Remove the surrogate id
		String foreignTableName = foreignTable.getName();
		Column idColumn = getIdColumn(foreignTable);
		foreignColumns.remove(idColumn);

		// Remove the foreign keys
		Column[] foreignKeys = foreignTable.getForeignKeys();
		for (Column foreignKey : foreignKeys) {
			foreignColumns.remove(foreignKey);
		}

		Query q = new Query();
		q.select(primaryColumns.toArray(new Column[primaryColumns.size()]));
		q.select(foreignColumns.toArray(new Column[foreignColumns.size()]));
		q.from(new FromItem(JoinType.LEFT, relationship));
		if (_log.isDebugEnabled()) {
			_log.debug("Setting table data for '" + primaryTable.getName()
					+ "' to query result: " + q.toString());
		}
		List tableRows = executeQuery(q).toObjectArrays();

		for (Column foreignColumn : foreignColumns) {
			Column newPrimaryColumn = new Column(foreignColumn.getName(),
					foreignColumn.getType(), primaryTable, primaryTable
							.getColumnCount(), foreignColumn.isNullable());
			newPrimaryColumn.setIndexed(foreignColumn.isIndexed());
			newPrimaryColumn.setNativeType(foreignColumn.getNativeType());
			primaryTable.addColumn(newPrimaryColumn);
		}
		_tableData.put(primaryTable.getName(), tableRows);

		foreignTable.getSchema().removeTable(foreignTable);
		_tableData.remove(foreignTableName);
		relationship.remove();
		return this;
	}

	public XmlDataContextStrategy autoFlattenTables() {
		Table[] tables = _schema.getTables();
		for (Table table : tables) {
			Relationship[] foreignKeyRelationships = table
					.getForeignKeyRelationships();
			if (foreignKeyRelationships.length == 1) {
				int nonDataColumns = 0;
				Column[] columns = table.getColumns();
				for (Column column : columns) {
					String nativeType = column.getNativeType();
					if (NATIVE_TYPE_FOREIGN_KEY.equals(nativeType)
							|| NATIVE_TYPE_PRIMARY_KEY.equals(nativeType)) {
						nonDataColumns++;
					}
				}

				// If there is only one data carrying column in the table and
				// the foreign key is unique for all rows, we
				// will flatten it
				if (columns.length == nonDataColumns + 1) {
					boolean uniqueForeignKeys = true;

					Column[] foreignColumns = foreignKeyRelationships[0]
							.getForeignColumns();
					SelectItem countAllItem = SelectItem.getCountAllItem();
					Query q = new Query().select(foreignColumns).select(
							countAllItem).from(table).groupBy(foreignColumns);
					DataSet data = executeQuery(q);
					Comparable comparable = NumberComparator
							.getComparable(1);
					while (data.next()) {
						Object value = data.getRow().getValue(countAllItem);
						if (comparable.compareTo(value) < 0) {
							uniqueForeignKeys = false;
							break;
						}
					}
					data.close();

					if (uniqueForeignKeys) {
						flattenTables(foreignKeyRelationships[0]);
					}
				}
			}
		}
		return this;
	}
}