All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eobjects.metamodel.xml.XmlSaxDataContext Maven / Gradle / Ivy

/**
 * eobjects.org MetaModel
 * Copyright (C) 2010 eobjects.org
 *
 * This copyrighted material is made available to anyone wishing to use, modify,
 * copy, or redistribute it subject to the terms and conditions of the GNU
 * Lesser General Public License, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 * for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this distribution; if not, write to:
 * Free Software Foundation, Inc.
 * 51 Franklin Street, Fifth Floor
 * Boston, MA  02110-1301  USA
 */
package org.eobjects.metamodel.xml;

import java.io.File;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.eobjects.metamodel.MetaModelException;
import org.eobjects.metamodel.QueryPostprocessDataContext;
import org.eobjects.metamodel.data.DataSet;
import org.eobjects.metamodel.data.RowPublisher;
import org.eobjects.metamodel.data.RowPublisherDataSet;
import org.eobjects.metamodel.query.SelectItem;
import org.eobjects.metamodel.schema.Column;
import org.eobjects.metamodel.schema.ColumnType;
import org.eobjects.metamodel.schema.ImmutableSchema;
import org.eobjects.metamodel.schema.MutableColumn;
import org.eobjects.metamodel.schema.MutableSchema;
import org.eobjects.metamodel.schema.MutableTable;
import org.eobjects.metamodel.schema.Schema;
import org.eobjects.metamodel.schema.Table;
import org.eobjects.metamodel.util.Action;
import org.eobjects.metamodel.util.FileHelper;
import org.eobjects.metamodel.util.Ref;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
 * XML datacontext which uses SAX parsing for fast and memory effecient reading
 * of XML files.
 * 
 * The DataContext requires the user to specify a set of (simplified) XPaths to
 * define which elements are row delimitors and which elements or attributes are
 * value/column definitions.
 * 
 * @author Kasper Sørensen
 */
public class XmlSaxDataContext extends QueryPostprocessDataContext {

	private static final Logger logger = LoggerFactory
			.getLogger(XmlSaxDataContext.class);

	public static final String COLUMN_NAME_ROW_ID = "row_id";

	private final Ref _inputSourceRef;
	private final Map> _valueXpaths;
	private String _schemaName;
	private XmlSaxTableDef[] _tableDefs;

	/**
	 * Constructs an XML DataContext based on SAX parsing.
	 * 
	 * @param inputSourceRef
	 *            a factory reference for the input source to read the XML from.
	 *            The ref will be repeatedly called for each access to the file!
	 * @param tableDefs
	 *            an array of table definitions, which provide instructions as
	 *            to the xpaths to apply to the document.
	 * 
	 * @see XmlSaxTableDef
	 */
	public XmlSaxDataContext(Ref inputSourceRef,
			XmlSaxTableDef... tableDefs) {
		_inputSourceRef = inputSourceRef;
		_tableDefs = tableDefs;
		_valueXpaths = new HashMap>();
		_schemaName = null;

		for (XmlSaxTableDef tableDef : tableDefs) {
			LinkedHashMap xpathMap = new LinkedHashMap();
			_valueXpaths.put(tableDef, xpathMap);
			String[] valueXpaths = tableDef.getValueXpaths();
			for (String valueXpath : valueXpaths) {
				xpathMap.put(getName(tableDef, valueXpath), valueXpath);
			}
		}
	}

	public XmlSaxDataContext(final File file, XmlSaxTableDef... tableDefs) {
		this(createInputSourceRef(file), tableDefs);
	}

	private static Ref createInputSourceRef(final File file) {
		return new Ref() {
			@Override
			public InputSource get() {
				return new InputSource(FileHelper.getReader(file));
			}
		};
	}

	@Override
	protected Schema getMainSchema() throws MetaModelException {
		final MutableSchema schema = new MutableSchema(getMainSchemaName());

		for (XmlSaxTableDef tableDef : _tableDefs) {
			final String rowXpath = tableDef.getRowXpath();
			final MutableTable table = new MutableTable(getTableName(tableDef))
					.setSchema(schema).setRemarks("XPath: " + rowXpath);

			final MutableColumn rowIndexColumn = new MutableColumn(
					COLUMN_NAME_ROW_ID, ColumnType.INTEGER).setColumnNumber(0)
					.setNullable(false).setTable(table)
					.setRemarks("Row/tag index (0-based)");
			table.addColumn(rowIndexColumn);

			for (String valueXpath : tableDef.getValueXpaths()) {
				final MutableColumn column = new MutableColumn(getName(
						tableDef, valueXpath)).setRemarks("XPath: "
						+ valueXpath);
				if (valueXpath.startsWith("index(") && valueXpath.endsWith(")")) {
					column.setType(ColumnType.INTEGER);
				} else {
					column.setType(ColumnType.VARCHAR);
				}
				column.setTable(table);
				table.addColumn(column);
			}
			schema.addTable(table);
		}

		return new ImmutableSchema(schema);
	}

	private XmlSaxTableDef getTableDef(Table table) {
		for (XmlSaxTableDef tableDef : _tableDefs) {
			if (getTableName(tableDef).equals(table.getName())) {
				return tableDef;
			}
		}
		throw new IllegalArgumentException("No table def found for table "
				+ table);
	}

	private String getTableName(XmlSaxTableDef tableDef) {
		String xpath = tableDef.getRowXpath();
		int lastIndexOf = xpath.lastIndexOf('/');
		if (lastIndexOf != -1) {
			xpath = xpath.substring(lastIndexOf);
		}
		return xpath;
	}

	private String getName(XmlSaxTableDef tableDef, String xpath) {
		String rowXpath = tableDef.getRowXpath();
		if (xpath.startsWith(rowXpath)) {
			xpath = xpath.substring(rowXpath.length());
		}
		return xpath;
	}

	@Override
	protected String getMainSchemaName() throws MetaModelException {
		if (_schemaName == null) {
			// when querying the schema name for the first time, pick the first
			// element of the document.
			try {
				SAXParserFactory saxFactory = SAXParserFactory.newInstance();
				SAXParser saxParser = saxFactory.newSAXParser();
				XMLReader xmlReader = saxParser.getXMLReader();
				xmlReader.setContentHandler(new DefaultHandler() {
					@Override
					public void startElement(String uri, String localName,
							String qName, Attributes attributes)
							throws SAXException {
						if (qName != null && qName.length() > 0) {
							_schemaName = '/' + qName;
							throw new XmlStopParsingException();
						}
					}
				});
				xmlReader.parse(_inputSourceRef.get());
			} catch (XmlStopParsingException e) {
				logger.debug("Parsing stop signal thrown");
			} catch (Exception e) {
				logger.error(
						"Unexpected error occurred while retrieving schema name",
						e);
				if (e instanceof RuntimeException) {
					throw (RuntimeException) e;
				}
				throw new IllegalStateException(e);
			}
		}
		return _schemaName;
	}

	@Override
	protected DataSet materializeMainSchemaTable(Table table, Column[] columns,
			int maxRows) {
		final XmlSaxTableDef tableDef = getTableDef(table);

		final String[] valueXpaths = new String[columns.length];
		final SelectItem[] selectItems = new SelectItem[columns.length];
		for (int i = 0; i < columns.length; i++) {
			final Column column = columns[i];
			selectItems[i] = new SelectItem(column);
			valueXpaths[i] = getXpath(tableDef, column);
		}

		final Action rowPublisherAction = new Action() {
			@Override
			public void run(RowPublisher rowPublisher) throws Exception {
				SAXParserFactory saxFactory = SAXParserFactory.newInstance();
				SAXParser saxParser = saxFactory.newSAXParser();
				XMLReader xmlReader = saxParser.getXMLReader();
				xmlReader.setContentHandler(new XmlSaxContentHandler(tableDef
						.getRowXpath(), rowPublisher, valueXpaths));
				try {
					xmlReader.parse(_inputSourceRef.get());
				} catch (XmlStopParsingException e) {
					logger.debug("Parsing stop signal thrown");
				} catch (Exception e) {
					logger.warn("Unexpected error occurred while parsing", e);
					throw e;
				} finally {
					rowPublisher.finished();
				}
			}
		};
		return new RowPublisherDataSet(selectItems, maxRows, rowPublisherAction);
	}

	private String getXpath(XmlSaxTableDef tableDef, Column column) {
		String columnName = column.getName();
		if (COLUMN_NAME_ROW_ID.equals(columnName)) {
			return "index(" + tableDef.getRowXpath() + ")";
		}
		String result = _valueXpaths.get(tableDef).get(columnName);
		if (result == null) {
			return columnName;
		}
		return result;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy