All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.julielab.xmlData.config.FieldConfig Maven / Gradle / Ivy

Go to download

A utility for managing documents stored in a PostgreSQL database. The documents are imported into a PostgreSQL DB as full texts with the goal to be able to retrieve the documents by their PubMedID efficiently. For more sophisticated tasks, a user configuration file can be delivered which can take control of the table schema to use, the PostgreSQL schema to use and the actual database server to connect to as well as the concrete database.

The newest version!
/**
 * FieldDefinition.java
 *
 * Copyright (c) 2011, JULIE Lab.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Common Public License v1.0
 *
 * Author: faessler
 *
 * Current version: 1.0
 * Since version:   1.0
 *
 * Creation date: 11.03.2011
 **/

package de.julielab.xmlData.config;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.ximpleware.AutoPilot;
import com.ximpleware.EOFException;
import com.ximpleware.EncodingException;
import com.ximpleware.EntityException;
import com.ximpleware.NavException;
import com.ximpleware.ParseException;
import com.ximpleware.PilotException;
import com.ximpleware.VTDException;
import com.ximpleware.VTDGen;
import com.ximpleware.VTDNav;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;

import de.julielab.xml.JulieXMLConstants;
import de.julielab.xml.JulieXMLTools;
import de.julielab.xmlData.Constants;

/**
 * This class holds the definition of fields for the database table to work
 * with. The definition was read from the configuration XML file.
 * 
 * @author faessler
 */
public class FieldConfig extends ConfigBase {
	
	private final static Logger log = LoggerFactory.getLogger(FieldConfig.class);
	
	private static final String XPATH_CONF_SCHEMA_INFO = "//DBSchemaInformation";
	private static final String XPATH_CONF_SCHEMES = XPATH_CONF_SCHEMA_INFO + "/tableSchemas";
	private static final String XPATH_CONF_SCHEME = XPATH_CONF_SCHEMES + "/tableSchema";

	private static final String XPATH_CONF_FIELD_TEMPLATE = XPATH_CONF_SCHEME
			+ "[@name='%s']/field";
	private static final String XPATH_CONF_ACTIVE_SCHEME_TEMPLATE = XPATH_CONF_SCHEME
			+ "[@name='%s']";

	private List> fields;
	private Map> fieldNameMap;
	private String forEachXPath;
	private String[] primaryKey;
	private String[] columns;
	private String[] columnsToRetrieve;
	private String timestampFieldName = null;
	private List primaryKeyFieldNumbers = null;
	private byte[] configData;

	/**
	 * @return the name
	 */
	public String getName() {
		return name;
	}

	public FieldConfig(byte[] configData, String schemaName) throws VTDException {
		this.configData = configData;
		this.name = schemaName;
		buildFields(configData, schemaName);
	}

	private void buildFields(byte[] mergedConfData, String activeSchemeName)
			throws EncodingException, EOFException, EntityException, ParseException,
			XPathParseException, XPathEvalException, NavException, PilotException {
		VTDGen vg = new VTDGen();
		vg.setDoc(mergedConfData);
		vg.parse(true);
		VTDNav vn = vg.getNav();
		AutoPilot ap = new AutoPilot(vn);

		ap.selectXPath(String.format(XPATH_CONF_ACTIVE_SCHEME_TEMPLATE, activeSchemeName));
		if (ap.evalXPath() != -1) {
			int attrIndex = vn.getAttrVal(JulieXMLConstants.FOR_EACH);
			if (attrIndex != -1)
				forEachXPath = vn.toString(attrIndex);
		}

		ap.selectXPath(String.format(XPATH_CONF_FIELD_TEMPLATE, activeSchemeName));
		AutoPilot ap2 = new AutoPilot(vn);
		fields = new ArrayList>();
		fieldNameMap = new HashMap>();

		boolean xPathFound = false;
		while (ap.evalXPath() != -1) {
			xPathFound = true;
			Map field = new LinkedHashMap();
			String fieldName = null;
			int i = -1;
			ap2.selectAttr("*");
			while ((i = ap2.iterateAttr()) != -1) {
				String attrName = vn.toString(i);
				String attrValue = vn.toRawString(i + 1);
				// I actually don't know if there still are unsupported types since in the DataBaseConnector we just set objects
				if (attrName.equals(JulieXMLConstants.TYPE))
					if (!isKnownType(attrValue))
						throw new IllegalArgumentException("Type \"" + attrValue
								+ "\" is not supported by this tool.");
				field.put(attrName, attrValue);
				if (attrName.equals(JulieXMLConstants.NAME))
					fieldName = attrValue;
				if (attrName.equals(JulieXMLConstants.TIMESTAMP) && Boolean.parseBoolean(attrValue))
					timestampFieldName = fieldName;
			}
			// These fields potentially still contain field
			// definitions which do not rely on the XML document
			// itself but e.g. on the file name. These fields
			// are treated in the "prepare" method.
			fields.add(field);
			fieldNameMap.put(fieldName, field);
		}
		if (!xPathFound)
			throw new TableSchemaDoesNotExistException("No field schema with name \""
					+ activeSchemeName + "\" was found.");
	}

	public List> getFields() {
		return fields;
	}

	public Map getField(String fieldName) {
		return fieldNameMap.get(fieldName);
	}

	public String getForEachXPath() {
		return forEachXPath;
	}

	public void setForEachXPath(String forEachXPath) {
		this.forEachXPath = forEachXPath;
	}

	public static boolean isKnownType(String type) {
		return isStringType(type) || isTimestampWithoutTZType(type) || isStringTypeArray(type)
				|| isIntegerType(type) || isBooleanType(type) || isBinaryDataType(type);
	}

	public static boolean isBinaryDataType(String type) {
		return type.equals(Constants.TYPE_BINARY_DATA);
	}

	/**
	 * @param type
	 * @return
	 */
	public static boolean isStringTypeArray(String type) {
		return type.equals(Constants.TYPE_TEXT_ARRAY) || type.equals(Constants.TYPE_VARCHAR_ARRAY);
	}
	
	/**
	 * @param type
	 * @return
	 */
	public static boolean isBooleanType(String type) {
		return type.equals(Constants.TYPE_BOOLEAN);
	}

	/**
	 * Returns true if the string type equals the SQL
	 * "timestamp without time zone" type.
	 * 
	 * @param type
	 *            String to test.
	 * @return True if type denotes a timestamp type without
	 *         timezone information.
	 */
	public static boolean isTimestampWithoutTZType(String type) {
		return type.equals(Constants.TYPE_TIMESTAMP_WITHOUT_TIMEZONE);
	}

	public boolean isOfTimestampWithoutTZType(String fieldName) {
		return isTimestampWithoutTZType(fieldNameMap.get(fieldName).get(JulieXMLConstants.TYPE));
	}

	public static boolean isStringType(String type) {
		return type.equals(Constants.TYPE_TEXT);
	}

	/**
	 * @param type
	 * @return
	 */
	public static boolean isIntegerType(String type) {
		return type.equals(Constants.TYPE_INTEGER);
	}

	public boolean isOfStringType(String fieldName) {
		return isStringType(fieldNameMap.get(fieldName).get(JulieXMLConstants.TYPE));
	}

	public boolean isOfStringType(Map field) {
		return isStringType(field.get(JulieXMLConstants.TYPE));
	}

	public boolean isOfIntegerType(Map field) {
		return isIntegerType(field.get(JulieXMLConstants.TYPE));
	}

	public boolean isOfBinaryDataType(Map field) {
		return isBinaryDataType(field.get(JulieXMLConstants.TYPE));
	}

	/**
	 * 
	 * @return - An Array with the names off all primary keys
	 */
	public String[] getPrimaryKey() {
		if (primaryKey == null) {
			List pkColumnNames = new ArrayList();
			for (Map field : fields) {
				if (Boolean.parseBoolean(field.get(JulieXMLConstants.PRIMARY_KEY)))
					pkColumnNames.add(field.get(JulieXMLConstants.NAME));
			}
			primaryKey = new String[pkColumnNames.size()];
			pkColumnNames.toArray(primaryKey);
		}
		return primaryKey;
	}

	/**
	 * 
	 * @return - The indices of those fields which are primary keys, beginning
	 *         with 0
	 */
	public List getPrimaryKeyFieldNumbers() {
		if (primaryKeyFieldNumbers == null) {
			List fieldNumbers = new ArrayList();
			int i = 0;
			for (Map field : fields) {
				if (Boolean.parseBoolean(field.get(JulieXMLConstants.PRIMARY_KEY)))
					fieldNumbers.add(i);
				++i;
			}
			primaryKeyFieldNumbers = fieldNumbers;
		}
		return primaryKeyFieldNumbers;
	}

	public String[] getColumnsToRetrieve() {
		if (columnsToRetrieve == null) {
			List retrieveColumnNames = new ArrayList();
			for (Map field : fields) {
				if (Boolean.parseBoolean(field.get(JulieXMLConstants.RETRIEVE)))
					retrieveColumnNames.add(field.get(JulieXMLConstants.NAME));
			}
			columnsToRetrieve = new String[retrieveColumnNames.size()];
			retrieveColumnNames.toArray(columnsToRetrieve);
		}
		return columnsToRetrieve;
	}

	public List> getFieldsToRetrieve() {
		List> fieldsToRetrieve = new ArrayList>();
		for (Map field : fields) {
			if (Boolean.parseBoolean(field.get(JulieXMLConstants.RETRIEVE)))
				fieldsToRetrieve.add(field);
		}
		return fieldsToRetrieve;
	}

	/**
	 * Returns the names of the columns forming the primary key in a CSV format.
	 * 

* This method calls {@link #getPrimaryKey()} to obtain the list of primary * key column names. It then builds a string consisting of these names * separated by commas and returns this string. *

* Example: If the primary key columns are "pmid" and "systemID", the string * returned would be "pmid,systemID". * * @return A comma separated list of the column names which form the primary * key in this table scheme. */ public String getPrimaryKeyString() { return StringUtils.join(getPrimaryKey(), ","); } public String[] expandPKNames(String[] fmtStrs) { return JulieXMLTools.expandArrayEntries(getPrimaryKey(), fmtStrs); } public String[] expandPKNames(String fmtStr) { return JulieXMLTools.expandArrayEntries(getPrimaryKey(), fmtStr); } public String getTimestampFieldName() { return timestampFieldName; } @Override public String toString() { List strList = new ArrayList(); strList.add("Schema configuration for \"" + name + "\":"); for (Map field : fields) { strList.add("\n"); strList.add("Field \"" + field.get(JulieXMLConstants.NAME) + "\":"); for (String attr : field.keySet()) { strList.add(attr + "=\"" + field.get(attr) + "\""); } } return StringUtils.join(strList, "\n"); } public String getConfigText() { return new String(configData); } private final String name; public String[] getColumns() { if (columns == null) { List columnNames = new ArrayList(); for (Map field : fields) { columnNames.add(field.get(JulieXMLConstants.NAME)); } columns = new String[columnNames.size()]; columnNames.toArray(columns); } return columns; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy