All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.julielab.xmlData.config.FieldConfig Maven / Gradle / Ivy

Go to download

A utility for managing documents stored in a PostgreSQL database. The documents are imported into a PostgreSQL DB as full texts with the goal to be able to retrieve the documents by their PubMedID efficiently. For more sophisticated tasks, a user configuration file can be delivered which can take control of the table schema to use, the PostgreSQL schema to use and the actual database server to connect to as well as the concrete database.

There is a newer version: 1.6.2
Show newest version
/**
 * FieldDefinition.java
 *
 * Copyright (c) 2011, JULIE Lab.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Common Public License v1.0
 *
 * Author: faessler
 *
 * Current version: 1.0
 * Since version:   1.0
 *
 * Creation date: 11.03.2011
 **/

package de.julielab.xmlData.config;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;

import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.ximpleware.AutoPilot;
import com.ximpleware.EOFException;
import com.ximpleware.EncodingException;
import com.ximpleware.EntityException;
import com.ximpleware.NavException;
import com.ximpleware.ParseException;
import com.ximpleware.PilotException;
import com.ximpleware.VTDException;
import com.ximpleware.VTDGen;
import com.ximpleware.VTDNav;
import com.ximpleware.XPathEvalException;
import com.ximpleware.XPathParseException;

import de.julielab.xml.JulieXMLConstants;
import de.julielab.xml.JulieXMLTools;
import de.julielab.xmlData.Constants;

/**
 * This class holds the definition of fields for the database table to work
 * with. The definition was read from the configuration XML file.
 * 
 * @author faessler
 */
public class FieldConfig extends ConfigBase {
	
	private final static Logger log = LoggerFactory.getLogger(FieldConfig.class);
	
	private static final String XPATH_CONF_SCHEMA_INFO = "//DBSchemaInformation";
	private static final String XPATH_CONF_SCHEMES = XPATH_CONF_SCHEMA_INFO + "/tableSchemas";
	private static final String XPATH_CONF_SCHEME = XPATH_CONF_SCHEMES + "/tableSchema";

	private static final String XPATH_CONF_FIELD_TEMPLATE = XPATH_CONF_SCHEME
			+ "[@name='%s']/field";
	private static final String XPATH_CONF_ACTIVE_SCHEME_TEMPLATE = XPATH_CONF_SCHEME
			+ "[@name='%s']";

	private List> fields;
	private Map> fieldNameMap;
	private String forEachXPath;
	private String[] primaryKey;
	private String[] columns;
	private String[] columnsToRetrieve;
	private String timestampFieldName = null;
	private List primaryKeyFieldNumbers = null;
	private byte[] configData;

	/**
	 * @return the name
	 */
	public String getName() {
		return name;
	}

	public FieldConfig(byte[] configData, String schemaName) throws VTDException {
		this.configData = configData;
		this.name = schemaName;
		buildFields(configData, schemaName);
	}

	public FieldConfig(List> fields, String forEachXPath, String schemaName) {
        this.forEachXPath = forEachXPath;
        this.name = schemaName;
		this.fields = fields;
		fieldNameMap = new HashMap<>();
		for (Map field : fields) {
			String name = field.get(JulieXMLConstants.NAME);
			if (name == null)
				throw new IllegalArgumentException("The passed field configuration contains the field \"" + field + "\" " +
						"that does specify the required \"" + JulieXMLConstants.NAME + "\" property");
            if (field.get(JulieXMLConstants.TYPE) == null)
                throw new IllegalArgumentException("The passed field configuration contains the field \"" + field + "\" " +
                        "that does specify the required \"" + JulieXMLConstants.TYPE + "\" property");
            fieldNameMap.put(name, field);
            if (field.get(JulieXMLConstants.TIMESTAMP) != null && Boolean.parseBoolean(field.get(JulieXMLConstants.TIMESTAMP)))
                timestampFieldName = name;
		}
        primaryKey = fields.stream().
                filter(field -> Boolean.parseBoolean((field.get(JulieXMLConstants.PRIMARY_KEY)))).
                map(field -> field.get(JulieXMLConstants.NAME)).
                toArray(String[]::new);
		columns = fields.stream().
                map(field -> field.get(JulieXMLConstants.NAME)).
                toArray(String[]::new);
		columnsToRetrieve = fields.stream().
                filter(field -> Boolean.parseBoolean((field.get(JulieXMLConstants.RETRIEVE)))).
                map(field -> field.get(JulieXMLConstants.NAME)).
                toArray(String[]::new);
        primaryKeyFieldNumbers = new ArrayList<>();
        for (int i = 0; i < fields.size(); i++) {
            Map field =  fields.get(i);
            if (Boolean.parseBoolean((field.get(JulieXMLConstants.PRIMARY_KEY))))
                primaryKeyFieldNumbers.add(i);
        }
	}

	private void buildFields(byte[] mergedConfData, String activeSchemeName)
			throws EncodingException, EOFException, EntityException, ParseException,
			XPathParseException, XPathEvalException, NavException, PilotException {
		VTDGen vg = new VTDGen();
		vg.setDoc(mergedConfData);
		vg.parse(true);
		VTDNav vn = vg.getNav();
		AutoPilot ap = new AutoPilot(vn);

		ap.selectXPath(String.format(XPATH_CONF_ACTIVE_SCHEME_TEMPLATE, activeSchemeName));
		if (ap.evalXPath() != -1) {
			int attrIndex = vn.getAttrVal(JulieXMLConstants.FOR_EACH);
			if (attrIndex != -1)
				forEachXPath = vn.toString(attrIndex);
		}

		ap.selectXPath(String.format(XPATH_CONF_FIELD_TEMPLATE, activeSchemeName));
		AutoPilot ap2 = new AutoPilot(vn);
		fields = new ArrayList>();
		fieldNameMap = new HashMap>();

		boolean xPathFound = false;
		while (ap.evalXPath() != -1) {
			xPathFound = true;
			Map field = new LinkedHashMap();
			String fieldName = null;
			int i = -1;
			ap2.selectAttr("*");
			while ((i = ap2.iterateAttr()) != -1) {
				String attrName = vn.toString(i);
				String attrValue = vn.toRawString(i + 1);
				// I actually don't know if there still are unsupported types since in the DataBaseConnector we just set objects
				if (attrName.equals(JulieXMLConstants.TYPE))
					if (!isKnownType(attrValue))
						throw new IllegalArgumentException("Type \"" + attrValue
								+ "\" is not supported by this tool.");
				field.put(attrName, attrValue);
				if (attrName.equals(JulieXMLConstants.NAME))
					fieldName = attrValue;
				if (attrName.equals(JulieXMLConstants.TIMESTAMP) && Boolean.parseBoolean(attrValue))
					timestampFieldName = fieldName;
			}
			// These fields potentially still contain field
			// definitions which do not rely on the XML document
			// itself but e.g. on the file name. These fields
			// are treated in the "prepare" method.
			fields.add(field);
			fieldNameMap.put(fieldName, field);
		}
		if (!xPathFound)
			throw new TableSchemaDoesNotExistException("No field schema with name \""
					+ activeSchemeName + "\" was found.");
	}

	public List> getFields() {
		return fields;
	}

	public Map getField(String fieldName) {
		return fieldNameMap.get(fieldName);
	}

	public String getForEachXPath() {
		return forEachXPath;
	}

	public void setForEachXPath(String forEachXPath) {
		this.forEachXPath = forEachXPath;
	}

	public static boolean isKnownType(String type) {
		return isStringType(type) || isTimestampWithoutTZType(type) || isStringTypeArray(type)
				|| isIntegerType(type) || isBooleanType(type) || isBinaryDataType(type);
	}

	public static boolean isBinaryDataType(String type) {
		return type.equals(Constants.TYPE_BINARY_DATA);
	}

	/**
	 * @param type
	 * @return
	 */
	public static boolean isStringTypeArray(String type) {
		return type.equals(Constants.TYPE_TEXT_ARRAY) || type.equals(Constants.TYPE_VARCHAR_ARRAY);
	}
	
	/**
	 * @param type
	 * @return
	 */
	public static boolean isBooleanType(String type) {
		return type.equals(Constants.TYPE_BOOLEAN);
	}

	/**
	 * Returns true if the string type equals the SQL
	 * "timestamp without time zone" type.
	 * 
	 * @param type
	 *            String to test.
	 * @return True if type denotes a timestamp type without
	 *         timezone information.
	 */
	public static boolean isTimestampWithoutTZType(String type) {
		return type.equals(Constants.TYPE_TIMESTAMP_WITHOUT_TIMEZONE);
	}

	public boolean isOfTimestampWithoutTZType(String fieldName) {
		return isTimestampWithoutTZType(fieldNameMap.get(fieldName).get(JulieXMLConstants.TYPE));
	}

	public static boolean isStringType(String type) {
		return type.equals(Constants.TYPE_TEXT);
	}

	/**
	 * @param type
	 * @return
	 */
	public static boolean isIntegerType(String type) {
		return type.equals(Constants.TYPE_INTEGER);
	}

	public boolean isOfStringType(String fieldName) {
		return isStringType(fieldNameMap.get(fieldName).get(JulieXMLConstants.TYPE));
	}

	public boolean isOfStringType(Map field) {
		return isStringType(field.get(JulieXMLConstants.TYPE));
	}

	public boolean isOfIntegerType(Map field) {
		return isIntegerType(field.get(JulieXMLConstants.TYPE));
	}

	public boolean isOfBinaryDataType(Map field) {
		return isBinaryDataType(field.get(JulieXMLConstants.TYPE));
	}

	/**
	 * 
	 * @return - An Array with the names off all primary keys
	 */
	public String[] getPrimaryKey() {
		if (primaryKey == null) {
			primaryKey = getPrimaryKeyFields().
					map(field -> field.get(JulieXMLConstants.NAME)).
					toArray(String[]::new);
		}
		return primaryKey;
	}

	public Stream> getPrimaryKeyFields() {
	    return fields.stream().filter(field -> Boolean.parseBoolean(field.get(JulieXMLConstants.PRIMARY_KEY)));
    }

	/**
	 * 
	 * @return - The indices of those fields which are primary keys, beginning
	 *         with 0
	 */
	public List getPrimaryKeyFieldNumbers() {
		if (primaryKeyFieldNumbers == null) {
			List fieldNumbers = new ArrayList();
			int i = 0;
			for (Map field : fields) {
				if (Boolean.parseBoolean(field.get(JulieXMLConstants.PRIMARY_KEY)))
					fieldNumbers.add(i);
				++i;
			}
			primaryKeyFieldNumbers = fieldNumbers;
		}
		return primaryKeyFieldNumbers;
	}

	public String[] getColumnsToRetrieve() {
		if (columnsToRetrieve == null) {
			List retrieveColumnNames = new ArrayList();
			for (Map field : fields) {
				if (Boolean.parseBoolean(field.get(JulieXMLConstants.RETRIEVE)))
					retrieveColumnNames.add(field.get(JulieXMLConstants.NAME));
			}
			columnsToRetrieve = new String[retrieveColumnNames.size()];
			retrieveColumnNames.toArray(columnsToRetrieve);
		}
		return columnsToRetrieve;
	}

	public List> getFieldsToRetrieve() {
		List> fieldsToRetrieve = new ArrayList>();
		for (Map field : fields) {
			if (Boolean.parseBoolean(field.get(JulieXMLConstants.RETRIEVE)))
				fieldsToRetrieve.add(field);
		}
		return fieldsToRetrieve;
	}

	/**
	 * Returns the names of the columns forming the primary key in a CSV format.
	 * 

* This method calls {@link #getPrimaryKey()} to obtain the list of primary * key column names. It then builds a string consisting of these names * separated by commas and returns this string. *

* Example: If the primary key columns are "pmid" and "systemID", the string * returned would be "pmid,systemID". * * @return A comma separated list of the column names which form the primary * key in this table scheme. */ public String getPrimaryKeyString() { return StringUtils.join(getPrimaryKey(), ","); } /** * Takes an array of format strings according to {@link String#format(String, Object...)} with a single %s symbol. * The number of format * string must be equal to the number of primary key elements of this table schema definition. Then, for the * ith format string, the ith primary key element is applied. * @param fmtStrs The format string to fill with primary key elements, one format string per primary key element. * @return An array of strings corresponding to the format strings filled with the primary key elements. * @see {@link String#format(String, Object...)} */ public String[] expandPKNames(String[] fmtStrs) { return JulieXMLTools.expandArrayEntries(getPrimaryKey(), fmtStrs); } /** * Applies each primary key element to the format string and returns the results as array. Each result element * corresponds to one primary key element applied to the format string. * @param fmtStr The format string to be filled with the primary key elements. * @return All results corresponding to applying each primary key element once to the given format string. */ public String[] expandPKNames(String fmtStr) { return JulieXMLTools.expandArrayEntries(getPrimaryKey(), fmtStr); } public String getTimestampFieldName() { return timestampFieldName; } @Override public String toString() { List strList = new ArrayList(); strList.add("Schema configuration for \"" + name + "\":"); for (Map field : fields) { strList.add("\n"); strList.add("Field \"" + field.get(JulieXMLConstants.NAME) + "\":"); for (String attr : field.keySet()) { strList.add(attr + "=\"" + field.get(attr) + "\""); } } return StringUtils.join(strList, "\n"); } public String getConfigText() { if (configData != null) return new String(configData); return ""; } private final String name; public String[] getColumns() { if (columns == null) { List columnNames = new ArrayList(); for (Map field : fields) { columnNames.add(field.get(JulieXMLConstants.NAME)); } columns = new String[columnNames.size()]; columnNames.toArray(columns); } return columns; } public static Map createField(String... configuration) { if (configuration.length % 2 == 1) throw new IllegalArgumentException("An even number of arguments is required. The even indexes " + "are field property keys, the odd indexes are the values to the previous key."); Map field = new HashMap<>(); for (int i = 0; i < configuration.length; i = i + 2) { String s = configuration[i]; field.put(s, configuration[i + 1]); } return field; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy