de.julielab.xml.JulieXMLConstants Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of julie-xml-tools Show documentation
Reads XML using the VTD parser.
There is a newer version: 0.6.7
package de.julielab.xml;

public abstract class JulieXMLConstants {

    /**
     * Constant for the name of a definition attribute in the configuration file for the {@link DataBaseConnector}. The
     * value of the FOR_EACH attribute is the XPath expression which determines the XML elements for each
     * of which particular fields with values within these elements should be constructed.
     * 
     *  Example:
     * 

     * Assume we have a file with the following structure:
     *
     * 
     * <MedlineCitationSet>
     * 		<MedlineCitation>
     * 			<PMID>123456</PMID>
     * 			...more content...
     * 		</MedlineCitation>
     * 			...
     * 		<MedlineCitation>
     * 		...some content...
     * 		</MedlineCitation>
     * 			...
     * </MedlineCitationSet>
     * 
     * 
     * We want to traverse each of the <MedlineCitation> elements and extract information from their inner
     * elements, e.g. their PMID. For this purpose we set the FOR_EACH attribute (or the corresponding
     * method parameters in the Java code) to the XPath "/MedlineCitationSet/MedlineCitation". 
     * 
     */
    public static final String FOR_EACH = "forEach";

    /**
     * Constant for the name of a field attribute.
     * 
     * If set to true, the database table column corresponding to the field definition with the PRIMARY_KEY
     * attribute will be part of the primary key.
     */
    public static final String PRIMARY_KEY = "primaryKey";

    /**
     * Constant for the name of a field attribute.
     * 

     * If set to true, the value of the database table column corresponding to the field definition with the
     * RETRIEVE attribute will be retrieved by the {@link DataBaseConnector}'s query methods.
     */
    public static final String RETRIEVE = "retrieve";

    /**
     * Constant for the name of a field attribute. The XPATH attribute is holding the XPath expression that
     * determines which XML element holds the desired information for the field.
     */
    public static final String XPATH = "xpath";

    /**
     * Constant for the name of a field. Will correspond to the name of the database-field which holds the information
     * retrieved by the field Map.
     */
    public static final String NAME = "name";

    /**
     * Constant for the name of a field attribute.
     * 

     * The RETURN_XML_FRAGMENT attribute determines whether the complete XML code pointed to by the
     * XPATH attribute should be returned (RETURN_XML_FRAGMENT set to true).
     * 

     *  Example:
     * 

     * A field with the following attribute-value-pairs
     * 
     * 
     * <field name="xml" xpath="/MedlineCitationSet/MedlineCitation" returnXMLFragment="true">
     * 
     * returns the complete XML fragment for a MedlineCitation node, including opening and closing tags.  
     */
    public static final String RETURN_XML_FRAGMENT = "returnXMLFragment";

    /**
     * Constant for the name of a field attribute.
     * 
     * If a field's XPath expression has several hits (e.g. an XPath pointing to an author in a Medline document will
     * most probably find multiple matches), RETURN_ARRAY determines whether the extracted values should be
     * returned as a String array. If set to false, multiple values will be concatenated using the String given by
     * CONCAT_STRING or, if missing, the default (',').
     */
    public static final String RETURN_ARRAY = "returnValuesAsArray";

    public static final Object GZIP = "gzip";

    /**
     * Constant for the name of a field attribute.
     * 

     * Determines the String to be used when concatenating multiple hits of an XPath. Multiple hits can also be returned
     * as an array. See the RETURN_ARRAY constant for more information.
     */
    public static final String CONCAT_STRING = "concatString";

    /**
     * Constant for the name of a field attribute.
     * 

     * If set to true, the file name - if the XML document is read from file - is used to extract values. This is done
     * on a 'match with regular expression and replace with' fashion. Therefore, using this attribute requires to
     * deliver values for the attributes {@link #REGEX} and {@link #REPLACE_WITH} as well.
     */
    public static final String EXTRACT_FROM_FILENAME = "extractFromFileName";


    /**
     * Constant for the name of a field attribute.
     * 

     * If set to true, extracted XML text passages will be XML entity resolved. That is, special characters will be substituted by their human readable counterpart.
     *
     * 
     * Example:
     * 

     * The text
     * 
The population of butterflies \& bees represents &lt; 30% of all insects
     * contains the XML entities '&' and '<' in an escaped fashion so not to collide with the XML parsing process. Switching RESOLVE_ENTITIES to true will
     * result in the string
     * The population of butterflies & bees represents < 30% of all insects
     * 
     */
    public static final String RESOLVE_ENTITIES = "resolveEntities";

    /**
     * Constant for the name of a field attribute. Fields which have set the TIMESTAMP attribute to "true"
     * will be given the time stamp of the last update for the corresponding record. This is required for delta-updates.
     */
    public static final String TIMESTAMP = "timestamp";

    /**
     * Constant for the name of a field attribute.
     * 
     * Used together with EXTRACT_FROM_FILENAME and REPLACE_WITH attributes. Determines the
     * regular expression whose matches on the XML document file name are substituted by the value given by
     * REPLACE_WITH.
     */
    public static final String REGEX = "regex";

    /**
     * Constant for the name of a field attribute.
     * 

     * Used together with EXTRACT_FROM_FILENAME and REGEX attributes. Determines the
     * substitute expression to replace characters of the XML document filename which match the regular expression given
     * by the REGEX attribute.
     */
    public static final String REPLACE_WITH = "replaceWith";

    /**
     * Constant for the name of a field attribute.
     * 

     * Used to set the type of a parsed value, e.g. String
     */
    public static final String TYPE = "type";

    /**
     * Constant for the name of a field attribute>
     * 

     * The value of this attribute will always be the value of the field that specifies the attribute.
     * 
     */
    public static final String CONSTANT_VALUE = "constantValue";
}