All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ow2.weblab.service.gate.GateHelper Maven / Gradle / Ivy

Go to download

Gate based component, that can process the Text units to extract informations using Gate's tools (such as grammars, gazetteers, tokenizer or POS Taggers). This project contains two versions, a simple component and webservice one.

There is a newer version: 2.0
Show newest version
/**
 * WEBLAB: Service oriented integration platform for media mining and intelligence applications
 * 
 * Copyright (C) 2004 - 2009 EADS DEFENCE AND SECURITY SYSTEMS
 * 
 * This library is free software; you can redistribute it and/or modify it under the terms of
 * the GNU Lesser General Public License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with this
 * library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
 * Floor, Boston, MA 02110-1301 USA
 */

package org.ow2.weblab.service.gate;

import gate.FeatureMap;
import gate.creole.ontology.OConstants.RDF;
import gate.creole.ontology.OConstants.RDFS;

import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.LogFactory;
import org.weblab_project.core.comparator.SegmentComparator;
import org.weblab_project.core.exception.WebLabCheckedException;
import org.weblab_project.core.factory.AnnotationFactory;
import org.weblab_project.core.factory.SegmentFactory;
import org.weblab_project.core.helper.PoKHelper;
import org.weblab_project.core.helper.PoKHelperExtended;
import org.weblab_project.core.helper.RDFHelperFactory;
import org.weblab_project.core.model.Annotation;
import org.weblab_project.core.model.text.LinearSegment;
import org.weblab_project.core.model.text.Text;
import org.weblab_project.core.ontologies.WebLab;
import org.weblab_project.core.properties.PropertiesLoader;
import org.weblab_project.core.util.TextUtil;


/**
 * This class contains useful static methods for the gate-extraction project.
 * 
 * @author ymombrun
 * @date 2008-05-06
 */
public class GateHelper {

	/**
	 * The namespace to be used by annotation created using gate annotations in case of a verbose mode.
	 */
	public final static String GATE_TEMP_NS = WebLab.PROCESSING_PROPERTY_NAMESPACE + "temp/gate/";

	/**
	 * To prevent from the creation of j.0 and prefixes. Not used in simple mode.
	 */
	public final static String GATE_TEMP_NS_PREFIX = "tempGate";

	/**
	 * The base URI of the created instances instances. Not used in old mode.
	 */
	public final static String GATE_TYPE_BASE_URI = "http://gate.ac.uk/gatemodel#";


	/**
	 * Just an empty map that is used a lot of times. To prevent useless instantiation.
	 */
	private final static Map EMPTY_MAP = Collections. emptyMap();

	/**
	 * Name of the property file to be loaded. I may contains: EXCLUSION_PROPERTY, FEATURE_EXCLUSION_PROPERTY, SERVICE_URI_PROPERTY, PROCESSING_MODE_PROPERTY, VERBOSE_PROPERTY
	 */
	public final static String PROPERTIES_FILE_NAME = "gate.properties";

	/**
	 * Name of the property that contains the list of properties to be excluded from the conversion. Typically it contains SpaceToken, Token... and every other Gate Annotation that will not be used
	 * further in WebLab document.
	 */
	public final static String EXCLUSION_PROPERTY = "gateAnnotationTypesToExclude";

	/**
	 * Name of the property that contains the list of features to be excluded from the conversion. Typically it contains rules, matches... and every other Gate Annotation that will not be used further
	 * in WebLab document.
	 */
	public final static String FEATURE_EXCLUSION_PROPERTY = "gateFeaturesToExlude";

	/**
	 * Name of the property that contains the URI of the service URI to be used if the created annotation shall contains a isProducedBy statement.
	 */
	public final static String SERVICE_URI_PROPERTY = "addProducedByThisServiceURI";

	/**
	 * Choose the processing mode to be used.
	 * Values may be: OLD_MODE, SIMPLE_MODE or COMPLETE_MODE
	 */
	public final static String PROCESSING_MODE_PROPERTY = "annotationConversionMode";

	@Deprecated
	public final static String OLD_PROCESSING_MODE = "old";
	public final static String SIMPLE_PROCESSING_MODE = "simple";
	public final static String COMPLETE_PROCESSING_MODE = "complete";

	/**
	 * Name of the property that contains the boolean value to set whether or not to be verbose in annotation conversion. Note that this is not used in case of simple mode. But in case of old or
	 * complete mode, it defines whether or not to include id's properties in output annotations.
	 */
	public final static String VERBOSE_PROPERTY = "verbose";

	/**
	 * Excluded annotations. Read in PROPERTIES_FILE_NAME using EXCLUSION_PROPERTY.
	 */
	private static Set EXCLUDED_ANNOTATIONS = new HashSet();

	/**
	 * Excluded annotations. Read in PROPERTIES_FILE_NAME using FEATURE_EXCLUSION_PROPERTY.
	 */
	private static Set EXCLUDED_FEATURES = new HashSet();

	@Deprecated
	private static boolean OLD_MODE;


	private static boolean ADD_FEATURES;


	private static boolean ADD_IDS;


	private static String SERVICE_URI;

	/**
	 * The URI of the predicate for types.
	 * 
	 * Annotation are created on a temporary namespace. The reason is that Gate let you to write you own plugins, and rules, enabling the creation a your own Types.
	 * Since this service shall be working in various applications, having various business ontologies, the annotation format is a temporary one.
	 * Best practice is to write a service, that will be called after this to do the mapping between those temporary annotations and good annotation using the specified ontology.
	 */
	@Deprecated
	public final static String GATE_TEMP_ANNOTATION_TYPE = GATE_TEMP_NS + "type";

	/**
	 * The URI of the predicate for meta data that refines types.
	 */
	@Deprecated
	public final static String GATE_TEMP_ANNOTATION_META = GATE_TEMP_NS + "meta";

	/**
	 * Annotate text with each annotation in annotation set.
	 * At the end, sorts the segments list to ease further process.
	 * 
	 * @param text
	 *            The WebLab Text to be annotated
	 * @param annots
	 *            The Gate annotation set to be used to annotate text
	 */
	public static void linkGateAnnotsToText(Text text, final gate.AnnotationSet annots) {
		if (OLD_MODE) {
			Map annotated = new HashMap();
			for (final gate.Annotation annot : annots) {
				if (!EXCLUDED_ANNOTATIONS.contains(annot.getType())) {
					GateHelper.linkGateAnnotToText(text, annot, annotated);
				}
			}
		} else {
			final Annotation wlAnnot = AnnotationFactory.createAndLinkAnnotation(text);
			final PoKHelperExtended pokhe = RDFHelperFactory.getPoKHelperExtended(wlAnnot);
			pokhe.setAutoCommitMode(false);
			pokhe.setNSPrefix("wlp", WebLab.PROCESSING_PROPERTY_NAMESPACE);
			if (ADD_IDS || ADD_FEATURES) {
				pokhe.setNSPrefix(GATE_TEMP_NS_PREFIX, GATE_TEMP_NS);
			}
			if (SERVICE_URI != null) {
				pokhe.createResStat(wlAnnot.getUri(), WebLab.IS_PRODUCED_BY, SERVICE_URI);
			}
			for (final gate.Annotation gateAnnot : annots) {
				if (!EXCLUDED_ANNOTATIONS.contains(gateAnnot.getType())) {
					GateHelper.linkGateAnnotToText(text, gateAnnot, pokhe);
				}
			}
			pokhe.commit();
		}
		Collections.sort(text.getSegment(), new SegmentComparator());
	}



	/**
	 * Creates a LinearSegment at the position of the gate.Annotation.
	 * Creates an instance of this entity using the PoKHelper.
	 * 
	 * @param text
	 *            The text section to process
	 * @param annotGate
	 *            An annotation in gate format
	 * @param pokh
	 *            The pokHelper yo be used to create instances.
	 */
	private static void linkGateAnnotToText(Text text, final gate.Annotation annotGate, final PoKHelper pokh) {
		LinearSegment segment = SegmentFactory.createAndLinkLinearSegment(text, annotGate.getStartNode().getOffset().intValue(), annotGate.getEndNode().getOffset().intValue());

		final String instanceURI = "weblab:gateInstance/" + annotGate.getType() + "/" + System.nanoTime() + "/" + annotGate.hashCode();
		final String typeURI = GATE_TYPE_BASE_URI + annotGate.getType();

		pokh.createResStat(instanceURI, RDF.TYPE, typeURI);
		pokh.createResStat(segment.getUri(), WebLab.REFERS_TO, instanceURI);
		String label;
		try {
			label = TextUtil.getSegmentText(text, segment);
		} catch (final WebLabCheckedException wlce) {
			LogFactory.getLog(GateHelper.class).warn("Unable to retrieve text at segment: " + segment.getUri(), wlce);
			label = "";
		}
		if (!label.trim().isEmpty()) {
			pokh.createLitStat(instanceURI, RDFS.LABEL, label.trim());
		}

		if (ADD_FEATURES) {
			final FeatureMap featureMap = annotGate.getFeatures();
			if (featureMap != null && !featureMap.isEmpty()) {
				for (final Object key : featureMap.keySet()) {
					if (key instanceof String) {
						String featKey = ((String) key).trim();
						if (!EXCLUDED_FEATURES.contains(featKey)) {
							final Object featureValue = featureMap.get(featKey);
							if (featureValue != null) {
								pokh.createLitStat(instanceURI, GATE_TEMP_NS + featKey, featureValue.toString());
							}
						}
					} else {
						LogFactory.getLog(GateHelper.class).warn("Unable to create feature from key '" + key + "' on gate annotation type '" + annotGate.getType() + "'.");
					}
				}
			}
		}

		if (ADD_IDS) {
			pokh.createLitStat(instanceURI, GATE_TEMP_NS + "nodeId", annotGate.getId().toString());
			pokh.createLitStat(instanceURI, GATE_TEMP_NS + "startNodeId", annotGate.getStartNode().getId().toString());
			pokh.createLitStat(instanceURI, GATE_TEMP_NS + "endNodeId", annotGate.getEndNode().getId().toString());
		}
	}



	/**
	 * Creates a LinearSegment at the same position that the gate.Annotation.
	 * Create an Annotation on this Segment.
	 * If the same segment is already annotated by this service (contained by the Map) the existing Annotation is used.
	 * 
	 * Add to this Annotation the statements from gate.Annotation.
	 * 
	 * @param text
	 *            The text section to process
	 * @param annotGate
	 *            An annotation in gate format
	 * @param annotated
	 *            The Map of previously annotated by this service Segment, enabling to not create various Annotations for the same position in text.
	 */
	private static void linkGateAnnotToText(Text text, final gate.Annotation annotGate, Map annotated) {
		LinearSegment segment = SegmentFactory.createAndLinkLinearSegment(text, annotGate.getStartNode().getOffset().intValue(), annotGate.getEndNode().getOffset().intValue());

		final PoKHelperExtended pokHelper;
		if (annotated.containsKey(segment.getUri())) {
			final Annotation annot = annotated.get(segment.getUri());
			pokHelper = RDFHelperFactory.getPoKHelperExtended(annot);
			pokHelper.setAutoCommitMode(false);
		} else {
			final Annotation annot = AnnotationFactory.createAndLinkAnnotation(segment);
			annotated.put(segment.getUri(), annot);
			pokHelper = RDFHelperFactory.getPoKHelperExtended(annot);
			pokHelper.setAutoCommitMode(false);
			pokHelper.setNSPrefix(GATE_TEMP_NS_PREFIX, GATE_TEMP_NS);
			if (SERVICE_URI != null) {
				pokHelper.setNSPrefix("wlp", WebLab.PROCESSING_PROPERTY_NAMESPACE);
				pokHelper.createResStat(annot.getUri(), WebLab.IS_PRODUCED_BY, SERVICE_URI);
			}
		}
		GateHelper.reifyMetas(annotGate, segment.getUri(), pokHelper);

		pokHelper.commit();
	}


	/**
	 * Reads the content annotGate featureMap and annotate as reified statement these properties. It also annotate startNode, endNode and id of the Gate Annotation.
	 * 
	 * @param annotGate
	 *            The annotation to extract features
	 * @param subject
	 *            The subject of the statement
	 * @param pokHE
	 *            the extended pok helper to be used to annotate
	 */
	private static void reifyMetas(final gate.Annotation annotGate, final String subject, final PoKHelperExtended pokHE) {
		final String reifURI = "weblab:gateAnnot/" + annotGate.getType() + "/" + System.nanoTime() + "/" + annotGate.hashCode();
		boolean reified = false;
		final FeatureMap featureMap = annotGate.getFeatures();
		if (featureMap != null && !featureMap.isEmpty()) {
			for (final Object key : featureMap.keySet()) {
				if (key instanceof String) {
					String featKey = ((String) key).trim();
					if (!EXCLUDED_FEATURES.contains(featKey)) {
						final Object featureValue = featureMap.get(featKey);
						if (featureValue != null) {
							GateHelper.reifyLiteralMeta(subject, annotGate.getType(), featKey + "=" + featureValue.toString(), pokHE, reifURI);
							reified = true;
						}
					}
				} else {
					LogFactory.getLog(GateHelper.class).warn("Unable to create feature from key '" + key + "' on gate annotation type '" + annotGate.getType() + "'.");
				}
			}
		}

		if (ADD_IDS) {
			GateHelper.reifyLiteralMeta(subject, annotGate.getType(), "gateStartNode=" + annotGate.getStartNode().getId().toString(), pokHE, reifURI);
			GateHelper.reifyLiteralMeta(subject, annotGate.getType(), "gateEndNode=" + annotGate.getEndNode().getId().toString(), pokHE, reifURI);
			GateHelper.reifyLiteralMeta(subject, annotGate.getType(), "gateAnnotId=" + annotGate.getId().toString(), pokHE, reifURI);
			reified = true;
		}

		if (!reified) {
			pokHE.createLitStat(subject, GATE_TEMP_ANNOTATION_TYPE, annotGate.getType());
		}
	}

	/**
	 * @param subject
	 *            The subject of the original statement
	 * @param object
	 *            The object of the original statement
	 * @param reifiedLiteral
	 *            The reified literal value
	 * @param pokHE
	 *            The pokHE to be used to annotate
	 * @param reifURI
	 *            The URI of the reified statement to be created.
	 */
	private static void reifyLiteralMeta(final String subject, final String object, final String reifiedLiteral, PoKHelperExtended pokHE, final String reifURI) {
		Map properFeatures = new HashMap(1);
		properFeatures.put(GATE_TEMP_ANNOTATION_META, reifiedLiteral);
		pokHE.createLitStatReif(subject, GATE_TEMP_ANNOTATION_TYPE, object, properFeatures, EMPTY_MAP, EMPTY_MAP, reifURI);
	}



	/**
	 * Use the file PROPERTIES_FILE_NAME to set:
	 * 
    *
  • Which annotation types to exclude.
  • *
  • Which features types to exclude.
  • *
  • Which conversion mode to use.
  • *
  • Whether or not to add ids info.
  • *
  • Which service URI to use with isProducedBy statements (if not defined, not statement will be added).
  • *
*/ public static void init() { Map props = PropertiesLoader.loadProperties(PROPERTIES_FILE_NAME); final String excludedString = props.get(EXCLUSION_PROPERTY); final String excludedFeaturesString = props.get(FEATURE_EXCLUSION_PROPERTY); final String processingMode = props.get(PROCESSING_MODE_PROPERTY); final String verboseMode = props.get(VERBOSE_PROPERTY); SERVICE_URI = props.get(SERVICE_URI_PROPERTY); if (excludedString != null && !excludedString.isEmpty()) { EXCLUDED_ANNOTATIONS.addAll(Arrays.asList(excludedString.split(";"))); } else { LogFactory.getLog(GateHelper.class).warn("No " + EXCLUSION_PROPERTY + " property found in " + PROPERTIES_FILE_NAME + " file; nothing will be skipped."); } if (excludedFeaturesString != null && !excludedFeaturesString.isEmpty()) { EXCLUDED_FEATURES.addAll(Arrays.asList(excludedFeaturesString.split(";"))); } else { LogFactory.getLog(GateHelper.class).warn("No " + FEATURE_EXCLUSION_PROPERTY + " property found in " + PROPERTIES_FILE_NAME + " file; nothing will be skipped."); } if (processingMode.equalsIgnoreCase(OLD_PROCESSING_MODE)) { OLD_MODE = true; ADD_FEATURES = true; } else if (processingMode.equalsIgnoreCase(SIMPLE_PROCESSING_MODE)) { OLD_MODE = false; ADD_FEATURES = false; } else if (processingMode.equalsIgnoreCase(COMPLETE_PROCESSING_MODE)) { OLD_MODE = false; ADD_FEATURES = true; } else { // Default mode LogFactory.getLog(GateHelper.class).warn("No " + PROCESSING_MODE_PROPERTY + " property found in " + PROPERTIES_FILE_NAME + " file; SimpleMode will be used."); OLD_MODE = false; ADD_FEATURES = false; } if (verboseMode != null) { ADD_IDS = Boolean.parseBoolean(verboseMode); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy