All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gate.Utils Maven / Gradle / Ivy

Go to download

GATE - general achitecture for text engineering - is open source software capable of solving almost any text processing problem. This artifact enables you to embed the core GATE Embedded with its essential dependencies. You will able to use the GATE Embedded API and load and store GATE XML documents. This artifact is the perfect dependency for CREOLE plugins or for applications that need to customize the GATE dependencies due to confict with their own dependencies or for lower footprint.

The newest version!
/*
 *  Utils.java
 *
 *  Copyright (c) 1995-2012, The University of Sheffield. See the file
 *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
 *
 *  This file is part of GATE (see http://gate.ac.uk/), and is free
 *  software, licenced under the GNU Library General Public License,
 *  Version 2, June 1991 (in the distribution annotationSet file licence.html,
 *  and also available at http://gate.ac.uk/gate/licence.html).
 *
 *  Johann Petrak, 2010-02-05
 *
 */

package gate;

import java.io.File;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.event.Level;

import gate.annotation.AnnotationSetImpl;
import gate.annotation.ImmutableAnnotationSetImpl;
import gate.creole.ConditionalSerialController;
import gate.creole.Plugin;
import gate.creole.RunningStrategy;
import gate.util.FeatureBearer;
import gate.util.GateRuntimeException;
import gate.util.InvalidOffsetException;
import gate.util.OffsetComparator;

/**
 * Various utility methods to make often-needed tasks more easy and
 * using up less code.  In Java code (or JAPE grammars) you may wish to
 * import static gate.Utils.* to access these methods without
 * having to qualify them with a class name.  In Groovy code, this class can be
 * used as a category to inject each utility method into the class of its first
 * argument, e.g.
 * 
 * Document doc = // ...
 * Annotation ann = // ...
 * use(gate.Utils) {
 *   println "Annotation has ${ann.length()} characters"
 *   println "and covers the string \"${doc.stringFor(ann)}\""
 * }
 * 
* * @author Johann Petrak, Ian Roberts */ public class Utils { /** * Return the length of the document content covered by an Annotation as an * int -- if the content is too long for an int, the method will throw * a GateRuntimeException. Use getLengthLong(SimpleAnnotation ann) if * this situation could occur. * @param ann the annotation for which to determine the length * @return the length of the document content covered by this annotation. */ public static int length(SimpleAnnotation ann) { long len = lengthLong(ann); if (len > java.lang.Integer.MAX_VALUE) { throw new GateRuntimeException( "Length of annotation too big to be returned as an int: "+len); } else { return (int)len; } } /** * Return the length of the document content covered by an Annotation as a * long. * @param ann the annotation for which to determine the length * @return the length of the document content covered by this annotation. */ public static long lengthLong(SimpleAnnotation ann) { return ann.getEndNode().getOffset() - ann.getStartNode().getOffset(); } /** * Return the length of the document as an * int -- if the content is too long for an int, the method will throw a * GateRuntimeException. Use getLengthLong(Document doc) if * this situation could occur. * @param doc the document for which to determine the length * @return the length of the document content. */ public static int length(Document doc) { long len = doc.getContent().size(); if (len > java.lang.Integer.MAX_VALUE) { throw new GateRuntimeException( "Length of document too big to be returned as an int: "+len); } else { return (int)len; } } /** * Return the length of the document as a long. * @param doc the document for which to determine the length * @return the length of the document content. */ public static long lengthLong(Document doc) { return doc.getContent().size(); } /** * Return the DocumentContent corresponding to the annotation. *

* Note: the DocumentContent object returned will also contain the * original content which can be accessed using the getOriginalContent() * method. * @param doc the document from which to extract the content * @param ann the annotation for which to return the content. * @return a DocumentContent representing the content spanned by the annotation. */ public static DocumentContent contentFor( SimpleDocument doc, SimpleAnnotation ann) { try { return doc.getContent().getContent( ann.getStartNode().getOffset(), ann.getEndNode().getOffset()); } catch(gate.util.InvalidOffsetException ex) { throw new GateRuntimeException(ex.getMessage()); } } /** * Return the document text as a String corresponding to the annotation. * @param doc the document from which to extract the document text * @param ann the annotation for which to return the text. * @return a String representing the text content spanned by the annotation. */ public static String stringFor( Document doc, SimpleAnnotation ann) { try { return doc.getContent().getContent( ann.getStartNode().getOffset(), ann.getEndNode().getOffset()).toString(); } catch(gate.util.InvalidOffsetException ex) { throw new GateRuntimeException(ex.getMessage(),ex); } } /** * Return the cleaned document text as a String corresponding to the annotation. * (Delete leading and trailing whitespace; normalize * internal whitespace to single spaces.) * @param doc the document from which to extract the document text * @param ann the annotation for which to return the text. * @return a String representing the text content spanned by the annotation. */ public static String cleanStringFor(Document doc, SimpleAnnotation ann) { return cleanString(stringFor(doc, ann)); } /** * Returns the document text between the provided offsets. * @param doc the document from which to extract the document text * @param start the start offset * @param end the end offset * @return document text between the provided offsets */ public static String stringFor( Document doc, Long start, Long end) { try { return doc.getContent().getContent( start, end).toString(); } catch(gate.util.InvalidOffsetException ex) { throw new GateRuntimeException(ex.getMessage()); } } /** * Return the cleaned document text between the provided offsets. * (Delete leading and trailing whitespace; normalize * internal whitespace to single spaces.) * @param doc the document from which to extract the document text * @param start the start offset * @param end the end offset * @return document text between the provided offsets */ public static String cleanStringFor(Document doc, Long start, Long end) { return cleanString(stringFor(doc, start, end)); } /** * Return the DocumentContent covered by the given annotation set. *

* Note: the DocumentContent object returned will also contain the * original content which can be accessed using the getOriginalContent() * method. * @param doc the document from which to extract the content * @param anns the annotation set for which to return the content. * @return a DocumentContent representing the content spanned by the * annotation set. */ public static DocumentContent contentFor( SimpleDocument doc, AnnotationSet anns) { try { return doc.getContent().getContent( anns.firstNode().getOffset(), anns.lastNode().getOffset()); } catch(gate.util.InvalidOffsetException ex) { throw new GateRuntimeException(ex.getMessage()); } } /** * Return the document text as a String covered by the given annotation set. * @param doc the document from which to extract the document text * @param anns the annotation set for which to return the text. * @return a String representing the text content spanned by the annotation * set. */ public static String stringFor( Document doc, AnnotationSet anns) { try { return doc.getContent().getContent( anns.firstNode().getOffset(), anns.lastNode().getOffset()).toString(); } catch(gate.util.InvalidOffsetException ex) { throw new GateRuntimeException(ex.getMessage()); } } /** * Return the cleaned document text as a String covered by the given annotation set. * (Delete leading and trailing whitespace; normalize * internal whitespace to single spaces.) * @param doc the document from which to extract the document text * @param anns the annotation set for which to return the text. * @return a String representing the text content spanned by the annotation * set. */ public static String cleanStringFor(Document doc, AnnotationSet anns) { return cleanString(stringFor(doc, anns)); } /** * Return a cleaned version of the input String. (Delete leading and trailing * whitespace; normalize internal whitespace to single spaces; return an * empty String if the input contains nothing but whitespace, but null * if the input is null.) * @return a cleaned version of the input String. */ public static String cleanString(String input) { if (input == null) { return null; } // implied else return input.replaceAll("\\s+", " ").trim(); } /** * Get the start offset of an annotation. */ public static Long start(SimpleAnnotation a) { return (a.getStartNode() == null) ? null : a.getStartNode().getOffset(); } /** * Get the start offset of an annotation set. */ public static Long start(AnnotationSet as) { return (as.firstNode() == null) ? null : as.firstNode().getOffset(); } /** * Get the start offset of a document (i.e. 0L). */ public static Long start(SimpleDocument d) { return Long.valueOf(0L); } /** * Get the end offset of an annotation. */ public static Long end(SimpleAnnotation a) { return (a.getEndNode() == null) ? null : a.getEndNode().getOffset(); } /** * Get the end offset of an annotation set. */ public static Long end(AnnotationSet as) { return (as.lastNode() == null) ? null : as.lastNode().getOffset(); } /** * Get the end offset of a document. */ public static Long end(SimpleDocument d) { return d.getContent().size(); } /** * Return a the subset of annotations from the given annotation set * that start exactly at the given offset. * * @param annotationSet the set of annotations from which to select * @param atOffset the offset where the annoation to be returned should start * @return an annotation set containing all the annotations from the original * set that start at the given offset */ public static AnnotationSet getAnnotationsAtOffset( AnnotationSet annotationSet, Long atOffset) { // this returns all annotations that start at this atOffset OR AFTER! AnnotationSet tmp = annotationSet.get(atOffset); // so lets filter ... List ret = new ArrayList(); Iterator it = tmp.iterator(); while(it.hasNext()) { Annotation ann = it.next(); if(ann.getStartNode().getOffset().equals(atOffset)) { ret.add(ann); } } return Factory.createImmutableAnnotationSet(annotationSet.getDocument(), ret); } public static AnnotationSet getAnnotationsEndingAtOffset(AnnotationSet annotationSet, Long endOffset) { List endsAt = new ArrayList(); // start can't be negative Long start = endOffset > 0 ? endOffset - 1 : 0; // it seems we can ask for beyond the document without error Long end = endOffset + 1; // get annotations that overlap this bit AnnotationSet annotations = annotationSet.get(start,end); // filter to get just those that end at the offset for (Annotation a : annotations) { if (a.getEndNode().getOffset().equals(endOffset)) { endsAt.add(a); } } // return the annotations we've found, if any return Factory.createImmutableAnnotationSet(annotationSet.getDocument(), endsAt); } /** * Get all the annotations from the source annotation set that lie within * the range of the containing annotation. * * @param sourceAnnotationSet the annotation set from which to select * @param containingAnnotation the annotation whose range must contain the * selected annotations * @return the AnnotationSet containing all annotations fully contained in * the offset range of the containingAnnotation */ public static AnnotationSet getContainedAnnotations( AnnotationSet sourceAnnotationSet, Annotation containingAnnotation) { return getContainedAnnotations(sourceAnnotationSet,containingAnnotation,""); } /** * Get all the annotations of type targetType * from the source annotation set that lie within * the range of the containing annotation. * * @param sourceAnnotationSet the annotation set from which to select * @param containingAnnotation the annotation whose range must contain the * @param targetType the type the selected annotations must have. If the * empty string, no filtering on type is done. * @return the AnnotationSet containing all annotations fully contained in * the offset range of the containingAnnotation */ public static AnnotationSet getContainedAnnotations( AnnotationSet sourceAnnotationSet, Annotation containingAnnotation, String targetType) { if(targetType.equals("")) { return sourceAnnotationSet.getContained( containingAnnotation.getStartNode().getOffset(), containingAnnotation.getEndNode().getOffset()); } else { return sourceAnnotationSet.getContained( containingAnnotation.getStartNode().getOffset(), containingAnnotation.getEndNode().getOffset()).get(targetType); } } /** * Get all the annotations from the source annotation set that lie within * the range of the containing annotation set, i.e. within the offset range * between the start of the first annotation in the containing set and the * end of the last annotation in the annotation set. If the containing * annotation set is empty, an empty set is returned. * * @param sourceAnnotationSet the annotation set from which to select * @param containingAnnotationSet the annotation set whose range must contain * the selected annotations * @return the AnnotationSet containing all annotations fully contained in * the offset range of the containingAnnotationSet */ public static AnnotationSet getContainedAnnotations( AnnotationSet sourceAnnotationSet, AnnotationSet containingAnnotationSet) { return getContainedAnnotations(sourceAnnotationSet,containingAnnotationSet,""); } /** * Get all the annotations from the source annotation set with a type equal to * targetType that lie within * the range of the containing annotation set, i.e. within the offset range * between the start of the first annotation in the containing set and the * end of the last annotation in the annotation set. If the containing * annotation set is empty, an empty set is returned. * * @param sourceAnnotationSet the annotation set from which to select * @param containingAnnotationSet the annotation set whose range must contain * the selected annotations * @param targetType the type the selected annotations must have * @return the AnnotationSet containing all annotations fully contained in * the offset range of the containingAnnotationSet */ public static AnnotationSet getContainedAnnotations( AnnotationSet sourceAnnotationSet, AnnotationSet containingAnnotationSet, String targetType) { if(containingAnnotationSet.isEmpty() || sourceAnnotationSet.isEmpty()) { return Factory.createImmutableAnnotationSet(sourceAnnotationSet.getDocument(), null); } if(targetType.equals("")) { return sourceAnnotationSet.getContained( containingAnnotationSet.firstNode().getOffset(), containingAnnotationSet.lastNode().getOffset()); } else { return sourceAnnotationSet.getContained( containingAnnotationSet.firstNode().getOffset(), containingAnnotationSet.lastNode().getOffset()).get(targetType); } } /** * Get all the annotations from the source annotation set that cover * the range of the specified annotation. * * @param sourceAnnotationSet the annotation set from which to select * @param coveredAnnotation the annotation whose range must equal or lie within * the selected annotations * @return the AnnotationSet containing all annotations that fully cover * the offset range of the coveredAnnotation */ public static AnnotationSet getCoveringAnnotations( AnnotationSet sourceAnnotationSet, Annotation coveredAnnotation) { return getCoveringAnnotations(sourceAnnotationSet,coveredAnnotation,""); } /** * Get all the annotations of type targetType * from the source annotation set that cover * the range of the specified annotation. * * @param sourceAnnotationSet the annotation set from which to select * @param coveredAnnotation the annotation whose range must be covered * @param targetType the type the selected annotations must have. If the * empty string, no filtering on type is done. * @return the AnnotationSet containing all annotations that fully cover * the offset range of the coveredAnnotation */ public static AnnotationSet getCoveringAnnotations( AnnotationSet sourceAnnotationSet, Annotation coveredAnnotation, String targetType) { return sourceAnnotationSet.getCovering(targetType, coveredAnnotation.getStartNode().getOffset(), coveredAnnotation.getEndNode().getOffset()); } /** * Get all the annotations from the source annotation set that cover * the range of the specified annotation set. If the covered * annotation set is empty, an empty set is returned. * * @param sourceAnnotationSet the annotation set from which to select * @param coveredAnnotationSet the annotation set whose range must be covered by * the selected annotations * @return the AnnotationSet containing all annotations that fully cover * the offset range of the containingAnnotationSet */ public static AnnotationSet getCoveringAnnotations( AnnotationSet sourceAnnotationSet, AnnotationSet coveredAnnotationSet) { return getCoveringAnnotations(sourceAnnotationSet,coveredAnnotationSet,""); } /** * Get all the annotations from the source annotation set with a type equal to * targetType that cover * the range of the specified annotation set. If the specified * annotation set is empty, an empty set is returned. * * @param sourceAnnotationSet the annotation set from which to select * @param coveredAnnotationSet the annotation set whose range must * be covered by the selected annotations * @param targetType the type the selected annotations must have * @return the AnnotationSet containing all annotations that fully cover * the offset range of the containingAnnotationSet */ public static AnnotationSet getCoveringAnnotations( AnnotationSet sourceAnnotationSet, AnnotationSet coveredAnnotationSet, String targetType) { if(coveredAnnotationSet.isEmpty() || sourceAnnotationSet.isEmpty()) { return Factory.createImmutableAnnotationSet(sourceAnnotationSet.getDocument(), null); } return sourceAnnotationSet.getCovering(targetType, coveredAnnotationSet.firstNode().getOffset(), coveredAnnotationSet.lastNode().getOffset()); } /** * Get all the annotations from the source annotation set that * partly or totally overlap * the range of the specified annotation. * * @param sourceAnnotationSet the annotation set from which to select * @param overlappedAnnotation the annotation whose range the selected * annotations must overlap * @return the AnnotationSet containing all annotations that fully cover * the offset range of the coveredAnnotation */ public static AnnotationSet getOverlappingAnnotations( AnnotationSet sourceAnnotationSet, Annotation overlappedAnnotation) { return getOverlappingAnnotations(sourceAnnotationSet,overlappedAnnotation,""); } /** * Get all the annotations of type targetType * from the source annotation set that partly or totally overlap * the range of the specified annotation. * * @param sourceAnnotationSet the annotation set from which to select * @param overlappedAnnotation the annotation whose range the selected * annotations must overlap * @param targetType the type the selected annotations must have. If the * empty string, no filtering on type is done. * @return the AnnotationSet containing all annotations that fully cover * the offset range of the coveredAnnotation */ public static AnnotationSet getOverlappingAnnotations( AnnotationSet sourceAnnotationSet, Annotation overlappedAnnotation, String targetType) { if ( (targetType == null) || targetType.isEmpty()) { return sourceAnnotationSet.get(overlappedAnnotation.getStartNode().getOffset(), overlappedAnnotation.getEndNode().getOffset()); } return sourceAnnotationSet.get(targetType, overlappedAnnotation.getStartNode().getOffset(), overlappedAnnotation.getEndNode().getOffset()); } /** * Get all the annotations from the source annotation set that overlap * the range of the specified annotation set. If the overlapped * annotation set is empty, an empty set is returned. * * @param sourceAnnotationSet the annotation set from which to select * @param overlappedAnnotationSet the annotation set whose range must * be overlapped by the selected annotations * @return the AnnotationSet containing all annotations that fully cover * the offset range of the containingAnnotationSet */ public static AnnotationSet getOverlappingAnnotations( AnnotationSet sourceAnnotationSet, AnnotationSet overlappedAnnotationSet) { return getOverlappingAnnotations(sourceAnnotationSet,overlappedAnnotationSet,""); } /** * Get all the annotations from the source annotation set with a type equal to * targetType that partly or completely overlap the range of the specified * annotation set. If the specified annotation set is empty, an empty * set is returned. * * @param sourceAnnotationSet the annotation set from which to select * @param overlappedAnnotationSet the annotation set whose range must * be overlapped by the selected annotations * @param targetType the type the selected annotations must have * @return the AnnotationSet containing all annotations that partly or fully * overlap the offset range of the containingAnnotationSet */ public static AnnotationSet getOverlappingAnnotations( AnnotationSet sourceAnnotationSet, AnnotationSet overlappedAnnotationSet, String targetType) { if(overlappedAnnotationSet.isEmpty() || sourceAnnotationSet.isEmpty()) { return Factory.createImmutableAnnotationSet(sourceAnnotationSet.getDocument(), null); } if ( (targetType == null) || targetType.isEmpty()) { return sourceAnnotationSet.get(overlappedAnnotationSet.firstNode().getOffset(), overlappedAnnotationSet.lastNode().getOffset()); } return sourceAnnotationSet.get(targetType, overlappedAnnotationSet.firstNode().getOffset(), overlappedAnnotationSet.lastNode().getOffset()); } /** * Return a List containing the annotations in the given annotation set, in * document order (i.e. increasing order of start offset). * * @param as the annotation set * @return a list containing the annotations from as in document * order. */ public static List inDocumentOrder(AnnotationSet as) { List ret = new ArrayList(); if(as != null) { ret.addAll(as); Collections.sort(ret, OFFSET_COMPARATOR); } return ret; } /** * A single instance of {@link OffsetComparator} that can be used by any code * that requires one. */ public static final OffsetComparator OFFSET_COMPARATOR = new OffsetComparator(); /** * Create a feature map from an array of values. The array must have an even * number of items, alternating keys and values i.e. [key1, value1, key2, * value2, ...]. * * @param values an even number of items, alternating keys and values. * @return a feature map containing the given items. */ public static FeatureMap featureMap(Object... values) { FeatureMap fm = Factory.newFeatureMap(); if(values != null) { for(int i = 0; i < values.length; i++) { fm.put(values[i], values[++i]); } } return fm; } /** * Create a feature map from an existing map (typically one that does not * itself implement FeatureMap). * * @param map the map to convert. * @return a new FeatureMap containing the same mappings as the source map. */ public static FeatureMap toFeatureMap(Map map) { FeatureMap fm = Factory.newFeatureMap(); fm.putAll(map); return fm; } /** * This method can be used to check if a ProcessingResource has * a chance to be run in the given controller with the current settings. *

* That means that for a non-conditional controller, the method will return * true if the PR is part of the controller. For a conditional controller, * the method will return true if it is part of the controller and at least * once (if the same PR is contained multiple times) it is not disabled. * * @param controller * @param pr * @return true or false depending on the conditions explained above. */ public static boolean isEnabled(Controller controller, ProcessingResource pr) { Collection prs = controller.getPRs(); if(!prs.contains(pr)) { return false; } if(controller instanceof ConditionalSerialController) { Collection rss = ((ConditionalSerialController)controller).getRunningStrategies(); for(RunningStrategy rs : rss) { // if we find at least one occurrence of the PR that is not disabled // return true if(rs.getPR().equals(pr) && rs.getRunMode() != RunningStrategy.RUN_NEVER) { return true; } } // if we get here, no occurrence of the PR has found or none that // is not disabled, so return false return false; } return true; } /** * Return the running strategy of the PR in the controller, if the controller * is a conditional controller. If the controller is not a conditional * controller, null is returned. If the controller is a conditional controller * and the PR is contained multiple times, the running strategy for the * first occurrence the is found is returned. * * @param controller * @param pr * @return A RunningStrategy object or null */ public static RunningStrategy getRunningStrategy(Controller controller, ProcessingResource pr) { if(controller instanceof ConditionalSerialController) { Collection rss = ((ConditionalSerialController)controller).getRunningStrategies(); for(RunningStrategy rs : rss) { if(rs.getPR() == pr) { return rs; } } } return null; } /** * Issue a message to the log but only if the same message has not * been logged already in the same GATE session. * This is intended for explanations or warnings that should not be * repeated every time the same situation occurs. * * @param logger - the logger instance to use * @param level - a Log4J severity level for the message * @param message - the message itself * @deprecated Log4J support will be removed in future, please use SLF4J */ @Deprecated public static void logOnce (org.apache.log4j.Logger logger, org.apache.log4j.Level level, String message) { if(!alreadyLoggedMessages.contains(message)) { try { logger.log(level, message); } catch (Exception e) { System.err.println( "Failed to access logger through deprecated gate.Utils.logOnce method.\n"+ "Log message was: " + message); } alreadyLoggedMessages.add(message); } } /** * Issue a message to the log but only if the same message has not * been logged already in the same GATE session. * This is intended for explanations or warnings that should not be * repeated every time the same situation occurs. * * @param logger - the logger instance to use * @param level - an SLF4J severity level for the message * @param message - the message itself */ public static void logOnce(Logger logger, Level level, String message) { if (!alreadyLoggedMessages.contains(message)) { switch (level) { case TRACE: logger.trace(message); break; case DEBUG: logger.debug(message); break; case INFO: logger.info(message); break; case WARN: logger.warn(message); break; case ERROR: logger.error(message); break; default: // unknown log level, should be impossible } alreadyLoggedMessages.add(message); } } /** * Check if a message has already been logged or shown. This does not log * or show anything but only stores the message as one that has been shown * already if necessary and returns if the message has been shown or not. * * @param message - the message that should only be logged or shown once * @return - true if the message has already been logged or checked with * this method. * */ public static boolean isLoggedOnce(String message) { boolean isThere = alreadyLoggedMessages.contains(message); if(!isThere) { alreadyLoggedMessages.add(message); } return isThere; } private static final Set alreadyLoggedMessages = Collections.synchronizedSet(new HashSet()); /** * Returns the only annotation that annset is expected to contains, throws an * exception if there is not exactly one annotation. This is useful when a * binding set is expected to contain exactly one interesting annotation. * * @param annset the annotation set that is expected to contain exactly one annotation * @return the one annotation or throws an exception if there are 0 or more than one annotations * */ public static Annotation getOnlyAnn(AnnotationSet annset) { if (annset.size() != 1) { throw new GateRuntimeException( "Annotation set does not contain exactly 1 annotation but " + annset.size()); } else { return annset.iterator().next(); } } /** * Add a new annotation to the output annotation set outSet, spanning the same * region as spanSet, and having the given type and feature map. The start and * end nodes of the new annotation will be new nodes. This method will convert * the checked InvalidOffsetException that can be raised by * AnnotationSet.add to a GateRuntimeException. * * @param outSet the annotation set where the new annotation will be added * @param spanSet an annotation set representing the span of the new annotation * @param type the annotation type of the new annotation * @param fm the feature map to use for the new annotation * @return Returns the Id of the added annotation */ public static Integer addAnn(AnnotationSet outSet, AnnotationSet spanSet, String type, FeatureMap fm) { try { return outSet.add(start(spanSet), end(spanSet), type, fm); } catch (InvalidOffsetException ex) { throw new GateRuntimeException("Offset error when trying to add new annotation: ", ex); } } /** * Add a new annotation to the output annotation set outSet, spanning the * given offset range, and having the given type and feature map. The start and * end nodes of the new annotation will be new nodes. This method will convert * the checked InvalidOffsetException that can be raised by * AnnotationSet.add to a GateRuntimeException. * * @param outSet outSet the annotation set where the new annotation will be added * @param startOffset the start offset of the new annotation * @param endOffset the end offset of the new annotation * @param type the annotation type of the new annotation * @param fm the feature map to use for the new annotation * @return Returns the Id of the added annotation */ public static Integer addAnn(AnnotationSet outSet, long startOffset, long endOffset, String type, FeatureMap fm) { try { return outSet.add(startOffset, endOffset, type, fm); } catch (InvalidOffsetException ex) { throw new GateRuntimeException("Offset error when trying to add new annotation: ", ex); } } /** * Add a new annotation to the output annotation set outSet, covering the same * region as the annotation spanAnn, and having the given type and feature map. The start and * end nodes of the new annotation will be new nodes. This method will convert * the checked InvalidOffsetException that can be raised by * AnnotationSet.add to a GateRuntimeException. * * @param outSet the annotation set where the new annotation will be added * @param spanAnn an annotation representing the span of the new annotation * @param type the annotation type of the new annotation * @param fm the feature map to use for the new annotation * @return Returns the Id of the added annotation */ public static Integer addAnn(AnnotationSet outSet, Annotation spanAnn, String type, FeatureMap fm) { try { return outSet.add(start(spanAnn), end(spanAnn), type, fm); } catch (InvalidOffsetException ex) { throw new GateRuntimeException("Offset error adding new annotation: ", ex); } } static private Pattern nsQNamePattern = Pattern.compile("^(.*:)(.+)$"); /** * Expand both namespace prefixes and base-uris, if possible. * This will expand the String toExpand according to the following rules: *

    *
  • if toExpand is a qName and does start with a name prefix in the form * "somens:" or ":", then the name prefix is looked up in the prefixes * map and replaced with the URI prefix found there. If the prefix could not * be found a GateRuntimeException is thrown. *
  • if toExpand does not start with a name prefix, the entry with * an empty string as the key is retrieved from the prefixes map and * used as a baseURI: the result is the baseURI and the toExpand String * concatenated. If no entry with an empty string is found in the map, a * GateRuntimeException is thrown. * *
* * This method can therefore be used to expand both base uris and namespaces. *

* If the map only contains a basename uri (if the only entry is for the * empty string key) then name space prefixes are not checked: in this * case, the toExpand string may contain an unescaped colon. * If the map does not contain a basename URI (if there is no entry for the * empty string key) then all toExpand strings are expected to be qNames. *

* NOTE: the name prefixes in the prefixes map must include the trailing colon! * * @param toExpand the URI portion to expand as a String * @param prefixes a map from name prefixes to URI prefixes * @return a String with name prefixes or base URI expanded */ public static String expandUriString(String toExpand, Map prefixes ) { // lets see if we have a basename entry in the map String baseUri = prefixes.get(""); // if there is a baseURI and it is the only entry, just prefix toExpand with // it, no matter what if(baseUri != null && prefixes.size() == 1) { return baseUri+toExpand; } // if the toExpand string starts with .*:, interpret this as the name space Matcher m = nsQNamePattern.matcher(toExpand); if (m.matches()) { String prefix = m.group(1); String lname = m.group(2); String uriPrefix = prefixes.get(prefix); if(uriPrefix == null) { throw new GateRuntimeException("name prefix not found in prefix map for "+toExpand); } else { return uriPrefix+lname; } } else { // this is not a qName, try to expand with the baseURI if(baseUri == null) { throw new GateRuntimeException("No base Uri in prefix map for "+toExpand); } else { return baseUri + toExpand; } } } /** * Compact an URI String using base URI and namespace prefixes. * The prefixes map, which maps name prefixes of the form "ns:" or the empty * string to URI prefixes is searched for the first URI prefix in the value * set that matches the beginning of the uriString. The corresponding name prefix * is then used to replace that URI prefix. * In order to control which URI prefix is matched first if the map contains * several prefixes which can all match some URIs, a LinkedHashMap can be * used so that the first matching URI prefix will be deterministic. * * @param uriString a full URI String that should get shortened using prefix names or a base URI * @param prefixes a map containing name prefixes mapped to URI prefixes (same as for expandUriString) * @return a shortened URI where the URI prefix is replaced with a prefix name or the empty string */ public static String shortenUriString(String uriString, Map prefixes) { // get the URI prefixes String uriPrefix = ""; String namePrefix = ""; for(Map.Entry entry : prefixes.entrySet()) { String np = entry.getKey(); String uri = entry.getValue(); if(uriString.startsWith(uri)) { uriPrefix = uri; namePrefix = np; break; } } if(uriPrefix.equals("")) { throw new GateRuntimeException("No prefix found in prefixes map for "+uriString); } return namePrefix + uriString.substring(uriPrefix.length()); } /** * Get all the annotations from the source annotation set that start and end * at exactly the same offsets as the given annotation set. * * @param source the annotation set from which to select * @param coextSet the annotation set from which to take the start and end offsets * @return the AnnotationSet containing all annotations exactly coextensive with coextSet */ public static AnnotationSet getCoextensiveAnnotations(AnnotationSet source, AnnotationSet coextSet) { return getCoextensiveAnnotationsWorker(source, null, start(coextSet), end(coextSet)); } /** * Get all the annotations from the source annotation set that start and end * at exactly the same offsets as the given annotation set and are of the * specified type. * * @param source the annotation set from which to select * @param coextSet the annotation set from which to take the start and end offsets * @param type the desired annotation type of the annotations to return * @return the AnnotationSet containing all annotations exactly coextensive with coextSet and of the * specified type */ public static AnnotationSet getCoextensiveAnnotations(AnnotationSet source, AnnotationSet coextSet, String type) { return getCoextensiveAnnotationsWorker(source, type, start(coextSet), end(coextSet)); } /** * Get all the annotations from the source annotation set that start and end * at exactly the same offsets as the given annotation. * * @param source the annotation set from which to select * @param coextAnn the annotation from which to take the start and end offsets * @return the AnnotationSet containing all annotations exactly coextensive with coextAnn */ public static AnnotationSet getCoextensiveAnnotations(AnnotationSet source, Annotation coextAnn) { return getCoextensiveAnnotationsWorker(source, null, start(coextAnn), end(coextAnn)); } /** * Get all the annotations from the source annotation set that start and end * at exactly the same offsets as the given annotation and have the specified type. * * @param source the annotation set from which to select * @param coextAnn the annotation from which to take the start and end offsets * @return the AnnotationSet containing all annotations exactly coextensive with coextAnn and * having the specified type. */ public static AnnotationSet getCoextensiveAnnotations(AnnotationSet source, Annotation coextAnn, String type) { return getCoextensiveAnnotationsWorker(source, type, start(coextAnn), end(coextAnn)); } private static AnnotationSet getCoextensiveAnnotationsWorker(AnnotationSet source, String type, long start, long end) { if (source instanceof gate.annotation.AnnotationSetImpl) { AnnotationSet ret = ((AnnotationSetImpl) source).getStrict(start, end); if (type != null) { return ret.get(type); } else { return ret; } } else { AnnotationSet annset = source.getContained(start, end); List annotationsToAdd = new ArrayList(); for (Annotation ann : annset) { if (start(ann) == start && end(ann) == end) { if (type == null || ann.getType().equals(type)) { annotationsToAdd.add(ann); } } } return Factory.createImmutableAnnotationSet(source.getDocument(), annotationsToAdd); } } /** * This will replace all occurrences of variables of the form $env{name}, * $prop{name}, $doc{featname}, $pr_parm{inputAS} or $$env{name} etc in a String. * * The source for replacing the variable can be environment variables, * system properties, or arbitrary maps or resources specified when calling * the method. *

* Examples: *

    *
  • replaceVariablesInString("text $env{GATE_HOME} more text"): * returns "text /path/to/gate more text" if the environment variable * "GATE_HOME" was set to "/path/to/gate" *
  • replaceVariablesInString("text $pr{myfeature1} more text",pr1): * returns "myvalue1" if the feature map of the processing resource pr1 * contains an entry with key "myfeature" and value "myvalue" *
  • replaceVariablesInString("text ${somekey} more text",map1,map2,resource1,map3): * this will * find the value of an entry with key "somekey" in the first Map object specified * in the parameter list of the method. *
*

* The possible sources for finding values for a variable are: *

    *
  • System.getenv(): for variables of the form $env{name} *
  • System.getProperties(): for variables of the form $prop{name} *
  • Resource: the feature map of any resource which is specified in the * list of objects is used for variables of the form $resource{name} or * for variables of the form $corpus{name} if the resource is a corpus, for * $pr{name} if the resource is a processing resource and so on. If the * resource is a processing resource its *
  • FeatureMap or Map: any feature map or * Map which can be used to look up String keys can be specified * as a source and will be used for variables of the form ${name}. *
*

* The value substituted is converted to a string using the toString() * method of whatever object is stored in the map. If the value returned * by Map.get(key) is null, no substitution is carried out and the * variable is left unchanged in the string. *

* The following variable constructs are supported: *

    *
  • $env{name} will be replaced with the value from the environment variables map * from System.getenv() and nothing else. *
  • $prop{name} will be replaced with the value of the properties map * from System.getProperties() and nothing else. *
  • $controller{name} will be replaced with the value of a feature from the FeatureMap * of the first resource of type Controller found in the argument list. *
  • $corpus{name} will be replaced with the value of a feature from the FeatureMap * of the first resource of type Corpus found in the argument list. *
  • $pr{name} will be replaced with the value of a feature from the FeatureMap * of the first resource of type ProcessingResource found in the argument list. *
  • $pr_parm{name} will be replaced with the value of the parameter 'name' * of the first resource of type ProcessingResource found in the argument list. * This can be especially useful to replace a variable in one parameter with * the value of another, potentially hidden, parameter of the same PR. *
  • $doc{name} will be replaced with the value of a feature from the FeatureMap * of the first resource of type Document found in the argument list. *
  • $resource{name} will be replaced with the value of a feature from the FeatureMap * of the first resource of type Resource found in the argument list. *
*

* If two dollar characters are used instead of one, the replacement string * will in turn be subject to replacement, e.g. $$env{abc} could get replaced * with the replacement string '$corpus{f1}' which would in turn get replaced * with the value of the feature 'f1' from the feature set of the first * corpus in the parameter list that has a value for that feature. * */ @SuppressWarnings("unchecked") public static String replaceVariablesInString( String string, Object... sources) { // shortcut for strings where no replacement is possible (minimum content // would have to be $pr{x} if(string == null || string.isEmpty() || string.length() < 6) { return string; } Matcher matcher = varnamePattern.matcher(string); int findFrom = 0; int lastEnd = 0; StringBuilder sb = new StringBuilder(string.length()*2); while(findFrom < string.length() && matcher.find(findFrom)) { String dollars = matcher.group(1); String type = matcher.group(2); String varname = matcher.group(3); int matchStart = matcher.start(); // whenever we have found something, we can immediately move the part // from the last end of match to the new start of match to the // return string, that is just unmodified string ... // But only if the length is > 0 if((matchStart - lastEnd) > 0) { sb.append(string.substring(lastEnd,matchStart)); } lastEnd = matcher.end(); Object value = null; // for each match we find, go through all the sources in the order // listed and if the type of the source matches the requested type // then try to look the variable up. If we find something use it and // finish looking, otherwise continue until all sources have been // exhausted. // A variable where no value has been found anywhere is not replaced. // If a variable got replaced and it was a variable that started with // two dollar signs, then the replacement value is first getting // recursively replaced too. if(type.equals("env")) { value = System.getenv().get(varname); } else if(type.equals("prop")) { value = System.getProperties().get(varname); } else { for(Object source : sources) { if(type.isEmpty()) { // an empty variable type matches only maps from the sources if(source instanceof Map) { value = ((Map)source).get(varname); } } else if(type.equals("pr") && (source instanceof ProcessingResource)) { value = ((FeatureBearer)source).getFeatures().get(varname); } else if(type.equals("pr_parm") && (source instanceof ProcessingResource)) { try { value = ((ProcessingResource)source).getParameterValue(varname); } catch(Exception ex) { // do nothing, leave the value null } } else if(type.equals("doc") && (source instanceof Document)) { value = ((FeatureBearer)source).getFeatures().get(varname); } else if(type.equals("controller") && (source instanceof Controller)) { value = ((FeatureBearer)source).getFeatures().get(varname); } else if(type.equals("corpus") && (source instanceof Corpus)) { value = ((FeatureBearer)source).getFeatures().get(varname); } else if(type.equals("resource") && (source instanceof Resource)) { value = ((FeatureBearer)source).getFeatures().get(varname); } if(value != null) { break; } } // for source : sources } // only do anything at all if we found a value for this parameter if(value != null) { String replacement = value.toString(); // if we had double-dollars, first do the recursive replacement ... if(dollars.equals("$$")) { replacement = replaceVariablesInString(replacement, sources); } sb.append(replacement); } else { sb.append(matcher.group()); // the first character after the match } findFrom = matcher.end(); } // while matcher.find ... // if we have some unmatched string left over, append it too if(lastEnd < string.length()) { sb.append(string.substring(lastEnd)); } return sb.toString(); } private static final Pattern varnamePattern = Pattern.compile("(\\$\\$?)([a-zA-Z]*)\\{([^}]+)\\}"); /** * Load a plugin from the default GATE plugins directory. * * This will load the plugin with the specified directory name from the * default GATE plugins path, if GATE knows its own location. * * @param dirName The directory name of the plugin within the standard GATE plugins directory. */ @Deprecated public static void loadPlugin(String dirName) { File gatehome = Gate.getGateHome(); if(gatehome == null) { throw new GateRuntimeException("Cannot load Plugin, Gate home location not known"); } File pluginDir = new File(new File(gatehome,"plugins"),dirName); loadPlugin(pluginDir); } /** * Load a plugin from the specified directory. * * This will load the plugin from the directory path specified as a File object. * */ public static void loadPlugin(File pluginDir) { try { Gate.getCreoleRegister().registerPlugin(new Plugin.Directory(pluginDir.toURI().toURL())); } catch (Exception ex) { throw new GateRuntimeException("Could not register plugin directory "+pluginDir,ex); } } /** * Return the given set with the given annotations removed. * * This returns a new immutable annotation set, which contains all the annotations from origSet * except the given annotations. The removal is not based on equality but on the id of the * annotation: an annotation in origSet which has the same id as the annotation except is removed * in the returned set. *

* NOTE: Annotation ids are only unique within a document, so you should never mix annotations * from different documents when using this method! * * @param origSet The annotation set from which to remove the given annotation * @param except The annotation to remove from the given set * @return A new immutable annotation set with the given annotation removed from the original set */ public static AnnotationSet minus(AnnotationSet origSet, Annotation... except) { return minus(origSet, Arrays.asList(except)); } /** * Return the given set with the given annotations removed. * * This returns a new immutable annotation set, which contains all the annotations from origSet * except the annotations given in the collection of exceptions. * The removal is not based on equality but on the id of the * annotations: an annotation in origSet which has the same id as an annotation from the exceptions * is removed in the returned set. *

* NOTE: Annotation ids are only unique within a document, so you should never mix annotations * from different documents when using this method! * * @param origSet The annotation set from which to remove the given exceptions * @param exceptions The annotations to remove from the given set * @return A new immutable annotation set with the exceptions removed from the original set */ public static AnnotationSet minus(AnnotationSet origSet, Collection exceptions) { Set ids = new HashSet(); for(Annotation exception : exceptions) { ids.add(exception.getId()); } List tmp = new ArrayList(); for(Annotation ann : origSet) { if(!ids.contains(ann.getId())) { tmp.add(ann); } } return new ImmutableAnnotationSetImpl(origSet.getDocument(),tmp); } /** * Return the given set with the given annotations added. * * This returns a new immutable annotation set, which contains all the annotations from origSet * plus the given annotations to add. The addition is not based on equality but on the id of the * annotations: any new annotation is added if its annotation id differs from all the ids * already in the set. *

* NOTE: Annotation ids are only unique within a document, so you should never mix annotations * from different documents when using this method! * * @param origSet The annotation set from which to remove the given exceptions * @param toAdd The annotations to add to the given set * @return A new immutable annotation set with the given annotations added */ public static AnnotationSet plus(AnnotationSet origSet, Annotation... toAdd) { return plus(origSet,Arrays.asList(toAdd)); } /** * Return the given set with the given annotations added. * * This returns a new immutable annotation set, which contains all the annotations from origSet * plus the given annotations added. The addition is not based on equality but on the id of the * annotations: any new annotation is added if its annotation id differs from all the ids * already in the set. *

* NOTE: Annotation ids are only unique within a document, so you should never mix annotations * from different documents when using this method! * * @param origSet The annotation set from which to remove the given exceptions * @param toAdd A collection of annotations to add to the original set * @return A new immutable annotation set with the annotations from the collection added. */ public static AnnotationSet plus(AnnotationSet origSet, Collection toAdd) { Set ids = new HashSet(); for(Annotation orig : origSet) { ids.add(orig.getId()); } List tmp = new ArrayList(); tmp.addAll(origSet); for(Annotation ann : toAdd) { if(!ids.contains(ann.getId())) { tmp.add(ann); } } return new ImmutableAnnotationSetImpl(origSet.getDocument(),tmp); } /** * Return the subset from the original set that matches one of the given annotations. * * This returns a new immutable annotation set, which contains all the annotations from origSet * which are also among the annotations given. The check for matching annotations is not based * on equality but on the id of the * annotations: an annotation from the original set is included in the returned set if its * annotation id matches the annotation id of any of the annotations given. *

* NOTE: Annotation ids are only unique within a document, so you should never mix annotations * from different documents when using this method! * * @param origSet The annotation set from which to select only the given annotations. * @param others the given annotations * @return A new immutable annotation set with the interesection between original set and given annotations */ public static AnnotationSet intersect(AnnotationSet origSet, Annotation... others) { return intersect(origSet,Arrays.asList(others)); } public static AnnotationSet intersect(AnnotationSet origSet, Collection others) { if(others.isEmpty()) { return new ImmutableAnnotationSetImpl(origSet.getDocument(),null); } Set ids = new HashSet(); for(Annotation other : others) { ids.add(other.getId()); } List tmp = new ArrayList(); for(Annotation ann : origSet) { if(ids.contains(ann.getId())) { tmp.add(ann); } } return new ImmutableAnnotationSetImpl(origSet.getDocument(),tmp); } public static URL resolveURL(String url) throws IOException { return resolveURL(new URL(url)); } public static URL resolveURL(URL url) throws IOException { // if it's not http or https then there's no notion of redirection, so // stick to the original URL object if (!url.getProtocol().equalsIgnoreCase("http") && !url.getProtocol().equalsIgnoreCase("https")) { return url; } URL resourceUrl = url; Set seenUrls = new HashSet<>(); int followedRedirects = 0; // limit to 20 redirects, that's the most any of the major browsers will follow while (followedRedirects++ < 20) { // check for redirection loop if(!seenUrls.add(resourceUrl.toExternalForm())) { throw new IOException("Redirection loop detected for URL " + url); } // open a connection to the URL and... HttpURLConnection conn = (HttpURLConnection) resourceUrl.openConnection(); // set a bunch of connection properties conn.setRequestMethod("HEAD"); conn.setConnectTimeout(30000); conn.setReadTimeout(30000); conn.setInstanceFollowRedirects(false); // Make the logic below easier to detect redirections switch (conn.getResponseCode()) { case 301: // moved permanently case 302: // moved temporarily case 303: // "see other" case 307: // "temporary redirect" // if we've hit a redirect then get the location from the header String location = conn.getHeaderField("Location"); location = URLDecoder.decode(location, "UTF-8"); URL newUrl = new URL(resourceUrl, location); // Deal with relative URLs // follow the redirect if (and only if) it goes to another http or https URL if(newUrl.getProtocol().equalsIgnoreCase("http") || newUrl.getProtocol().equalsIgnoreCase("https")) { resourceUrl = newUrl; continue; } } // we've found a URL without a redirect so at this point we can stop return resourceUrl; } throw new IOException("Too many redirects for " + url); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy