gate.creole.orthomatcher.OrthoMatcherHelper Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of annie Show documentation
ANNIE is a general purpose information extraction system that provides the building blocks of many other GATE applications.
There is a newer version: 9.1
Show newest version
package gate.creole.orthomatcher;

import gate.Annotation;
import gate.AnnotationSet;
import gate.creole.ExecutionException;
import gate.util.InvalidOffsetException;

import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import org.apache.log4j.Logger;

public class OrthoMatcherHelper {
	
    protected static final Logger log = Logger.getLogger(OrthoMatcherHelper.class);
  
	  public static boolean straightCompare(String s1,
	          String s2,
	          boolean matchCase) {

	    boolean matched = false;
	    if (!matchCase)
	      matched = s1.equalsIgnoreCase(s2);
	    else matched =  s1.equals(s2) ;
	//  kalina: do not remove, nice for debug
	//  if (matched && (s2.startsWith("Kenneth") || s1.startsWith("Kenneth")))
	//  Out.prln("Rule1: Matched " + s1 + "and " + s2);
	    return matched;
	  }
	  
	  /**
	   * Returns true if only one of s1 and s2 is a single character and the two strings match on that
	   * initial
	   */
	  public static boolean initialMatch(String s1, String s2) {
	    return (((s1.length() == 1) ^ (s2.length() == 1) ) && (s1.charAt(0) == s2.charAt(0)));
	  }

	  /**
	   * Gets the content of an annotation
	   */
		public static String getStringForSpan(Long start, Long end,gate.Document d) throws ExecutionException {
		    try {
		      return d.getContent().getContent(start, end).toString();
		    }
		    catch (InvalidOffsetException e) {
		      //log.error("Weird offset exception in getStringForSpan", e);
		      throw new ExecutionException(e);
		    }
	  }
		 
	  public static boolean executeDisjunction(Map allrules, int[] executeRules,String longName,String shortName, boolean mr[]) {
		  
		  boolean result=false;
		  
		  for (int i = 0; i < executeRules.length; i = i + 1) {
		    
		    boolean current=allrules.get(executeRules[i]).value(longName, shortName);
		    mr[executeRules[i]]=current;
			  result=result || current;
		  }
		  
		  return result;
	  }
	  
	  public static Double round2Places(Double input) {
	    return Math.round(input*100.0)/100.0;
	  }
	  
	  /**
	   * It is used for test purposes.
	   * This table shows which rules have fired over a corpus.
	   */
	  private static final boolean[] rulesUsedTable;
	  
	  static {
	    rulesUsedTable = new boolean[18];
      for(int i=0;i> iter = rulesUsedTable.entrySet().iterator();
	    if (rulesUsedTable!=null) {   
	    log.debug("Saving table of used orthomatcher rules:");
	    
	    StringBuilder table=new StringBuilder();
	    
	    for(int i=0;i1) {
      
      int i=0;
      //encode in temp
      for(String pair: pairs){

        String[] s = pair.split(":");
        int x=Integer.parseInt(s[0].trim())* ceil + Integer.parseInt(s[1].trim());
        temp[i]=x;
        i++;
      }

      Arrays.sort(temp);
      
      //decode from temp
      StringBuilder result=new StringBuilder();
      for(int n: temp) {
        int a = n / ceil;
        int b = n % ceil;
        result.append(a).append(":").append(b).append(", ");
      }
      
      return result.toString();
      }
      else return input;//we do not need to sort a single pair
    }
	  
	  /*
	   * The feature "matches" contains annotation IDs.
	   * This method adds a new feature called "matches_positions" that tells the exact position of each match annotation from "matches".
	   * "matches" contains annotations IDs which are in general different and can not be used for comparison in tools like the Corpus Quality Assurance tool
	   * "matches_positions" can be used to check if the matches really match in for example the Corpus Quality Assurance tool
	   */
	  protected static void setMatchesPositions(AnnotationSet nameAllAnnots) {
	    
	    //get all annotations that have a matches feature
	    Set fNames = new HashSet();
	    fNames.add(gate.creole.ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME);
	    AnnotationSet allMatchesAnnots =
	      nameAllAnnots.get(null, fNames);

	    if (allMatchesAnnots == null || allMatchesAnnots.isEmpty())
	      return;

	    for (Annotation currentMatchAnnot : allMatchesAnnots) {
	        
	        String matchValue=currentMatchAnnot.getFeatures().get(gate.creole.ANNIEConstants.ANNOTATION_COREF_FEATURE_NAME).toString();
	        
	        matchValue = matchValue.substring(1);
	        matchValue = matchValue.substring(0,matchValue.length()-1);
	        
	        String[] annotationsIDs = matchValue.split(",");

	        String matchPositionsValue="";//with the annotations positions
	        String sentinel = ", ";
	        
	        int[] integerIDs = OrthoMatcherHelper.convertArrayToInteger(annotationsIDs);
	        for (int i=0; i iter = nameAllAnnots.iterator();
	        
	        //find the current annotation with ID from the match list - in order to get its start and end point
	        if (currentMatchAnnot.getId()==id)
	           ann=currentMatchAnnot; else {
	              while (iter.hasNext()) {
	                 Annotation a=iter.next();
	                 if (a.getId()==id)
	                 {
	                   ann = a;
	                   break;
	                 }
	              }
	           }
	        
	        //do the actual job of retrieving the start and end points
	        if (ann!=null) {
	          matchPositionsValue = matchPositionsValue + ann.getStartNode().getOffset()+":"+ann.getEndNode().getOffset()+sentinel;
	        }
	        
	        }//end going through the match ids
	        
	        //sort so that every time we have the "match_positions" generated the same way so that we can compare it
	        matchPositionsValue = OrthoMatcherHelper.SortByStartPosition(matchPositionsValue);
	        
	        //formating 
	        if (matchPositionsValue.endsWith(sentinel)) {
	        matchPositionsValue = matchPositionsValue.substring(0,matchPositionsValue.length()-sentinel.length());
	        }
	        matchPositionsValue = "["+matchPositionsValue+"]";
	        //finally insert the annotation
	        currentMatchAnnot.getFeatures().put("matches_positions", matchPositionsValue);
	        
	      //}
	    } //while - going through all the matches annotations(that have a feature "match") and adding the new feature
	  }//matchesPositions


}