All Downloads are FREE. Search and download functionalities are using the official Maven repository.

resources.jape.possibles.jape Maven / Gradle / Ivy

Go to download

Processing resources for annotating and normalising both Arabic and Roman numbers

The newest version!
/*
 * Copyright (c) 2009-2011, The University of Sheffield.
 * 
 * This file is part of GATE (see http://gate.ac.uk/), and is free software,
 * Licensed under the GNU Library General Public License, Version 3, June 2007
 * (in the distribution as file licence.html, and also available at
 * http://gate.ac.uk/gate/licence.html).
 */
 
Phase: Possibles

Input: Number Token Split
Options: control = all

//TODO deal with - in sup
//TODO deal with the fact that per is usually category=IN
//the . was added to fix the Lasix 20 mg p.o. parsing problem although
//it would be nice to figure out why the parser isn't working properly
Rule:possible1
(
	({Number}):number
	(({Token.category != "IN", Token.kind != "punctuation"}|
	  {Token.string == "per"}|
	  {Token.kind == "punctuation", Token.string =~ "[.|~^+/*]", !Split})+):unit 
):all
-->
{
	try {
		
		AnnotationSet all = bindings.get("all");
		
		AnnotationSet existing = inputAS.get("PossibleMeasurement").get(all.firstNode().getOffset());
		
		if (existing != null) {
			if (existing.size() > 0 && existing.lastNode().getOffset().longValue() > all.lastNode().getOffset().longValue()) return;
				
			inputAS.removeAll(existing);
		}
		
		Annotation number = bindings.get("number").iterator().next();
		
		AnnotationSet unit = bindings.get("unit");
		
		FeatureMap features = Factory.newFeatureMap();
		features.put("number", number);
		features.put("unit", doc.getContent().getContent(number.getEndNode().getOffset(), unit.lastNode().getOffset()).toString());
		features.put("unitTokens", gate.Utils.inDocumentOrder(unit));
		
		inputAS.add(number.getStartNode().getOffset(),number.getEndNode().getOffset(),"PossibleMeasurement", features);
	}
	catch (Exception e) {
		e.printStackTrace();
	}
}






© 2015 - 2025 Weber Informatics LLC | Privacy Policy