All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gate.plugins.ANNIE.resources.NE.number.jape Maven / Gradle / Ivy

Go to download

Gate based component, that can process the Text units to extract informations using Gate's tools (such as grammars, gazetteers, tokenizer or POS Taggers). This project contains two versions, a simple component and webservice one.

There is a newer version: 2.0
Show newest version
/*
*  number.jape
*
* Copyright (c) 1998-2004, The University of Sheffield.
*
*  This file is part of GATE (see http://gate.ac.uk/), and is free
*  software, licenced under the GNU Library General Public License,
*  Version 2, June 1991 (in the distribution as file licence.html,
*  and also available at http://gate.ac.uk/gate/licence.html).
*
*  Diana Maynard, 02 Aug 2001
* 
*  $Id: number.jape 5921 2004-07-21 17:00:37Z akshay $
*/


Phase:	Number
Input: Token Lookup
Options: control = appelt

///////////////////////////////////////////////////////////////
//Money Rules


Macro: MILLION_BILLION
({Token.string == "m"}|
{Token.string == "million"}|
{Token.string == "b"}|
{Token.string == "billion"}|
{Token.string == "bn"}|
{Token.string == "k"}|
{Token.string == "K"}
)

Macro: NUMBER_WORDS
// two hundred and thirty five
// twenty five

(
 (({Lookup.majorType == number} 
   ({Token.string == "-"})?
  )*
   {Lookup.majorType == number}
   {Token.string == "and"}
 )*
 ({Lookup.majorType == number} 
  ({Token.string == "-"})?
 )*
   {Lookup.majorType == number}
)


Macro: AMOUNT_NUMBER
// enables commas, decimal points and million/billion
// to be included in a number
 
(({Token.kind == number}
  (({Token.string == ","}|
    {Token.string == "."}
   )
   {Token.kind == number}
  )*
  |
  (NUMBER_WORDS)
 )
 (MILLION_BILLION)?
)


Rule:	MoneyCurrencyUnit
// 30 pounds
  (        
      (AMOUNT_NUMBER)
      ({Lookup.majorType == currency_unit})
  )
:number -->
  :number.Money = {kind = "number", rule = "MoneyCurrencyUnit"}


Rule:	MoneySymbolUnit

// $30 
// $30 US
// not $1$21
// $20US


(   
 ({Token.symbolkind == currency}|
  {Lookup.majorType == currency_unit})
 (AMOUNT_NUMBER)
 (
  {Lookup.majorType == currency_unit}
 )?
) 
:number 

 -->
  :number.Money = {kind = "number", rule = "MoneySymbolUnit"}

//////////////////////////////////////////////////////////////

// Percentage Rules

Macro: PERCENT
({Token.string == "%"} | 
 {Token.string == "percent"}|
 ({Token.string == "per"}
 {Token.string == "cent"})
)


Rule: PercentBasic
// +20%
// minus 10 percent
// two point four percent

(
 ({Token.string == "+"}|
  {Token.string == "-"}|
  {Token.string == "minus"}  
 )?
  ((AMOUNT_NUMBER|NUMBER_WORDS)
  {Token.string == "point"}
 )? 
 (AMOUNT_NUMBER|NUMBER_WORDS)
 (PERCENT)
)
:number -->
  :number.Percent = {rule = "PercentBasic"}

Rule: PercentSpan
// 20-30%
// two to four percent

(
 (AMOUNT_NUMBER|NUMBER_WORDS)
 ({Token.string == "-"} |
  {Token.string == "to"})
 (AMOUNT_NUMBER|NUMBER_WORDS)
 (PERCENT)
)
:number -->
  :number.Percent = {rule = "PercentSpan"}














© 2015 - 2024 Weber Informatics LLC | Privacy Policy