All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nt.opensextant-xponents-core.3.3.6.source-code.poli_patterns.cfg Maven / Gradle / Ivy

There is a newer version: 3.7.3
Show newest version
#  Patterns of Life (Poli)
#  
#  NOTE: These are all examples.  Reference implementations are in poli.data pkg.
#
#  document title grabber
#  telephony patterns (cell, land line)
#  networking and communications (email, IP)
#  Zip codes, common postal patterns
#  
# ...................................
#  Telephony Patterns
# ...................................
#DEFINE tel_ctry \d{3}
#DEFINE tel_area \(?\d{3}\)?
#DEFINE tel_sep [-.\s]
#DEFINE tel_num \d{3}[-\.\s]\d{4}
#DEFINE tel_exchange x\d{2,5}

#CLASS PHONE  org.opensextant.extractors.poli.data.TelephoneNumber
#RULE PHONE  01  [\s,]??
#TEST PHONE  01  (978-793-0080)
#TEST PHONE  01  (978)-793-0080
#TEST PHONE  01  c.978.793.0080
#TEST PHONE  01  tel. 978.793.0800 x44

# Email Address
# Specification Notes: https://en.wikipedia.org/wiki/Email_address
# Will consider quoted email addresses and bracketed ones separately.
# Will consider Unicode variant on this pattern -- replace A-Z0-9 with \p{Alpha} or other supported character group.
#DEFINE email_local_name [A-Z0-9][-A-Z0-9.!#$%&'*+/=?^_`{|}~]{1,63}
#DEFINE email_domain [A-Z0-9][-.A-Z0-9]{3,253}[A-Z0-9]
#CLASS EMAIL  org.opensextant.extractors.poli.data.EmailAddress
#RULE EMAIL 01 @
#TEST EMAIL 01 [email protected]
#TEST EMAIL 01 [email protected]  			# Pass - leading "." is ignored, unless it is quoted ".test"@....
#TEST EMAIL 01 [email protected] 		#FAIL double ".." is not allowed.
#TEST EMAIL 01 [email protected]  	#FAIL double ".." is not allowed.
#TEST EMAIL 01 [email protected]
#TEST EMAIL 01 [email protected]
#TEST EMAIL 01 [email protected].
#TEST EMAIL 01 [email protected]"  # unbalanced punctuation
#TEST EMAIL 01 1233øø[email protected] #FAIL Unicode.  TEST Case -- see EAI aka Email Addr Internationalization 


# ...................................
#  Document Patterns
# ...................................

# Title is some text denoted by a SUBJ or a TITLE heading, bound by end of line.
# Ends of multi-line titles are not easy to detect -- add your own if that is the case.
#DEFINE title_marker    SUBJECT|SUBJ|TITLE
#DEFINE title           .+
#RULE TITLE 01 [:/\n]+\s*[\n\r]+

#TEST TITLE 01 SUBJ: Lorem ip sip sum cuta tookus blookie.$NL
#TEST TITLE 01 SUBJECT: Lorem ip sip sum cuta tookus blookie.$NL
#TEST TITLE 01 TITLE    Happiness is having nothing$NL 
#TEST TITLE 01 TITLE    :   Happiness is having nothing$NL 

# ...................................
#  Cyber Patterns
# ...................................
#DEFINE byte [\dA-F]{2}
#CLASS CYBER.MAC  org.opensextant.extractors.poli.data.MACAddress
#RULE CYBER.MAC 01 <byte>[-:]<byte>[-:]<byte>[-:]<byte>[-:]<byte>[-:]<byte>
#TEST CYBER.MAC 01 00:45:AF:c0:44:EE
#TEST CYBER.MAC 01 00-45-AF-c0-44-EE



# ...................................
#  Monetary and Financial Patterns
# ...................................
#DEFINE currency_sym        [€£$¥]
#DEFINE currency_nom        usd|euro|eur|yuan|dollars|\$US|pounds|yen
#DEFINE money_magnitude     million|thousand|mil\.?|m|k|b
#DEFINE money_amount        [\d,.]+

// Money Amounts:
//  (a)  number mag currency     80K pounds;   80,700 pounds;  80.700 pounds;
//  (b)  sym number mag         £80K 
// User must apply a LOCALE to parse currency -- does context of document imply European number separators? "." or "," ?
//
#CLASS MONEY.AMT  org.opensextant.extractors.poli.data.Money
#RULE  MONEY.AMT  01sym     <money_amount>\s*<money_magnitude>?\s*<currency_sym>
#TEST  MONEY.AMT  01sym     787K €
#TEST  MONEY.AMT  01sym     78.700K €
#TEST  MONEY.AMT  01sym     .,.,K €

#RULE  MONEY.AMT  01nom     <money_amount>\s*<money_magnitude>?\s*<currency_nom>
#TEST  MONEY.AMT  01nom     78.700K USD
#TEST  MONEY.AMT  01nom     78.700 USD
#TEST  MONEY.AMT  01nom     78,700 USD
#TEST  MONEY.AMT  01nom     78,700USD
#TEST  MONEY.AMT  01nom     8 million USD
#TEST  MONEY.AMT  01nom     ,,,, USD

#RULE  MONEY.AMT  02sym     <currency_sym>\s*<money_amount>\s*<money_magnitude>?
#TEST  MONEY.AMT  02sym     €787K
#TEST  MONEY.AMT  02sym     €78.700K
#TEST  MONEY.AMT  02sym     € 78.700K
#TEST  MONEY.AMT  02sym     € 78.700
#TEST  MONEY.AMT  02sym     € .....
</code></pre>    <br/>
    <br/>
<div class='clear'></div>
</main>
</div>
<br/><br/>
    <div class="align-center">© 2015 - 2024 <a href="/legal-notice.php">Weber Informatics LLC</a> | <a href="/data-protection.php">Privacy Policy</a></div>
<br/><br/><br/><br/><br/><br/>
</body>
</html>

<script data-cfasync="false" src="/cdn-cgi/scripts/5c5dd728/cloudflare-static/email-decode.min.js"></script>