All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nt.opensextant-xponents-core.3.3.6.source-code.datetime_patterns.cfg Maven / Gradle / Ivy

There is a newer version: 3.7.3
Show newest version
/**
 *                         NOTICE
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * **************************************************************************
 *                          NOTICE
 * This software was produced for the U. S. Government under Contract No.
 * W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer
 * Software and Noncommercial Computer Software Documentation Clause
 * 252.227-7014 (JUN 1995)
 *
 * (c) 2009-2013 The MITRE Corporation. All Rights Reserved.
 * **************************************************************************
 */

// ALL Patterns below  - defines, rules, etc. -- are for MATCHING.
// Parsing of actual fields named in defines is done after matches are found.
// Validation of parsed fields is last.

# Well-known month abbreviations.
#DEFINE MON_ABBREV  JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEPT?|OCT|NOV|DEC

# A name starting with 3 ASCII letters as above, but followed by other letters, possibly not English or ASCII.
# Detection of month/day/year patterns with non-English month names is only a coincidence if they share a common prefix.
# Locales for date patterns and language options could be explored further. But is beyond scope.
#DEFINE MON_NAME JAN\p{Alpha}{0,6}|FEB\p{Alpha}{0,6}|MAR\p{Alpha}{0,2}|APR\p{Alpha}{0,3}|MAY|JUN\p{Alpha}{0,2}|JUL\p{Alpha}{0,2}|AUG\p{Alpha}{0,3}|SEP\p{Alpha}{0,6}|OCT\p{Alpha}{0,6}|NOV\p{Alpha}{0,6}|DEC\p{Alpha}{0,6}
// #DEFINE MON_NAME   [A-Z]{3}\w{0,8}
#DEFINE DAY_ENUM th|nd|rd|st

#  Fixed length fields
// In all practicality, year is 1xxx or 2xxx.  Years 0001 to 0999 not really considered.
#DEFINE YEAR         [12]\d{3}
#DEFINE YY           '?\d\d

//  Year/YY is 2-4 digits,... but could be 3.  This is only used for matching. XTemp still validates matches.
//  '76
//
#DEFINE YEARYY       '?\d{2}|\d{4}
#DEFINE MM           [01]\d 
#DEFINE DD           [0-3]\d
#DEFINE SHORT_TZ     [A-Z]

#DEFINE hh    [0-2]\d
#DEFINE mm    [0-5]\d
#DEFINE ss    [0-5]\d


#   Variable length
#DEFINE DOM         [0-3]?\d
#DEFINE MONTH       [01]?\d
#DEFINE LONG_TZ     [A-Z]{3,5}

#DEFINE OF	of|Of|OF
# Do not use DSEP? (that is an optional separator.) Most field-separated patterns would be too noisy.
#DEFINE DSEP1 [-/.]
#DEFINE DSEP2 [-/.]

// ........................................
// Month, Day, Year patterns, MDY
// ........................................
// FORM: DATE: MM/DD/YY 
#RULE   MDY   01    \b//\b
#TEST   MDY   01    12/30/90
#TEST   MDY   01    DATE: 12/30/90
#TEST   MDY   01    13/30/90    FAIL bad MON
#TEST   MDY   01    12/32/90    FAIL bad DOM
#TEST   MDY   01    12/30/01
#TEST   MDY   01    12/30/00
#TEST   MDY   01    12/30/55
#TEST   MDY   01    12/30/15

// FORM: DATE: MM/DD/YYYY 
#RULE   MDY   02    \b//\b
#TEST   MDY   02    12/30/1990
#TEST   MDY   02    DATE:  12/30/1990
#TEST   MDY   02    13/30/1990    FAIL bad MON
#TEST   MDY   02    12/32/1990    FAIL bad DOM
#TEST   MDY   02    12/30/2001
#TEST   MDY   02    12/30/0000    FAIL bad YYYY
#TEST   MDY   02    12/30/1955
#TEST   MDY   02    12/30/1915

// FORM: MMM DD, YYYY   or MMM DD YYYY,  MMM DD, YY, etc.
#RULE   MDY   03    \b[\s.]+[\s,.]+\b
#TEST   MDY   03    DEC 30, 1990
#TEST   MDY   03    DEC 30 1990
#TEST   MDY   03    DEC.30 1990
#TEST   MDY   03    DEC. 30 1990
#TEST   MDY   03    DEC.30.1990
#TEST   MDY   03    DEC 30 90
#TEST   MDY   03    DEC 30 990         FAIL bad year
#TEST   MDY   03    DEC 00 1990        FAIL bad DOM
#TEST   MDY   03    DEC 01 2300        FAIL ambiguous; time appears to be 2300 where year is expected
#TEST   MDY   03    DECEMBER 30 1990
#TEST   MDY   03    DECMEBER 30 90
#TEST   MDY   03    DECIEMBRE 30 1990
#TEST   MDY   03    DECIEMBRE 00 1990  # FAIL no 00 day
#TEST   MDY   03    DECEMBER 01 2300   FAIL ambiguous; time appears to be 2300 where year is expected


// FORM: MMM, YYYY  or Month, YYYY   comma optional. 4-digit year required
#RULE   MDY   04    \b[\s,.]+\b
#TEST   MDY   04    DEC 1990
#TEST   MDY   04    DEC, 1990
#TEST   MDY   04    DEC. 1990
#TEST   MDY   04    DECEMBER, 1990
#TEST   MDY   04    DECIEMBRE, 1990

// FORM: MMM of YYYY
#RULE   MDY   04a    \b\s+\s+\b
#TEST   MDY   04a    DEC of 1990
#TEST   MDY   04a    DECEMBER of 1990

#RULE   MDY   05    \b\s+[\s,]+\b
#TEST   MDY   05    30 DEC 1990
#TEST   MDY   05    30 DEC 90
#TEST   MDY   05    01 DEC 00
#TEST   MDY   05    01 DEC 02
#TEST   MDY   05    30 DECEMBER 1990
#TEST   MDY   05    30 DECMEBER 1990
#TEST   MDY   05    30 DECIEMBRE 1990

#RULE   MDY   06a    \b[\s.]+[\s,]+\b
#TEST   MDY   06a    September 19th, 2017
#TEST   MDY   06a    September 19th, 17   # FAIL
#TEST   MDY   06a    September 19 th, 17  # FAIL
#TEST   MDY   06a    Sept. 19th, 2017
#TEST   MDY   06a    Sept 19th, 2017
#TEST   MDY   06a    Sept 1st, 2017
#TEST   MDY   06a    Sept 23rd, 2017
#TEST   MDY   06a    Sept 15th, 2017
#TEST   MDY   06a    Sept 22nd, 2017

#RULE   MDY   06b    \b\s+?\s*[\s,]+\b
#TEST   MDY   06b    19th September, 2017
#TEST   MDY   06b    19th of September, 2017


#RULE   DMYT   01    \b\s+[\s,]+ :?\b
#TEST   DMYT   01    30 DEC 1990 0400
#TEST   DMYT   01    30 DEC 90 0400
#TEST   DMYT   01    11 JUN 14 1815     06:15 PM, 11 JUNE 2014
#TEST   DMYT   01    25 March 2012 04:00
#TEST   DMYT   01    25 March, 2012 04:00



// FORM: DATE:  DD-MON-YYYY
#RULE   DMY   01    \b--\b
#TEST   DMY   01    12-DEC-90
#TEST   DMY   01    12-DEC-1990

// FORM: DATE:  DD MON YYYY
#RULE   DMY   02    \b\s*\s*\b
#TEST   DMY   02    12 DEC 90
#TEST   DMY   02    12 DEC 1990
#TEST   DMY   02    12DEC90
#TEST   DMY   02    12DEC1990
#TEST   DMY   02    12MARCH1999
#TEST   DMY   02    12FEBBRAIO1999
#TEST   DMY   02    12JUL1999
#TEST   DMY   02    12JULIO1999

// FORM: DATE:  YYYY-MM-DD as it appears in free text of documents.
//              The limitations of this pattern are related to how it was used.
//              This is a relatively modern format; was this format used in text in 1700s?
#RULE   YMD   01    \b\b
#TEST   YMD   01    2001-11-11
#TEST   YMD   01    0001-04-34   # FAIL
#TEST   YMD   01    1001-04-30   # FAIL
#TEST   YMD   01    2001-04-30
#TEST   YMD   01    1990-04-30
#TEST   YMD   01    1790-04-30   # FAIL -- 1800 01 01 is earliest date for this pattern. 
#TEST   YMD   01    a2001-04-30  # FAIL
#TEST   YMD   01    c2001-04-30  # FAIL
#TEST   YMD   01    42001-04-30  # FAIL


// ........................................
// DATE TIME PATTERNS,  DTM
// ........................................

// FORM: A|O|P|R DDHHMMZ MMM YY 
#RULE   DTM   01   \b
\s*\s*\b #TEST DTM 01 A 301400Z DEC 90 #TEST DTM 01 R 301400Z DEC 90 #TEST DTM 01 A 351400Z DEC 90 # FAIL day out of range // FORM: YYYYMMDDTHHMMZ #RULE DTM 02 \b
T\b #TEST DTM 02 20101230T1400Z // FORM: YYYY-MM-DDTHH:MM:SS ... ISO Time. ISO 8601 uses "-", not "/". But should be validated in normalization. // DTM 04 is collapsed into this pattern. #RULE DTM 03 \b
[T ]:: #TEST DTM 03 2010-12-30T14:00:01:12 #TEST DTM 03 2010-12-30T14:00:02 // FORM: YYYY-MM-DDTHH:MM ... ISO Time. See 03 above. #RULE DTM 03b \b
[T ]: #TEST DTM 03b 2010-12-30T14:01:11:12 #TEST DTM 03b 2010-12-30T14:02:12 #TEST DTM 03b 2010-12/30T14:03:13 # FAIL // FORM: MM/DD/YY* HH:MM:SS. // TODO: 12-hour clock time and detect PM/AM. This HH:MM is only 24 hour clock. #RULE DTM 05a \b\s?:: #TEST DTM 05a 12-30-20 14:00:00:12 # extra data associated with value? #TEST DTM 05a 12-30-2020 14:00:01 #TEST DTM 05a 12/30-2020 14:00:02 # FAIL Test mixed punctuation. #TEST DTM 05a 12/30/2020 14:00:02 # Test mixed punctuation. #TEST DTM 05a 12/30/20 14:00 # Test mixed punctuation. // FORM: MM/DD/YY* HH:MM. // TODO: 12-hour clock time and detect PM/AM. This HH:MM is only 24 hour clock. #RULE DTM 05b \b\s?: #TEST DTM 05b 12-30-20 14:00:00:12 # extra data associated with value? #TEST DTM 05b 12-30-2020 14:01:01 #TEST DTM 05b 12/30-2020 14:02:02 # FAIL Test mixed punctuation. #TEST DTM 05b 12/30/2020 14:03:02 # Test mixed punctuation. #TEST DTM 05b 12/30/20 14:04 # Test mixed punctuation. #TEST DTM 05b 12.30.20 14:04 # European convention for Date. #TEST DTM 05b 2.30.20 14:04 # FAIL No 30 FEB. European convention for Date. Non-zero padded #TEST DTM 05b 2.30/2020 14:04 # FAIL No 30 FEB., Mixed Separators. European convention for Date. Non-zero padded. #TEST DTM 05b 4.30.20 14:04 # European convention for Date. Non-zero padded #TEST DTM 05b 4.30/2020 14:04 # FAIL Mixed Separators. European convention for Date. Non-zero padded. // 8-digit date is great when used in short spans of text; But as a general pattern, this matches any 8 digit number. // Not very accurate on data or large texts. // FORM: YYYYMMDD //#RULE DTM xx \b
\b //#TEST DTM xx 20101230




© 2015 - 2024 Weber Informatics LLC | Privacy Policy