All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nt.opensextant-xponents-core.3.3.6.source-code.geocoord_patterns.cfg Maven / Gradle / Ivy

There is a newer version: 3.7.3
Show newest version
/**
 *                        NOTICE
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * **************************************************************************
 *                          NOTICE
 * This software was produced for the U. S. Government under Contract No.
 * W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer
 * Software and Noncommercial Computer Software Documentation Clause
 * 252.227-7014 (JUN 1995)
 *
 * (c) 2009-2013 The MITRE Corporation. All Rights Reserved.
 * **************************************************************************
 */
// HISTORY
// =================
// 2012-07-19  MCU  added range test case for DD-01, DM-01, DMS-01
//                  "98 degrees" test: "98 44 009 N, 292 24 03 E  FAIL",  for example.
// 2012-08-08 MCU   removed notion that LONGITUDE is [01][0-7]\d   I don't rembember where I got that.
//                  [01]?\d\d is sufficient and validation of match will be done on all matches.  So these patterns can be simplified.
//                   0\d\d
//                   \d{1,3}, etc.
// 2012-10-10 MCU   Looking at DMS patterns abmiguities.  Order matters.  Flexpat 1.1 now preserves order of appearance 
//                  in this file.  First rule to fire a match will be considered best match, others with exact span will be duplicates.
// 
// 2012-11-26 MCU   DM and DMS patterns without hemisphere patterns are now supported.
//                  Truth data included as a CSV file. See OpenSextant GeocoderEval for source and scripts.
//
// 2013-01-05 MCU   Saw reports that had invisible 'guarded area marker char' (U+0096) as separator space between lat/lon 
//                  -- So latlonSep has range of allowable space or control chars U+0080 to U+00A0. 
// 
// 2019-12-17 MCU   Added UTM reference and additional test cases. More needed, to cover range of Lat-band widths
//
// Pattern guidelines:   use "\" not "\\" 
// To really mean "\\" -- escape a slash use 
//
//Hemispheres
#DEFINE	hemiLat	[NSns]
#DEFINE	hemiLon	[EWew]

// "-" as first char is not a range char.
#DEFINE	hemiLatSign	[-+]
#DEFINE	hemiLonSign	[-+]
#DEFINE hemiLatPre  [-+]|[NSns]\s?
#DEFINE hemiLonPre  [-+]|[EWew]\s?

//space and punctuation
#DEFINE slash   \\
#DEFINE degSym   [˚°º]

// LAT DM separator -- removed ".", as D.M is a decimal degree pattern.
// To specifically support D.M.S patterns best use a specific pattern for that one case.
// LAT SEPARATORS
#DEFINE dmLatSep   \s?DEG|[-˚°º:]|\s
#DEFINE msLatSep   [-'´’′:.\s]
//#DEFINE secLatSep  ["'\u00B4\u2019\u201D\u2033\u2032\s]{1,2}

// LON SEPARATORS -- same as LAT
#DEFINE dmLonSep   \s?DEG|[-˚°º:]|\s
#DEFINE msLonSep   [-'´’′:.\s]
//#DEFINE secLonSep   ["'\u00B4\u2019\u201D\u2033\u2032\s]{1,2}

#DEFINE secLatSep ["'´’”″′\s]{0,2}
#DEFINE secLonSep ["'´’”″′\s]{0,2}

// #DEFINE latlonSepNoDash      [;,/|=x\s\u0080-\u00A0]
// Allows a "-" dash between LAT and LON, where dash is not a hemi sign
// The pattern where a "Dash(-)" is used to separate a Lat/Lon will be its own pattern. 
//   For example 0156N-4555W uses a dash, but has hemisphere symbol as ordinal. 
//              +0156 - -4555 is problematic to detect
#DEFINE latlonSep     [;,/|=x\s\u0080\u0096\u00A0]?
#DEFINE latlonSep3    [;,/|=x\s\u0080\u0096\u00A0]{1,3}
#DEFINE latlonSep03   [;,/|=x\s\u0080\u0096\u00A0]{0,3}
#DEFINE dmsSep        [-.:\s]

// SPACE: Coordinate patterns can break across lines ~ HTML, word docs, word wrapping, etc.
//  RULE: Allow no more than 3 whitespace chars in the general case; In some cases optional space may be worth testing
//#DEFINE	space	\\s{1,3}
//#DEFINE optSpace        \\s{0,3}

// Parts of a decimal degree Latitude/Longitude
#DEFINE	decDegLat	\d?\d\.\d{1,20}
#DEFINE	decDegLon	[0-1]?\d?\d\.\d{1,20}

// degree decimal minute lat/lon;  either:
// MM.m..., 
//  M.m..., 
//  M-mm is MIN-SEC, not MIN.DECMIN
// MM-mmm... 
//  M-mmm... 
//                NOT M-mm or MM-mm which implies Min/Sec, not decimal
#DEFINE	decMinLat	[0-5]?\d\.\d{1,20}
#DEFINE decMinLat3      [0-5]?\d[-.]\d{3,20}
#DEFINE	decMinLon	[0-5]?\d\.\d{1,20}
#DEFINE decMinLon3      [0-5]?\d[-.]\d{3,20}

// Difference between DMS fixed-width and variable lenght fields could be accomplished by these greedy matchers 
//  \d{1,2} or \d?, for example.
//  if 2 digits exist, then \d{1,2} will consume both, before matching next item in pattern.
//  Fear:  too many variable or optional items will lead to a lot of false positives.
//  Later if this framework proves to work we could consolidate the variable length and fixed length patterns back to a common pattern.
// For now trying to make this more accurate and explicit.

// Parts of a DMS latitude/Longitude
#DEFINE	degLat	\d{1,2}
#DEFINE	degLon	[01]?\d?\d

//Parts of a Degrees Minutes Seconds fractional seconds
#DEFINE	fractMinLat	[-\.]\d{1,6}
#DEFINE	fractMinLon	[-\.]\d{1,6}
#DEFINE	fractSecLat	\.\d{1,6}
#DEFINE	fractSecLon	\.\d{1,6}
#DEFINE	fractSecLatOpt	\.?\d{1,6}
#DEFINE	fractSecLonOpt	\.?\d{1,6}
#DEFINE	fractMinLat3	\d{3,6}
#DEFINE	fractMinLon3	\d{3,6}

// Fixed-length patterns
#DEFINE dmsDegLat  [0-8]\d
#DEFINE dmsDegLon  [01]\d\d
#DEFINE dmsMinLat  [0-5]\d
#DEFINE dmsMinLon  [0-5]\d
#DEFINE dmsSecLat  [0-5]\d
#DEFINE dmsSecLon  [0-5]\d

#DEFINE	minLat	[0-5]?\d
#DEFINE	secLat	[0-5]?\d
#DEFINE	minLon	[0-5]?\d
#DEFINE	secLon	[0-5]?\d

#DEFINE latMarker Lat\w*[:=]?
#DEFINE lonMarker Lon\w*[:=]?

// Parts of an MGRS and UTM
#DEFINE	UTMBand	        [A-HJ-NP-Z]
#DEFINE	UTMZone 	[0-5]?\d
#DEFINE	MGRSQuad	[A-HJ-NP-Z][A-HJ-NP-V]
#DEFINE MGRSZone        [0-6]?\d\s?[C-HJ-NP-X]

// http://en.wikipedia.org/wiki/Universal_Transverse_Mercator
// UTM Easting is from 167,000 to 833,000 m
//     Northing is from   0 to 10,000,000 m ( <= 9,999,999m)
#DEFINE UTMEasting \d{6}
#DEFINE UTMNorthing \d{3,7}

// MGRS precision is 1m.  Quad is 100,000m sq so resolution is 5 digits + 5 digits with optional whitespace
// 99999n 99999e  -- in MGRS we never see "m" units or N/E denoted explicitly
// Occassionally, newlines or whitespace are interspersed in offset
// minimal:
// dd
// ddddd ddddd  with an additional one or two white spaces.   The offsets start and end with numbers. Only whitespace between is optional. 
// ddddd dddddd  additional digit in Easting  -- trailing 6th digit is a typo; trim off
// dddddd ddddd  additional digit in Northing -- trailing 6th digit is a typo; trim off
// ddddddddddd   Typo introduces ambiguity -- only correct thing is to split on halfway point +/- 1 digit and emit two answers
// 
// "precise" pattern is too precise.
#DEFINE Easting_Northing_Precise    \d{2,5}\s?\d{2,5}
#DEFINE Easting_Northing    \d[\s\d]{0,10}\d


//---------------------------RULES------------------------------------------------

#  #RULE  FAM   ENUM    PATTERN | PATTERN-WITH-GROUPS
#  #TEST  FAM   ENUM    TEXT-WITH-COORD
#  #NEGATIVE FAM ENUM   TEXT-WITH-INVALID-COORD  --- UNUSED SO FAR.

//..........................................
//			    MGRS
//..........................................
//  FORM: ZZZQQ ee* nn*
//           Z Q E N       Zone (2-3 char), Quad (2 char), Easting (1-5 digits) Northing(1-5 digits)
//  http://en.wikipedia.org/wiki/Military_grid_reference_system
//  http://www.armystudyguide.com/content/Prep_For_Basic_Training/Prep_for_basic_land_navigation/locate-a-point-using-the-.shtml
#RULE  MGRS    01      \b\s?\s{0,2}\b
#TEST  MGRS    01      4QFJ 50007000
#TEST  MGRS    01     38SMB4611036560
#TEST  MGRS    01     38SMB46113656
#TEST  MGRS    01      8SMB46113656
#TEST  MGRS    01     38SMB 4611036560
#TEST  MGRS    01     38SMB 46113656
#TEST  MGRS    01     38SMB 461136560    # asymmetric
#TEST  MGRS    01     38SMB 46110 3656
#TEST  MGRS    01     38SMB 4611 03656
#TEST  MGRS    01     38SMB 4611 03656 60
#TEST  MGRS    01     38SMB 4611 03656 4 km  ## random numeric text  following a northing.
#TEST  MGRS    01     38SMB 461 03656 4 km  ## random numeric text  following a northing.
#TEST  MGRS    01     38SMB 0461 0365
#TEST  MGRS    01     38SMB 00461 00365
#TEST  MGRS    01     38SMB 04610 03650
#TEST  MGRS    01     38SMB 04610 0365
#TEST  MGRS    01     38SMB 46110365 60
#TEST  MGRS    01     38SMB 46110365 60
#TEST  MGRS    01     38SMB 46110365 60000 # supposedly a large number follows -- pattern should not match.
#TEST  MGRS    01     38 SMB 4611 3656
#TEST  MGRS    01     42 RPR 4611 3656 FAIL 42 RP*  P not valid lat band in 42 longitudinal zone
#TEST  MGRS    01     42WDB 4611 3656   FAIL, D not a valid zone
#TEST  MGRS    01     63SNV 6567 7888   FAIL
#TEST  MGRS    01     00SNV 6567 7888   FAIL
#TEST  MGRS    01      0SNV 6567 7888   FAIL
#   MGRS date patterns -- sometimes dates are valid MGRS other times they match the pattern, but are invalid MGRS locations.
#TEST  MGRS    01      20 PER 1000 FAIL - appears to be a rate of some sort.
#TEST  MGRS    01      10 JAN 1994 FAIL - appears to be a date of some sort.
#TEST  MGRS    01      10 JAN 94 FAIL - appears to be a date of some sort.
#TEST  MGRS    01       1 JAN 94 FAIL - appears to be a date of some sort.
#TEST  MGRS    01      01 JAN 94 FAIL - appears to be a date of some sort.  past century
#TEST  MGRS    01      10 JAN 13 FAIL - appears to be a date of some sort.
#TEST  MGRS    01       1 JAN 13 FAIL - appears to be a date of some sort.
#TEST  MGRS    01       4 JUL 2008 FAIL - appears to be a date of some sort; and also invalid MGRS 
#TEST  MGRS    01      01 JAN 13 FAIL - appears to be a date of some sort.   current year, 2013
#TEST  MGRS    01      14 JAN 00 FAIL - appears to be a date of some sort.   year 2000
#TEST  MGRS    01      14 JAN 30 FAIL - appears to be a date of some sort.   year 1930?
#TEST  MGRS    01      14 EST 2030 FAIL - appears to be a date of some sort.   year 2030?
#TEST  MGRS    01      14 EST 2008 FAIL - appears to be a date of some sort
#TEST  MGRS    01      14 EDT 2008 FAIL - appears to be a date of some sort
#TEST  MGRS    01      61 EDT 2008 FAIL - appears to be a date of some sort
#TEST  MGRS    01       7JAN13 1500  FAIL - obvious date, Jan. 7th, 2013, 3PM
#TEST  MGRS    01      17MAR921100  TEST  is a date, but should pass if no MGRS filters are on.
#TEST  MGRS    01     38smb 00461 00365  FAIL - lower case MGRS grids should not match by default; it is so rare.

//..........................................
//			    UTM
//..........................................
// REFERENCE:  https://en.wikipedia.org/wiki/Universal_Transverse_Mercator_coordinate_system
// FORM: (?# ZZA DDDDDD DDDDDDD )
#RULE  UTM  01     \b\s?\s?[mE]{0,2}\s?[mN]{0,2}\b
#TEST  UTM  01     17N 699990 3333335
#TEST  UTM  01     17S 699990 3333335
#TEST  UTM  01     17N 630084 4833438  # Should be CN tower, as mentioned in wikipedia above.
#TEST  UTM  01     17T 699990 3333335
#TEST  UTM  01      7S 699990 3333335
#TEST  UTM  01     17N6999903333335
#TEST  UTM  01     17 6999903333335     fail ambiguous
#TEST  UTM  01     17N 699990m 3333335m
#TEST  UTM  01     17S 699990mE 3333335mN
#TEST  UTM  01     17N 699990Em 3333335Nm
#TEST  UTM  01     17X 699990e 3333335n     Pass, but questionable lat band may contradict Northing;  Northern Hemi, X lat
#TEST  UTM  01     17D 699990E 3333335N     Ditto,  Southern Hemi = D lat


//..........................................
//                    Degrees Decimal minutes
//..........................................
// TODO:  Clean up all test cases below for DM and DMS
//                       
//              00    DD MMH-mmm    DDD MMH-mmm
//              01a   DD MM H       DDD MM H
//              01a   DD MM.mmm H   DDD MM.mmm H
//              01a   DD DEG MM.m H DDD DEG MM.m H
//              01b H DD MM       H DDD MM
//              01b H DD MM.mmm   H DDD MM.mmm
//
//   Variables: "x" or "BY" are equivalent separators to ";/,|(sp)"
//              Deg symbol, ', ", or '' are all optional, but if present, they suggest pattern is a coordinate, not just a pair of numbers.
//              Lon DDD is 1-3 numbers, Lat DD is 1-3 numbers, leading '0' optional

// Pattern logic for dealing with factional minutes;
//  work-in progress:
//  has separators?
//     YES => is sepator a "-" dash throughout?
//        YES => if fractMin length is 2 chars, then this is DMS, not DM.m
//        NO =>  is factMin prefixed with "-" and 
//
//

// FORM:  DDMMH-mmm
#RULE   DM      00      \b[-\s]?[-\s]?
#TEST   DM      00      4218N-009 10224W-003
#TEST   DM      00      4218N.009 10224W.003
#TEST   DM      00      42-18N-009 102-24W-003
#TEST   DM      00      42 18N-009 102 24W-003
#TEST   DM      00      42 18N-009102 24W-003    FAIL  same pattern test as above, but no separator.
#TEST   DM      00      78 44N-009, 092 24N-003  FAIL,  both hemispheres are N
#TEST   DM      00      98 44N-009, 292 24W-003  FAIL,  XY out of range

// VARIABLE LENGTH FIELDS
// FORM:  DDMMmmmHDDDMMmmmH
#RULE   DM      01a     \b\s???\s?\s???\s?
#TEST   DM      01a     42 18.009N 102 24.003W
#TEST   DM      01a     42 18.00 N 102 24.00W
#TEST   DM      01a     42 18-00 N 102 24-00W
#TEST   DM      01a     42-18-09N; 102-24-03W    # FAIL Ambiguous, but good test. This is valid DMS only. 
#TEST   DM      01a     42 9-00 N 102 6-00W       This pattern needs to be more explicit -- its own pattern?--- MM-mm used to represent MM.mm fractional minutes is tough, as it is just as easily MM-SS min/seconds; However MM-mmm is more obvious as fractional minutes.
#TEST   DM      01a     42 9.009 N 102 6.003W
#TEST   DM      01a     42 18-009N 102 24-003W
#TEST   DM      01a     42.18.009N 102.24.003W
#TEST   DM      01a     42-18-009N; 102-24-003W
#TEST   DM      01a     42° 18.009'N; 102° 24.003'W
#TEST   DM      01a     42° 18.009N; 102° 24.003W
#TEST   DM      01a     42°18.009N; 102°24.003W
#TEST   DM      01a     12° 18.009N; 2° 24.003W
#TEST   DM      01a     42-18-09N; 102-24-03W    # FAIL: DMS?, MM-SS is not decimal Min MM.SS it is Min Sec
#TEST   DM      01a     42-18-009N; 102-24-003W   # Clearly Decimal, as third slot is 3 digits, so it seems 18.009 Min, rather than 18min 009sec
#TEST   DM      01a     42.18.009N 102.24.003W
#TEST   DM      01a     42.18.009N x 102.24.003W
#TEST   DM      01a     42.18.9999 N 102.24.5555 W
#TEST   DM      01a     42°18'N 102°24'W
#TEST   DM      01a     42° 18'N 102° 24'W
#TEST   DM      01a     42 DEG 18.0N 102 DEG 24.0W
#TEST   DM      01a     01DEG 44 N 101DEG 44 E
#TEST   DM      01a     42:18n 102:24w
#TEST   DM      01a     42:18n - 102:24w
#TEST   DM      01a     42:00N-102:18E



// VARIABLE LENGTH FIELDS
// FORM:  DD MM.mmm H
// lat/lon separator -- cannot use "-" (latlonSep) as it is ambiguous if optional sep with optional preceding hemisphere which one is which. 
// 
#RULE   DM      01b      \b\s?\s???\s?\s???
#TEST   DM      01b      N42 18.00 W102 24.00
#TEST   DM      01b      N 42 18.00 W 102 24.00
#TEST   DM      01b      N42 18-00 W102 24-00
#TEST   DM      01b      N42 18-00 x W102 24-00
#TEST   DM      01b      N 4218.0 W 10224.0
#TEST   DM      01b      N42.18.005 x W102.24.008
#TEST   DM      01b      N42 18-005 x W102 24-008
#TEST   DM      01b      +42° 18 -102° 24
#TEST   DM      01b      N42 18' W102 24'
#TEST   DM      01b      +42 18 -102 24  # FAIL
#TEST   DM      01b      N42.18.005 x W102.24.008
#TEST   DM      01b      N42:18 W102:24
#TEST   DM      01b      n42:18 w102:24
#TEST   DM      01b      N 01° 44' E 101° 22'
#TEST   DM      01b      N 01° 44' - E 101° 22'
#TEST   DM      01b      N 01° 44' - 101° 22'    Could possibly not PASS, as lat hemisphere is  North; ambiguous if lon hemi is West or if "-" dash 
#TEST   DM      01b      01° 44' - -101° 22'     A really obscure use of dash, next to a hemisphere sign. TESTING Only
#TEST	DM		01b      -1 12-00-32     FAIL: lopsided fields and/or precision. Use of "-" separator is inconsistent.
#TEST	DM		01b      -1-45 -12-00-32     FAIL


// COPY OF 01a/b to allow for fixed-width min, sec, but w/out delimiters.
// FORM:  DDMM.m*H
#RULE   DM      02a     \b\s?\s?\b
#TEST   DM      02a     4218.009N 10224.003W
#TEST   DM      02a     4209.009N 10206.003W
#TEST   DM      02a     4209.00N 10206.00W
#TEST   DM      02a     4209.0N 10206.0W
#TEST   DM      02a     4209.0N - 10206.0W  #FAIL dash not allowed as separator currently.
#TEST   DM      02a     4209.0N x 10206.0W

// FORM:  HDDMM.m*
#RULE   DM      02b      \b\s?\s?\b
#TEST   DM      02b      N4218.0 W10224.0
#TEST   DM      02b      N4218.009W10224.003
#TEST   DM      02b      N4218-0018 W10224-0444

// Desc: FIXED, NO DELIM, FRACTIONAL MINUTES -- 03a/b
#RULE   DM      03a      \b\s?\s?\b
#TEST   DM      03a      4218009N10224003W
#TEST   DM      03a      4218009N;10224003W
#TEST   DM      03a      4218009N 10224003W

#RULE   DM      03b      \b\s?\s?\b
#TEST   DM      03b      N4218009W10224003
#TEST   DM      03b      N4218009;W10224003
#TEST   DM      03b      N4218009 W10224003


// Finds DM or DM.m degree patterns that have no hemisphere.
// TODO: Revisit why optional hemisphere poses a problem.  For now we assume the lat/lon separator is not a dash
// 
#RULE   DM      03-deg      \b?\s*???\s*??\b
#TEST   DM      03-deg      42° 18, 102° 24
#TEST   DM      03-deg      42° 18' 102° 24'
#TEST   DM      03-deg      42°18 x 102° 24
#TEST   DM      03-deg      42˚ 18.44, 102˚ 24.11    # Vary deg symbol
#TEST   DM      03-deg      42° 18.44' 102° 24.11'
#TEST   DM      03-deg      42°18.44 x 102° 24.11
#TEST   DM      03-deg      (+42°18.44 x 102° 24.11)
#TEST   DM      03-deg      (+42°18.44 x +102° 24.11)
#TEST   DM      03-deg      (42°18.44 x -102° 24.11)


// desc: FIXED WIDTH FIELDS, NO SEP
#RULE   DM      04a      \b
#TEST   DM      04a      N4218 W10224

// FORM:  DDMMH DDMMH 
#RULE   DM      04b      \b\s*\s*
#TEST   DM      04b      4218N 10224W

// RANDOM PATTERNS:
// FORM: /DDMMHd/DDDMMHd/ where d=checksum value
#RULE   DM      05      /\d/\d/
#TEST   DM      05      /4218N4/10224W5/

#DEFINE  xySep  \s?[Xx/,]\s?

// DM with minimal symbols.  Required LAT/LON separator.  Otherwise this is a list of numbers with +/- signs.
// FORM: +/-DD MM.mmm x +/-DDD DD.DDD
#RULE   DM      06      \s\s
#TEST   DM      06      +42 18.0 x -102 24.0
#TEST   DM      06      +42 18.0 X -102 24.0
#TEST   DM      06      +42 18.0 / -102 24.0
#TEST   DM      06      +42 18.0, -102 24.0

// DOT patterns -- We're for now assuming 
//
//     DD.MM.SS  DDD.MM.SS  is DMS
//     DD.MM.mmm* DDD.MM.mmm* is DM
//
//   and Longitude in these patterns is 3-digits
// 
#RULE   DM      01a-dot  \b\.\.\s?*\.\.\s?
#TEST   DM      01a-dot  03.44.777N 111.10.044W
#TEST   DM      01a-dot  03.44.777 N 111.10.044 W
#TEST   DM      01a-dot  03.44.7778 N 111.10.0442 W

#RULE   DM      01b-dot  \b\s?\.\.*\s?\.\.
#TEST   DM      01b-dot  N03.44.777 W111.10.044
#TEST   DM      01a-dot  N 03.44.777 W 111.10.044 
#TEST   DM      01a-dot  N 03.44.7778 W 111.10.0442 


#DEFINE trivialSep \s{0,2}

//..........................................
//                    Degrees Minutes Seconds
//..........................................
//         01a          DD MM SS H    DDD MM SS H
//         01b        H DD MM SS    H DDD MM SS 
#RULE   DMS     01a  ?\s*\s?\s??\s??\s*\s?\s??\s?
// Beware unicode
#TEST   DMS     01a 42 18' 00N 102 24' 00W
#TEST   DMS     01a 42 18' 00”N 102 24' 00”W
#TEST   DMS     01a  2 8' 00"N 12 24' 00"W
#TEST   DMS     01a 42° 18' 00"N  102° 24' 00"W
#TEST   DMS     01a    42 18-00 N 102 24-00W
#TEST   DMS     01a    42 9-00 N 102 6-00W
#TEST   DMS     01a 34°22′24″N   49°14′27″E
#TEST   DMS     01a 34°22 24″N   49°14 27″E
#TEST   DMS     01a 34°22′24N   49°14′27E
#TEST   DMS     01a 34° 22′ 24″N   49° 14′ 27″E
#TEST   DMS     01a 34° 22' 24"N   49° 14' 27"E
#TEST   DMS     01a 34°22'24"N 49°14'27"E
#TEST   DMS     01a 34°22'24"N   49°14'27"E
#TEST   DMS     01a 34 22'24"N   49 14'27"E
#TEST   DMS     01a 34 22' 24"N 049 14' 27"E
#TEST   DMS     01a 34 22 24N 049 14 27E
#TEST   DMS     01a 34 22 24 N 049 14 27 E
#TEST   DMS     01a 34 22 24N 049 14 27E
#TEST   DMS     01a 34 22 24 N 49 14 27 E
#TEST   DMS     01a 98 44 009 N, 292 24 03 E  FAIL
#TEST   DMS     01a 27:37:45N/82:42:10W  // original
#TEST   DMS     01a 27 37' 45"N/82 42' 10"W  // original
#TEST   DMS     01a "27° 37' 45’’N, 82° 42' 10’’W",  // original
#TEST   DMS     01a "27° 37' 45’N, 82° 42' 10’W",    // single second hash sym
#TEST   DMS     01a "27° 37' 45’’N 82° 42' 10’’W",  // no lat/lon sep
#TEST   DMS     01a "27° 37 45N, 82° 42 10W"    // no min hash.
#TEST   DMS     01a 01°44'55.5"N 101°22'33.0"E
#TEST   DMS     01a 01° 44' 55.5"N 101° 22' 33.0"E
#TEST   DMS     01a 01° 44' 55.5"N 101° 22' 33"E // asymmetric in decimal seconds precision
#TEST   DMS     01a 42 18' 00" -102 24' 00"
#TEST   DMS     01a GEO:(42° 18' 00" 102° 24' 00")
#TEST   DMS     01a GEO:42° 18' 00" 102° 24' 00"
#TEST   DMS     01a 03-04-05 12-11-10   Ambiguous date or coordinate -- very possibly neither.
#TEST   DMS     01a 03-04-05 12:11:10	FAIL - is a date/time pattern
#TEST   DMS     01a 99-04-05 12:11:10	FAIL - is a date/time pattern - part of 1999-04...
#TEST   DMS     01a 03-04-58 12:11:10	FAIL - is a date/time pattern - DOB 1958-03-04
#TEST   DMS     01a 03-04-99 12:11:10	FAIL - is a date/time pattern - DOB 1958-03-04
#TEST   DMS     01a 14-04-33 12:11:10	FAIL - is a date/time pattern 
#TEST   DMS     01a Latitude: 41º58'46"N, Longitude: 87º54'20"W  
#TEST   DMS     01a location 28˚22′24″N 46˚14′27″E


// DMS 01 with hemisphere leading
#RULE   DMS     01b  \b\s?\s??\s?\s??\b
#TEST   DMS     01b N01°44'55.5" E101°22'33.0"
#TEST   DMS     01b N01° 44' 55.5" E101° 22' 33.0"
#TEST   DMS     01b N 01° 44' 55.5" E 101° 22' 33.0"
#TEST   DMS     01b N42 18' 00" W102 24' 00"
#TEST   DMS     01b N02 8' 00" W012 24' 00"
#TEST   DMS     01b N42° 18' 00"  W102° 24' 00"
#TEST   DMS     01b +42 18' 00" -102 24' 00"
#TEST   DMS     01b xx +42° 18' 00"  -102° 24' 00" xx

// DMS variation using ":" only. 
// Only if hemisphere sign is present will this match.
#RULE   DMS     01c  \b::\s?::\b
#TEST   DMS     01c x +42:18:00 -102:24:00 x

// DMS01 with fractional seconds, but no hemisphere symbol, just degree
#RULE   DMS     01-deg  \b?\s?\s???\s?\s??\b
#TEST   DMS     01-deg  01°44'55.5" 101°22'33.0"
#TEST   DMS     01-deg  01° 44' 55.5" 101° 22' 33.0"
#TEST   DMS     01-deg  01° 44' 55.5" 101° 22' 33"
#TEST   DMS     01-deg  01° 44' 55" 101° 22' 33.0"
#TEST   DMS     01-deg  01° 44' 55" -101° 22' 33.0"

#RULE   DMS      01a-dot  \b\.\.\s?\.\.\s?
#TEST   DMS      01a-dot  03.44.777N 111.10.044W
#TEST   DMS      01a-dot  03.44.777 N 111.10.044 W
#TEST   DMS      01a-dot  03.44.7778 N 111.10.0442 W

#RULE   DMS      01b-dot  \b\s?\.\.\s?\.\.
#TEST   DMS      01b-dot  N03.44.777 W111.10.044
#TEST   DMS      01a-dot  N 03.44.777 W 111.10.044 
#TEST   DMS      01a-dot  N 03.44.7778 W 111.10.0442 

// DMS 01 with dot as a separator.
// 
#RULE   DMS     02a  \b\.\.\.\.
#TEST   DMS     02a   01.44.55N 055.44.33E

#RULE   DMS     02b  \b\.\.\.\.
#TEST   DMS     02b   N01.44.55 E055.44.33


// DMS with NO SEPARATORS
#RULE   DMS     03a  \b\s?*\s?
#TEST   DMS     03a  421800N 1022400W
#TEST   DMS     03a  5113N 00425E      FAIL

#RULE   DMS     03b  \b
#TEST   DMS     03b  N421800 W1022400
#TEST   DMS     03b  N421800W1022400
#TEST   DMS     03b  N421800/W1022400

// DMS NO SEPARATORS, decimal seconds. Ambiguous -- is additional precision in Fractional Minutes or fractional seconds?
#RULE   DMS     04a  \b
#TEST   DMS     04a  4218001234N 10224001234W
#TEST   DMS     04a  4218001234N - 10224001234W
#TEST   DMS     04a  4218001234N x 10224001234W

#RULE   DMS     04b  \b
#TEST   DMS     04b  N4218001234 W10224001234
#TEST   DMS     04b  N4218001234   W10224001234




//..........................................
//                      Decimal Degrees
//..........................................
// FORM: DD-xx, Decimal Deg, Preceding Hemisphere (a) H DD.DDDDDD° HDDD.DDDDDD°, optional deg symbol
#RULE   DD      01      \b\s??\s??
#TEST   DD      01      N42.3, W102.4
#TEST   DD      01      N 42.3, W 102.4
#TEST   DD      01      N42.3°, W102.4°
#TEST   DD      01      N98.3°, W192.4°  #FAIL test out lon deg validation.
#TEST   DD      01      N98.3°, W292.4°  #FAIL test out lon deg validation.

// FORM: DD-xx, Decimal Deg, Postpending Hemisphere (a) DD.DDDDDD°H DDD.DDDDDD°H,   optional deg symbol
#RULE   DD      02      \b??\b
#TEST   DD      02      42.3N; 102.4W
#TEST   DD      02      42.3°N; 102.4°W

// FORM: DD-xx, Decimal Deg, Preceding Hemisphere, +/- with deg symbol, (a) {-|+|}DD.DDDDDD° {-|+|}DDD.DDDDDD°
#RULE   DD      03      \b??
#TEST   DD      03      +42.3°;-102.4° 
#TEST   DD      03      42.3° x -102.4°
#TEST   DD      03      42.3° x 102.4°

// FORM: DD-xx, Decimal Deg with explicit LAT: LON: markers, both hemisphere and degree symbols are optional
#RULE	DD	04	\s*???\s*???
#TEST   DD      04      Latitude: 42.3N x Longitude: 102.3W
#TEST   DD      04      Latitude: N42.3° x Longitude: W102.3°
#TEST   DD      04      Latitude: 42.3 Longitude: 102.3
#TEST   DD      04      Latitude= -42.3 Longitude= 102.3
#TEST   DD      04      Latitude= 42.3 Longitude= 102.3
#TEST   DD      04      Latitude: 42.3 Longitude: 102.3
#TEST   DD      04      Latitude -42.3 Longitude 102.3
#TEST   DD      04      Lat:-42.3 Lon:102.3
#TEST   DD      04      Lat= -42.3 Long= 102.3

// #RULE	DD	04a	\w+[:=]\s*??
// #TEST   DD      04a     COORD=-55.6, +134.700

//..........................................
//                      Degrees -- These patterns are so low-resolution, that for now it seems useless to allow these to run by default.
//                      Its approximately 100 KM x 100 KM
//..........................................

// FORM: DD-xx, Decimal Deg, Preceding Hemisphere (a) HDD° HDDD°, required deg symbol
#RULE   DD      05      \b??
#TEST   DD      05      N42°, W102°
#TEST   DD      05      N42W102   #FAIL -- hemispheres are right, but text is too short and lack of separator leads to ambiguity.
#TEST   DD      05      -42°, 102°
#TEST   DD      05      N 42°, W 102°
#TEST   DD      05      -42°, +102°
#TEST   DD      05      -42°, 102°
#TEST   DD      05      -42°, 102°
#TEST   DD      05      +42°, -102°

#RULE   DD      06      \b?\s??\s?\b
#TEST   DD      06      42°N, 102°N        FAIL
#TEST   DD      06      42° N, 102° W
#TEST   DD      06      42 N, 102 W
#TEST   DD      06      42N x 102W
#TEST   DD      06      00 N 130 WA     FAIL

#RULE   DD      07      \b\s?\s?\b
#TEST   DD      07      N42, W102
#TEST   DD      07      N42 x W102
#TEST   DD      07      N 42 W 102

//#RULE   DD      08     \b?,\s??
//#TEST   DD      08     text 54.67, -117 text  # FAIL - no decimal longitude
//#TEST   DD      08     text 54.33, -117.81 text
//#TEST   DD      08     text +54.22, 117.71 text
//#TEST   DD      08     text +54.22, 045.71 text
//#TEST   DD      08     text +54.22, -045.71 text
//#TEST   DD      08     text 54.11,117.61 text
//#TEST   DD      08     YX = (54.11, 117.61) text   # Coordinate tuple ()





© 2015 - 2024 Weber Informatics LLC | Privacy Policy