
com.github.sisyphsu.dateparser.DateParserBuilder Maven / Gradle / Ivy
package com.github.sisyphsu.dateparser;
import java.util.*;
/**
* Predefine some date parser's rules, and support to customize new rules.
*
* @author sulin
* @since 2019-09-12 14:34:29
*/
public final class DateParserBuilder {
static final String[] months = {
"january",
"february",
"march",
"april",
"may",
"june",
"july",
"august",
"september",
"october",
"november",
"december",
"jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec",
};
static final String[] weeks = {
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday",
"mon", "tue", "wed", "thu", "fri", "sat", "sun",
};
static final List STANDARD_RULES = new ArrayList<>();
static final List CUSTOMIZED_RULES = new ArrayList<>();
static final Map CUSTOMIZED_RULE_MAP = new HashMap<>();
static {
// support day of week, like 'Mon' or 'Monday,'
for (String week : weeks) {
register(String.format("(?%s)\\W*", week));
}
for (String month : months) {
// month-word at first, like 'may. 8th, 2009,' or 'may. 8th, 09'
register(String.format("(?%s)\\W+(?\\d{1,2})(?:th)?\\W+(?\\d{4})\\b", month));
register(String.format("(?%s)\\W+(?\\d{1,2})(?:th)?\\W+(?\\d{2})$", month));
register(String.format("(?%s)\\W+(?\\d{1,2})(?:th)?\\W+(?\\d{2})[^:\\d]", month));
register(String.format("(?%s)\\W+(?\\d{1,2})(?:th)?\\W*", month));
// month-word at middle, like '8th, may, 2009,' or '8th-may-09'
register(String.format("(?\\d{1,2})(?:th)?\\W+(?%s)\\W+(?\\d{4})\\b", month));
register(String.format("(?\\d{1,2})(?:th)?\\W+(?%s)\\W+(?\\d{2})$", month));
register(String.format("(?\\d{1,2})(?:th)?\\W+(?%s)\\W+(?\\d{2})[^:\\d]", month));
register(String.format("(?\\d{1,2})(?:th)?\\W+(?%s)\\W*", month));
// month-word at middle, like '2009-may-8th'
register(String.format("(?\\d{4})\\W+(?%s)\\W+(?\\d{1,2})(?:th)?\\W*", month));
}
// yyyy-MM-dd, yyyy/MM/dd...
register("(?\\d{4})\\W{1}(?\\d{1,2})\\W{1}(?\\d{1,2})[^\\d]?");
// yyyy-MM, yyyy/MM...
register("^(?\\d{4})\\W{1}(?\\d{1,2})$");
// MM/dd/yyyy, dd/MM/yyyy
register("(?\\d{1,2}\\W{1}\\d{1,2})\\W{1}(?\\d{4})[^\\d]?");
// dd/MM/yy, MM/dd/yy
register("(?\\d{1,2}[./]\\d{1,2})[./](?\\d{2})$");
register("(?\\d{1,2}[./]\\d{1,2})[./](?\\d{2})[^:\\d]");
// yyyy
register(" ?(?\\d{4})$");
// yyyyMM
register("^(?\\d{4})(?\\d{2})$");
// yyyyMMdd
register("^(?\\d{4})(?\\d{2})(?\\d{2})$");
// yyyyMMddhhmmss
register("^(?\\d{4})(?\\d{2})(?\\d{2})(?\\d{2})(?\\d{2})(?\\d{2})$");
// unixtime(10)
register("^(?\\d{10})$");
// millisecond(13)
register("^(?\\d{13})$");
// microsecond(16)
register("^(?\\d{16})$");
// nanosecond(19)
register("^(?\\d{19})$");
// at hh:mm:ss.SSSSZ
register("\\W*(?:at )?(?\\d{1,2}):(?\\d{1,2})(?::(?\\d{1,2}))?(?:[.,](?\\d{1,9}))?(?z)?");
// +08:00
register(" ?(?[-+]\\d{2}:?(?:\\d{2})?)");
// 12 o’clock
register(" ?(?\\d{1,2}) o’clock\\W*");
// am, pm
register(" ?(?am|pm)\\W*");
// (CEST) (GMT Daylight Time)
register(" [(](?\\w+(?: \\w+)*)[)]");
// support all languages' default TimeZone
for (String zoneId : TimeZone.getAvailableIDs()) {
final TimeZone zone = TimeZone.getTimeZone(zoneId);
final RuleHandler handler = (cs, matcher, dt) -> dt.zone = zone;
String zoneIdStr = zone.getID().toLowerCase();
register(String.format(" ?\\Q%s\\E", zoneIdStr), handler);
register(String.format(" ?\\Q[%s]\\E", zoneIdStr), handler);
}
// support others no-standard 'timezone'
register(" ?pdt", (cs, matcher, dt) -> dt.zone = TimeZone.getTimeZone("PST"));
register(" ?cest", (cs, matcher, dt) -> dt.zone = TimeZone.getTimeZone("CET"));
// MSK m=+0.000000001
register(" msk m=[+-]\\d\\.\\d+");
}
static synchronized void register(String re) {
if (!STANDARD_RULES.contains(re)) {
STANDARD_RULES.add(re);
}
}
static synchronized void register(String re, RuleHandler handler) {
if (!CUSTOMIZED_RULE_MAP.containsKey(re)) {
CUSTOMIZED_RULES.add(re);
}
CUSTOMIZED_RULE_MAP.put(re, handler);
}
private boolean preferMonthFirst = false;
private final List rules = new ArrayList<>();
private final Set standardRules = new HashSet<>();
private final Map customizedRuleMap = new HashMap<>();
DateParserBuilder() {
// predefined standard rules
this.rules.addAll(DateParserBuilder.STANDARD_RULES);
this.standardRules.addAll(DateParserBuilder.STANDARD_RULES);
// predefined customized rules
this.rules.addAll(DateParserBuilder.CUSTOMIZED_RULES);
this.customizedRuleMap.putAll(DateParserBuilder.CUSTOMIZED_RULE_MAP);
}
/**
* Mark this parser prefer mm/dd or not.
*
* @param preferMonthFirst True means prefer mm/dd, False means prefer dd/mm.
* @return This
*/
public DateParserBuilder preferMonthFirst(boolean preferMonthFirst) {
this.preferMonthFirst = preferMonthFirst;
return this;
}
/**
* Add an standard rule which could parse the specified subsequence.
*
* @param rule Standard rule which should have some specified groupName
* @return This
*/
public DateParserBuilder addRule(String rule) {
if (!standardRules.contains(rule)) {
rules.add(rule);
standardRules.add(rule);
}
return this;
}
/**
* Add an customized rule which could parse any subsequence.
*
* @param rule The parsing rule in regex
* @param handler The parsing callback
* @return This
*/
public DateParserBuilder addRule(String rule, RuleHandler handler) {
if (!customizedRuleMap.containsKey(rule)) {
rules.add(rule);
}
customizedRuleMap.put(rule, handler);
return this;
}
/**
* Build the final DateParser instance.
*
* @return DateParser
*/
public DateParser build() {
return new DateParser(rules, standardRules, customizedRuleMap, preferMonthFirst);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy