All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.sisyphsu.dateparser.DateParserBuilder Maven / Gradle / Ivy

package com.github.sisyphsu.dateparser;

import java.util.*;

/**
 * Predefine some date parser's rules, and support to customize new rules.
 *
 * @author sulin
 * @since 2019-09-12 14:34:29
 */
public final class DateParserBuilder {

    static final String[] months = {
            "january",
            "february",
            "march",
            "april",
            "may",
            "june",
            "july",
            "august",
            "september",
            "october",
            "november",
            "december",
            "jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep", "oct", "nov", "dec",
    };
    static final String[] weeks = {
            "monday",
            "tuesday",
            "wednesday",
            "thursday",
            "friday",
            "saturday",
            "sunday",
            "mon", "tue", "wed", "thu", "fri", "sat", "sun",
    };

    static final List STANDARD_RULES = new ArrayList<>();

    static final List CUSTOMIZED_RULES = new ArrayList<>();
    static final Map CUSTOMIZED_RULE_MAP = new HashMap<>();

    static {
        // support day of week, like 'Mon' or 'Monday,'
        for (String week : weeks) {
            register(String.format("(?%s)\\W*", week));
        }

        for (String month : months) {
            // month-word at first, like 'may. 8th, 2009,' or 'may. 8th, 09'
            register(String.format("(?%s)\\W+(?\\d{1,2})(?:th)?\\W+(?\\d{4})\\b", month));
            register(String.format("(?%s)\\W+(?\\d{1,2})(?:th)?\\W+(?\\d{2})$", month));
            register(String.format("(?%s)\\W+(?\\d{1,2})(?:th)?\\W+(?\\d{2})[^:\\d]", month));
            register(String.format("(?%s)\\W+(?\\d{1,2})(?:th)?\\W*", month));

            // month-word at middle, like '8th, may, 2009,' or '8th-may-09'
            register(String.format("(?\\d{1,2})(?:th)?\\W+(?%s)\\W+(?\\d{4})\\b", month));
            register(String.format("(?\\d{1,2})(?:th)?\\W+(?%s)\\W+(?\\d{2})$", month));
            register(String.format("(?\\d{1,2})(?:th)?\\W+(?%s)\\W+(?\\d{2})[^:\\d]", month));
            register(String.format("(?\\d{1,2})(?:th)?\\W+(?%s)\\W*", month));

            // month-word at middle, like '2009-may-8th'
            register(String.format("(?\\d{4})\\W+(?%s)\\W+(?\\d{1,2})(?:th)?\\W*", month));
        }

        // yyyy-MM-dd, yyyy/MM/dd...
        register("(?\\d{4})\\W{1}(?\\d{1,2})\\W{1}(?\\d{1,2})[^\\d]?");

        // yyyy-MM, yyyy/MM...
        register("^(?\\d{4})\\W{1}(?\\d{1,2})$");

        // MM/dd/yyyy, dd/MM/yyyy
        register("(?\\d{1,2}\\W{1}\\d{1,2})\\W{1}(?\\d{4})[^\\d]?");

        // dd/MM/yy, MM/dd/yy
        register("(?\\d{1,2}[./]\\d{1,2})[./](?\\d{2})$");
        register("(?\\d{1,2}[./]\\d{1,2})[./](?\\d{2})[^:\\d]");

        // yyyy
        register(" ?(?\\d{4})$");
        // yyyyMM
        register("^(?\\d{4})(?\\d{2})$");
        // yyyyMMdd
        register("^(?\\d{4})(?\\d{2})(?\\d{2})$");
        // yyyyMMddhhmmss
        register("^(?\\d{4})(?\\d{2})(?\\d{2})(?\\d{2})(?\\d{2})(?\\d{2})$");

        // unixtime(10)
        register("^(?\\d{10})$");

        // millisecond(13)
        register("^(?\\d{13})$");

        // microsecond(16)
        register("^(?\\d{16})$");

        // nanosecond(19)
        register("^(?\\d{19})$");

        // at hh:mm:ss.SSSSZ
        register("\\W*(?:at )?(?\\d{1,2}):(?\\d{1,2})(?::(?\\d{1,2}))?(?:[.,](?\\d{1,9}))?(?z)?");

        // +08:00
        register(" ?(?[-+]\\d{2}:?(?:\\d{2})?)");

        // 12 o’clock
        register(" ?(?\\d{1,2}) o’clock\\W*");

        // am, pm
        register(" ?(?am|pm)\\W*");

        // (CEST) (GMT Daylight Time)
        register(" [(](?\\w+(?: \\w+)*)[)]");

        // support all languages' default TimeZone
        for (String zoneId : TimeZone.getAvailableIDs()) {
            final TimeZone zone = TimeZone.getTimeZone(zoneId);
            final RuleHandler handler = (cs, matcher, dt) -> dt.zone = zone;

            String zoneIdStr = zone.getID().toLowerCase();
            register(String.format(" ?\\Q%s\\E", zoneIdStr), handler);
            register(String.format(" ?\\Q[%s]\\E", zoneIdStr), handler);
        }

        // support others no-standard 'timezone'
        register(" ?pdt", (cs, matcher, dt) -> dt.zone = TimeZone.getTimeZone("PST"));
        register(" ?cest", (cs, matcher, dt) -> dt.zone = TimeZone.getTimeZone("CET"));

        // MSK m=+0.000000001
        register(" msk m=[+-]\\d\\.\\d+");
    }

    static synchronized void register(String re) {
        if (!STANDARD_RULES.contains(re)) {
            STANDARD_RULES.add(re);
        }
    }

    static synchronized void register(String re, RuleHandler handler) {
        if (!CUSTOMIZED_RULE_MAP.containsKey(re)) {
            CUSTOMIZED_RULES.add(re);
        }
        CUSTOMIZED_RULE_MAP.put(re, handler);
    }

    private boolean preferMonthFirst = false;
    private final List rules = new ArrayList<>();
    private final Set standardRules = new HashSet<>();
    private final Map customizedRuleMap = new HashMap<>();

    DateParserBuilder() {
        // predefined standard rules
        this.rules.addAll(DateParserBuilder.STANDARD_RULES);
        this.standardRules.addAll(DateParserBuilder.STANDARD_RULES);
        // predefined customized rules
        this.rules.addAll(DateParserBuilder.CUSTOMIZED_RULES);
        this.customizedRuleMap.putAll(DateParserBuilder.CUSTOMIZED_RULE_MAP);
    }

    /**
     * Mark this parser prefer mm/dd or not.
     *
     * @param preferMonthFirst True means prefer mm/dd, False means prefer dd/mm.
     * @return This
     */
    public DateParserBuilder preferMonthFirst(boolean preferMonthFirst) {
        this.preferMonthFirst = preferMonthFirst;
        return this;
    }

    /**
     * Add an standard rule which could parse the specified subsequence.
     *
     * @param rule Standard rule which should have some specified groupName
     * @return This
     */
    public DateParserBuilder addRule(String rule) {
        if (!standardRules.contains(rule)) {
            rules.add(rule);
            standardRules.add(rule);
        }
        return this;
    }

    /**
     * Add an customized rule which could parse any subsequence.
     *
     * @param rule    The parsing rule in regex
     * @param handler The parsing callback
     * @return This
     */
    public DateParserBuilder addRule(String rule, RuleHandler handler) {
        if (!customizedRuleMap.containsKey(rule)) {
            rules.add(rule);
        }
        customizedRuleMap.put(rule, handler);
        return this;
    }

    /**
     * Build the final DateParser instance.
     *
     * @return DateParser
     */
    public DateParser build() {
        return new DateParser(rules, standardRules, customizedRuleMap, preferMonthFirst);
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy