All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nlpcraft.server.nlp.enrichers.date.NCDateParser.scala Maven / Gradle / Ivy

There is a newer version: 0.8.2
Show newest version
/*
 * “Commons Clause” License, https://commonsclause.com/
 *
 * The Software is provided to you by the Licensor under the License,
 * as defined below, subject to the following condition.
 *
 * Without limiting other conditions in the License, the grant of rights
 * under the License will not include, and the License does not grant to
 * you, the right to Sell the Software.
 *
 * For purposes of the foregoing, “Sell” means practicing any or all of
 * the rights granted to you under the License to provide to third parties,
 * for a fee or other consideration (including without limitation fees for
 * hosting or consulting/support services related to the Software), a
 * product or service whose value derives, entirely or substantially, from
 * the functionality of the Software. Any license notice or attribution
 * required by the License must also include this Commons Clause License
 * Condition notice.
 *
 * Software:    NLPCraft
 * License:     Apache 2.0, https://www.apache.org/licenses/LICENSE-2.0
 * Licensor:    Copyright (C) 2018 DataLingvo, Inc. https://www.datalingvo.com
 *
 *     _   ____      ______           ______
 *    / | / / /___  / ____/________ _/ __/ /_
 *   /  |/ / / __ \/ /   / ___/ __ `/ /_/ __/
 *  / /|  / / /_/ / /___/ /  / /_/ / __/ /_
 * /_/ |_/_/ .___/\____/_/   \__,_/_/  \__/
 *        /_/
 */

package org.nlpcraft.server.nlp.enrichers.date

import java.util.{Locale, Calendar ⇒ C}
import scala.collection.JavaConverters._

/**
  * Date parser.
  */
object NCDateParser {
    // For english calendar settings.
    Locale.setDefault(Locale.forLanguageTag("EN"))

    // USA week.
    private val FIRST_DAY_OF_WEEK = C.SUNDAY
    private val LAST_DAY_OF_WEEK = C.SATURDAY

    private val CAL_MONTHS = Seq(
        C.JANUARY,
        C.FEBRUARY,
        C.MARCH,
        C.APRIL,
        C.MAY,
        C.JUNE,
        C.JULY,
        C.AUGUST,
        C.SEPTEMBER,
        C.OCTOBER,
        C.NOVEMBER,
        C.DECEMBER
    )

    private val MONTH_NUM_MAP: Map[Int, Int] = zipValueIndex(CAL_MONTHS)
    private val NUM_MONTH_MAP: Map[Int, Int] = zipIndexValue(CAL_MONTHS)

    private val QUARTERS_BEGIN = Map(
        1 → C.JANUARY,
        2 → C.APRIL,
        3 → C.JULY,
        4 → C.OCTOBER
    )

    private val QUARTERS: Map[Int, Int] = NUM_MONTH_MAP.map(m ⇒ m._1 → (m._2 / 3 + 1))

    private val SEASONS = Map(
        C.DECEMBER → 1,
        C.JANUARY → 1,
        C.FEBRUARY → 1,
        C.MARCH → 2,
        C.APRIL → 2,
        C.MAY → 2,
        C.JUNE → 3,
        C.JULY → 3,
        C.AUGUST → 3,
        C.SEPTEMBER → 4,
        C.OCTOBER → 4,
        C.NOVEMBER → 4
    )

    private val SEASONS_BEGIN = Map(
        1 → C.DECEMBER,
        2 → C.MARCH,
        3 → C.JUNE,
        4 → C.SEPTEMBER
    )

    // USA week.
    private val WEEK_DAYS = Seq(
        C.SUNDAY,
        C.MONDAY,
        C.TUESDAY,
        C.WEDNESDAY,
        C.THURSDAY,
        C.FRIDAY,
        C.SATURDAY
    )

    private val PERIODS_WEIGHT = Map(
        "d" → 1,
        "dw" → 1,
        "w" → 2,
        "m" → 3,
        "q" → 4,
        "s" → 4,
        "y" → 5,
        "e" → 6,
        "c" → 7
    )

    private val NUM_WEEK_DAYS_MAP: Map[Int, Int] = zipIndexValue(WEEK_DAYS)

    def calculate(f: String, base: Long, inclFrom: Boolean = true, inclTo: Boolean = true): NCDateRange = {
        val seq = f.split(":")

        seq.length match {
            // from d1
            case _ if f.startsWith(":") ⇒
                val res = calculatePart(f.drop(1), base)

                val to = mkTo(res, inclTo)

                NCDateRange(NCDateRange.MIN_VALUE, to, f, (Seq(":") ++ res.periods).asJava)

            // to d2
            case _ if f.endsWith(":") ⇒
                val res = calculatePart(f.take(f.length - 1), base)

                var from = mkFrom(res, inclFrom)

                if (from > getTruncatedNow)
                    from = shift(from, getShiftPeriod(res.period), -1)
    
                NCDateRange(from, NCDateRange.MAX_VALUE, f, (res.periods :+ ":").asJava)

            // between d1 and d2
            case 2 ⇒
                val part1 = seq.head
                val part2 = seq.last

                val res1 = calculatePart(part1, base)
                val res2 = calculatePart(part2, base)

                val d1 = mkFrom(res1, inclFrom)
                val d2 = mkTo(res2, inclTo)

                val sumPeriods = (res1.periods :+ ":") ++ res2.periods

                // Example: 11m:1m, d-1:1m (but current January already finished.)
                def tryUsingWeights(): NCDateRange =
                    if (PERIODS_WEIGHT(res1.period) > PERIODS_WEIGHT(res2.period))
                        NCDateRange(shift(d1, getShiftPeriod(res1.period), -1), d2, f, sumPeriods.asJava)
                    else
                        NCDateRange(d1, shift(d2, getShiftPeriod(res2.period), 1), f, sumPeriods.asJava)

                // Tries to resolve without guarantee.

                if (d1 > d2) {
                    // Special cases.
                    // ==============
                    // Example 1.
                    // ----------
                    // "from october to november of 2010"
                    // October should be processed as month of 2010 but not current year.
                    //
                    // Example 2.
                    // ----------
                    // "from 25th october to 11th november of 2010"
                    // 25th October should be processed as day of 2010, but not current year.
                    if (res1.samePeriods("m") && res2.samePeriods("my") ||
                        res1.samePeriods("dm") && res2.samePeriods("dmy"))
                        NCDateRange(changeYear(d1, d2), d2, f, sumPeriods.asJava)
                    else
                        tryUsingWeights()
                }
                else if (d1 == d2)
                    tryUsingWeights()
                else
                    NCDateRange(d1, d2, f, sumPeriods.asJava)
            case _ ⇒
                val res = calculatePart(f, base)
    
                NCDateRange(res.from, res.to, f, res.periods.asJava)
        }
    }

    private def zipIndexValue[T](seq: Seq[T]): Map[Int, T] = seq.zipWithIndex.map(p ⇒ (p._2 + 1) → p._1).toMap

    private def zipValueIndex[T](seq: Seq[T]): Map[T, Int] = seq.zipWithIndex.map(p ⇒ p._1 → (p._2 + 1)).toMap

    private def set(cal: C, pairs: (Int, Int)*): C = {
        for (pair ← pairs) cal.set(pair._1, pair._2)

        cal
    }

    private def add(cal: C, pairs: (Int, Int)*): C = {
        for (pair ← pairs) cal.add(pair._1, pair._2)

        cal
    }

    private def mkCalendar(d: Option[Long] = None): C = {
        val cal = C.getInstance()

        if (d.isDefined)
            cal.setTimeInMillis(d.get)

        set(
            cal,
            C.HOUR_OF_DAY → 0,
            C.MINUTE → 0,
            C.SECOND → 0,
            C.MILLISECOND → 0
        )
    }

    private def getTruncatedNow: Long = mkCalendar().getTimeInMillis

    private def mkFrom(period: String, opt: Option[Int], d: Long): Long =
        opt match {
            case Some(v) ⇒ mkFrom(period, v, d)
            case None ⇒ mkFrom(period, d)
        }

    // Winter starts in previous year.
    private def adjustWinter(season: Int, cal: C) = if (season == 1) add(cal, C.YEAR → -1)

    private def mkFrom(period: String, d: Long): Long = {
        val cal = mkCalendar(Some(d))

        def set0(pairs: (Int, Int)*): C = set(cal, pairs: _*)

        (period match {
            case "d" | "dw" ⇒ cal
            case "w" ⇒ set0(C.DAY_OF_WEEK → FIRST_DAY_OF_WEEK)
            case "m" ⇒ set0(C.DAY_OF_MONTH → 1)
            case "q" ⇒ set0(
                C.MONTH → QUARTERS_BEGIN(QUARTERS(MONTH_NUM_MAP(cal.get(C.MONTH)))),
                C.DAY_OF_MONTH → 1
            )
            case "y" ⇒ set0(
                C.MONTH → C.JANUARY,
                C.DAY_OF_YEAR → 1
            )
            case "e" ⇒
                set0(
                    C.YEAR → (cal.get(C.YEAR) / 10 * 10 + 1),
                    C.MONTH → C.JANUARY,
                    C.DAY_OF_YEAR → 1
                )
            case "c" ⇒ set0(
                C.YEAR → (cal.get(C.YEAR) / 100 * 100 + (if (cal.get(C.YEAR) < 100) 0 else 1)),
                C.MONTH → C.JANUARY,
                C.DAY_OF_YEAR → 1
            )
            case "s" ⇒
                val s = SEASONS(cal.get(C.MONTH))

                adjustWinter(s, cal)

                set0(
                    C.MONTH → SEASONS_BEGIN(s),
                    C.DAY_OF_MONTH → 1
                )

        }).getTimeInMillis
    }

    private def mkFrom(period: String, v: Int, d: Long): Long = {
        val cal = mkCalendar(Some(d))

        def set0(pairs: (Int, Int)*): C = set(cal, pairs: _*)

        (period match {
            case "d" ⇒ set0(C.DAY_OF_MONTH → v)
            case "w" ⇒ set0(
                C.DAY_OF_WEEK → FIRST_DAY_OF_WEEK,
                C.WEEK_OF_MONTH → v
            )
            case "m" ⇒ set0(
                C.MONTH → NUM_MONTH_MAP(v),
                C.DAY_OF_MONTH → 1
            )
            case "q" ⇒ set0(
                C.MONTH → QUARTERS_BEGIN(v),
                C.DAY_OF_MONTH → 1
            )
            case "y" ⇒ set0(
                C.YEAR → v,
                C.MONTH → C.JANUARY,
                C.DAY_OF_MONTH → 1
            )
            case "e" ⇒
                set0(
                    C.YEAR → (cal.get(C.YEAR) / 100 * 100 + (v - 1) * 10 + 1),
                    C.MONTH → C.JANUARY,
                    C.DAY_OF_YEAR → 1
                )
            case "c" ⇒ set0(
                C.YEAR → ((v - 1) * 100 + 1),
                C.MONTH → C.JANUARY,
                C.DAY_OF_MONTH → 1
            )
            case "dw" ⇒ set0(C.DAY_OF_WEEK → NUM_WEEK_DAYS_MAP(v))
            case "s" ⇒
                adjustWinter(v, cal)

                set0(
                    C.MONTH → SEASONS_BEGIN(v),
                    C.DAY_OF_MONTH → 1
                )
        }).getTimeInMillis
    }

    private def mkTo(period: String, from: Long): Long = {
        val cal = mkCalendar(Some(from))

        def add0(pairs: (Int, Int)*) = add(cal, pairs: _*)

        (period match {
            case "d" ⇒ add0(C.DAY_OF_YEAR → 1)
            case "w" ⇒ add0(C.WEEK_OF_YEAR → 1)
            case "m" ⇒ add0(C.MONTH → 1)
            case "q" ⇒ add0(C.MONTH → 3)
            case "y" ⇒ add0(C.YEAR → 1)
            case "e" ⇒ add0(C.YEAR → 10)
            case "c" ⇒ add0(C.YEAR → 100)
            case "dw" ⇒ add0(C.DAY_OF_YEAR → 1)
            case "s" ⇒ add0(C.MONTH → 3)
        }).getTimeInMillis
    }

    private def shift(d: Long, period: String, n: Int): Long = {
        val cal = mkCalendar(Some(d))

        def add0(pairs: (Int, Int)*) = add(cal, pairs: _*)

        (period match {
            case "d" ⇒ add0(C.DAY_OF_YEAR → n)
            case "w" ⇒ add0(C.WEEK_OF_YEAR → n)
            case "m" ⇒ add0(C.MONTH → n)
            case "q" ⇒ add0(C.MONTH → (3 * n))
            case "y" ⇒ add0(C.YEAR → n)
            case "e" ⇒ add0(C.YEAR → (10 * n))
            case "c" ⇒ add0(C.YEAR → (100 * n))
            case "dw" ⇒ add0(C.WEEK_OF_YEAR → n)
            case "s" ⇒ add0(C.YEAR → n)
        }).getTimeInMillis
    }

    private def parseInt(s: String): Option[Int] = if (!s.isEmpty) Some(s.toInt) else None

    private def isSign(ch: Char) = ch == '+' || ch == '-'

    private def isDigit(ch: Char) = ch.isDigit

    private def isLetter(ch: Char) = !isSign(ch) && !isDigit(ch)

    private def isSignOrDigit(ch: Char) = isSign(ch) || isDigit(ch)

    private def getShiftPeriod(period: String) =
        period match {
            case "m" | "q" | "s" ⇒ "y"
            case "dw" ⇒ "w"
            case _ ⇒ period //d, w, y, e, c
        }

    private[date] def calculatePart(fns: String, base: Long): PartResult = {
        var res = PartResult(base, base, "", Seq.empty[String])

        for (fn ← fns.split(",").map(_.trim)) {
            val resFrom = res.from

            def after(heads: String*): String = fn.drop(heads.map(_.length).sum)

            // Numeric period with optional shift.
            // 4d; 4d+2
            def periodRange(): PartResult = {
                val d1 = fn.takeWhile(isDigit)
                val period = after(d1).takeWhile(isLetter)
                val d2Opt = after(period, d1).takeWhile(isSignOrDigit)

                val from = mkFrom(period, parseInt(d1), resFrom)

                parseInt(d2Opt) match {
                    case Some(d2) ⇒
                        val to = mkTo(period, from)

                        val periodShift = getShiftPeriod(period)

                        PartResult(shift(from, periodShift, d2), shift(to, periodShift, d2), period, res.periods)
                    case None ⇒
                        val to = mkTo(period, from)

                        PartResult(from, to, period, res.periods)
                }
            }

            // Duration (2d etc) and current period (d etc) with optional shift.
            // m, m+2, m2 (not m2+2)
            def durationRange(): PartResult = {
                val period = fn.takeWhile(isLetter)
                val d1Opt = after(period).takeWhile(isDigit)
                val d2Opt = after(period).takeWhile(isSignOrDigit)

                parseInt(d1Opt) match {
                    // m2 (shift impossible)
                    case Some(d1) ⇒
                        val from = resFrom
                        val to = shift(from, period, d1)

                        PartResult(from, to, period, res.periods)
                    // m
                    case None ⇒
                        val from = mkFrom(period, None, resFrom)
                        val to = mkTo(period, from)

                        parseInt(d2Opt) match {
                            case Some(d2) ⇒
                                PartResult(shift(from, period, d2), shift(to, period, d2), period, res.periods)
                            case None ⇒ PartResult(from, to, period, res.periods)
                        }
                }
            }

            def mkDayResult(c: C): PartResult =
                PartResult(c.getTimeInMillis, add(c, C.DAY_OF_YEAR → 1).getTimeInMillis, "d", res.periods)

            def now(): PartResult = mkDayResult(mkCalendar(Some(base)))

            def lastDay(shift: C ⇒ C): PartResult = mkDayResult(shift(mkCalendar(Some(resFrom))))

            def ldw(): PartResult = lastDay((c: C) ⇒ set(c, C.DAY_OF_WEEK → LAST_DAY_OF_WEEK))
            def ldm(): PartResult = lastDay((c: C) ⇒ set(c, C.DAY_OF_MONTH → c.getActualMaximum(C.DAY_OF_MONTH)))
            def ldy(): PartResult = lastDay((c: C) ⇒ set(c, C.DAY_OF_YEAR → c.getActualMaximum(C.DAY_OF_YEAR)))

            def ldYears(years: Int): PartResult = lastDay((c: C) ⇒ {
                val curYear = c.get(C.YEAR)
                var shift = curYear % years

                if (shift != 0)
                    shift = years - shift

                // Should't be in one function call (last day is relative)
                set(c, C.YEAR → (curYear + shift))
                set(c, C.DAY_OF_YEAR → c.getActualMaximum(C.DAY_OF_YEAR))
            })

            def lde(): PartResult = ldYears(10)
            def ldc(): PartResult = ldYears(100)

            def ld3M(map3m: Map[Int, Int]): PartResult = lastDay((c: C) ⇒ {
                val n = map3m(MONTH_NUM_MAP(c.get(C.MONTH)))

                // Should't be in one function call (last day is relative)
                // Note that keys in `map3m` sorted.
                set(c, C.MONTH → NUM_MONTH_MAP(map3m.filter(_._2 == n).keys.toSeq.max))
                set(c, C.DAY_OF_MONTH → c.getActualMaximum(C.DAY_OF_MONTH))
            })

            def ldq(): PartResult = ld3M(QUARTERS)
            def lds(): PartResult = ld3M(SEASONS)

            res = fn match {
                case "now" ⇒ now()

                case "$dw" ⇒ ldw()
                case "$dm" ⇒ ldm()
                case "$dq" ⇒ ldq()
                case "$dy" ⇒ ldy()
                case "$de" ⇒ lde()
                case "$dc" ⇒ ldc()
                case "$ds" ⇒ lds()

                case _ if fn.head.isDigit ⇒ periodRange() // 4d; 4d+2

                case _ ⇒ durationRange() // m, m+2, m2 (not m2+2)
            }
        }

        res
    }

    // Creates date based on 'base' with year from 'yd'
    private def changeYear(base: Long, yd: Long): Long = {
        val c = mkCalendar(Some(yd))

        val y = c.get(C.YEAR)

        c.setTimeInMillis(base)
        c.set(C.YEAR, y)

        c.getTimeInMillis
    }

    private def mkFrom(range: PartResult, incl: Boolean): Long = if (incl) range.from else range.to
    private def mkTo(range: PartResult, incl: Boolean): Long = if (incl) range.to else range.from

    case class PartResult(from: Long, to: Long, period: String, parents: Seq[String]) {
        def samePeriods(ps: String): Boolean = periods.mkString.sorted == ps.sorted
        def periods: Seq[String] = if (period.isEmpty) parents else parents :+ period
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy