All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.storehaus.algebra.query.CalendarTimeQueryRange.scala Maven / Gradle / Ivy

There is a newer version: 0.15.0-RC1
Show newest version
/*
 * Copyright 2013 Twitter Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License. You may obtain
 * a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.twitter.storehaus.algebra.query

import com.twitter.scalding.{RichDate, DateRange, Duration, AbsoluteDuration, Years, Months, Weeks, Days, Hours, Minutes}
import java.util.TimeZone
import scala.annotation.tailrec

case class CalendarBucket(typeIndx: Int, startTime: Long)

/** A query strategy for time with named buckets.
 */
class CalendarTimeStrategy(strategyTimezone: TimeZone = TimeZone.getTimeZone("UTC")) extends QueryStrategy[DateRange, Long, CalendarBucket] {

  private val allBuckets = {
    implicit val tz = strategyTimezone
    List(Years(1), Months(1), Days(1), Hours(1), Minutes(1))
  }

  def bucketLength(bucket: CalendarBucket) = {
    val endTime = indexToDuration(bucket.typeIndx).addTo(RichDate(bucket.startTime)).timestamp
    endTime - bucket.startTime
  }

  protected def indexToDuration(indx: Int) = allBuckets(indx)
  protected def durationToName(x: Duration): String = x.getClass.getName.split('.').reverse.head

  private def tsToBuckets(msSinceEpoch: Long) =  {
    val richDate = RichDate(msSinceEpoch)
    allBuckets.zipWithIndex.map { case (duration, indx) =>
      CalendarBucket(indx, duration.floorOf(richDate).timestamp)
    }.toSet
  }

  private def len(dr: DateRange) = AbsoluteDuration.fromMillisecs(dr.end.timestamp - dr.start.timestamp + 1L)

  private def outsideRange(filterDR: DateRange, child: DateRange) =
    (child.start >= filterDR.end || child.end <= filterDR.start)

  def query(dr: DateRange): Set[CalendarBucket] = extract(dr, Set(dr), 0, allBuckets, Set[CalendarBucket]())

  @tailrec
  private def extract(filterDr: DateRange, drSet: Set[DateRange], curIndx: Int, remainingDurations: List[Duration], acc: Set[CalendarBucket]): Set[CalendarBucket] = {
    remainingDurations match {
      case Nil => acc
      case head :: tail =>
      // expand the DR
        val expandedOut = drSet.map{ dr =>
                            DateRange(head.floorOf(dr.start), head.floorOf(dr.end) + head)
                                .each(head)
                                .filter(!outsideRange(filterDr, _))
                                .filter(len(_).toMillisecs > 1L)
                                .toSet
                            }.foldLeft(Set[DateRange]()){_ ++ _}
        // Things which only partially fit in this time range
        val others = expandedOut.filter(!filterDr.contains(_))
        // Things which fit fully in this time range
        val fullyInRange = expandedOut
                              .filter(filterDr.contains(_))
                              .map(x => CalendarBucket(curIndx, x.start.timestamp))
        extract(filterDr, others, curIndx + 1, tail, acc ++ fullyInRange)
    }
  }

  def index(ts: Long):Set[CalendarBucket] = tsToBuckets(ts)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy