All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.udf.UDFDateFloor Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.udf;

import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;

import org.apache.hadoop.hive.common.type.TimestampTZ;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.apache.hadoop.hive.serde2.io.TimestampLocalTZWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.joda.time.Chronology;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.Period;
import org.joda.time.ReadableDuration;
import org.joda.time.chrono.ISOChronology;

import com.facebook.presto.hive.$internal.com.google.common.collect.ImmutableMap;

/**
 * UDFDateFloor.
 *
 * Abstract class that converts a timestamp to a timestamp with a given granularity.
 */
public abstract class UDFDateFloor extends UDF {

  private final QueryGranularity granularity;
  private final TimestampWritable resultTS;
  private final TimestampLocalTZWritable resultTSLTZ;

  public UDFDateFloor(String granularity) {
    this.granularity = QueryGranularity.fromString(granularity);
    this.resultTS = new TimestampWritable();
    this.resultTSLTZ = new TimestampLocalTZWritable();
  }

  public TimestampWritable evaluate(TimestampWritable t) {
    if (t == null) {
      return null;
    }
    final long originalTimestamp = t.getTimestamp().getTime(); // default
    final long originalTimestampUTC = new DateTime(originalTimestamp)
        .withZoneRetainFields(DateTimeZone.UTC).getMillis(); // default -> utc
    final long newTimestampUTC = granularity.truncate(originalTimestampUTC); // utc
    final long newTimestamp = new DateTime(newTimestampUTC, DateTimeZone.UTC)
        .withZoneRetainFields(DateTimeZone.getDefault()).getMillis(); // utc -> default
    resultTS.setTime(newTimestamp);
    return resultTS;
  }

  public TimestampLocalTZWritable evaluate(TimestampLocalTZWritable t) {
    if (t == null) {
      return null;
    }
    final ZonedDateTime localZDT = t.getTimestampTZ().getZonedDateTime(); // default
    final long originalTimestampUTC = localZDT.withZoneSameLocal(ZoneOffset.UTC)
        .toInstant().toEpochMilli(); // default -> utc
    final long newTimestampUTC = granularity.truncate(originalTimestampUTC); // utc
    final ZonedDateTime newLocalZDT = ZonedDateTime.of(
        LocalDateTime.ofInstant(Instant.ofEpochMilli(newTimestampUTC), ZoneOffset.UTC),
        localZDT.getZone()); // utc -> default
    resultTSLTZ.set(new TimestampTZ(newLocalZDT));
    return resultTSLTZ;
  }

  /*
   * This code that creates the result for the granularity functions has been brought from Druid
   */

  private static final Map CALENDRIC_GRANULARITIES = ImmutableMap.of(
          "YEAR", new PeriodGranularity(new Period("P1Y"), null, null),
          "MONTH", new PeriodGranularity(new Period("P1M"), null, null),
          "QUARTER", new PeriodGranularity(new Period("P3M"), null, null),
          "WEEK", new PeriodGranularity(new Period("P1W"), null, null));

  private static abstract class QueryGranularity {

    public abstract long next(long offset);

    public abstract long truncate(long offset);

    public abstract DateTime toDateTime(long offset);

    public abstract Iterable iterable(final long start, final long end);

    public static QueryGranularity fromString(String str) {
      String name = str.toUpperCase();
      if (CALENDRIC_GRANULARITIES.containsKey(name)) {
        return CALENDRIC_GRANULARITIES.get(name);
      }
      return new DurationGranularity(convertValue(str), 0);
    }

    private static enum MillisIn {
      SECOND(1000), MINUTE(60 * 1000), FIFTEEN_MINUTE(15 * 60 * 1000), THIRTY_MINUTE(
              30 * 60 * 1000), HOUR(3600 * 1000), DAY(24 * 3600 * 1000);

      private final long millis;

      MillisIn(final long millis) {
        this.millis = millis;
      }
    }

    private static long convertValue(Object o) {
      if (o instanceof String) {
        return MillisIn.valueOf(((String) o).toUpperCase()).millis;
      } else if (o instanceof ReadableDuration) {
        return ((ReadableDuration) o).getMillis();
      } else if (o instanceof Number) {
        return ((Number) o).longValue();
      }
      throw new RuntimeException("Granularity not recognized");
    }
  }

  private static abstract class BaseQueryGranularity extends QueryGranularity {
    public abstract long next(long offset);

    public abstract long truncate(long offset);

    public DateTime toDateTime(long offset) {
      return new DateTime(offset, DateTimeZone.UTC);
    }

    public Iterable iterable(final long start, final long end) {
      return new Iterable() {
        @Override
        public Iterator iterator() {
          return new Iterator() {
            long curr = truncate(start);

            long next = BaseQueryGranularity.this.next(curr);

            @Override
            public boolean hasNext() {
              return curr < end;
            }

            @Override
            public Long next() {
              if (!hasNext()) {
                throw new NoSuchElementException();
              }

              long retVal = curr;

              curr = next;
              next = BaseQueryGranularity.this.next(curr);

              return retVal;
            }

            @Override
            public void remove() {
              throw new UnsupportedOperationException();
            }
          };
        }
      };
    }
  }

  private static class PeriodGranularity extends BaseQueryGranularity {
    private final Period period;

    private final Chronology chronology;

    private final long origin;

    private final boolean hasOrigin;

    private final boolean isCompound;

    public PeriodGranularity(Period period, DateTime origin, DateTimeZone tz) {
      this.period = period;
      this.chronology = tz == null ? ISOChronology.getInstanceUTC() : ISOChronology.getInstance(tz);
      if (origin == null) {
        // default to origin in given time zone when aligning multi-period granularities
        this.origin = new DateTime(0, DateTimeZone.UTC).withZoneRetainFields(chronology.getZone())
                .getMillis();
        this.hasOrigin = false;
      } else {
        this.origin = origin.getMillis();
        this.hasOrigin = true;
      }
      this.isCompound = isCompoundPeriod(period);
    }

    @Override
    public DateTime toDateTime(long t) {
      return new DateTime(t, chronology.getZone());
    }

    @Override
    public long next(long t) {
      return chronology.add(period, t, 1);
    }

    @Override
    public long truncate(long t) {
      if (isCompound) {
        try {
          return truncateMillisPeriod(t);
        } catch (UnsupportedOperationException e) {
          return truncateCompoundPeriod(t);
        }
      }

      final int years = period.getYears();
      if (years > 0) {
        if (years > 1 || hasOrigin) {
          int y = chronology.years().getDifference(t, origin);
          y -= y % years;
          long tt = chronology.years().add(origin, y);
          // always round down to the previous period (for timestamps prior to origin)
          if (t < tt)
            t = chronology.years().add(tt, -years);
          else
            t = tt;
          return t;
        } else {
          return chronology.year().roundFloor(t);
        }
      }

      final int months = period.getMonths();
      if (months > 0) {
        if (months > 1 || hasOrigin) {
          int m = chronology.months().getDifference(t, origin);
          m -= m % months;
          long tt = chronology.months().add(origin, m);
          // always round down to the previous period (for timestamps prior to origin)
          if (t < tt)
            t = chronology.months().add(tt, -months);
          else
            t = tt;
          return t;
        } else {
          return chronology.monthOfYear().roundFloor(t);
        }
      }

      final int weeks = period.getWeeks();
      if (weeks > 0) {
        if (weeks > 1 || hasOrigin) {
          // align on multiples from origin
          int w = chronology.weeks().getDifference(t, origin);
          w -= w % weeks;
          long tt = chronology.weeks().add(origin, w);
          // always round down to the previous period (for timestamps prior to origin)
          if (t < tt)
            t = chronology.weeks().add(tt, -weeks);
          else
            t = tt;
          return t;
        } else {
          t = chronology.dayOfWeek().roundFloor(t);
          // default to Monday as beginning of the week
          return chronology.dayOfWeek().set(t, 1);
        }
      }

      final int days = period.getDays();
      if (days > 0) {
        if (days > 1 || hasOrigin) {
          // align on multiples from origin
          int d = chronology.days().getDifference(t, origin);
          d -= d % days;
          long tt = chronology.days().add(origin, d);
          // always round down to the previous period (for timestamps prior to origin)
          if (t < tt)
            t = chronology.days().add(tt, -days);
          else
            t = tt;
          return t;
        } else {
          t = chronology.hourOfDay().roundFloor(t);
          return chronology.hourOfDay().set(t, 0);
        }
      }

      final int hours = period.getHours();
      if (hours > 0) {
        if (hours > 1 || hasOrigin) {
          // align on multiples from origin
          long h = chronology.hours().getDifferenceAsLong(t, origin);
          h -= h % hours;
          long tt = chronology.hours().add(origin, h);
          // always round down to the previous period (for timestamps prior to origin)
          if (t < tt)
            t = chronology.hours().add(tt, -hours);
          else
            t = tt;
          return t;
        } else {
          t = chronology.minuteOfHour().roundFloor(t);
          return chronology.minuteOfHour().set(t, 0);
        }
      }

      final int minutes = period.getMinutes();
      if (minutes > 0) {
        // align on multiples from origin
        if (minutes > 1 || hasOrigin) {
          long m = chronology.minutes().getDifferenceAsLong(t, origin);
          m -= m % minutes;
          long tt = chronology.minutes().add(origin, m);
          // always round down to the previous period (for timestamps prior to origin)
          if (t < tt)
            t = chronology.minutes().add(tt, -minutes);
          else
            t = tt;
          return t;
        } else {
          t = chronology.secondOfMinute().roundFloor(t);
          return chronology.secondOfMinute().set(t, 0);
        }
      }

      final int seconds = period.getSeconds();
      if (seconds > 0) {
        // align on multiples from origin
        if (seconds > 1 || hasOrigin) {
          long s = chronology.seconds().getDifferenceAsLong(t, origin);
          s -= s % seconds;
          long tt = chronology.seconds().add(origin, s);
          // always round down to the previous period (for timestamps prior to origin)
          if (t < tt)
            t = chronology.seconds().add(tt, -seconds);
          else
            t = tt;
          return t;
        } else {
          return chronology.millisOfSecond().set(t, 0);
        }
      }

      final int millis = period.getMillis();
      if (millis > 0) {
        if (millis > 1) {
          long ms = chronology.millis().getDifferenceAsLong(t, origin);
          ms -= ms % millis;
          long tt = chronology.millis().add(origin, ms);
          // always round down to the previous period (for timestamps prior to origin)
          if (t < tt)
            t = chronology.millis().add(tt, -millis);
          else
            t = tt;
          return t;
        } else {
          return t;
        }
      }

      return t;
    }

    private static boolean isCompoundPeriod(Period period) {
      int[] values = period.getValues();
      boolean single = false;
      for (int v : values) {
        if (v > 0) {
          if (single)
            return true;
          single = true;
        }
      }
      return false;
    }

    private long truncateCompoundPeriod(long t) {
      long current;
      if (t >= origin) {
        long next = origin;
        do {
          current = next;
          next = chronology.add(period, current, 1);
        } while (t >= next);
      } else {
        current = origin;
        do {
          current = chronology.add(period, current, -1);
        } while (t < current);
      }
      return current;
    }

    private long truncateMillisPeriod(final long t) {
      // toStandardDuration assumes days are always 24h, and hours are always 60 minutes,
      // which may not always be the case, e.g if there are daylight saving changes.
      if (chronology.days().isPrecise() && chronology.hours().isPrecise()) {
        final long millis = period.toStandardDuration().getMillis();
        long offset = t % millis - origin % millis;
        if (offset < 0) {
          offset += millis;
        }
        return t - offset;
      } else {
        throw new UnsupportedOperationException(
                "Period cannot be converted to milliseconds as some fields mays vary in length with chronology "
                        + chronology.toString());
      }
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) {
        return true;
      }
      if (o == null || getClass() != o.getClass()) {
        return false;
      }

      PeriodGranularity that = (PeriodGranularity) o;

      if (hasOrigin != that.hasOrigin) {
        return false;
      }
      if (origin != that.origin) {
        return false;
      }
      if (!chronology.equals(that.chronology)) {
        return false;
      }
      if (!period.equals(that.period)) {
        return false;
      }

      return true;
    }

    @Override
    public int hashCode() {
      int result = period.hashCode();
      result = 31 * result + chronology.hashCode();
      result = 31 * result + (int) (origin ^ (origin >>> 32));
      result = 31 * result + (hasOrigin ? 1 : 0);
      return result;
    }

    @Override
    public String toString() {
      return "PeriodGranularity{" + "period=" + period + ", timeZone=" + chronology.getZone()
              + ", origin=" + (hasOrigin ? origin : "null") + '}';
    }
  }

  private static class DurationGranularity extends BaseQueryGranularity {
    private final long length;

    private final long origin;

    public DurationGranularity(long millis, long origin) {
      this.length = millis;
      this.origin = origin % length;
    }

    @Override
    public long next(long t) {
      return t + getDurationMillis();
    }

    @Override
    public long truncate(final long t) {
      final long duration = getDurationMillis();
      long offset = t % duration - origin % duration;
      if (offset < 0) {
        offset += duration;
      }
      return t - offset;
    }

    public long getDurationMillis() {
      return length;
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) {
        return true;
      }
      if (o == null || getClass() != o.getClass()) {
        return false;
      }

      DurationGranularity that = (DurationGranularity) o;

      if (length != that.length) {
        return false;
      }
      if (origin != that.origin) {
        return false;
      }

      return true;
    }

    @Override
    public int hashCode() {
      int result = (int) (length ^ (length >>> 32));
      result = 31 * result + (int) (origin ^ (origin >>> 32));
      return result;
    }

    @Override
    public String toString() {
      return "DurationGranularity{" + "length=" + length + ", origin=" + origin + '}';
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy