water.parser.ParseTime Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of h2o-core Show documentation
H2O Core
There is a newer version: 3.8.2.9
package water.parser;

import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.DateTimeFormatterBuilder;
import water.util.Log;

import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

public abstract class ParseTime {
  // Deduce if we are looking at a Date/Time value, or not.
  // If so, return time as msec since Jan 1, 1970 or Long.MIN_VALUE.

  // I tried java.util.SimpleDateFormat, but it just throws too many
  // exceptions, including ParseException, NumberFormatException, and
  // ArrayIndexOutOfBoundsException... and the Piece de resistance: a
  // ClassCastException deep in the SimpleDateFormat code:
  // "sun.util.calendar.Gregorian$Date cannot be cast to sun.util.calendar.JulianCalendar$Date"

  public static final boolean isTime(BufferedString str) {
    return attemptTimeParse(str) != Long.MIN_VALUE;
  }

  private static final byte MMS[][][] = new byte[][][] {
    {"jan".getBytes(),"january"  .getBytes()},
    {"feb".getBytes(),"february" .getBytes()},
    {"mar".getBytes(),"march"    .getBytes()},
    {"apr".getBytes(),"april"    .getBytes()},
    {"may".getBytes(),"may"      .getBytes()},
    {"jun".getBytes(),"june"     .getBytes()},
    {"jul".getBytes(),"july"     .getBytes()},
    {"aug".getBytes(),"august"   .getBytes()},
    {"sep".getBytes(),"september".getBytes()},
    {"oct".getBytes(),"october"  .getBytes()},
    {"nov".getBytes(),"november" .getBytes()},
    {"dec".getBytes(),"december" .getBytes()}
  };

  public static long attemptTimeParse( BufferedString str ) {
    try {
      long t0 = attemptYearFirstTimeParse(str); // "yyyy-MM-dd" and time if present
      if( t0 != Long.MIN_VALUE ) return t0;
      long t2 = attemptDayFirstTimeParse(str); // "dd-MMM-yy" and time if present
      if( t2 != Long.MIN_VALUE ) return t2;
    } catch( org.joda.time.IllegalFieldValueException | // Not time at all
             org.joda.time.IllegalInstantException      // Parsed as time, but falls into e.g. a daylight-savings hour hole
             ie ) { } //FIXME should collect errors and report at end of parse
    return Long.MIN_VALUE;
  }
  // Tries to parse "yyyy-MM-dd [HH:mm:ss.SSS aa]"
  private static long attemptYearFirstTimeParse(BufferedString str) {
    final byte[] buf = str.getBuffer();
    int i=str.getOffset();
    final int end = i+str.length();
    while( i < end && buf[i] == ' ' ) i++;
    if   ( i < end && buf[i] == '"' ) i++;
    if( (end-i) != 10 && (end-i) < 19 ) return Long.MIN_VALUE;
    int yyyy=0, MM=0, dd=0;

    // Parse date
    yyyy = digit(yyyy,buf[i++]);
    yyyy = digit(yyyy,buf[i++]);
    yyyy = digit(yyyy,buf[i++]);
    yyyy = digit(yyyy,buf[i++]);
    if( buf[i++] != '-' ) return Long.MIN_VALUE;
    MM = digit(MM,buf[i++]);
    MM = digit(MM,buf[i++]);
    if( MM < 1 || MM > 12 ) return Long.MIN_VALUE;
    if( buf[i++] != '-' ) return Long.MIN_VALUE;
    dd = digit(dd,buf[i++]);
    dd = digit(dd,buf[i++]);
    if( dd < 1 || dd > 31 ) return Long.MIN_VALUE;
    if( i==end )
      return new DateTime(yyyy,MM,dd,0,0,0, getTimezone()).getMillis();

    //Parse time
    if( buf[i++] != ' ' ) return Long.MIN_VALUE;
    return parseTime(buf, i, end, yyyy, MM, dd);
  }

  // Tries to parse "dd[-]MMM[-]yy[yy][:' '][HH:mm:ss.SSS aa]"
  // where MMM is a text representation of the month (e.g. Jul or July)
  private static long attemptDayFirstTimeParse(BufferedString str) {
    final byte[] buf = str.getBuffer();
    int i=str.getOffset();
    final int end = i+str.length();
    while( i < end && buf[i] == ' ' ) i++;
    if   ( i < end && buf[i] == '"' ) i++;
    if( (end-i) < 8 ) return Long.MIN_VALUE;
    int yyyy=0, MM=0, dd=0;

    // Parse date
    dd = digit(dd,buf[i++]);
    if( isDigit(buf[i]) ) dd = digit(dd,buf[i++]);
    if( dd < 1 || dd > 31 ) return Long.MIN_VALUE;
    if( buf[i] == '-' ) i++;
    if( !isChar(buf[i]) ) return Long.MIN_VALUE;
    //convert MMM or full month name to MM
    byte[] MMM = null;
    OUTER: for( ; MM= end) continue INNER;
        for (int j = 0; j < MMM.length; j++)
          if (MMM[j] != Character.toLowerCase(buf[i + j]))
            continue INNER;
        if (buf[i + MMM.length] == '-' || isDigit(buf[i + MMM.length])) break OUTER;
      }
    }
    if( MM == MMS.length ) return Long.MIN_VALUE; // No matching month
    i += MMM.length;             // Skip month bytes
    MM++;                       // 1-based month
    if( buf[i] == '-' ) i++;
    yyyy = digit(yyyy,buf[i++]);    // 2-digit year
    yyyy = digit(yyyy,buf[i++]);
    if( end-i>=2 && buf[i] != '"' && buf[i] != ' ' && buf[i] != ':') {
      yyyy = digit(yyyy,buf[i++]);  // 4-digit year
      yyyy = digit(yyyy,buf[i++]);
    } else { //POSIX 2004 & 2008 says 69-99 -> 1900s, 00-68 -> 2000s
      if (yyyy >= 69)
        yyyy += 1900;
      else
        yyyy += 2000;
    }
    if( i 23 ) return Long.MIN_VALUE;
    if( buf[i] != ':' && buf[i] != '.' ) return Long.MIN_VALUE;
    ++i;
    mm = digit(mm,buf[i++]);
    mm = digit(mm,buf[i++]);
    if( mm < 0 || mm > 59 ) return Long.MIN_VALUE;
    if( buf[i] != ':' && buf[i] != '.' ) return Long.MIN_VALUE;
    ++i;
    ss = digit(ss,buf[i++]);
    ss = digit(ss,buf[i++]);
    if( ss < 0 || ss > 59 ) return Long.MIN_VALUE;
    if( i 999 ) return Long.MIN_VALUE;
      if ((end - i) >= 6) // nanoseconds are included
        i += 6; // ignore
    }
    if( i 12) return Long.MIN_VALUE;
      // convert 1-12 hours into 0-23
      if (buf[i] == 'P') // PM
        if (HH < 12) HH += 12;
      else // AM
        if (HH == 12) HH = 0;
      i += 2;
    } else return Long.MIN_VALUE;

    if( i '9' ) return -1;
    return x*10+(c-'0');
  }
  private static boolean isDigit(byte b) {
    if (b < '0' || b > '9') return false;
    else return true;
  }
  private static boolean isChar(byte b) {
    if (b < 'A' || (b >'Z' && b < 'a')  || b > 'z') return false;
    else return true;
  }

  private static DateTimeZone _timezone;

  public static void setTimezone(String tz) {
    Set idSet = DateTimeZone.getAvailableIDs();
    if(idSet.contains(tz))
      _timezone = DateTimeZone.forID(tz);
    else
      Log.err("Attempted to set unrecognized timezone: "+ tz);
  }

  public static DateTimeZone getTimezone() {
    return _timezone == null ? DateTimeZone.getDefault() : _timezone;
  }

  public static String listTimezones() {
    DateTimeFormatter offsetFormatter = new DateTimeFormatterBuilder().appendTimeZoneOffset(null, true, 2, 4).toFormatter();
    Set idSet = DateTimeZone.getAvailableIDs();
    Map tzMap = new TreeMap();
    Iterator it = idSet.iterator();
    String id, cid, offset, key, output;
    DateTimeZone tz;
    int i = 0;
    long millis = System.currentTimeMillis();


    // collect canonical and alias IDs into a map
    while (it.hasNext()) {
      id = it.next();
      tz = DateTimeZone.forID(id);
      cid = tz.getID();
      offset = offsetFormatter.withZone(tz).print(tz.getStandardOffset(millis));
      key = offset + " " + cid;
      if (id == cid) { // Canonical ID
        if (!tzMap.containsKey(key)) tzMap.put(key, "");
      }  else {// alias ID
        if (!tzMap.containsKey(key))
          tzMap.put(key, "");
        tzMap.put(key,  tzMap.get(key) + ", " + id);
      }
    }

    // assemble result
    output = "StandardOffset CanonicalID, Aliases\n";
    for (Map.Entry e : tzMap.entrySet())
      output += e.getKey() + e.getValue()+"\n";

    return output;
  }

  /**
   * Factory to create a formatter from a strptime pattern string.
   * This models the commonly supported features of strftime from POSIX
   * (where it can).
   * 
   * The format may contain locale specific output, and this will change as
   * you change the locale of the formatter.
   * Call DateTimeFormatter.withLocale(Locale) to switch the locale.
   * For example:
   * 
   * DateTimeFormat.forPattern(pattern).withLocale(Locale.FRANCE).print(dt);
   * 
   *
   * @param pattern  pattern specification
   * @return the formatter
   *  @throws IllegalArgumentException if the pattern is invalid
   */
  public static DateTimeFormatter forStrptimePattern(String pattern) {
    if (pattern == null || pattern.length() == 0)
      throw new IllegalArgumentException("Empty date time pattern specification");

    DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
    parseToBuilder(builder, pattern);
    DateTimeFormatter formatter = builder.toFormatter();

    return formatter;
  }

  /**
   * Parses the given pattern and appends the rules to the given
   * DateTimeFormatterBuilder. See strptime man page for valid patterns.
   *
   * @param pattern  pattern specification
   * @throws IllegalArgumentException if the pattern is invalid
   */
  private static void parseToBuilder(DateTimeFormatterBuilder builder, String pattern) {
    int length = pattern.length();
    int[] indexRef = new int[1];

    for (int i=0; i 1900s, 00-68 -> 2000s
            builder.appendTwoDigitYear(2019);
            break;
          case 'Y':
            builder.appendYear(4,4);
            break;
          case 'z':
            builder.appendTimeZoneOffset(null, "z", false, 2, 2);
            break;
          case 'Z':
            break;  //for output only, accepted and ignored for input
          default:  // No match, ignore
            builder.appendLiteral('\'');
            builder.appendLiteral(token);
            Log.warn(token + "is not acceptted as a parse token, treating as a literal");
        }
      } else {
        if (c == '\'') {
          String sub = token.substring(1);
          if (sub.length() > 0) {
            // Create copy of sub since otherwise the temporary quoted
            // string would still be referenced internally.
            builder.appendLiteral(new String(sub));
          }
        } else throw new IllegalArgumentException("Unexpected token encountered parsing format string:" + c);
      }
    }
  }

  /**
   * Parses an individual token.
   *
   * @param pattern  the pattern string
   * @param indexRef  a single element array, where the input is the start
   *  location and the output is the location after parsing the token
   * @return the parsed token
   */
  private static String parseToken(String pattern, int[] indexRef) {
    StringBuilder buf = new StringBuilder();

    int i = indexRef[0];
    int length = pattern.length();

    char c = pattern.charAt(i);
    if (c == '%' && i + 1 < length && pattern.charAt(i+1) != '%') {
      //Grab pattern tokens
      c = pattern.charAt(++i);
      //0 is ignored for input, and this ignores alternative religious eras
      if ((c == '0' || c == 'E') && i + 1 >= length) c = pattern.charAt(++i);
      buf.append('%');
      buf.append(c);
    } else { // Grab all else as text
      buf.append('\'');  // mark literals with ' in first place
      buf.append(c);
      for (i++; i < length;i++) {
        c = pattern.charAt(i);
        if (c == '%' ) { // consume literal % otherwise break
          if (i + 1 < length && pattern.charAt(i + 1) == '%') i++;
          else { i--; break; }
        }
        buf.append(c);
      }
    }

    indexRef[0] = i;
    return buf.toString();
  }
}