
water.parser.ParseTime Maven / Gradle / Ivy
package water.parser;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.joda.time.format.DateTimeFormatterBuilder;
import water.util.Log;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
public abstract class ParseTime {
// Deduce if we are looking at a Date/Time value, or not.
// If so, return time as msec since Jan 1, 1970 or Long.MIN_VALUE.
// I tried java.util.SimpleDateFormat, but it just throws too many
// exceptions, including ParseException, NumberFormatException, and
// ArrayIndexOutOfBoundsException... and the Piece de resistance: a
// ClassCastException deep in the SimpleDateFormat code:
// "sun.util.calendar.Gregorian$Date cannot be cast to sun.util.calendar.JulianCalendar$Date"
public static final boolean isTime(BufferedString str) {
return attemptTimeParse(str) != Long.MIN_VALUE;
}
private static final byte MMS[][][] = new byte[][][] {
{"jan".getBytes(),"january" .getBytes()},
{"feb".getBytes(),"february" .getBytes()},
{"mar".getBytes(),"march" .getBytes()},
{"apr".getBytes(),"april" .getBytes()},
{"may".getBytes(),"may" .getBytes()},
{"jun".getBytes(),"june" .getBytes()},
{"jul".getBytes(),"july" .getBytes()},
{"aug".getBytes(),"august" .getBytes()},
{"sep".getBytes(),"september".getBytes()},
{"oct".getBytes(),"october" .getBytes()},
{"nov".getBytes(),"november" .getBytes()},
{"dec".getBytes(),"december" .getBytes()}
};
public static long attemptTimeParse( BufferedString str ) {
try {
long t0 = attemptYearFirstTimeParse(str); // "yyyy-MM-dd" and time if present
if( t0 != Long.MIN_VALUE ) return t0;
long t2 = attemptDayFirstTimeParse(str); // "dd-MMM-yy" and time if present
if( t2 != Long.MIN_VALUE ) return t2;
} catch( org.joda.time.IllegalFieldValueException | // Not time at all
org.joda.time.IllegalInstantException // Parsed as time, but falls into e.g. a daylight-savings hour hole
ie ) { } //FIXME should collect errors and report at end of parse
return Long.MIN_VALUE;
}
// Tries to parse "yyyy-MM-dd [HH:mm:ss.SSS aa]"
private static long attemptYearFirstTimeParse(BufferedString str) {
final byte[] buf = str.getBuffer();
int i=str.getOffset();
final int end = i+str.length();
while( i < end && buf[i] == ' ' ) i++;
if ( i < end && buf[i] == '"' ) i++;
if( (end-i) != 10 && (end-i) < 19 ) return Long.MIN_VALUE;
int yyyy=0, MM=0, dd=0;
// Parse date
yyyy = digit(yyyy,buf[i++]);
yyyy = digit(yyyy,buf[i++]);
yyyy = digit(yyyy,buf[i++]);
yyyy = digit(yyyy,buf[i++]);
if( buf[i++] != '-' ) return Long.MIN_VALUE;
MM = digit(MM,buf[i++]);
MM = digit(MM,buf[i++]);
if( MM < 1 || MM > 12 ) return Long.MIN_VALUE;
if( buf[i++] != '-' ) return Long.MIN_VALUE;
dd = digit(dd,buf[i++]);
dd = digit(dd,buf[i++]);
if( dd < 1 || dd > 31 ) return Long.MIN_VALUE;
if( i==end )
return new DateTime(yyyy,MM,dd,0,0,0, getTimezone()).getMillis();
//Parse time
if( buf[i++] != ' ' ) return Long.MIN_VALUE;
return parseTime(buf, i, end, yyyy, MM, dd);
}
// Tries to parse "dd[-]MMM[-]yy[yy][:' '][HH:mm:ss.SSS aa]"
// where MMM is a text representation of the month (e.g. Jul or July)
private static long attemptDayFirstTimeParse(BufferedString str) {
final byte[] buf = str.getBuffer();
int i=str.getOffset();
final int end = i+str.length();
while( i < end && buf[i] == ' ' ) i++;
if ( i < end && buf[i] == '"' ) i++;
if( (end-i) < 8 ) return Long.MIN_VALUE;
int yyyy=0, MM=0, dd=0;
// Parse date
dd = digit(dd,buf[i++]);
if( isDigit(buf[i]) ) dd = digit(dd,buf[i++]);
if( dd < 1 || dd > 31 ) return Long.MIN_VALUE;
if( buf[i] == '-' ) i++;
if( !isChar(buf[i]) ) return Long.MIN_VALUE;
//convert MMM or full month name to MM
byte[] MMM = null;
OUTER: for( ; MM= end) continue INNER;
for (int j = 0; j < MMM.length; j++)
if (MMM[j] != Character.toLowerCase(buf[i + j]))
continue INNER;
if (buf[i + MMM.length] == '-' || isDigit(buf[i + MMM.length])) break OUTER;
}
}
if( MM == MMS.length ) return Long.MIN_VALUE; // No matching month
i += MMM.length; // Skip month bytes
MM++; // 1-based month
if( buf[i] == '-' ) i++;
yyyy = digit(yyyy,buf[i++]); // 2-digit year
yyyy = digit(yyyy,buf[i++]);
if( end-i>=2 && buf[i] != '"' && buf[i] != ' ' && buf[i] != ':') {
yyyy = digit(yyyy,buf[i++]); // 4-digit year
yyyy = digit(yyyy,buf[i++]);
} else { //POSIX 2004 & 2008 says 69-99 -> 1900s, 00-68 -> 2000s
if (yyyy >= 69)
yyyy += 1900;
else
yyyy += 2000;
}
if( i 23 ) return Long.MIN_VALUE;
if( buf[i] != ':' && buf[i] != '.' ) return Long.MIN_VALUE;
++i;
mm = digit(mm,buf[i++]);
mm = digit(mm,buf[i++]);
if( mm < 0 || mm > 59 ) return Long.MIN_VALUE;
if( buf[i] != ':' && buf[i] != '.' ) return Long.MIN_VALUE;
++i;
ss = digit(ss,buf[i++]);
ss = digit(ss,buf[i++]);
if( ss < 0 || ss > 59 ) return Long.MIN_VALUE;
if( i 999 ) return Long.MIN_VALUE;
if ((end - i) >= 6) // nanoseconds are included
i += 6; // ignore
}
if( i 12) return Long.MIN_VALUE;
// convert 1-12 hours into 0-23
if (buf[i] == 'P') // PM
if (HH < 12) HH += 12;
else // AM
if (HH == 12) HH = 0;
i += 2;
} else return Long.MIN_VALUE;
if( i '9' ) return -1;
return x*10+(c-'0');
}
private static boolean isDigit(byte b) {
if (b < '0' || b > '9') return false;
else return true;
}
private static boolean isChar(byte b) {
if (b < 'A' || (b >'Z' && b < 'a') || b > 'z') return false;
else return true;
}
private static DateTimeZone _timezone;
public static void setTimezone(String tz) {
Set idSet = DateTimeZone.getAvailableIDs();
if(idSet.contains(tz))
_timezone = DateTimeZone.forID(tz);
else
Log.err("Attempted to set unrecognized timezone: "+ tz);
}
public static DateTimeZone getTimezone() {
return _timezone == null ? DateTimeZone.getDefault() : _timezone;
}
public static String listTimezones() {
DateTimeFormatter offsetFormatter = new DateTimeFormatterBuilder().appendTimeZoneOffset(null, true, 2, 4).toFormatter();
Set idSet = DateTimeZone.getAvailableIDs();
Map tzMap = new TreeMap();
Iterator it = idSet.iterator();
String id, cid, offset, key, output;
DateTimeZone tz;
int i = 0;
long millis = System.currentTimeMillis();
// collect canonical and alias IDs into a map
while (it.hasNext()) {
id = it.next();
tz = DateTimeZone.forID(id);
cid = tz.getID();
offset = offsetFormatter.withZone(tz).print(tz.getStandardOffset(millis));
key = offset + " " + cid;
if (id == cid) { // Canonical ID
if (!tzMap.containsKey(key)) tzMap.put(key, "");
} else {// alias ID
if (!tzMap.containsKey(key))
tzMap.put(key, "");
tzMap.put(key, tzMap.get(key) + ", " + id);
}
}
// assemble result
output = "StandardOffset CanonicalID, Aliases\n";
for (Map.Entry e : tzMap.entrySet())
output += e.getKey() + e.getValue()+"\n";
return output;
}
/**
* Factory to create a formatter from a strptime pattern string.
* This models the commonly supported features of strftime from POSIX
* (where it can).
*
* The format may contain locale specific output, and this will change as
* you change the locale of the formatter.
* Call DateTimeFormatter.withLocale(Locale) to switch the locale.
* For example:
*
* DateTimeFormat.forPattern(pattern).withLocale(Locale.FRANCE).print(dt);
*
*
* @param pattern pattern specification
* @return the formatter
* @throws IllegalArgumentException if the pattern is invalid
*/
public static DateTimeFormatter forStrptimePattern(String pattern) {
if (pattern == null || pattern.length() == 0)
throw new IllegalArgumentException("Empty date time pattern specification");
DateTimeFormatterBuilder builder = new DateTimeFormatterBuilder();
parseToBuilder(builder, pattern);
DateTimeFormatter formatter = builder.toFormatter();
return formatter;
}
/**
* Parses the given pattern and appends the rules to the given
* DateTimeFormatterBuilder. See strptime man page for valid patterns.
*
* @param pattern pattern specification
* @throws IllegalArgumentException if the pattern is invalid
*/
private static void parseToBuilder(DateTimeFormatterBuilder builder, String pattern) {
int length = pattern.length();
int[] indexRef = new int[1];
for (int i=0; i 1900s, 00-68 -> 2000s
builder.appendTwoDigitYear(2019);
break;
case 'Y':
builder.appendYear(4,4);
break;
case 'z':
builder.appendTimeZoneOffset(null, "z", false, 2, 2);
break;
case 'Z':
break; //for output only, accepted and ignored for input
default: // No match, ignore
builder.appendLiteral('\'');
builder.appendLiteral(token);
Log.warn(token + "is not acceptted as a parse token, treating as a literal");
}
} else {
if (c == '\'') {
String sub = token.substring(1);
if (sub.length() > 0) {
// Create copy of sub since otherwise the temporary quoted
// string would still be referenced internally.
builder.appendLiteral(new String(sub));
}
} else throw new IllegalArgumentException("Unexpected token encountered parsing format string:" + c);
}
}
}
/**
* Parses an individual token.
*
* @param pattern the pattern string
* @param indexRef a single element array, where the input is the start
* location and the output is the location after parsing the token
* @return the parsed token
*/
private static String parseToken(String pattern, int[] indexRef) {
StringBuilder buf = new StringBuilder();
int i = indexRef[0];
int length = pattern.length();
char c = pattern.charAt(i);
if (c == '%' && i + 1 < length && pattern.charAt(i+1) != '%') {
//Grab pattern tokens
c = pattern.charAt(++i);
//0 is ignored for input, and this ignores alternative religious eras
if ((c == '0' || c == 'E') && i + 1 >= length) c = pattern.charAt(++i);
buf.append('%');
buf.append(c);
} else { // Grab all else as text
buf.append('\''); // mark literals with ' in first place
buf.append(c);
for (i++; i < length;i++) {
c = pattern.charAt(i);
if (c == '%' ) { // consume literal % otherwise break
if (i + 1 < length && pattern.charAt(i + 1) == '%') i++;
else { i--; break; }
}
buf.append(c);
}
}
indexRef[0] = i;
return buf.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy