
com.oracle.truffle.js.parser.date.DateParser Maven / Gradle / Ivy
Show all versions of js-language Show documentation
/*
* Copyright (c) 2018, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* The Universal Permissive License (UPL), Version 1.0
*
* Subject to the condition set forth below, permission is hereby granted to any
* person obtaining a copy of this software, associated documentation and/or
* data (collectively the "Software"), free of charge and under any and all
* copyright rights in the Software, and any and all patent rights owned or
* freely licensable by each licensor hereunder covering either (i) the
* unmodified Software as contributed to or provided by such licensor, or (ii)
* the Larger Works (as defined below), to deal in both
*
* (a) the Software, and
*
* (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
* one is included with the Software each a "Larger Work" to which the Software
* is contributed by such licensors),
*
* without restriction, including without limitation the rights to copy, create
* derivative works of, display, perform, and distribute the Software and make,
* use, sell, offer for sale, import, export, have made, and have sold the
* Software and the Larger Work(s), and to sublicense the foregoing rights on
* either these or other terms.
*
* This license is subject to the following condition:
*
* The above copyright notice and either this complete permission notice or at a
* minimum a reference to the UPL must be included in all copies or substantial
* portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.oracle.truffle.js.parser.date;
import static java.lang.Character.DECIMAL_DIGIT_NUMBER;
import static java.lang.Character.LOWERCASE_LETTER;
import static java.lang.Character.OTHER_PUNCTUATION;
import static java.lang.Character.SPACE_SEPARATOR;
import static java.lang.Character.UPPERCASE_LETTER;
import java.util.HashMap;
import java.util.Locale;
import com.oracle.truffle.js.runtime.JSContext;
import com.oracle.truffle.js.runtime.JSRealm;
// @formatter:off
/**
* JavaScript date parser. This class first tries to parse a date string
* according to the extended ISO 8601 format specified in ES5 15.9.1.15.
* If that fails, it falls back to legacy mode in which it accepts a range
* of different formats.
*
* This class is neither thread-safe nor reusable. Calling the
* parse() method more than once will yield undefined results.
*/
public class DateParser {
/** Constant for index position of parsed year value. */
public static final int YEAR = 0;
/** Constant for index position of parsed month value. */
public static final int MONTH = 1;
/** Constant for index position of parsed day value. */
public static final int DAY = 2;
/** Constant for index position of parsed hour value. */
public static final int HOUR = 3;
/** Constant for index position of parsed minute value. */
public static final int MINUTE = 4;
/** Constant for index position of parsed second value. */
public static final int SECOND = 5;
/** Constant for index position of parsed millisecond value. */
public static final int MILLISECOND = 6;
/** Constant for index position of parsed time zone offset value. */
public static final int TIMEZONE = 7;
private enum Token {
UNKNOWN, NUMBER, SEPARATOR, PARENTHESIS, NAME, SIGN, END
}
private final String string;
private final int length;
private final Integer[] fields;
private int pos = 0;
private Token token;
private int tokenLength;
private Name nameValue;
private int numValue;
private int currentField = YEAR;
private int yearSign = 0;
private boolean namedMonth = false;
private final JSRealm realm;
private final boolean extraLenient; //necessary for Temporal
private static final HashMap names = new HashMap<>();
static {
addName("monday", Name.DAY_OF_WEEK, 0);
addName("tuesday", Name.DAY_OF_WEEK, 0);
addName("wednesday", Name.DAY_OF_WEEK, 0);
addName("thursday", Name.DAY_OF_WEEK, 0);
addName("friday", Name.DAY_OF_WEEK, 0);
addName("saturday", Name.DAY_OF_WEEK, 0);
addName("sunday", Name.DAY_OF_WEEK, 0);
addName("january", Name.MONTH_NAME, 1);
addName("february", Name.MONTH_NAME, 2);
addName("march", Name.MONTH_NAME, 3);
addName("april", Name.MONTH_NAME, 4);
addName("may", Name.MONTH_NAME, 5);
addName("june", Name.MONTH_NAME, 6);
addName("july", Name.MONTH_NAME, 7);
addName("august", Name.MONTH_NAME, 8);
addName("september", Name.MONTH_NAME, 9);
addName("october", Name.MONTH_NAME, 10);
addName("november", Name.MONTH_NAME, 11);
addName("december", Name.MONTH_NAME, 12);
addName("am", Name.AM_PM, 0);
addName("pm", Name.AM_PM, 12);
addName("z", Name.TIMEZONE_ID, 0);
addName("gmt", Name.TIMEZONE_ID, 0);
addName("ut", Name.TIMEZONE_ID, 0);
addName("utc", Name.TIMEZONE_ID, 0);
addName("est", Name.TIMEZONE_ID, -5 * 60);
addName("edt", Name.TIMEZONE_ID, -4 * 60);
addName("cst", Name.TIMEZONE_ID, -6 * 60);
addName("cdt", Name.TIMEZONE_ID, -5 * 60);
addName("mst", Name.TIMEZONE_ID, -7 * 60);
addName("mdt", Name.TIMEZONE_ID, -6 * 60);
addName("pst", Name.TIMEZONE_ID, -8 * 60);
addName("pdt", Name.TIMEZONE_ID, -7 * 60);
addName("t", Name.TIME_SEPARATOR, 0);
}
/**
* Construct a new DateParser instance for parsing the given string.
* @param string the string to be parsed
*/
public DateParser(final JSRealm realm, final String string, boolean extraLenient) {
this.string = string;
this.length = string.length();
this.fields = new Integer[TIMEZONE + 1];
this.realm = realm;
this.extraLenient = extraLenient;
}
/**
* Try parsing the given string as date according to the extended ISO 8601 format
* specified in ES5 15.9.1.15. Fall back to legacy mode if that fails.
* This method returns true if the string could be parsed.
* @return true if the string could be parsed as date
*/
public boolean parse() {
return parseEcmaDate() || parseLegacyDate();
}
/**
* Try parsing the date string according to the rules laid out in ES5 15.9.1.15.
* The date string must conform to the following format:
*
* [('-'|'+')yy]yyyy[-MM[-dd]][Thh:mm[:ss[.sss]][Z|(+|-)hh:mm]]
*
* If the string does not contain a time zone offset, the TIMEZONE field
* is set to 0 (GMT).
* @return true if string represents a valid ES5 date string.
*/
public boolean parseEcmaDate() {
if (token == null) {
token = next();
}
while (token != Token.END) {
switch (token) {
case NUMBER:
if (currentField == YEAR && yearSign != 0) {
// 15.9.1.15.1 Extended year must have six digits
if (tokenLength != 6) {
return false;
}
if (numValue == 0 && yearSign == -1) {
// The representation of the year 0 as -000000 is invalid
return false;
}
numValue *= yearSign;
} else if (!checkEcmaField(currentField, numValue)) {
return false;
}
if (!skipEcmaDelimiter()) {
return false;
}
if (currentField < TIMEZONE) {
set(currentField++, numValue);
}
break;
case NAME:
if (nameValue == null) {
return false;
}
switch (nameValue.type) {
case Name.TIME_SEPARATOR:
if (currentField == YEAR || currentField > HOUR) {
return false;
}
currentField = HOUR;
break;
case Name.TIMEZONE_ID:
if (!nameValue.key.equals("z") || !setTimezone(nameValue.value, false)) {
return false;
}
break;
default:
return false;
}
break;
case SIGN:
if (peek() == -1) {
// END after sign - wrong!
return false;
}
if (currentField == YEAR) {
yearSign = numValue;
} else if (currentField < SECOND || !setTimezone(readTimeZoneOffset(), true)) {
// Note: Spidermonkey won't parse timezone unless time includes seconds and milliseconds
return false;
}
break;
default:
return false;
}
token = next();
}
return patchResult(true);
}
/**
* Try parsing the date using a fuzzy algorithm that can handle a variety of formats.
*
* Numbers separated by ':' are treated as time values, optionally followed by a
* millisecond value separated by '.'. Other number values are treated as date values.
* The exact sequence of day, month, and year values to apply is determined heuristically.
*
* English month names and selected time zone names as well as AM/PM markers are recognized
* and handled properly. Additionally, numeric time zone offsets such as (+|-)hh:mm or
* (+|-)hhmm are recognized. If the string does not contain a time zone offset
* the TIMEZONEfield is left undefined, meaning the local time zone should be applied.
*
* English weekday names are recognized but ignored. All text in parentheses is ignored as well.
* All other text causes parsing to fail.
*
* @return true if the string could be parsed
*/
public boolean parseLegacyDate() {
if (currentField > DAY) {
return false;
}
if (token == null) {
token = next();
}
while (token != Token.END) {
switch (token) {
case NUMBER:
if (skipDelimiter(':')) {
// A number followed by ':' is parsed as time
if (!setTimeField(numValue)) {
return false;
}
// consume remaining time tokens
do {
token = next();
if (!((token == Token.NUMBER && setTimeField(numValue))
|| ((token == Token.END || token == Token.SEPARATOR) && setTimeField(0)))) {
return false;
}
} while (isSet(SECOND) ? (skipDelimiter('.') || skipDelimiter(':')) : skipDelimiter(':'));
} else {
// Parse as date token
if (!setDateField(numValue)) {
return false;
}
skipDelimiter('-');
}
break;
case NAME:
if (nameValue == null) {
return false;
}
switch (nameValue.type) {
case Name.AM_PM:
if (!setAmPm(nameValue.value)) {
return false;
}
break;
case Name.MONTH_NAME:
if (!setMonth(nameValue.value)) {
return false;
}
break;
case Name.TIMEZONE_ID:
if (!setTimezone(nameValue.value, false)) {
return false;
}
break;
case Name.TIME_SEPARATOR:
return false;
default:
break;
}
if (nameValue.type != Name.TIMEZONE_ID) {
skipDelimiter('-');
}
break;
case SIGN:
if (peek() == -1) {
// END after sign - wrong!
return false;
}
if (!setTimezone(readTimeZoneOffset(), true)) {
return false;
}
break;
case PARENTHESIS:
if (!skipParentheses()) {
return false;
}
break;
case SEPARATOR:
break;
default:
return false;
}
token = next();
}
return patchResult(false);
}
/**
* Get the parsed date and time fields as an array of Integers.
*
* If parsing was successful, all fields are guaranteed to be set except for the
* TIMEZONE field which may be null, meaning that local time zone
* offset should be applied.
*
* @return the parsed date fields
*/
public Integer[] getDateFields() {
return fields;
}
private boolean isSet(final int field) {
return fields[field] != null;
}
private Integer get(final int field) {
return fields[field];
}
private void set(final int field, final int value) {
fields[field] = value;
}
private int peek() {
return pos < length ? string.charAt(pos) : -1;
}
// Skip delimiter if followed by a number. Used for ISO 8601 formatted dates
private boolean skipNumberDelimiter(final char c) {
if (pos < length - 1 && string.charAt(pos) == c
&& Character.getType(string.charAt(pos + 1)) == DECIMAL_DIGIT_NUMBER) {
token = null;
pos++;
return true;
}
return false;
}
private boolean skipDelimiter(final char c) {
if (pos < length && string.charAt(pos) == c) {
token = null;
pos++;
return true;
}
return false;
}
private Token next() {
if (pos >= length) {
tokenLength = 0;
return Token.END;
}
final char c = string.charAt(pos);
final int type = Character.getType(c);
if (c > 0x80 && type != SPACE_SEPARATOR) {
tokenLength = 1;
pos++;
return Token.UNKNOWN; // We only deal with ASCII here
}
switch (type) {
case DECIMAL_DIGIT_NUMBER:
numValue = readNumber(9);
if (pos < length && isAsciiDigit(string.charAt(pos))) {
return Token.UNKNOWN; // number longer than 9 digits
}
return Token.NUMBER;
case SPACE_SEPARATOR:
case OTHER_PUNCTUATION:
tokenLength = 1;
pos++;
return Token.SEPARATOR;
case UPPERCASE_LETTER:
case LOWERCASE_LETTER:
nameValue = readName();
return Token.NAME;
default:
tokenLength = 1;
pos++;
switch (c) {
case '(':
return Token.PARENTHESIS;
case '-':
case '+':
numValue = c == '-' ? -1 : 1;
return Token.SIGN;
default:
return Character.isWhitespace(c) ? Token.SEPARATOR : Token.UNKNOWN;
}
}
}
private boolean checkLegacyField(final int field, final int value) {
switch (field) {
case HOUR:
return isHour(value);
case MINUTE:
case SECOND:
return isMinuteOrSecond(value);
case MILLISECOND:
return checkMilliseconds(value);
default:
// skip validation on other legacy fields as we don't know what's what
return true;
}
}
private boolean checkEcmaField(final int field, final int value) {
switch (field) {
case YEAR:
return tokenLength == 4;
case MONTH:
return tokenLength == 2 && isMonth(value);
case DAY:
return tokenLength == 2 && isDay(value);
case HOUR:
return tokenLength == 2 && isHour(value);
case MINUTE:
case SECOND:
return tokenLength == 2 && isMinuteOrSecond(value);
case MILLISECOND:
return checkMilliseconds(value);
default:
return true;
}
}
private boolean checkMilliseconds(final int value) {
if (value < 0) {
return false;
}
// convert numValue to milliseconds (i.e. to length 3)
int currentLength = tokenLength;
while (currentLength < 3) {
numValue *= 10;
currentLength++;
}
while (currentLength > 3) {
numValue /= 10;
currentLength--;
}
return true;
}
private boolean skipEcmaDelimiter() {
switch (currentField) {
case YEAR:
case MONTH:
return skipNumberDelimiter('-') || peek() == 'T' || peek() == -1;
case DAY:
return peek() == 'T' || peek() == -1;
case HOUR:
case MINUTE:
return skipNumberDelimiter(':') || endOfTime();
case SECOND:
return skipNumberDelimiter('.') || endOfTime();
default:
return true;
}
}
private boolean endOfTime() {
final int c = peek();
return c == -1 || c == 'Z' || c == '-' || c == '+' || c == ' ';
}
private static boolean isAsciiLetter(final char ch) {
return ('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z');
}
private static boolean isAsciiDigit(final char ch) {
return '0' <= ch && ch <= '9';
}
private int readNumber(final int maxDigits) {
final int start = pos;
int n = 0;
final int max = Math.min(length, pos + maxDigits);
while (pos < max && isAsciiDigit(string.charAt(pos))) {
n = n * 10 + string.charAt(pos++) - '0';
}
tokenLength = pos - start;
return n;
}
private Name readName() {
final int start = pos;
final int limit = Math.min(pos + 3, length);
// first read up to the key length
while (pos < limit && isAsciiLetter(string.charAt(pos))) {
pos++;
}
final String key = string.substring(start, pos).toLowerCase(Locale.ENGLISH);
final Name name = names.get(key);
// then advance to end of name
while (pos < length && isAsciiLetter(string.charAt(pos))) {
pos++;
}
tokenLength = pos - start;
// make sure we have the full name or a prefix
if (name != null && name.matches(string, start, tokenLength)) {
return name;
}
return null;
}
private int readTimeZoneOffset() {
final int sign = string.charAt(pos - 1) == '+' ? 1 : -1;
int hours = readNumber(2);
boolean delimiter = skipDelimiter(':');
int minutes = readNumber(2);
if (!delimiter && tokenLength == 1) {
// three digits in hmm format (the second digit of hours belongs to minutes)
minutes += 10 * (hours % 10);
hours /= 10;
}
return sign * (60 * hours + minutes);
}
private boolean skipParentheses() {
int parenCount = 1;
while (pos < length && parenCount != 0) {
final char c = string.charAt(pos++);
if (c == '(') {
parenCount++;
} else if (c == ')') {
parenCount--;
}
}
return true;
}
private static int getDefaultValue(final int field) {
switch (field) {
case MONTH:
case DAY:
return 1;
default:
return 0;
}
}
private static boolean isDay(final int n) {
return 1 <= n && n <= 31;
}
private static boolean isMonth(final int n) {
return 1 <= n && n <= 12;
}
private static boolean isHour(final int n) {
return 0 <= n && n <= 24;
}
private static boolean isMinuteOrSecond(final int n) {
return 0 <= n && n < 60;
}
private boolean setMonth(final int m) {
if (!isSet(MONTH)) {
namedMonth = true;
set(MONTH, m);
return true;
}
return false;
}
private boolean setDateField(final int n) {
for (int field = YEAR; field != HOUR; field++) {
if (!isSet(field)) {
// no validation on legacy date fields
set(field, n);
return true;
}
}
return false;
}
private boolean setTimeField(final int n) {
for (int field = HOUR; field != TIMEZONE; field++) {
if (!isSet(field)) {
if (checkLegacyField(field, n)) {
set(field, n);
return true;
}
return false;
}
}
return false;
}
private boolean setTimezone(final int offset, final boolean asNumericOffset) {
if (!isSet(TIMEZONE) || (asNumericOffset && get(TIMEZONE) == 0)) {
set(TIMEZONE, offset);
return true;
}
return false;
}
private boolean setAmPm(final int offset) {
if (!isSet(HOUR)) {
return false;
}
int hour = get(HOUR);
if (hour >= 0 && hour <= 12) {
if (hour == 12) {
// 12:30 am == 00:30; 12:30 pm == 12:30 (24h)
hour = 0;
}
set(HOUR, hour + offset);
}
return true;
}
private boolean patchResult(final boolean strict) {
// sanity checks - make sure we have something
if (!isSet(YEAR) && !isSet(HOUR)) {
return false;
}
if (isSet(HOUR) && !isSet(MINUTE)) {
return false;
}
JSContext context = realm.getContext();
if (context.isOptionV8CompatibilityMode() && !extraLenient) {
if (!isSet(YEAR) && !isSet(DAY) && !isSet(MONTH)) {
return false;
}
}
boolean dateOnly = !isSet(HOUR);
// fill in default values for unset fields except timezone
for (int field = YEAR; field <= TIMEZONE; field++) {
if (get(field) == null) {
if (field == TIMEZONE && !isUTCDefaultTimezone(dateOnly, strict)) {
// When the UTC offset representation is absent,
// date-only forms are interpreted as a UTC time and
// date-time forms are interpreted as a local time (= empty TIMEZONE).
continue;
}
final int value = getDefaultValue(field);
set(field, value);
}
}
if (!strict) {
// swap year, month, and day if it looks like the right thing to do
if (isDay(get(YEAR))) {
final int d = get(YEAR);
set(YEAR, get(DAY));
if (namedMonth) {
// d-m-y
set(DAY, d);
} else {
// m-d-y
final int d2 = get(MONTH);
set(MONTH, d);
set(DAY, d2);
}
}
// sanity checks now that we know what's what
if (!isMonth(get(MONTH)) || !isDay(get(DAY))) {
return false;
}
// add 1900 or 2000 to year if it's between 0 and 100
final int year = get(YEAR);
if (year >= 0 && year < 100) {
set(YEAR, year >= 50 ? 1900 + year : 2000 + year);
}
} else {
// 24 hour value is only allowed if all other time values are zero
if (get(HOUR) == 24 &&
(get(MINUTE) != 0 || get(SECOND) != 0 || get(MILLISECOND) != 0)) {
return false;
}
}
// set month to 0-based
set(MONTH, get(MONTH) - 1);
return true;
}
private boolean isUTCDefaultTimezone(boolean dateOnly, boolean strict) {
return (strict || realm.getContext().getLanguageOptions().useUTCForLegacyDates()) && dateOnly;
}
private static void addName(final String str, final int type, final int value) {
final Name name = new Name(str, type, value);
names.put(name.key, name);
}
private static class Name {
final String name;
final String key;
final int value;
final int type;
static final int DAY_OF_WEEK = -1;
static final int MONTH_NAME = 0;
static final int AM_PM = 1;
static final int TIMEZONE_ID = 2;
static final int TIME_SEPARATOR = 3;
Name(final String name, final int type, final int value) {
assert name != null;
assert name.equals(name.toLowerCase(Locale.ENGLISH));
this.name = name;
// use first three characters as lookup key
this.key = name.substring(0, Math.min(3, name.length()));
this.type = type;
this.value = value;
}
public boolean matches(final String str, final int offset, final int len) {
return name.regionMatches(true, 0, str, offset, len);
}
@Override
public String toString() {
return name;
}
}
}