org.apache.tika.utils.DateUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tika-core Show documentation
Show all versions of tika-core Show documentation
This is the core Apache Tika™ toolkit library from which all other modules inherit functionality. It also
includes the core facades for the Tika API.
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.utils;
import java.text.DateFormat;
import java.text.DateFormatSymbols;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.List;
import java.util.Locale;
import java.util.TimeZone;
/**
* Date related utility methods and constants
*/
public class DateUtils {
/**
* The UTC time zone. Not sure if {@link TimeZone#getTimeZone(String)}
* understands "UTC" in all environments, but it'll fall back to GMT
* in such cases, which is in practice equivalent to UTC.
*/
public static final TimeZone UTC = TimeZone.getTimeZone("UTC");
/**
* Custom time zone used to interpret date values without a time
* component in a way that most likely falls within the same day
* regardless of in which time zone it is later interpreted. For
* example, the "2012-02-17" date would map to "2012-02-17T12:00:00Z"
* (instead of the default "2012-02-17T00:00:00Z"), which would still
* map to "2012-02-17" if interpreted in say Pacific time (while the
* default mapping would result in "2012-02-16" for UTC-8).
*/
public static final TimeZone MIDDAY = TimeZone.getTimeZone("GMT-12:00");
private static DateFormat createDateFormat(String format, TimeZone timezone) {
final SimpleDateFormat sdf =
new SimpleDateFormat(format, new DateFormatSymbols(Locale.US));
if (timezone != null) {
sdf.setTimeZone(timezone);
}
return sdf;
}
/**
* So we can return Date objects for these, this is the
* list (in preference order) of the various ISO-8601
* variants that we try when processing a date based
* property.
*/
private final List iso8601InputFormats = loadDateFormats();
private List loadDateFormats() {
List dateFormats = new ArrayList<>();
// yyyy-mm-ddThh...
dateFormats.add(createDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", UTC)); // UTC/Zulu
dateFormats.add(createDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", null)); // With timezone
dateFormats.add(createDateFormat("yyyy-MM-dd'T'HH:mm:ss", null)); // Without timezone
// yyyy-mm-dd hh...
dateFormats.add(createDateFormat("yyyy-MM-dd' 'HH:mm:ss'Z'", UTC)); // UTC/Zulu
dateFormats.add(createDateFormat("yyyy-MM-dd' 'HH:mm:ssZ", null)); // With timezone
dateFormats.add(createDateFormat("yyyy-MM-dd' 'HH:mm:ss", null)); // Without timezone
// Date without time, set to Midday UTC
dateFormats.add(createDateFormat("yyyy-MM-dd", MIDDAY)); // Normal date format
dateFormats.add(createDateFormat("yyyy:MM:dd", MIDDAY)); // Image (IPTC/EXIF) format
return dateFormats;
}
/**
* Returns a ISO 8601 representation of the given date. This method
* is thread safe and non-blocking.
*
* @see TIKA-495
* @param date given date
* @return ISO 8601 date string, including timezone details
*/
public static String formatDate(Date date) {
Calendar calendar = GregorianCalendar.getInstance(UTC, Locale.US);
calendar.setTime(date);
return doFormatDate(calendar);
}
/**
* Returns a ISO 8601 representation of the given date. This method
* is thread safe and non-blocking.
*
* @see TIKA-495
* @param date given date
* @return ISO 8601 date string, including timezone details
*/
public static String formatDate(Calendar date) {
// Explicitly switch it into UTC before formatting
date.setTimeZone(UTC);
return doFormatDate(date);
}
/**
* Returns a ISO 8601 representation of the given date, which is
* in an unknown timezone. This method is thread safe and non-blocking.
*
* @see TIKA-495
* @param date given date
* @return ISO 8601 date string, without timezone details
*/
public static String formatDateUnknownTimezone(Date date) {
// Create the Calendar object in the system timezone
Calendar calendar = GregorianCalendar.getInstance(TimeZone.getDefault(), Locale.US);
calendar.setTime(date);
// Have it formatted
String formatted = formatDate(calendar);
// Strip the timezone details before returning
return formatted.substring(0, formatted.length()-1);
}
private static String doFormatDate(Calendar calendar) {
return String.format(
Locale.ROOT,
"%04d-%02d-%02dT%02d:%02d:%02dZ",
calendar.get(Calendar.YEAR),
calendar.get(Calendar.MONTH) + 1,
calendar.get(Calendar.DAY_OF_MONTH),
calendar.get(Calendar.HOUR_OF_DAY),
calendar.get(Calendar.MINUTE),
calendar.get(Calendar.SECOND));
}
/**
* Tries to parse the date string; returns null if no parse was possible.
*
* This is not thread safe! Wrap in synchronized or create new {@link DateUtils}
* for each class.
*
* @param dateString
* @return
*/
public Date tryToParse(String dateString) {
// Java doesn't like timezones in the form ss+hh:mm
// It only likes the hhmm form, without the colon
int n = dateString.length();
if (dateString.charAt(n - 3) == ':'
&& (dateString.charAt(n - 6) == '+' || dateString.charAt(n - 6) == '-')) {
dateString = dateString.substring(0, n - 3) + dateString.substring(n - 2);
}
for (DateFormat df : iso8601InputFormats) {
try {
return df.parse(dateString);
} catch (java.text.ParseException e){
}
}
return null;
}
}