All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.lockss.daemon.PublicationDate Maven / Gradle / Ivy

The newest version!
/*
 * $Id$
 */

/*

Copyright (c) 2000-2011 Board of Trustees of Leland Stanford Jr. University,
all rights reserved.

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
STANFORD UNIVERSITY BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

Except as contained in this notice, the name of Stanford University shall not
be used in advertising or otherwise to promote the sale, use or other dealings
in this Software without prior written authorization from Stanford University.

*/
package org.lockss.daemon;

import java.text.ParseException;
import java.util.Calendar;
import java.util.Date;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
import java.util.StringTokenizer;

import org.lockss.util.StringUtil;

/**
 * This class implements a journal or book publication date. The date
 * consists of a year, and optionally either a season or quarter, or
 * a month and optionally a day.  The elements can either be in narrative
 * form (e.g. 15 January, 2010), or structured (15/Jan/2010).
 * 

* Seasons and quarters are not handled by standard date formats, so * special handling is provided for English, French, German, Italian, * Portuguese, Russian, and Spanish seasons and quarters. *

* Parsing is permissive, and fail-soft. It attempts to extract as much * information as it recognizes, and ignores what it can not.. * * @author Philip Gust * @version 1.0 */ public class PublicationDate { /** The season (1-4) */ private int season = 0; /** The quarter (1-4) */ private int quarter = 0; /** The month (1-12) */ private int month = 0; /** The day of month (1-31) */ private int day = 0; /** the year (1000 - current year+1) */ private int year = 0; /** english seasons */ static private final Map en_seasons = new LinkedHashMap(); { en_seasons.put("spring", 1); en_seasons.put("1S", 1); en_seasons.put("S1", 1); en_seasons.put("summer", 2); en_seasons.put("2S", 2); en_seasons.put("S2", 2); en_seasons.put("fall", 3); en_seasons.put("3S", 3); en_seasons.put("S3", 3); en_seasons.put("winter", 4); en_seasons.put("4S", 4); en_seasons.put("S4", 4); } /** french seasons */ static private final Map fr_seasons = new LinkedHashMap(); { fr_seasons.put("printemps", 1); // spring fr_seasons.put("d'été", 2); // summer season fr_seasons.put("été", 2); // summer fr_seasons.put("d'automne", 3); // autumn quarter fr_seasons.put("l'automne", 3); // autumn fr_seasons.put("automne", 3); // autumn season fr_seasons.put("d'hiver", 4); // winter season fr_seasons.put("hiver", 4); // winter } /** german seasons */ static private final Map de_seasons = new LinkedHashMap(); { de_seasons.put("Frühling", 1); // spring season de_seasons.put("Frühjahr", 1); // spring de_seasons.put("Sommersaison", 2); // summer season de_seasons.put("Sommer", 2); // summer de_seasons.put("Herbst-Quartal", 3); // fall quarter de_seasons.put("Herbst-Saison", 3); // fall season de_seasons.put("Herbst", 3); // fall de_seasons.put("Winterquartier", 4); // winter quarter de_seasons.put("Wintersaison", 4); // winter season de_seasons.put("Winter", 4); // winter } /** italian seasons */ static private final Map it_seasons = new LinkedHashMap(); { it_seasons.put("primavera", 1); // spring it_seasons.put("estiva", 2); // summer season it_seasons.put("estate", 2); // summer it_seasons.put("autunno", 3); // fall it_seasons.put("invernale", 4); // winter quarter it_seasons.put("inverno", 4); // winter } /** portuguese seasons */ static private final Map pt_seasons = new LinkedHashMap(); { pt_seasons.put("primavera", 1); // spring pt_seasons.put("verão", 2); // summer pt_seasons.put("outono", 3); // fall pt_seasons.put("inverno", 4); // winter } /** russian */ static private final Map ru_seasons = new LinkedHashMap(); { ru_seasons.put("весенний", 1); // spring season ru_seasons.put("весной", 1); // spring ru_seasons.put("Летний", 2); // summer season ru_seasons.put("Летом", 2); // summer ru_seasons.put("осень", 3); // fall season ru_seasons.put("осенью", 3); // fall ru_seasons.put("зимой", 4); // winter quarter ru_seasons.put("зимний", 4); // winter season ru_seasons.put("зима", 4); // winter } /** spanish */ static private final Map es_seasons = new LinkedHashMap(); { es_seasons.put("primavera", 1); // spring es_seasons.put("verano", 2); // summer es_seasons.put("otoño", 3); // fall es_seasons.put("invierno", 4); // winter } /** seasons */ static private final Map> seasons = new LinkedHashMap>(); { seasons.put("de", de_seasons); seasons.put("en", en_seasons); seasons.put("es", es_seasons); seasons.put("fr", fr_seasons); seasons.put("it", it_seasons); seasons.put("pt", pt_seasons); seasons.put("ru", ru_seasons); } /** english quarters */ static private final Map en_quarters = new LinkedHashMap(); { en_quarters.put("first quarter", 1); en_quarters.put("1st quarter", 1); en_quarters.put("quarter 1", 1); en_quarters.put("quarter one", 1); en_quarters.put("1Q", 1); en_quarters.put("Q1", 1); en_quarters.put("second quarter", 1); en_quarters.put("2nd quarter", 1); en_quarters.put("quarter 2", 1); en_quarters.put("quarter Two", 1); en_quarters.put("2Q", 2); en_quarters.put("Q2", 2); en_quarters.put("third quarter", 1); en_quarters.put("3rd quarter", 1); en_quarters.put("quarter 3", 1); en_quarters.put("quarter three", 1); en_quarters.put("3Q", 3); en_quarters.put("Q3", 3); en_quarters.put("fourth quarter", 1); en_quarters.put("4th quarter", 1); en_quarters.put("quarter 4", 1); en_quarters.put("quarter four", 1); en_quarters.put("4Q", 4); en_quarters.put("Q4", 4); } /** french quarters */ static private final Map fr_quarters = new LinkedHashMap(); { fr_quarters.put("premier trimestre",1); // first quarter fr_quarters.put("1er trimestre",1); // 1st quarter fr_quarters.put("trimestre 1",1); // quarter 1 fr_quarters.put("quart", 1); // quarter one fr_quarters.put("deuxième trimestre",2); // second quarter fr_quarters.put("2e trimestre",2); // 2nd quarter fr_quarters.put("2 er trimestre",2); // quarter 2 fr_quarters.put("quart deux",2); // quarter two fr_quarters.put("au troisième trimestre",3);// third quarter fr_quarters.put("3ème trimestre",3); // 3rd quarter fr_quarters.put("3 e trimestre",3); // quarter 3 fr_quarters.put("trois quart",3); // quarter three fr_quarters.put("quatrième trimestre",4); // fourth quarter fr_quarters.put("4e trimestre",4); // 4th quarter fr_quarters.put("4 e trimestre",4); // quarter 4 fr_quarters.put("quatre trimestres",4); // quarter four } /** german quarters */ static private final Map de_quarters = new LinkedHashMap(); { de_quarters.put("ersten Quartal",1); // first quarter de_quarters.put("1. Quartal",1); // 1st quarter de_quarters.put("Quartal 1",1); // quarter 1 de_quarters.put("Quartal ein",1); // quarter one de_quarters.put("zweiten Quartal",2); // second quarter de_quarters.put("2. Quartal",2); // 2nd quarter de_quarters.put("Quartal 2 ",2); // quarter 2 de_quarters.put("Quartal zwei",2); // quarter two de_quarters.put("dritten Quartal",3); // third quarter de_quarters.put("3. Quartal",3); // 3rd quarter de_quarters.put("Quartal 3",3); // quarter 3 de_quarters.put("Quartal drei",3); // quarter three de_quarters.put("vierten Quartal",4); // fourth quarter de_quarters.put("4. Quartal",4); // 4th quarter de_quarters.put("Quartal 4",4); // quarter 4 de_quarters.put("vierte Quartal",4); // quarter four } /** italian quarters */ static private final Map it_quarters = new LinkedHashMap(); { it_quarters.put("primer trimestre",1); // first quarter it_quarters.put("1 º trimestre",1); // 1st quarter it_quarters.put("trimestre 1",1); // quarter 1 it_quarters.put("quarto", 1); // quarter one it_quarters.put("secondo trimestre",2); // second quarter it_quarters.put("2 º trimestre",2); // 2nd quarter it_quarters.put("quartiere 2",2); // quarter 2 it_quarters.put("trimestre due",2); // quarter two it_quarters.put("terzo trimestre",3); // third quarter it_quarters.put("3 º trimestre",3); // 3rd quarter it_quarters.put("tre quarti",3); // quarter three it_quarters.put("nel quarto trimestre",4); // fourth quarter it_quarters.put("4 º trimestre",4); // 4th quarter it_quarters.put("quartiere 4",4); // quarter 4 it_quarters.put("trimestre quattro",4); // quarter four } /** portuguese quarters */ static private final Map pt_quarters = new LinkedHashMap(); { pt_quarters.put("primeiro trimestre",1); // first quarter pt_quarters.put("1 º trimestre",1); // 1st quarter pt_quarters.put("trimestre 1",1); // quarter 1 pt_quarters.put("quarto", 1); // quarter one pt_quarters.put("segundo trimestre",2); // second quarter pt_quarters.put("2 º trimestre",2); // 2nd quarter pt_quarters.put("2 º trimestre",2); // quarter 2 pt_quarters.put("trimestre de dois",2); // quarter two pt_quarters.put("terceiro trimestre",3); // third quarter pt_quarters.put("3 º trimestre",3); // 3rd quarter pt_quarters.put("quarto 3",3); // quarter 3 pt_quarters.put("três quartos",3); // quarter three pt_quarters.put("quarto trimestre",4); // fourth quarter pt_quarters.put("4 º trimestre",4); // 4th quarter pt_quarters.put("4 trimestre",4); // quarter 4 pt_quarters.put("trimestre quatro",4); // quarter four } /** russian quarters */ static private final Map ru_quarters = new LinkedHashMap(); { ru_quarters.put("первом квартале",1); // first quarter ru_quarters.put("1-й квартал",1); // 1st quarter ru_quarters.put("1 квартал",1); // quarter 1 ru_quarters.put("один квартал",1); // quarter one ru_quarters.put("во втором квартале",2); // second quarter ru_quarters.put("2-й квартал",2); // 2nd quarter ru_quarters.put("2 квартале",2); // quarter 2 ru_quarters.put("два квартала",2); // quarter two ru_quarters.put("третьем квартале",3); // third quarter ru_quarters.put("3-й квартал",3); // 3rd quarter ru_quarters.put("квартал 3",3); // quarter 3 ru_quarters.put("три четверти",3); // quarter three ru_quarters.put("четвертом квартале",4); // fourth quarter ru_quarters.put("4-й квартал",4); // 4th quarter ru_quarters.put("4 квартале",4); // quarter 4 ru_quarters.put("четвертом квартале",4); // quarter four } /** spanish quarters */ static private final Map es_quarters = new LinkedHashMap(); { es_quarters.put("primer trimestre",1); // first quarter es_quarters.put("1 º trimestre",1); // 1st quarter es_quarters.put("trimestre 1",1); // quarter 1 es_quarters.put("cuarto", 1); // quarter one es_quarters.put("segundo trimestre",2); // second quarter es_quarters.put("2 º trimestre",2); // 2nd quarter es_quarters.put("trimestre 2",2); // quarter 2 es_quarters.put("trimestre dos",2); // quarter two es_quarters.put("tercer trimestre",3); // third quarter es_quarters.put("3 º trimestre",3); // 3rd quarter es_quarters.put("trimestre 3",3); // quarter 3 es_quarters.put("de tres cuartos",3); // quarter three es_quarters.put("cuarto trimestre",4); // fourth quarter es_quarters.put("4 º trimestre",4); // 4th quarter es_quarters.put("trimestre 4",4); // quarter 4 es_quarters.put("cuatro trimestres",4); // quarter four } /** quarters */ static private final Map> quarters = new LinkedHashMap>(); { quarters.put("de", de_quarters); quarters.put("en", en_quarters); quarters.put("es", es_quarters); quarters.put("fr", fr_quarters); quarters.put("it", it_quarters); quarters.put("pt", pt_quarters); quarters.put("ru", ru_quarters); } /** * Construct a publication date by parsing a string formatted in * the default locale. * * @param dateStr the date string * @throws ParseException if the date cannot be parsed */ public PublicationDate(String dateStr) throws ParseException { this(dateStr, Locale.getDefault()); } /** * Construct a publication date by parsing a string formatted in * the specified locale. * * @param s the date string * @param locale the locale for interpreting the date string * @throws ParseException if the date cannot be parsed */ public PublicationDate(String s, Locale locale) throws ParseException { // normalize by stripping accents and lower-casing String dateStr = StringUtil.toUnaccented(s).toLowerCase(locale); // get season Map myseasons = seasons.get(locale.getLanguage()); if (myseasons != null) { // match season phrases as whole words. for (Map.Entry entry : myseasons.entrySet()) { String key = StringUtil.toUnaccented(entry.getKey()).toLowerCase(locale); String match = key + " "; if (!dateStr.startsWith(match)) { match = key + ","; if (!dateStr.startsWith(match)) { match = " " + key; if (!dateStr.endsWith(match)) { match = " " + key + " "; if (!dateStr.contains(match)) { match = " " + key + ","; if (!dateStr.contains(match)) { continue; } } } } } dateStr = dateStr.replace(match," "); season = entry.getValue(); break; } } // get quarter Map myquarters = quarters.get(locale.getLanguage()); if (myquarters != null) { // match quarter phrases as whole words. for (Map.Entry entry : myquarters.entrySet()) { String key = StringUtil.toUnaccented(entry.getKey()).toLowerCase(locale); String match = key + " "; if (!dateStr.startsWith(match)) { match = key + ","; if (!dateStr.startsWith(match)) { match = " " + key; if (!dateStr.endsWith(match)) { match = " " + key + " "; if (!dateStr.contains(match)) { match = " " + key + ","; if (!dateStr.contains(match)) { continue; } } } } } dateStr = dateStr.replace(match," "); quarter = entry.getValue(); break; } } Calendar c = Calendar.getInstance(locale); // earliest and latest recognizable years // first year; limits to 4-digit years int firstYear = 1000; // year after current year (some pubs come out in advance) int lastYear = c.get(Calendar.YEAR)+1; // get month names Map months = c.getDisplayNames(Calendar.MONTH, Calendar.ALL_STYLES, locale); // process words from date string after removing quarter or season for (StringTokenizer tok = new StringTokenizer(dateStr); tok.hasMoreElements(); ) { // remove extraneous trailing punctuation String word= tok.nextToken(); if (month == 0) { // get month after eliminating any trailing punctuation. String w = word.replaceAll("\\p{Punct}+$", ""); for (Map.Entry entry : months.entrySet()) { // strip punctuation because keys for a few locales // include trailing punctuation with their abbreviations if (StringUtil.toUnaccented( entry.getKey()).replaceAll( "\\p{Punct}+$", "").equalsIgnoreCase(w)) { month = entry.getValue()+1; break; } } if (month != 0) { continue; } } // bit of a kludge: also covers "28/nov./2010 where // './' is treated as a single delimiter; luckily '.' // is also a date delimiter so this works. String[] wds = word.split("[./-]+"); if (wds.length == 1) { try { // crude trick to strip ordinal indicators from numbers // by removing all non-digit characters before parsing int num = Integer.parseInt(word.replaceAll("[^\\p{Digit}]", "")); if (year == 0) { if ((num >= firstYear) && (num <= lastYear)) { year = num; continue; } } if (day == 0) { if ((num >= 1) && (num <= 31)) { day = num; continue; } } } catch (NumberFormatException ex) { // not a number } } int w[] = new int[3]; int len = Math.min(wds.length, 3); int monthIndex = -1; // parse initial fields for date elements for (int i = 0; i < len; i++) { Integer n = null; for (Map.Entry entry : months.entrySet()) { if (StringUtil.toUnaccented( entry.getKey()).replaceAll( "\\p{Punct}+$", "").equalsIgnoreCase(wds[i])) { n = entry.getValue(); break; } } if (n != null) { w[i] = 1+n; monthIndex = i; } else { try { // crude trick to strip ordinal indicators from numbers // by removing all non-digit characters before parsing w[i] = Integer.parseInt(wds[i].replaceAll("[^\\p{Digit}]", "")); } catch (NumberFormatException ex) { } } } if (w[0] > 0) { if ((w[0] >= firstYear) && (w[0] <= lastYear)) { // parse year and season, year and quarter, // year and month, or year month and day year = w[0]; if (w[1] != 0) { if (wds[1].equalsIgnoreCase("S"+w[1])) { season = w[1]; } else if (wds[1].equalsIgnoreCase("Q"+w[1])) { quarter = w[1]; } else if ((w[1] >= 1) && (w[1] <= 12)) { month = w[1]; if ((w[2] >= 1) && (w[2] <= 31)) { day = w[2]; } } } } else if (w[2] == 0) { // parse month and year, season and year, or quarter and year if ((w[1] >= firstYear) && (w[1] <= lastYear)) { year = w[1]; if (w[0] != 0) { if (wds[0].equalsIgnoreCase("S"+w[0])) { season = w[0]; } else if (wds[0].equalsIgnoreCase("Q"+w[0])) { quarter = w[0]; } else if ((w[0] >= 1) && (w[0] <= 12)) { month = w[0]; } } } } else if ((w[2] >= firstYear) && (w[2] <= lastYear)) { year = w[2]; if ((monthIndex == 0) || ("en".equalsIgnoreCase(locale.getLanguage()) && "US".equalsIgnoreCase(locale.getCountry()))) { // parse month day year in US only if ((w[0] >= 1) && (w[0] <= 12)) { month = w[0]; if ((w[1] >= 1) && (w[1] <= 31)) { day = w[1]; } } } else { // parse day month and year if ((w[1] >= 1) && (w[1] < 12)) { month = w[1]; if ((w[0] >= 1) && (w[0] <= 31)) { day = w[0]; } } } } } } // There must be at least a year. It would be good to catch // other cases as well, but this is what is curently being done. if (year == 0) { throw new ParseException(s, 0); } } /** * Construct a publication date from the specified date * @param aDate the date */ public PublicationDate(Date aDate) { this(aDate, "YMD"); } /** * Construct a publication date from the specified date * and mask. The mask length indicates how many elements * of the date to include: one element for year, two * elements for year and month, and three elements for * year month and day. * @param aDate the date * @param mask a mask string of 1-3 characters */ public PublicationDate(Date aDate, String mask) { if ((mask != null) && mask.length() >= 1) { Calendar cal = Calendar.getInstance(); cal.setTime(aDate); year = cal.get(Calendar.YEAR); if (mask.length() >= 2) { month = cal.get(Calendar.MONTH); if (mask.length() >= 3) { day = cal.get(Calendar.DAY_OF_MONTH); } } } } /** * Static for parsing a publication date string for the * default locale. * @param pubDateStr the date string * @param locale the locale * @throws ParseException if unrecognized date string */ static public PublicationDate parse(String pubDateStr) throws ParseException { return new PublicationDate(pubDateStr); } /** * Static for parsing a publication date string for the * specified locale. * @param pubDateStr the date string * @param locale the locale * @throws ParseException if unrecognized date string */ static public PublicationDate parse(String pubDateStr, Locale locale) throws ParseException { return new PublicationDate(pubDateStr, locale); } /** * Return string representation of the publication date in extended * ISO-8601 format: YYYY, YYYY-MM, YYYY-MM-DD, YYYY-QQ, YYYY-SS, where * YYYY, MM, and DD are numeric, and QQ is like "Q1" for the first quarter * and SS is like "S1" for the first season. * * @return the publication date string in extended ISO-8601 format */ public String toString() { String s = Integer.toString(year); if (quarter > 0) { s += "-Q" + quarter; } else if (season > 0) { s += "-S" + season; } else if (month > 0) { s += String.format("-%02d", month); if (day > 0) { s += String.format("-%02d", day); } } return s; } /** * Get the year for this date. * @return the year */ public int getYear() { return year; } /** * Get the 1-based month for this date. * @return the month or 0 if not specified */ public int getMonth() { return month; } /** * Get the 1-based day of month for this date. * @return the day of month or 0 if not specified */ public int getDayOfMonth() { return day; } /** * Get the 1-based quarter for this date. * @return the quarter if not specified */ public int getQuarter() { return quarter; } /** * Get the 1-based season for this date. * @return the season if not specified */ public int getSeason() { return season; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy