org.lockss.daemon.OpenUrlResolver Maven / Gradle / Ivy
Show all versions of lockss-core Show documentation
/*
Copyright (c) 2000-2018 Board of Trustees of Leland Stanford Jr. University,
all rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
STANFORD UNIVERSITY BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Except as contained in this notice, the name of Stanford University shall not
be used in advertising or otherwise to promote the sale, use or other dealings
in this Software without prior written authorization from Stanford University.
*/
package org.lockss.daemon;
import static org.lockss.metadata.SqlConstants.*;
import com.jcabi.aspects.*;
import java.io.IOException;
import java.lang.reflect.Method;
import java.net.HttpURLConnection;
import java.sql.*;
import java.text.ParseException;
import java.util.*;
import org.apache.commons.lang3.StringEscapeUtils;
import org.lockss.app.LockssDaemon;
import org.lockss.config.*;
import org.lockss.daemon.AuParamType.InvalidFormatException;
import org.lockss.db.DbException;
import org.lockss.exporter.biblio.*;
import org.lockss.metadata.MetadataDbManager;
import org.lockss.plugin.*;
import org.lockss.plugin.AuUtil.AuProxyInfo;
import org.lockss.plugin.PluginManager.CuContentReq;
import org.lockss.plugin.PrintfConverter.UrlListConverter;
import org.lockss.plugin.definable.DefinableArchivalUnit;
import org.lockss.proxy.ProxyManager;
import org.lockss.util.*;
import org.lockss.util.urlconn.*;
/**
* This class implements an OpenURL resolver that locates an article matching
* properties corresponding to OpenURL keys. Both OpenURL 1.0 and the earlier
* OpenURL 0.1 syntax are supported. Queries can be made by:
*
* - URL
* - DOI
*
* - ISSN/volume/issue/page
* - ISSN/volume/issue/article-number
* - ISSN/volume/issue/author
* - ISSN/volume/issue/article-title
* - ISSN/date/page
* - ISSN/date/article-number
* - ISSN/date/author
* - ISSN/date/article-title
*
* - journal-title/volume/issue/page
* - journal-title/volume/issue/article-number
* - journal-title/volume/issue/author
* - journal-title/volume/issue/article-title
* - journal-title/date/page
* - journal-title/date/article-number
* - journal-title/date/author
* - journal-title/date/article-title
*
* - ISBN/page
* - ISBN/chapter-author
* - ISBN/chapter-title
*
* - book-title/page
* - book-title/chapter-author
* - book-title/chapter-title
*
* - book-publisher/book-title/page
* - book-publisher/book-title/chapter-author
* - book-publisher/book-title/chapter-title
*
* - SICI
* - BICI
*
*
* Note: the TDB of the current configuration is used to resolve journal or
* if the entry is not in the metadata database, or if the query gives a
* journal or book title but no ISSN or ISBN. If there are multiple entries
* for the journal or book title, one of them is selected. OpenURL 1.0 allows
* specifying a book publisher, so if both publisher and title are specified,
* there is a good chance that the match will be unique.
*
* @author Philip Gust
* @version 1.0
*/
@Loggable(value = Loggable.TRACE, prepend = true)
public class OpenUrlResolver {
private static final Logger log = Logger.getLogger();
/** the LOCKSS daemon */
private final LockssDaemon daemon;
/** the PluginManager */
private final PluginManager pluginMgr;
/** the ProxyManager */
private final ProxyManager proxyMgr;
/** maximum redirects for looking up DOI url */
private static final int MAX_REDIRECTS = 10;
/** prefix for config properties */
public static final String PREFIX = Configuration.PREFIX + "openUrlResolver.";
/**
* Determines the maximum number of OpenUrlResolver publishers+providers
* that publish the same article when querying the metadata database.This
* number will certainly be very small (< 10)
*
*/
public static final String PARAM_MAX_PUBLISHERS_PER_ARTICLE = PREFIX
+ "max_publishers_per_article";
/**
* Default value of OpenUrlResolver max_publishers_per_article default
* configuration parameter.
*/
public static final int DEFAULT_MAX_PUBLISHERS_PER_ARTICLE = 10;
public static String PARAM_NEVER_PROXY =
org.lockss.servlet.ServeContent.PARAM_NEVER_PROXY;
public static boolean DEFAULT_NEVER_PROXY =
org.lockss.servlet. ServeContent.DEFAULT_NEVER_PROXY;
static final class FeatureEntry {
final String auFeatureKey;
final OpenUrlInfo.ResolvedTo resolvedTo;
public FeatureEntry(String auFeatureKey,
OpenUrlInfo.ResolvedTo resolvedTo) {
this.auFeatureKey = auFeatureKey;
this.resolvedTo = resolvedTo;
}
}
private static final String FEATURE_URLS =
DefinableArchivalUnit.KEY_AU_FEATURE_URL_MAP;
private static final String START_URLS =
DefinableArchivalUnit.KEY_AU_START_URL;
/**
* Keys to search for a matching journal feature. The order of the keys
* is the order they will be tried, from article, to issue, to volume,
* to title, to publisher.
*/
static final FeatureEntry[] auJournalFeatures = {
// FEATURE_URLS + "/au_abstract",
new FeatureEntry(FEATURE_URLS + "/au_article",OpenUrlInfo.ResolvedTo.ARTICLE),
new FeatureEntry(FEATURE_URLS + "/au_issue", OpenUrlInfo.ResolvedTo.ISSUE),
new FeatureEntry(FEATURE_URLS + "/au_volume", OpenUrlInfo.ResolvedTo.VOLUME),
new FeatureEntry(START_URLS, OpenUrlInfo.ResolvedTo.VOLUME),
new FeatureEntry(FEATURE_URLS + "/au_title", OpenUrlInfo.ResolvedTo.TITLE),
new FeatureEntry(FEATURE_URLS + "/au_publisher", OpenUrlInfo.ResolvedTo.PUBLISHER),
};
/**
* Keys to search for a matching book feature. The order of the keys is the
* the order they will be tried, from chapter, to volume, to title, to
* publisher.
*/
private static final FeatureEntry[] auBookAuFeatures = {
new FeatureEntry(FEATURE_URLS + "/au_chapter", OpenUrlInfo.ResolvedTo.CHAPTER),
new FeatureEntry(FEATURE_URLS + "/au_volume", OpenUrlInfo.ResolvedTo.VOLUME),
new FeatureEntry(START_URLS, OpenUrlInfo.ResolvedTo.VOLUME),
new FeatureEntry(FEATURE_URLS + "/au_title", OpenUrlInfo.ResolvedTo.TITLE),
new FeatureEntry(FEATURE_URLS + "/au_publisher", OpenUrlInfo.ResolvedTo.PUBLISHER),
};
/** The name of the TDB au_feature key selector */
static final String AU_FEATURE_KEY = "au_feature_key";
// pre-defined OpenUrlInfo for no url
public static final OpenUrlInfo OPEN_URL_INFO_NONE =
new OpenUrlInfo(null, null, OpenUrlInfo.ResolvedTo.NONE);
/**
* Information returned by OpenUrlResolver includes the resolvedUrl
* and the resolvedTo enumeration.
*/
public static final class OpenUrlInfo
implements Iterable {
static public enum ResolvedTo {
PUBLISHER, // resolved to a publisher
TITLE, // resolved to a tite of a serial (e.g. a journal or
// a book series) or other pubication
VOLUME, // resolved to a volume of a serial or other pubication,
// or the title of an individual book
CHAPTER, // resolved to a chapter of a book or other publication
ISSUE, // resolved to an issue of a serial or other publication
ARTICLE, // resolved to an article of a serial, book, or other pubication
OTHER, // resolved to an element of a publication
NONE, // not resolved if URL is null, or not in cache if has URL
};
private String resolvedUrl;
private String proxySpec;
private ResolvedTo resolvedTo;
private BibliographicItem resolvedBibliographicItem = null;
private OpenUrlInfo nextInfo = null;
private OpenUrlInfo(String resolvedUrl,
String proxySpec,
ResolvedTo resolvedTo) {
this.resolvedUrl = resolvedUrl;
this.resolvedTo = resolvedTo;
this.proxySpec = proxySpec;
}
protected static OpenUrlInfo newInstance(
String resolvedUrl, String proxySpec, ResolvedTo resolvedTo) {
return ((resolvedTo == ResolvedTo.NONE) && (resolvedUrl == null))
? OPEN_URL_INFO_NONE
: new OpenUrlInfo(resolvedUrl, proxySpec, resolvedTo);
}
protected static OpenUrlInfo newInstance(String resolvedUrl) {
return (resolvedUrl == null)
? OPEN_URL_INFO_NONE
: new OpenUrlInfo(resolvedUrl, null, OpenUrlInfo.ResolvedTo.OTHER);
}
protected static OpenUrlInfo newInstance(String resolvedUrl,
String proxySpec) {
return (resolvedUrl == null)
? OPEN_URL_INFO_NONE
: new OpenUrlInfo(resolvedUrl, proxySpec, OpenUrlInfo.ResolvedTo.OTHER);
}
public boolean isResolved() {
return resolvedTo != null && resolvedTo != ResolvedTo.NONE;
}
public boolean isNotResolved() {
return resolvedTo == null || resolvedTo == ResolvedTo.NONE;
}
public String getProxySpec() {
return proxySpec;
}
public String getProxyHost() {
if (proxySpec == null) {
return null;
}
int i = proxySpec.indexOf(':');
return (i < 0) ? proxySpec : proxySpec.substring(0,i);
}
public int getProxyPort() {
if (proxySpec == null) {
return 0;
}
int i = proxySpec.indexOf(':');
try {
return (i < 0) ? 0 : Integer.parseInt(proxySpec.substring(i+1));
} catch (NumberFormatException ex) {
return 0;
}
}
public String getResolvedUrl() {
return resolvedUrl;
}
public ResolvedTo getResolvedTo() {
return resolvedTo;
}
public BibliographicItem getBibliographicItem() {
return resolvedBibliographicItem;
}
@Override
public Iterator iterator() {
return new Iterator() {
OpenUrlInfo nextInfo = OpenUrlInfo.this;
@Override
public boolean hasNext() {
return nextInfo != null;
}
@Override
public OpenUrlInfo next() {
if (nextInfo == null) {
throw new NoSuchElementException();
}
OpenUrlInfo curInfo = nextInfo;
nextInfo = curInfo.nextInfo;
return curInfo;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
public void add(OpenUrlInfo nextInfo) {
if (nextInfo == null) {
throw new IllegalArgumentException("nextInfo cannot be null");
}
last().nextInfo = nextInfo;
}
public int size() {
int count = 1;
OpenUrlInfo info = this;
while (info.nextInfo != null) { info = info.nextInfo; count++; }
return count;
}
public OpenUrlInfo last() {
OpenUrlInfo info = this;
while (info.nextInfo != null) { info = info.nextInfo; }
return info;
}
public OpenUrlInfo next() {
return nextInfo;
}
public boolean hasNext() {
return nextInfo != null;
}
public String getOpenUrlQuery() {
// don't use publisher or title url
// because we don't preserve them
if ( resolvedUrl != null
&& resolvedTo != null
&& !resolvedTo.equals(ResolvedTo.PUBLISHER)
&& !resolvedTo.equals(ResolvedTo.TITLE)) {
return "rft_id=" + UrlUtil.encodeQueryArg(resolvedUrl);
}
if (resolvedBibliographicItem != null) {
return OpenUrlResolver
.getOpenUrlQueryForBibliographicItem(resolvedBibliographicItem);
}
return null;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("[OpenUrlInfo: ");
sb.append(resolvedTo);
if (resolvedTo != ResolvedTo.NONE) {
if (resolvedUrl != null) {
sb.append(", url: ");
sb.append(resolvedUrl);
}
if (resolvedBibliographicItem != null) {
sb.append(", bib: ");
sb.append(resolvedBibliographicItem);
}
}
if (nextInfo != null) {
sb.append(", next: ");
sb.append(nextInfo);
}
sb.append("]");
return sb.toString();
}
}
/**
* Create a resolver for the specified database manager.
*
* @param daemon the LOCKSS daemon
*/
public OpenUrlResolver(LockssDaemon daemon) {
if (daemon == null) {
throw new IllegalArgumentException("LOCKSS daemon not specified");
}
this.daemon = daemon;
this.pluginMgr = daemon.getPluginManager();
this.proxyMgr = daemon.getProxyManager();
}
/**
* Get an parameter either without or with the "rft." prefix.
*
* @param params the parameters
* @param key the key
* @return the value or null
if not present
*/
private String getRftParam(Map params, String key) {
String value = params.get(key);
if (value == null) {
value = params.get("rft." + key);
}
return value;
}
/**
* Get date based on date, ssn (season), and quarter rft parameters.
*
* @param params the parameters
* @return a normalized date string of the form YYYY{-MM{-DD}}
* or YYYY-Qn for nth quarter, or YYYY-Sn for nth season for
* n between 1 and 4.
*/
private String getRftDate(Map params) {
String ssn = getRftParam(params, "ssn"); // spring, summer, fall, winter
String quarter = getRftParam(params, "quarter"); // 1, 2, 3, 4
String date = getRftParam(params, "date"); // YYYY{-MM{-DD}}
// fill in month if only year specified
if ((date != null) && (date.indexOf('-') < 0)) {
if (quarter != null) {
// fill in month based on quarter
switch (quarter) {
case "1": date += "-Q1"; break;
case "2": date += "-Q2"; break;
case "3": date += "-Q3"; break;
case "4": date += "-Q4"; break;
default: log.warning("Invalid quarter: " + quarter);
}
} else if (ssn != null) {
// fill in month based on season
switch (ssn) {
case "spring": date += "-S1"; break;
case "summer": date += "-S2"; break;
case "fall": date += "-S3"; break;
case "winter": date += "-S4"; break;
default: log.warning("Invalid ssn: " + ssn);
}
}
}
return date;
}
/**
* Returns the TdbTitle corresponding to the specified OpenUrl params.
*
* @param params the OpenURL parameters
* @return a TdbTitle or null
if not found
*/
public TdbTitle resolveTdbTitleForOpenUrl(Map params) {
Tdb tdb = ConfigManager.getCurrentConfig().getTdb();
if (tdb != null) {
// get TdbTitle for ISBN
String isbn = getRftParam(params, "isbn");
if (isbn != null) {
Collection tdbAus = tdb.getTdbAusByIsbn(isbn);
return tdbAus.isEmpty() ? null : tdbAus.iterator().next().getTdbTitle();
}
// get TdbTitle for ISSN
String issn = getRftParam(params, "issn");
if (issn != null) {
return tdb.getTdbTitleByIssn(issn);
}
// get TdbTitle for BICI
String bici = getRftParam(params, "bici");
if (bici != null) {
int i = bici.indexOf('(');
if (i > 0) {
isbn = bici.substring(0,i);
Collection tdbAus = tdb.getTdbAusByIsbn(isbn);
return tdbAus.isEmpty() ? null : tdbAus.iterator().next().getTdbTitle();
}
}
// get TdbTitle for SICI
String sici = getRftParam(params, "sici");
if (sici != null) {
int i = sici.indexOf('(');
if (i > 0) {
issn = sici.substring(0,i);
return tdb.getTdbTitleByIssn(issn);
}
}
// get TdbTitle for journal pubisher and title
String publisher = getRftParam(params, "publisher");
String title = getRftParam(params, "jtitle");
if (title == null) {
title = getRftParam(params, "title");
}
if (title != null) {
Collection tdbTitles;
if (publisher != null) {
TdbPublisher tdbPublisher = tdb.getTdbPublisher(publisher);
tdbTitles = (tdbPublisher == null)
? Collections.emptyList()
:tdbPublisher.getTdbTitlesLikeName(title);
} else {
tdbTitles = tdb.getTdbTitlesByName(title);
}
return tdbTitles.isEmpty() ? null : tdbTitles.iterator().next();
}
// get TdbTitle for book pubisher and title
String btitle = getRftParam(params, "btitle");
if (btitle != null) {
Collection tdbAus;
if (publisher != null) {
TdbPublisher tdbPublisher = tdb.getTdbPublisher(publisher);
tdbAus = (tdbPublisher == null)
? Collections.emptyList()
:tdbPublisher.getTdbAusLikeName(title);
} else {
tdbAus = tdb.getTdbAusByName(title);
}
return tdbAus.isEmpty() ? null : tdbAus.iterator().next().getTdbTitle();
}
}
return null;
}
/**
* Resolve an OpenURL from a set of parameter keys and values.
*
* @param params the OpenURL parameters
* @return a url or null
if not found
*/
public OpenUrlInfo resolveOpenUrl(Map params) {
final String DEBUG_HEADER = "resolveOpenUrl(): ";
if (log.isDebug3()) log.debug3(DEBUG_HEADER + "params = " + params);
OpenUrlInfo resolvedDirectly = OPEN_URL_INFO_NONE;
if (params.containsKey("rft_id")) {
String rft_id = params.get("rft_id");
// handle rft_id that is an HTTP or HTTPS URL
if (UrlUtil.isHttpOrHttpsUrl(rft_id)) {
resolvedDirectly = resolveFromUrl(rft_id);
if (resolvedDirectly.isResolved()) {
return resolvedDirectly;
}
if (log.isDebug3())
log.debug3(DEBUG_HEADER + "Failed to resolve from URL: " + rft_id);
} else if (rft_id.startsWith("info:doi/")) {
String doi = rft_id.substring("info:doi/".length());
resolvedDirectly = resolveFromDOI(doi);
if (resolvedDirectly.isResolved()) {
return resolvedDirectly;
}
if (log.isDebug3())
log.debug3(DEBUG_HEADER + "Failed to resolve from DOI: " + doi);
}
}
if (params.containsKey("id")) {
// handle OpenURL 0.1 DOI specification
String id = params.get("id");
if (id.startsWith("doi:")) {
String doi = id.substring("doi:".length());
resolvedDirectly = resolveFromDOI(doi);
if (resolvedDirectly.isResolved()) {
return resolvedDirectly;
}
log.debug3("Failed to resolve from DOI: " + doi);
}
}
if (params.containsKey("doi")) {
String doi = params.get("doi");
resolvedDirectly = resolveFromDOI(doi);
if (resolvedDirectly.isResolved()) {
return resolvedDirectly;
}
log.debug3("Failed to resolve from DOI: " + doi);
}
String pub = getRftParam(params, "pub");
String spage = getRftParam(params, "spage");
String artnum = getRftParam(params, "artnum");
String author = getRftParam(params, "au");
String atitle = getRftParam(params, "atitle");
String isbn = getRftParam(params, "isbn");
String eisbn = getRftParam(params, "eisbn");
String edition = getRftParam(params, "edition");
String date = getRftDate(params);
String volume = getRftParam(params, "volume");
Tdb tdb = ConfigManager.getCurrentConfig().getTdb();
String anyIsbn = (eisbn != null) ? eisbn : isbn;
if (anyIsbn != null) {
OpenUrlInfo resolved = resolveFromIsbn(
anyIsbn, pub, date, volume, edition, artnum, spage, author, atitle);
if (resolved.isResolved()) {
log.debug3(
"Located url "
+ ((resolved.resolvedUrl == null) ? "" : resolved.resolvedUrl)
+ " for book ISBN " + anyIsbn);
return resolved;
}
log.debug3("Failed to resolve from ISBN: " + isbn);
}
String eissn = getRftParam(params, "eissn");
String issn = getRftParam(params, "issn");
String issue = getRftParam(params, "issue");
// process a journal based on EISSN or ISSN
String anyIssn = (eissn != null) ? eissn : issn;
if (anyIssn != null) {
// allow returning one result per publisher because
// the item may be available from multiple publishers
OpenUrlInfo resolved = resolveFromIssn(
anyIssn, pub, date, volume, issue, spage, artnum, author, atitle);
if (resolved.isResolved()) {
if (log.isDebug3()) {
String title = getRftParam(params, "jtitle");
if (title == null) {
title = getRftParam(params, "title");
}
log.debug3("Located url "
+ ((resolved.resolvedUrl == null) ? "" : resolved.resolvedUrl)
+ " for article \"" + atitle + "\""
+ ", ISSN " + anyIssn
+ ", title \"" + title + "\"");
}
return resolved;
}
log.debug3("Failed to resolve from ISSN: " + anyIssn);
}
String bici = params.get("rft.bici");
if (bici != null) {
// get cached URL from book book ICI code
OpenUrlInfo resolved = null;
try {
resolved = resolveFromBici(bici);
if (resolved.isResolved()) {
log.debug3(
"Located url "
+ ((resolved.resolvedUrl == null) ? "" : resolved.resolvedUrl)
+ "for bici " + bici);
return resolved;
}
} catch (ParseException ex) {
log.warning(ex.getMessage());
}
log.debug3("Failed to resolve from BICI: " + bici);
}
String sici = params.get("rft.sici");
// get cached URL from serial ICI code
if (sici != null) {
OpenUrlInfo resolved = null;
try {
resolved = resolveFromSici(sici);
if (resolved.isResolved()) {
log.debug3(
"Located url "
+ ((resolved.resolvedUrl == null) ? "" : resolved.resolvedUrl)
+ "for sici " + sici);
return resolved;
}
} catch (ParseException ex) {
log.warning(ex.getMessage());
}
log.debug3("Failed to resolve from SICI: " + sici);
}
// process a journal or book based on its title
String title = getRftParam(params, "title");
boolean isbook = false;
if (title == null) {
title = params.get("rft.btitle");
isbook = title != null;
}
if (title == null) {
title = params.get("rft.jtitle");
}
if (title != null) {
if (tdb == null) {
// TODO: need to search metadata database only
// for articles if no title database is specified
} else {
// only search the named publisher
TdbPublisher tdbPub = null;
if (pub != null) {
tdbPub = tdb.getTdbPublisher(pub);
// report no match if no matching publisher
if (tdbPub == null) {
return resolvedDirectly;
}
}
if (isbook) {
// search as though it is a book title
Collection tdbAus;
if (tdbPub != null) {
tdbAus = tdbPub.getTdbAusLikeName(title);
} else {
tdbAus = tdb.getTdbAusLikeName(title);
}
OpenUrlInfo resolved = null;
Collection noTdbAus = new ArrayList();
for (TdbAu tdbAu : tdbAus) {
// search for book through its ISBN to ensure that both
// metadata database and title database are consulted
String id = tdbAu.getIsbn();
if (id != null) {
// try resolving from ISBN
// note: non-standard treatment of 'artnum' as chapter identifier
String tdbPubName = tdbAu.getPublisherName();
OpenUrlInfo info = resolveFromIsbn(
id, tdbPubName, date, volume, issue,
artnum, spage, author, atitle);
if (info.isResolved()) {
if (log.isDebug3()) {
log.debug3("Located url "
+ ((info.resolvedUrl == null) ? "" : info.resolvedUrl)
+ " for article \"" + atitle + "\""
+ ", ISBN " + id
+ ", title \"" + title + "\""
+ ", publisher \""
+ tdbAu.getPublisherName() + "\"");
}
if (resolved == null) {
resolved = info;
} else {
resolved.add(info);
}
}
} else {
// add to list of titles with no ISBN
noTdbAus.add(tdbAu);
}
}
if (resolved != null) {
return resolved;
}
// search matching titles without ISBNs
resolved =resolveBookFromTdbAus(
noTdbAus, date, volume, edition, artnum, spage);
if (resolved.isResolved()) {
if (log.isDebug3()) {
log.debug3( "Located url " + resolved.resolvedUrl
+ ", title \"" + title + "\"");
}
}
return resolved;
} else {
// search as though it is a journal title
Collection tdbTitles;
if (tdbPub != null) {
// find title from specified publisher
tdbTitles = tdbPub.getTdbTitlesByName(title);
// find "like" titles if no exact matches
if (tdbTitles.isEmpty()) {
tdbTitles = tdbPub.getTdbTitlesLikeName(title);
}
} else {
// find title from any publisher
tdbTitles = tdb.getTdbTitlesByName(title);
// find "like" titles if no exact matches
if (tdbTitles.isEmpty()) {
tdbTitles = tdb.getTdbTitlesLikeName(title);
}
}
OpenUrlInfo resolved = null;
Collection noTdbTitles = new ArrayList();
for (TdbTitle tdbTitle : tdbTitles) {
// search for journal through its ISSN to ensure that both
// metadata database and title database are consulted
String id = tdbTitle.getIssn();
if (id != null) {
// try resolving from ISSN
String tdbPubName = tdbTitle.getPublisherName();
OpenUrlInfo info =
resolveFromIssn(id, tdbPubName,
date, volume, issue, spage, artnum, author, atitle);
if (info.isResolved()) {
if (log.isDebug3()) {
log.debug3("Located url "
+ ((info.resolvedUrl == null) ? "" : info.resolvedUrl)
+ " for article \"" + atitle + "\""
+ ", ISSN " + id
+ ", title \"" + title + "\""
+ ", publisher \"" + tdbPubName + "\"");
}
if (resolved == null) {
resolved = info;
} else {
resolved.add(info);
}
}
} else {
// add to list of titles with no ISBN or ISSN
noTdbTitles.add(tdbTitle);
}
}
if (resolved != null) {
return resolved;
}
// search matching titles without ISSNs
for (TdbTitle noTdbTitle : noTdbTitles) {
Collection tdbAus = noTdbTitle.getTdbAus();
OpenUrlInfo info =
resolveJournalFromTdbAus(tdbAus,date,volume,issue, spage, artnum);
if (info.isResolved()) {
if (log.isDebug3()) {
log.debug3( "Located url "
+ ((resolved.resolvedUrl == null) ? "" : resolved.resolvedUrl)
+ ", title \"" + title + "\"");
}
if (resolved == null) {
resolved = info;
} else {
resolved.add(info);
}
}
}
if (resolved != null) {
return resolved;
}
}
}
OpenUrlInfo resolved =
OpenUrlInfo.newInstance(null, null,
isbook ? OpenUrlInfo.ResolvedTo.VOLUME
: OpenUrlInfo.ResolvedTo.TITLE);
// create bibliographic item with only title properties
resolved.resolvedBibliographicItem =
new BibliographicItemImpl()
.setPublisherName(pub)
.setPublicationTitle(title);
return resolved;
} else if (pub != null) {
OpenUrlInfo resolved = OpenUrlInfo.newInstance(
null, null, OpenUrlInfo.ResolvedTo.PUBLISHER);
// create bibliographic item with only publisher properties
resolved.resolvedBibliographicItem =
new BibliographicItemImpl()
.setPublisherName(pub);
return resolved;
}
return resolvedDirectly;
}
/**
* Resolve serials article based on the SICI descriptor. For an article
* "Who are These Independent Information Brokers?", Bulletin of the
* American Society for Information Science, Feb-Mar 1995, Vol. 21, no 3,
* page 12, the SICI would be: 0095-4403(199502/03)21:3<12:WATIIB>2.0.TX;2-J
*
* @param sici a string representing the serials article SICI
* @return the article url or null
if not resolved
* @throws ParseException if error parsing SICI
*/
public OpenUrlInfo resolveFromSici(String sici) throws ParseException {
int i = sici.indexOf('(');
if (i < 0) {
// did not find end of date section
throw new ParseException("Missing start of date section", 0);
}
// validate ISSN after normalizing to remove punctuation
String issn = sici.substring(0,i).replaceFirst("-", "");
if (!MetadataUtil.isIssn(issn)) {
// ISSN is 8 characters
throw new ParseException("Malformed ISSN", 0);
}
// skip over date section (199502/03)
int j = sici.indexOf(')',i+1);
if (j < 0) {
// did not find end of date section
throw new ParseException("Missing end of date section", i+1);
}
// get volume and issue between end of
// date section and start of article section
i = j+1; // advance to start of volume
j = sici.indexOf('<',i);
if (j < 0) {
// did not find start of issue section
throw new ParseException("Missing start of issue section", i);
}
// get volume delimiter
int k = sici.indexOf(':', i);
if ((k < 0) || (k >= j)) {
// no volume delimiter before start of issue section
throw new ParseException("Missing volume delimiter", i);
}
String volume = sici.substring(i,k);
String issue = sici.substring(k+1,j);
// get end of issue section
i = j+1;
k = sici.indexOf('>', i+1);
if (k < 0) {
// did not find end of issue section
throw new ParseException("Missing end of issue section", i+1);
}
j = sici.indexOf(':',i+1);
if ((j < 0) || (j >= k)) {
throw new ParseException("Missing page delimiter", i+1);
}
String spage = sici.substring(i,j);
// get the cached URL from the parsed paramaters
// note: no publisher with sici
OpenUrlInfo resolved =
resolveFromIssn(issn, null, null, volume, issue, spage, null, null, null);
if ((resolved.isResolved()) && log.isDebug()) {
// report on the found article
Tdb tdb = ConfigManager.getCurrentConfig().getTdb();
String jTitle = null;
if (tdb != null) {
TdbTitle title = tdb.getTdbTitleByIssn(issn);
if (title != null) {
jTitle = title.getName();
}
}
if (log.isDebug3()) {
String s = "Located cachedURL "
+ ((resolved.resolvedUrl == null) ? "" : resolved.resolvedUrl)
+ " for ISSN " + issn
+ ", volume: " + volume
+ ", issue: " + issue
+ ", start page: " + spage;
if (jTitle != null) {
s += ", journal title \"" + jTitle + "\"";
}
log.debug3(s);
}
}
return OPEN_URL_INFO_NONE;
}
/**
* Resolve a book chapter based on the BICI descriptor. For an item "English
* as a World Language", Chapter 10, in "The English Language: A Historical
* Introduction", 1993, pp. 234-261, ISBN 0-521-41620-5, the BICI would be
* 0521416205(1993)(10;EAAWL;234-261)2.2.TX;1-1
*
* @param bici a string representing the book chapter BICI
* @return the article url or null
if not resolved
* @throws ParseException if error parsing BICI
*/
public OpenUrlInfo resolveFromBici(String bici) throws ParseException {
int i = bici.indexOf('(');
if (i < 0) {
// did not find end of date section
throw new ParseException("Missing start of date section", 0);
}
String isbn = bici.substring(0,i).replaceAll("-", "");
// match ISBN-10 or ISBN-13 with 0-9 or X checksum character
if (!MetadataUtil.isIsbn(isbn, false)) {
// ISSB is 10 or 13 characters
throw new ParseException("Malformed ISBN", 0);
}
// skip over date section (1993)
int j = bici.indexOf(')',i+1);
if (j < 0) {
// did not find end of date section
throw new ParseException("Missing end of date section", i+5);
}
String date = bici.substring(i+1, j);
// get volume and issue between end of
// date section and start of article section
if (bici.charAt(j+1) != '(') {
// did not find start of chapter section
throw new ParseException("Missing start of chapter section", j+1);
}
i = j+2; // advance to start of chapter
j = bici.indexOf(')',i);
if (j < 0) {
// did not find end of chapter section
throw new ParseException("Missing end of chapter section", i);
}
// get chapter number delimiter
int k = bici.indexOf(';', i);
if ((k < 0) || (k >= j)) {
// no chapter number delimiter before end of chapter section
throw new ParseException("Missing chapter number delimiter", i);
}
String chapter = bici.substring(i,k);
// get end of chapter section
i = k+1;
k = bici.indexOf(';', i+1);
if ((k < 0) || (k >= j)) {
// no chapter abbreviation delimiter before end of chapter section
throw new ParseException("Missing chapter abbreviation delimiter", i);
}
// extract the start page
String spage = bici.substring(k+1,j);
if (spage.indexOf('-') > 0) {
spage = spage.substring(0, spage.indexOf('-'));
}
// (isbn, date, volume, edition, chapter, spage, author, title)
// note: no publisher specified with bici
OpenUrlInfo resolved =
resolveFromIsbn(isbn, null, date, null, null, chapter, spage, null, null);
if ((resolved.isResolved()) && log.isDebug()) {
Tdb tdb = ConfigManager.getCurrentConfig().getTdb();
String bTitle = null;
if (tdb != null) {
Collection tdbAus = tdb.getTdbAusByIsbn(isbn);
if (!tdbAus.isEmpty()) {
bTitle = tdbAus.iterator().next().getPublicationTitle();
}
}
if (log.isDebug3()) {
String s = "Located cachedURL "
+ ((resolved.resolvedUrl == null) ? "" : resolved.resolvedUrl)
+ " for ISBN " + isbn
+ ", year: " + date
+ ", chapter: " + chapter
+ ", start page: " + spage;
if (bTitle != null) {
s += ", book title \"" + bTitle + "\"";
}
log.debug3(s);
}
}
return OPEN_URL_INFO_NONE;
}
/**
* Resolves from a url.
*
* @param aUrl the URL
* @return a resolved URL
*/
public OpenUrlInfo resolveFromUrl(String aUrl) {
return resolveFromUrl(aUrl, null); // no proxy specified
}
/**
* Resolves from a url. If URL is not in cache, returned OpenUrlInfo
* resolvedTo indicator is ResolvedTo.NONE.
*
* @param aUrl the URL
* @param proxySpec a proxy string of the form "host:port"
* @return OpenURLInfo with resolved URL
*/
public OpenUrlInfo resolveFromUrl(String aUrl, String proxySpec) {
String url = resolveUrl(aUrl, proxySpec);
if (url != null) {
CachedUrl cu = pluginMgr.findCachedUrl(url, CuContentReq.PreferContent);
if (cu != null) {
return OpenUrlInfo.newInstance(url, proxySpec);
}
}
return OPEN_URL_INFO_NONE;
}
/**
* Validates a URL and resolve it by following indirects, and stopping
* early if a URL that is in the LOCKSS cache is found.
*
* @param aUrl the URL
* @param auProxySpec an AU proxy spec of the form "host:port"
* @return a resolved URL
*/
String resolveUrl(String aUrl, String auProxySpec) { // protected for testing
final String DEBUG_HEADER = "resolveUrl(): ";
if (log.isDebug2()) {
log.debug2(DEBUG_HEADER + "aUrl = " + aUrl);
log.debug2(DEBUG_HEADER + "auProxySpec = " + auProxySpec);
}
if (isNeverProxy()) {
if (pluginMgr.findCachedUrl(aUrl) != null) {
return aUrl;
} else {
return null;
}
}
String url = aUrl;
try {
final LockssUrlConnectionPool connectionPool =
proxyMgr.getQuickConnectionPool();
// get proxy host and port for the proxy spec or the current config
AuProxyInfo proxyInfo = AuUtil.getAuProxyInfo(auProxySpec);
String proxyHost = proxyInfo.getHost();
int proxyPort = proxyInfo.getPort();
for (int i = 0; i < MAX_REDIRECTS; i++) {
if (log.isDebug3()) log.debug3(DEBUG_HEADER + " i = " + i);
// no need to look further if content already cached
if (pluginMgr.findCachedUrl(url) != null) {
if (log.isDebug2()) log.debug2(DEBUG_HEADER + " url = '" + url + "'");
return url;
}
LockssUrlConnection conn = null;
try {
// test URL by opening connection
conn = UrlUtil.openConnection(url, connectionPool);
if (log.isDebug3()) log.debug3(DEBUG_HEADER + " conn = " + conn);
conn.setFollowRedirects(false);
conn.setRequestProperty("user-agent", LockssDaemon.getUserAgent());
if (!StringUtil.isNullString(proxyHost) && (proxyPort > 0)) {
try {
conn.setProxy(proxyHost, proxyPort);
} catch (UnsupportedOperationException ex) {
log.warning("Unsupported connection request proxy: "
+ proxyHost + ":" + proxyPort);
}
}
conn.execute();
// if not redirected, validate based on response code
String url2 = conn.getResponseHeaderValue("Location");
if (log.isDebug3())
log.debug3(DEBUG_HEADER + " url2 = '" + url2 + "'");
if (url2 == null) {
int response = conn.getResponseCode();
if (log.isDebug3())
log.debug3(DEBUG_HEADER + " response code: " + response);
if (response == HttpURLConnection.HTTP_OK) {
if (log.isDebug2())
log.debug2(DEBUG_HEADER + " url = '" + url + "'");
return url;
}
return null;
}
// resolve redirected URL and try again
url = UrlUtil.resolveUri(url, url2);
log.debug3(i + " resolved to: " + url);
} finally {
IOUtil.safeRelease(conn);
}
}
} catch (IOException ex) {
log.error("resolving from URL:" + aUrl + " with URL: " + url, ex);
}
return null;
}
/**
* Return the article URL from a DOI, using either the MDB or TDB.
* @param doi the DOI
* @return the article url
*/
public OpenUrlInfo resolveFromDOI(String doi) {
if (!MetadataUtil.isDoi(doi)) {
return OPEN_URL_INFO_NONE;
}
OpenUrlInfo resolved = OPEN_URL_INFO_NONE;
try {
// resolve from database manager
MetadataDbManager dbMgr = daemon.getMetadataDbManager();
resolved = resolveFromDoi(dbMgr, doi);
} catch (IllegalArgumentException ex) {
}
if (resolved.isNotResolved()) {
// use DOI International resolver for DOI
resolved = resolveFromUrl("http://dx.doi.org/" + doi);
}
return resolved;
}
/**
* Return the OpenUrl query string for the specified auid.
*
* @param auid the auid
* @return the OpenUrl query string, or null if not available
*/
public String getOpenUrlQueryForAuid(String auid) {
TdbAu tdbau = TdbUtil.getTdbAu(auid);
if (tdbau != null) {
return getOpenUrlQueryForBibliographicItem(tdbau);
}
// Try returning an OpenURL with the starting URL
// corresponding to the AU with a SpiderCrawlSpec;
// by convention, the first URL is the manifest page
// (not for OAICrawlSpec or other types of CrawlSpec)
ArchivalUnit au = pluginMgr.getAuFromId(auid);
if (au != null) {
Collection urls = au.getAccessUrls();
if (urls.size() > 0) {
return "rft_id=" + urls.iterator().next();
}
}
return null;
}
/**
* Return the OpenUrl query string for the specified bibliographic item.
*
* @param bibitem the BibliographicItem
* @return the OpenUrl query string, or null if not available
*/
@Loggable(value = Loggable.TRACE, prepend = true)
static public String getOpenUrlQueryForBibliographicItem(
BibliographicItem bibitem) {
StringBuffer sb = new StringBuffer();
String isbn = bibitem.getIsbn();
if (!StringUtil.isNullString(isbn)) {
sb.append("&isbn=");
sb.append(UrlUtil.encodeQueryArg(MetadataUtil.formatIsbn(isbn)));
}
String issn = bibitem.getIssn();
if (!StringUtil.isNullString(issn)) {
sb.append("&issn=");
sb.append(UrlUtil.encodeQueryArg(MetadataUtil.formatIssn(issn)));
}
String publisher = bibitem.getPublisherName();
if (!StringUtil.isNullString(publisher)) {
sb.append("&publisher=");
sb.append(UrlUtil.encodeQueryArg(publisher));
}
String title = bibitem.getPublicationTitle();
if (!StringUtil.isNullString(title)) {
String pubType = bibitem.getPublicationType();
if ( !StringUtil.isNullString(pubType)
&& pubType.startsWith("book")) {
sb.append("&btitle");
} else {
sb.append("&jtitle=");
}
sb.append(UrlUtil.encodeQueryArg(title));
}
String year = bibitem.getStartYear();
if ( !StringUtil.isNullString(year)
&& year.equals(bibitem.getYear())) {
sb.append("&year=");
sb.append(UrlUtil.encodeQueryArg(year));
}
String volume = bibitem.getStartVolume();
if ( !StringUtil.isNullString(volume)
&& volume.equals(bibitem.getVolume())) {
sb.append("&volume=");
sb.append(UrlUtil.encodeQueryArg(volume));
}
String issue = bibitem.getStartIssue();
if ( !StringUtil.isNullString(issue)
&& volume.equals(bibitem.getIssue())) {
sb.append("&issue=");
sb.append(UrlUtil.encodeQueryArg(issue));
}
return (sb.length() == 0) ? null : sb.substring(1);
}
/**
* Return the article URL from a DOI using the MDB.
* @param dbMgr the database manager
* @param doi the DOI
* @return the OpenUrlInfo
*/
@Loggable(value = Loggable.TRACE, prepend = true)
private OpenUrlInfo resolveFromDoi(MetadataDbManager dbMgr, String doi) {
final String DEBUG_HEADER = "resolveFromDoi(): ";
if (log.isDebug2()) log.debug2(DEBUG_HEADER + "doi = " + doi);
Connection conn = null;
try {
conn = dbMgr.getConnection();
// Find in the database the Archival Unit identifier and URL linked to the
// passed DOI.
Map result = dbMgr.getAuUrlForDoi(conn, doi);
// Check whether the links were found.
if (result != null && !result.isEmpty()) {
// Yes: Get the Archival Unit identifier.
String auid = result.get("auid");
if (log.isDebug3()) log.debug3(DEBUG_HEADER + "auid = " + auid);
// Make sure that the Archival Unit has been created.
ArchivalUnit au = pluginMgr.getAuFromId(auid);
if (log.isDebug3()) log.debug3(DEBUG_HEADER + "au = " + au);
// Get the URL linked to the DOI.
String url = result.get("url");
if (log.isDebug3()) log.debug3(DEBUG_HEADER + "url = " + url);
OpenUrlInfo resolved = resolveFromUrl(url);
return resolved;
}
} catch (DbException dbe) {
log.error("Getting DOI:" + doi, dbe);
} finally {
MetadataDbManager.safeRollbackAndClose(conn);
}
return OPEN_URL_INFO_NONE;
}
/**
* Return article URL from an ISSN, date, volume, issue, spage, and author.
* The first author will only be used when the starting page is not given.
*
* @param issn the issn
* @param pub the publisher
* @param date the publication date
* @param volume the volume
* @param issue the issue
* @param spage the starting page
* @param artnum the article number
* @param author the first author's full name
* @param atitle the article title
* @return the article URL
*/
@Loggable(value = Loggable.TRACE, prepend = true, trim = false)
public OpenUrlInfo resolveFromIssn(
String issn, String pub, String date, String volume, String issue,
String spage, String artnum, String author, String atitle) {
OpenUrlInfo resolved = null;
// try resolving from the metadata database first
try {
MetadataDbManager dbMgr = daemon.getMetadataDbManager();
OpenUrlInfo aResolved = resolveFromIssn(dbMgr, issn, pub, date,
volume, issue, spage, artnum, author, atitle);
if (aResolved.isResolved()) {
return aResolved;
}
} catch (IllegalArgumentException ex) {
}
// get list of TdbTitles for issn
Tdb tdb = ConfigManager.getCurrentConfig().getTdb();
Collection titles;
if (tdb == null) {
titles = Collections.emptyList();
} else if (pub != null) {
TdbPublisher tdbPub = tdb.getTdbPublisher(pub);
titles = (tdbPub == null)
? Collections.emptyList() : tdbPub.getTdbTitlesByIssn(issn);
} else {
titles = tdb.getTdbTitlesByIssn(issn);
}
// try resolving from the title database
for (TdbTitle title : titles) {
OpenUrlInfo aResolved = null;
// resolve title, volume, AU, or issue TOC from TDB
Collection tdbAus = title.getTdbAus();
aResolved = resolveJournalFromTdbAus(tdbAus, date, volume, issue, spage, artnum);
if (aResolved.isNotResolved()) {
aResolved = OpenUrlInfo.newInstance(
null,null, OpenUrlInfo.ResolvedTo.TITLE);
// create bibliographic item with only title properties
aResolved.resolvedBibliographicItem =
new BibliographicItemImpl()
.setPublisherName(title.getPublisherName())
.setPublicationTitle(title.getName())
.setProprietaryIds(title.getProprietaryIds())
.setCoverageDepth(title.getCoverageDepth())
.setPrintIssn(title.getPrintIssn())
.setEissn(title.getEissn())
.setIssnL(title.getIssnL());
if (resolved == null) {
resolved = aResolved;
} else {
resolved.add(aResolved);
}
} else {
if (resolved != null) {
// add ahead of any fall-back OpenUrlInfo records
aResolved.add(resolved);
}
resolved = aResolved;
}
}
return (resolved == null) ? OPEN_URL_INFO_NONE : resolved;
}
/**
* Return article URL from an ISSN, date, volume, issue, spage, and author.
* The first author will only be used when the starting page is not given.
*
* @param dbMgr the database manager
* @param issns a list of alternate ISSNs for the title
* @param pub the publisher
* @param date the publication date
* @param volume the volume
* @param issue the issue
* @param spage the starting page
* @param artnum the article number
* @param author the first author's full name
* @param atitle the article title
* @return the article URL
*/
@Loggable(value = Loggable.TRACE, prepend = true)
private OpenUrlInfo resolveFromIssn(
MetadataDbManager dbMgr,
String issn, String pub, String date, String volume, String issue,
String spage, String artnum, String author, String atitle) {
final String DEBUG_HEADER = "resolveFromIssn(): ";
if (log.isDebug2()) {
log.debug2(DEBUG_HEADER + "issn = " + issn);
log.debug2(DEBUG_HEADER + "pub = " + pub);
log.debug2(DEBUG_HEADER + "date = " + date);
log.debug2(DEBUG_HEADER + "volume = " + volume);
log.debug2(DEBUG_HEADER + "issue = " + issue);
log.debug2(DEBUG_HEADER + "spage = " + spage);
log.debug2(DEBUG_HEADER + "artnum = " + artnum);
log.debug2(DEBUG_HEADER + "author = " + author);
log.debug2(DEBUG_HEADER + "atitle = " + atitle);
}
// true if properties specified a journal item
boolean hasJournalSpec =
(date != null) || (volume != null) || (issue != null);
if (log.isDebug3())
log.debug3(DEBUG_HEADER + "hasJournalSpec = " + hasJournalSpec);
// true if properties specify an article
boolean hasArticleSpec = (spage != null) || (artnum != null)
|| (author != null) || (atitle != null);
if (log.isDebug3())
log.debug3(DEBUG_HEADER + "hasArticleSpec = " + hasArticleSpec);
Connection conn = null;
OpenUrlInfo resolved = null;
try {
conn = dbMgr.getConnection();
StringBuilder select = new StringBuilder("select distinct ");
StringBuilder from = new StringBuilder(" from ");
StringBuilder where = new StringBuilder(" where ");
ArrayList args = new ArrayList();
// return all related values for debugging purposes
select.append("u." + URL_COLUMN);
select.append(",pb." + PUBLISHER_NAME_COLUMN);
select.append(",n1." + NAME_COLUMN + " as publication_name");
select.append(",i." + ISSN_COLUMN);
select.append(",bi." + VOLUME_COLUMN);
select.append(",bi." + ISSUE_COLUMN);
select.append(",bi." + START_PAGE_COLUMN);
select.append(",bi." + END_PAGE_COLUMN);
select.append(",bi." + ITEM_NO_COLUMN);
select.append(",n2." + NAME_COLUMN + " as article_name");
select.append(",pv2." + PROVIDER_NAME_COLUMN);
from.append(MD_ITEM_TABLE + " mi1"); // publication md_item
from.append("," + MD_ITEM_TABLE + " mi2"); // article md_item
from.append("," + ISSN_TABLE + " i");
from.append("," + PUBLICATION_TABLE + " pu");
from.append("," + PUBLISHER_TABLE + " pb");
from.append("," + MD_ITEM_NAME_TABLE + " n1"); // publication name
from.append("," + MD_ITEM_NAME_TABLE + " n2"); // article name
from.append("," + URL_TABLE + " u");
from.append("," + BIB_ITEM_TABLE + " bi");
from.append("," + PROVIDER_TABLE + " pv2");
from.append("," + AU_MD_TABLE + " am2");
where.append("mi2." + PARENT_SEQ_COLUMN + "=");
where.append("mi1." + MD_ITEM_SEQ_COLUMN);
where.append(" and i." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi1." + MD_ITEM_SEQ_COLUMN);
where.append(" and i." + ISSN_COLUMN + " = ?");
args.add(MetadataUtil.toUnpunctuatedIssn(issn)); // strip punctuation
where.append(" and pu." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi1." + MD_ITEM_SEQ_COLUMN);
where.append(" and pb." + PUBLISHER_SEQ_COLUMN + "=");
where.append("pu." + PUBLISHER_SEQ_COLUMN);
if (pub != null) {
// match publisher if specified
where.append(" and pb." + PUBLISHER_NAME_COLUMN + "= ?");
args.add(pub);
}
where.append(" and n1." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi1." + MD_ITEM_SEQ_COLUMN);
where.append(" and n1." + NAME_TYPE_COLUMN + "='primary'");
where.append(" and u." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi2." + MD_ITEM_SEQ_COLUMN);
where.append(" and u." + FEATURE_COLUMN + "='Access'");
where.append(" and bi." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi2." + MD_ITEM_SEQ_COLUMN);
where.append(" and n2." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi2." + MD_ITEM_SEQ_COLUMN);
where.append(" and n2." + NAME_TYPE_COLUMN + "='primary'");
where.append(" and mi2." + AU_MD_SEQ_COLUMN + "=");
where.append("am2." + AU_MD_SEQ_COLUMN);
where.append(" and am2." + PROVIDER_SEQ_COLUMN + "=");
where.append("pv2." + PROVIDER_SEQ_COLUMN);
if (hasJournalSpec) {
// can specify an issue by a combination of date, volume and issue;
// how these combine varies, so do the most liberal match possible
// and filter based on multiple results
if (date != null) {
// enables query "2009" to match "2009-05-10" in database
where.append(" and mi2." + DATE_COLUMN);
where.append(" like ? escape '\\'");
args.add(date.replace("\\","\\\\").replace("%","\\%") + "%");
}
if (volume != null) {
where.append(" and bi." + VOLUME_COLUMN + " = ?");
args.add(volume);
}
if (issue != null) {
where.append(" and bi." + ISSUE_COLUMN + " = ?");
args.add(issue);
}
}
// handle start page, author, and article title as
// equivalent ways to specify an article within an issue
if (hasArticleSpec) {
// accept any of the three
where.append(" and ( ");
if (spage != null) {
where.append("bi." + START_PAGE_COLUMN + " = ?");
args.add(spage);
}
if (artnum != null) {
if (spage != null) {
where.append(" or ");
}
where.append("bi." + ITEM_NO_COLUMN + " = ?");
args.add(artnum);
}
if (atitle != null) {
if ((spage != null) || (artnum != null)) {
where.append(" or ");
}
where.append("upper(n2." + NAME_COLUMN);
where.append(") like ? escape '\\'");
args.add(atitle.toUpperCase().replace("%","\\%") + "%");
}
if (author != null) {
if ((spage != null) || (artnum != null) || (atitle != null)) {
where.append(" or ");
}
from.append("," + AUTHOR_TABLE + " au");
// add the author query to the query
addAuthorQuery(author, where, args);
}
where.append(")");
}
String qstr = select.toString() + from.toString() + where.toString();
if (log.isDebug3()) log.debug3(DEBUG_HEADER + "qstr = " + qstr);
// only one value expected; any more and the query was under-specified
int maxPublishersPerArticle = getMaxPublishersPerArticle();
if (log.isDebug3()) log.debug3(DEBUG_HEADER
+ "maxPublishersPerArticle = " + maxPublishersPerArticle);
String[][] results = new String[maxPublishersPerArticle+1][11];
int count = resolveFromQuery(conn, qstr, args, results);
if (log.isDebug3()) log.debug3(DEBUG_HEADER + "count = " + count);
if (count <= maxPublishersPerArticle) {
// ensure at most one result per publisher+provider in case
// more than one publisher+provider publishes the same serial
Set pubs = new HashSet();
for (int i = 0; i < count; i++) {
if (log.isDebug3()) log.debug3(DEBUG_HEADER + "i = " + i);
// combine publisher and provider columns to determine uniqueness
String unique = results[i][1] + results[i][10];
if (log.isDebug3()) log.debug3(DEBUG_HEADER + "unique = " + unique);
if (!pubs.add(unique)) {
if (log.isDebug3())
log.debug3(DEBUG_HEADER + "'" + unique + "' is not unique");
resolved = OPEN_URL_INFO_NONE;
if (log.isDebug2())
log.debug2(DEBUG_HEADER + "resolved = " + resolved);
return resolved;
}
OpenUrlInfo info = OpenUrlInfo.newInstance(results[i][0], null,
OpenUrlInfo.ResolvedTo.ARTICLE);
if (resolved == null) {
resolved = info;
} else {
resolved.add(info);
}
}
}
} catch (DbException dbe) {
log.error("Exception caught getting ISSN:" + issn, dbe);
} finally {
MetadataDbManager.safeRollbackAndClose(conn);
}
if (log.isDebug3()) log.debug3(DEBUG_HEADER + "resolved = " + resolved);
return (resolved == null) ? OPEN_URL_INFO_NONE : resolved;
}
/**
* Returns the maximumn number of publishers per article to allow when
* querying the metadata database.
*
* @return the maximum number of publishers per article to allow
*/
static private int getMaxPublishersPerArticle() {
int maxpubs = ConfigManager.getCurrentConfig()
.getInt(PARAM_MAX_PUBLISHERS_PER_ARTICLE,
DEFAULT_MAX_PUBLISHERS_PER_ARTICLE);
return (maxpubs <= 0) ? DEFAULT_MAX_PUBLISHERS_PER_ARTICLE : maxpubs;
}
// Doesn't account for noproxy query param in ServeContent request
static private boolean isNeverProxy() {
return ConfigManager.getCurrentConfig().getBoolean(PARAM_NEVER_PROXY,
DEFAULT_NEVER_PROXY);
}
/**
* Resolve query if a single URL matches.
*
* @param conn the connection
* @param query the query
* @param args the args
* @param results the results
* @return the number of results returned
* @throws DbException
*/
@Loggable(value = Loggable.TRACE, prepend = true)
private int resolveFromQuery(Connection conn, String query,
List args, String[][] results) throws DbException {
final String DEBUG_HEADER = "resolveFromQuery(): ";
log.debug3(DEBUG_HEADER + "query: " + query);
MetadataDbManager dbMgr = daemon.getMetadataDbManager();
PreparedStatement stmt = dbMgr.prepareStatement(conn, query);
int count = 0;
try {
for (int i = 0; i < args.size(); i++) {
log.debug3(DEBUG_HEADER + " query arg: " + args.get(i));
stmt.setString(i + 1, args.get(i));
}
stmt.setMaxRows(results.length); // only need 2 to to determine if unique
ResultSet resultSet = dbMgr.executeQuery(stmt);
for ( ; count < results.length && resultSet.next(); count++) {
for (int i = 0; i < results[count].length; i++) {
results[count][i] = resultSet.getString(i+1);
}
}
} catch (SQLException sqle) {
throw new DbException("Cannot resolve from query", sqle);
}
return count;
}
/**
* Return article URL from a TdbTitle, date, volume, and issue.
*
* @param tdbAus a collection of TdbAus that match an ISSNs
* @param date the publication date
* @param volume the volume
* @param issue the issue
* @param spage the start page
* @param artnum the article number
* @return the article URL
*/
@Loggable(value = Loggable.TRACE, prepend = true)
private OpenUrlInfo resolveJournalFromTdbAus(
Collection tdbAus,
String date, String volume, String issue, String spage, String artnum) {
// get the year from the date
String year = null;
if (date != null) {
try {
year = Integer.toString(PublicationDate.parse(date).getYear());
} catch (ParseException ex) {}
}
log.debug3("resolveJournalFromTdbAus: year=" + year + ": " + tdbAus);
// list of AUs that match volume and year specified
ArrayList foundTdbAuList = new ArrayList();
// list of AUs that do not match volume and year specified
ArrayList notFoundTdbAuList = new ArrayList();
// find a TdbAu that matches the date, and volume
for (TdbAu tdbAu : tdbAus) {
// if neither year or volume specified, pick any TdbAu
if ((volume == null) && (year == null)) {
notFoundTdbAuList.add(tdbAu);
continue;
}
// if volume specified, see if this TdbAu matches
if (volume != null) {
if (!tdbAu.includesVolume(volume)) {
notFoundTdbAuList.add(tdbAu);
continue;
}
}
// if year specified, see if this TdbAu matches
if (year != null) {
if (!tdbAu.includesYear(year)) {
notFoundTdbAuList.add(tdbAu);
continue;
}
}
foundTdbAuList.add(tdbAu);
}
log.debug3("foundTdbAuList: " + foundTdbAuList);
// look for URL that is cached from list of matching AUs
for (TdbAu tdbau : foundTdbAuList) {
String aYear = year;
if (aYear == null) {
aYear = tdbau.getStartYear();
}
String aVolume = volume;
if (aVolume == null) {
aVolume = tdbau.getStartVolume();
}
String anIssue = issue;
if (anIssue == null) {
anIssue = tdbau.getStartIssue();
}
OpenUrlInfo aResolved =
getJournalUrl(tdbau, aYear, aVolume, anIssue, spage, artnum);
if (aResolved.resolvedUrl != null) {
if ( pluginMgr.findCachedUrl(aResolved.resolvedUrl) != null) {
// found the URL if in cache
return aResolved;
}
// even though getJournalUrl() checks that page exists,
// we can't rely on resolved URL being usable if the TdbAu is down
if (!tdbau.isDown()) {
return aResolved;
}
log.debug2( "discarding URL " + aResolved.resolvedUrl
+ " because tdbau is down: " + tdbau.getName());
}
}
// use tdbau that is not down from notFoundTdbAuList to find the
// title or publisher URL, since that is all we can return at this point
for (TdbAu tdbau : notFoundTdbAuList) {
if (!tdbau.isDown()) {
OpenUrlInfo aResolved = getJournalUrl(tdbau, null, null, null, null, null);
return aResolved;
}
log.debug2("discarding URL because tdbau is down: " + tdbau.getName());
}
// pick any AU to use for resolving the title as a last resort
if (!notFoundTdbAuList.isEmpty()) {
OpenUrlInfo aResolved =
getJournalUrl(notFoundTdbAuList.get(0), null, null, null, null, null);
aResolved.resolvedUrl = null;
return aResolved;
}
return OPEN_URL_INFO_NONE;
}
/**
* Return the type entry parameter map for the specified Plugin and TdbAu.
* @param plugin the plugin
* @param tdbau the AU
* @return the parameter map
*/
private static TypedEntryMap getParamMap(Plugin plugin, TdbAu tdbau) {
TypedEntryMap paramMap = new TypedEntryMap();
List descrs = plugin.getAuConfigDescrs();
for (ConfigParamDescr descr : descrs) {
String key = descr.getKey();
String sval = tdbau.getParam(key);
if (sval == null) {
sval = tdbau.getPropertyByName(key);
if (sval == null) {
sval = tdbau.getAttr(key);
}
}
if (sval != null) {
try {
Object val = descr.getValueOfType(sval);
paramMap.setMapElement(key, val);
} catch (InvalidFormatException ex) {
log.warning("invalid value for key: " + key + " value: " + sval, ex);
}
}
}
// add entries for attributes that do not correspond to AU params
for (Map.Entry entry : tdbau.getAttrs().entrySet()) {
if (!paramMap.containsKey(entry.getKey())) {
paramMap.setMapElement(entry.getKey(), entry.getValue());
}
}
return paramMap;
}
/**
* Return the type entry parameter map for the specified AU.
* @param au the AU
* @return the parameter map
*/
/* for later use (pjg)
private static TypedEntryMap getParamMap(ArchivalUnit au) {
TypedEntryMap paramMap = new TypedEntryMap();
Configuration config = au.getConfiguration();
Plugin plugin = au.getPlugin();
for (ConfigParamDescr descr : plugin.getAuConfigDescrs()) {
String key = descr.getKey();
if (config.containsKey(key)) {
try {
Object val = descr.getValueOfType(config.get(key));
paramMap.setMapElement(key, val);
} catch (Exception ex) {
log.error("Error configuring: " + key + " " + ex.getMessage());
}
}
}
return paramMap;
}
*/
/**
* Gets the book URL for an AU indicated by the DefinablePlugin
* and parameter definitions specified by the TdbAu.
*
* @param plugin the DefinablePlugin
* @param tdbau the TdbAu
* @param year the year
* @param volumeName the volume name
* @param issue the issue
* @return the issue URL
*/
/* for later use (pjg)
private static String getBooklUrl(
ArchivalUnit au, String volumeName, String year, String edition) {
TypedEntryMap paramMap = getParamMap(au);
Plugin plugin = au.getPlugin();
String url = getBookUrl(plugin, paramMap, volumeName, year, edition);
return url;
}
*/
/**
* Gets the book URL for a TdbAU indicated by the DefinablePlugin
* and parameter definitions specified by the TdbAu.
* @param tdbau the TdbAu
* @param year the year
* @param volumeName the volume name
* @param edition the edition
* @param chapter the chapter
* @param spage the start page
* @return the starting URL
*/
@Loggable(value = Loggable.TRACE, prepend = true)
private OpenUrlInfo getBookUrl(
TdbAu tdbau, String year, String volumeName,
String edition, String chapter, String spage) {
String pluginKey = PluginManager.pluginKeyFromId(tdbau.getPluginId());
Plugin plugin = pluginMgr.getPlugin(pluginKey);
OpenUrlInfo resolved = null;
if (plugin != null) {
log.debug3( "getting book feature url for plugin: "
+ plugin.getClass().getName());
// get starting URL from a DefinablePlugin
TypedEntryMap paramMap = getParamMap(plugin, tdbau);
// add volume with type and spelling of existing element
paramMap.setMapElement("volume", volumeName);
paramMap.setMapElement("volume_str",volumeName);
paramMap.setMapElement("volume_name", volumeName);
paramMap.setMapElement("year", year);
if (!StringUtil.isNullString(year)) {
try {
paramMap.setMapElement("au_short_year",
String.format("%02d", NumberUtil.parseInt(year)%100));
} catch (NumberFormatException ex) {
log.info( "Error parsing year '" + year
+ "' as an int -- not setting au_short_year");
}
}
paramMap.setMapElement("edition", edition);
paramMap.setMapElement("chapter", chapter);
paramMap.setMapElement("page", spage);
// auFeatureKey selects feature from a map of values
// for the same feature (e.g. au_feature_urls/au_year)
paramMap.setMapElement("auFeatureKey", tdbau.getAttr(AU_FEATURE_KEY));
String isbn = tdbau.getAttr("isbn");
if (isbn != null) {
paramMap.setMapElement("isbn", isbn);
}
String eisbn = tdbau.getAttr("eisbn");
if (eisbn != null) {
paramMap.setMapElement("eisbn", eisbn);
}
resolved = getBookUrl(tdbau, plugin, paramMap);
if (resolved.isResolved()) {
resolved.resolvedBibliographicItem = tdbau;
log.debug3("Resolved book url from plugin: " + resolved.resolvedUrl);
}
} else {
log.debug3("No plugin found for key: " + pluginKey);
}
return resolved;
}
/**
* Find the most specific bool feature URL that can be determined from
* the supplied parameters.
* @param tdbAu TdbAu describing AU to be accessed
* @param plugin the plugin
* @param paramMap param map containing properties and AU config params
* from TdbAu, plus possibly issn
, eissn
,
* feature_key
, volume
,
* volume_str
, volume_name
,
* year
, au_short_year
, issue
,
* article
, page
, item
(article
* number).
* @return a feature URL
*/
@Loggable(value = Loggable.TRACE, prepend = true)
public OpenUrlInfo getBookUrl(TdbAu tdbAu, Plugin plugin,
TypedEntryMap paramMap) {
OpenUrlInfo resolved = getPluginUrl(tdbAu, plugin,
auBookAuFeatures, paramMap);
return resolved;
}
/* *
* Gets the issue URL for an AU indicated by the DefinablePlugin
* and parameter definitions specified by the TdbAu.
*
* @param plugin the DefinablePlugin
* @param tdbau the TdbAu
* @param year the year
* @param volumeName the volume name
* @param issue the issue
* @return the issue URL
*/
/* for later use (pjg)
private static String getJournalUrl(
ArchivalUnit au, String year, String volumeName, String issue) {
TypedEntryMap paramMap = getParamMap(au);
Plugin plugin = au.getPlugin();
String url = getJournalUrl(plugin, paramMap, year, volumeName, issue);
return url;
}
*/
/**
* Get starting url from TdbAu.
* @param tdbau the TdbAu
* @param year the year
* @param volumeName the volume name
* @param issue the issue
* @param spage the start page
* @param artnum the article number
* @return the starting URL
*/
@Loggable(value = Loggable.TRACE, prepend = true)
private OpenUrlInfo getJournalUrl(
TdbAu tdbau, String year, String volumeName, String issue,
String spage, String artnum) {
String pluginKey = PluginManager.pluginKeyFromId(tdbau.getPluginId());
Plugin plugin = pluginMgr.getPlugin(pluginKey);
OpenUrlInfo resolved = OPEN_URL_INFO_NONE;
if (plugin != null) {
log.debug3("getting journal feature url for plugin: "
+ plugin.getClass().getName());
// get starting URL from a DefinablePlugin
// add volume with type and spelling of existing element
TypedEntryMap paramMap = getParamMap(plugin, tdbau);
paramMap.setMapElement("volume", volumeName);
paramMap.setMapElement("volume_str", volumeName);
paramMap.setMapElement("volume_name", volumeName);
paramMap.setMapElement("year", year);
String issn = tdbau.getPrintIssn();
if (issn != null) {
paramMap.setMapElement("issn", issn);
}
String eissn = tdbau.getEissn();
if (eissn != null) {
paramMap.setMapElement("eissn", eissn);
}
if (!StringUtil.isNullString(year)) {
try {
paramMap.setMapElement("au_short_year",
String.format("%02d", NumberUtil.parseInt(year)%100));
} catch (NumberFormatException ex) {
log.info("Error parsing year '" + year
+ "' as an integer -- not setting au_short_year");
}
}
paramMap.setMapElement("issue", issue);
paramMap.setMapElement("article", spage);
paramMap.setMapElement("page", spage);
paramMap.setMapElement("item", artnum); // for journals without page numbers
// AU_FEATURE_KEY selects feature from a map of values
// for the same feature (e.g. au_feature_urls/au_year)
paramMap.setMapElement(AU_FEATURE_KEY, tdbau.getAttr(AU_FEATURE_KEY));
resolved = getJournalUrl(tdbau, plugin, paramMap);
if (resolved.isResolved()) {
if (resolved.resolvedTo == OpenUrlInfo.ResolvedTo.TITLE) {
// create bibliographic item with only title properties
resolved.resolvedBibliographicItem =
new BibliographicItemImpl()
.setPublisherName(tdbau.getPublisherName())
.setPublicationTitle(tdbau.getPublicationTitle())
.setProprietaryIds(tdbau.getProprietaryIds())
.setCoverageDepth(tdbau.getCoverageDepth())
.setPrintIssn(tdbau.getPrintIssn())
.setEissn(tdbau.getEissn())
.setIssnL(tdbau.getIssnL());
} else {
resolved.resolvedBibliographicItem = tdbau;
}
log.debug3("Resolved journal url from plugin: " + resolved.resolvedUrl);
}
} else {
log.debug3("No plugin found for key: " + pluginKey);
}
return resolved;
}
/**
* Find the most specific journal feature URL that can be determined from
* the supplied parameters.
* @param tdbAu TdbAu describing AU to be accessed
* @param plugin the plugin
* @param paramMap param map containing properties and AU config params
* from TdbAu, plus possibly issn
, eissn
,
* feature_key
, volume
,
* volume_str
, volume_name
,
* year
, au_short_year
, issue
,
* article
, page
, item
(article
* number).
* @return a feature URL
*/
public OpenUrlInfo getJournalUrl(TdbAu tdbAu,
Plugin plugin,
TypedEntryMap paramMap) {
OpenUrlInfo resolved = getPluginUrl(tdbAu, plugin,
auJournalFeatures, paramMap);
return resolved;
}
public OpenUrlInfo getPluginUrl(Plugin plugin,
FeatureEntry[] pluginEntries,
TypedEntryMap paramMap) {
return getPluginUrl(null, plugin, pluginEntries, paramMap);
}
/**
* Get the URL for the specified key from the plugin.
* @param plugin the plugin
* @param pluginEntries array of FeatureEntry to try
* @param paramMap parameters for feature printfs
* @return OpenUrlInfo for the first FeatureEntry that evaluates without
* error
*/
@Loggable(value = Loggable.TRACE, prepend = true)
OpenUrlInfo getPluginUrl(TdbAu tdbAu,
Plugin plugin,
FeatureEntry[] pluginEntries,
TypedEntryMap paramMap) {
ArchivalUnit au = null;
if (tdbAu != null) {
String auid = tdbAu.getAuId(pluginMgr);
au = pluginMgr.getAuFromId(auid);
}
ExternalizableMap map;
// get printf pattern for pluginKey property
try {
Method method =
plugin.getClass().getMethod("getDefinitionMap", (new Class[0]));
Object obj = method.invoke(plugin);
if (!(obj instanceof ExternalizableMap)) {
return OPEN_URL_INFO_NONE;
}
map = (ExternalizableMap)obj;
} catch (Exception ex) {
log.error("getDefinitionMap", ex);
return OPEN_URL_INFO_NONE;
}
String proxySpec = null;
try {
proxySpec = paramMap.getString(ConfigParamDescr.CRAWL_PROXY.getKey());
} catch (NoSuchElementException ex) {
// no crawl_proxy param specified
}
for (FeatureEntry pluginEntry : pluginEntries) {
// locate object value for plugin key path
String pluginKey = pluginEntry.auFeatureKey;
String[] pluginKeyPath = pluginKey.split("/");
Object obj = map.getMapElement(pluginKeyPath[0]);
for (int i = 1; (i < pluginKeyPath.length); i++) {
if (obj instanceof Map) {
obj = ((Map)obj).get(pluginKeyPath[i]);
} else {
// all path elements except last one must be a map;
obj = null;
break;
}
}
if (obj instanceof Map) {
// match TDB AU_FEATURE_KEY value to key in map
String auFeatureKey = "*"; // default entry
try {
auFeatureKey = paramMap.getString(AU_FEATURE_KEY);
} catch (NoSuchElementException ex) {}
// entry may have multiple keys; '*' is the default entry
Object val = null;
for (Map.Entry entry : ((Map)obj).entrySet()) {
String key = entry.getKey();
if ( key.equals(auFeatureKey)
|| key.startsWith(auFeatureKey + ";")
|| key.endsWith(";" + auFeatureKey)
|| (key.indexOf(";" + auFeatureKey + ";") >= 0)) {
val = entry.getValue();
break;
}
}
obj = val;
pluginKey += "/" + auFeatureKey;
}
if (obj == null) {
log.debug("unknown plugin key: " + pluginKey);
continue;
}
Collection printfStrings = null;
if (obj instanceof String) {
// get single pattern for start url
printfStrings = Collections.singleton((String)obj);
} else if (obj instanceof Collection) {
printfStrings = (Collection)obj;
} else {
log.debug( "unknown type for plugin key: " + pluginKey
+ ": " + obj.getClass().getName());
continue;
}
log.debug3( "Trying plugin key: " + pluginKey
+ " for plugin: " + plugin.getPluginId()
+ " with " + printfStrings.size() + " printf strings");
// set up converter for use with feature URL printf strings
UrlListConverter converter =
PrintfConverter.newUrlListConverter(plugin, paramMap);
converter.setAllowUntypedArgs(true);
for (String s : printfStrings) {
String url = null;
// terminal value in maps may be a string, printf string or the
// name of a FeatureUrlHelperFactory
FeatureUrlHelper helper = null;
if (!s.startsWith("\"")) {
try {
FeatureUrlHelperFactory fact =
plugin.newAuxClass(s, FeatureUrlHelperFactory.class);
if (fact != null) {
helper = fact.createFeatureUrlHelper(plugin);
}
} catch (Exception e) {
log.error("Can't create FeatureUrlHelper for " +
plugin.getPluginName(), e);
}
}
if (helper != null) {
try {
List urls = helper.getFeatureUrls(au,
pluginEntry.resolvedTo,
paramMap);
if ((urls != null) && !urls.isEmpty()) {
// if multiple urls match, the first one will do
url = urls.get(0);
}
} catch (PluginException |
RuntimeException |
IOException e) {
log.error("Error in FeatureUrlHelper(" + plugin + ", " + paramMap,
e);
}
} else {
s = StringEscapeUtils.unescapeHtml4(s);
try {
List urls = converter.getUrlList(s);
if ((urls != null) && !urls.isEmpty()) {
// if multiple urls match, the first one will do
url = urls.get(0);
}
} catch (Throwable ex) {
log.debug("invalid conversion for " + s, ex);
continue;
}
}
// validate URL: either it's cached, or it can be reached
if (!StringUtil.isNullString(url)) {
log.debug3("Resolving from url: " + url);
url = resolveUrl(url, proxySpec);
if (url != null) {
return OpenUrlInfo.newInstance(url, proxySpec,
pluginEntry.resolvedTo);
}
}
}
}
return OPEN_URL_INFO_NONE;
}
/**
* Return the book URL from TdbTitle and edition.
*
* @param tdbAus a collection of TdbAus that match an ISBN
* @param date the publication date
* @param volume the volume
* @param edition the edition
* @param chapter the chapter
* @param spage the start page
* @return the book URL
*/
@Loggable(value = Loggable.TRACE, prepend = true)
private OpenUrlInfo resolveBookFromTdbAus(
Collection tdbAus, String date,
String volume, String edition, String chapter, String spage) {
// get the year from the date
String year = null;
if (date != null) {
try {
year = Integer.toString(PublicationDate.parse(date).getYear());
} catch (ParseException ex) {}
}
// list of AUs that match volume and year specified
ArrayList foundTdbAuList = new ArrayList();
// list of AUs that do not match volume, edition, and year specified
ArrayList notFoundTdbAuList = new ArrayList();
for (TdbAu tdbAu : tdbAus) {
// if none of year, volume, or edition specified, pick any TdbAu
if ((volume == null) && (year == null) && (edition == null)) {
notFoundTdbAuList.add(tdbAu);
continue;
}
// if volume specified, see if this TdbAu matches
if (volume != null) {
if (!tdbAu.includesVolume(volume)) {
notFoundTdbAuList.add(tdbAu);
continue;
}
}
// if year specified, see if this TdbAu matches
if (year != null) {
if (!tdbAu.includesYear(year)) {
notFoundTdbAuList.add(tdbAu);
continue;
}
}
// get the plugin id for the TdbAu that matches the specified edition
if (edition != null) {
String auEdition = tdbAu.getEdition();
if ((auEdition != null) && !edition.equals(auEdition)) {
notFoundTdbAuList.add(tdbAu);
continue;
}
}
foundTdbAuList.add(tdbAu);
}
OpenUrlInfo resolved = null;
// look for URL that is cached from list of matching AUs
for (TdbAu tdbau : foundTdbAuList) {
String aYear = year;
if (aYear == null) {
aYear = tdbau.getStartYear();
}
String aVolume = volume;
if (aVolume == null) {
aVolume = tdbau.getStartVolume();
}
String anEdition = edition;
if (edition == null) {
anEdition = tdbau.getEdition();
}
OpenUrlInfo aResolved = getBookUrl(
tdbau, year, aVolume, anEdition, chapter, spage);
if (aResolved.resolvedUrl != null) {
// found the URL if in cache
if (pluginMgr.findCachedUrl(aResolved.resolvedUrl) != null) {
if (resolved == null) {
resolved = aResolved;
} else {
resolved.add(aResolved);
}
}
// not a viable URL if the AU is down
// note: even though getBookUrl() checks that page exists,
// we can't rely on it being usable if TdbAu is down
else if (!tdbau.isDown()) {
if (resolved == null) {
resolved = aResolved;
} else {
resolved.add(aResolved);
}
} else {
log.debug2( "discarding URL " + aResolved.resolvedUrl
+ " because tdbau is down: " + tdbau.getName());
}
}
}
if (resolved != null) {
return resolved;
}
// use tdbau that is not down from notFoundTdbAuList to find the
// title or publisher URL, since that is all we can return at this point
for (TdbAu tdbau : notFoundTdbAuList) {
if (!tdbau.isDown()) {
OpenUrlInfo aResolved = getBookUrl(tdbau,
tdbau.getStartYear(), tdbau.getStartVolume(),
tdbau.getStartIssue(), null, null);
if (aResolved.isResolved()) {
if (resolved == null) {
resolved = aResolved;
} else {
resolved.add(aResolved);
}
}
} else {
log.debug2("discarding URL because tdbau is down: " + tdbau.getName());
}
}
if (resolved != null) {
return resolved;
}
// pick any AU to use for resolving the title as a last resort
if (!notFoundTdbAuList.isEmpty()) {
OpenUrlInfo aResolved =
OpenUrlInfo.newInstance(null, null, OpenUrlInfo.ResolvedTo.VOLUME);
aResolved.resolvedBibliographicItem = notFoundTdbAuList.get(0);
return aResolved;
}
return OPEN_URL_INFO_NONE;
}
/**
* Return the article URL from an ISBN, edition, spage, and author.
* The first author will only be used when the starting page is not given.
* "Volume" is used to hold edition information in the database manager
* schema for books. First author can be used in place of start page.
*
* @param isbn the isbn
* @param pub the publisher
* @param date the date
* @param volume the volume
* @param edition the edition
* @param chapter the chapter
* @param spage the start page
* @param author the first author
* @param atitle the chapter title
* @return the article URL
*/
public OpenUrlInfo resolveFromIsbn(
String isbn, String pub, String date, String volume, String edition,
String chapter, String spage, String author, String atitle) {
// only go to database manager if requesting individual article/chapter
try {
// resolve from database manager
MetadataDbManager dbMgr = daemon.getMetadataDbManager();
OpenUrlInfo aResolved = resolveFromIsbn(
dbMgr, isbn, pub, date, volume, edition,
chapter, spage, author, atitle);
if (aResolved.isResolved()) {
return aResolved;
}
} catch (IllegalArgumentException ex) {
}
// resolve from TDB
Tdb tdb = ConfigManager.getCurrentConfig().getTdb();
// get list of TdbTitles for issn
Collection tdbAus;
if (tdb == null) {
tdbAus = Collections.emptyList();
} else if (pub != null) {
TdbPublisher tdbPub = tdb.getTdbPublisher(pub);
tdbAus = (tdbPub == null)
? Collections.emptyList() : tdbPub.getTdbAusByIsbn(isbn);
} else {
tdbAus = tdb.getTdbAusByIsbn(isbn);
}
OpenUrlInfo resolved =
resolveBookFromTdbAus(tdbAus, date, volume, edition, chapter, spage);
return resolved;
}
/**
* Return the article URL from an ISBN, edition, start page, author, and
* article title using the metadata database.
*
* The algorithm matches the ISBN and optionally the edition, and either
* the start page, author, or article title. The reason for matching on any
* of the three is that typos in author and article title are always
* possible so we want to be more forgiving in matching an article.
*
* If none of the three are specified, the URL for the book table of contents
* is returned.
*
* @param dbMgr the database manager
* @param isbn the isbn
* @param pub the publisher
* @param String date the date
* @param String volumeName the volumeName
* @param edition the edition
* @param chapter the chapter
* @param spage the start page
* @param author the first author
* @param atitle the chapter title
* @return the url
*/
@Loggable(value = Loggable.TRACE, prepend = true)
private OpenUrlInfo resolveFromIsbn(
MetadataDbManager dbMgr, String isbn, String pub,
String date, String volume, String edition,
String chapter, String spage, String author, String atitle) {
final String DEBUG_HEADER = "resolveFromIsbn(): ";
OpenUrlInfo resolved = null;
Connection conn = null;
// error if input ISBN is not a ISBN-10 or ISBN-13
String strippedIsbn10 = MetadataUtil.toUnpunctuatedIsbn10(isbn);
String strippedIsbn13 = MetadataUtil.toUnpunctuatedIsbn13(isbn);
if ((strippedIsbn10 == null) && (strippedIsbn13 == null)) {
return OPEN_URL_INFO_NONE;
}
boolean hasBookSpec =
(date != null) || (volume != null) || (edition != null);
boolean hasArticleSpec = (chapter != null) || (spage != null)
|| (author != null) || (atitle != null);
try {
conn = dbMgr.getConnection();
StringBuilder select = new StringBuilder("select distinct ");
StringBuilder from = new StringBuilder(" from ");
StringBuilder where = new StringBuilder(" where ");
ArrayList args = new ArrayList();
// return all related values for debugging purposes
select.append("u." + URL_COLUMN);
select.append(",pb." + PUBLISHER_NAME_COLUMN);
select.append(",n1." + NAME_COLUMN + " as book_title");
select.append(",i." + ISBN_COLUMN);
select.append(",bi." + VOLUME_COLUMN);
select.append(",bi." + ISSUE_COLUMN + " as edition");
select.append(",bi." + START_PAGE_COLUMN);
select.append(",bi." + END_PAGE_COLUMN);
select.append(",bi." + ITEM_NO_COLUMN + " as chapt_no");
select.append(",n2." + NAME_COLUMN + " as chapt_title");
select.append(",pv2." + PROVIDER_NAME_COLUMN);
from.append(MD_ITEM_TABLE + " mi1"); // publication md_item
from.append("," + MD_ITEM_TABLE + " mi2"); // article md_item
from.append("," + ISBN_TABLE + " i");
from.append("," + PUBLICATION_TABLE + " pu");
from.append("," + PUBLISHER_TABLE + " pb");
from.append("," + MD_ITEM_NAME_TABLE + " n1"); // publication name
from.append("," + MD_ITEM_NAME_TABLE + " n2"); // article name
from.append("," + URL_TABLE + " u");
from.append("," + BIB_ITEM_TABLE + " bi");
from.append("," + PROVIDER_TABLE + " pv2");
from.append("," + AU_MD_TABLE + " am2");
where.append("mi2." + PARENT_SEQ_COLUMN + "=");
where.append("mi1." + MD_ITEM_SEQ_COLUMN);
where.append(" and i." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi1." + MD_ITEM_SEQ_COLUMN);
where.append(" and i." + ISBN_COLUMN);
if ((strippedIsbn10 != null) && (strippedIsbn13 != null)) {
// check both ISBN-10 and ISBN-13 forms
where.append(" in (?,?)");
args.add(strippedIsbn10);
args.add(strippedIsbn13);
} else {
// can't convert to ISBN-10 or ISBN-13 because input isbn
// is not well formed, so use whichever one is available
where.append(" = ?");
args.add((strippedIsbn13 != null) ? strippedIsbn13 : strippedIsbn10);
}
where.append(" and pu." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi1." + MD_ITEM_SEQ_COLUMN);
where.append(" and pb." + PUBLISHER_SEQ_COLUMN + "=");
where.append("pu." + PUBLISHER_SEQ_COLUMN);
if (pub != null) {
// match publisher if specified
where.append(" and pb." + PUBLISHER_NAME_COLUMN + "= ?");
args.add(pub);
}
where.append(" and n1." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi1." + MD_ITEM_SEQ_COLUMN);
where.append(" and n1." + NAME_TYPE_COLUMN + "='primary'");
where.append(" and u." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi2." + MD_ITEM_SEQ_COLUMN);
where.append(" and u." + FEATURE_COLUMN + "='Access'");
where.append(" and bi." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi2." + MD_ITEM_SEQ_COLUMN);
where.append(" and n2." + MD_ITEM_SEQ_COLUMN + "=");
where.append("mi2." + MD_ITEM_SEQ_COLUMN);
where.append(" and n2." + NAME_TYPE_COLUMN + "='primary'");
where.append(" and mi2." + AU_MD_SEQ_COLUMN + "=");
where.append("am2." + AU_MD_SEQ_COLUMN);
where.append(" and am2." + PROVIDER_SEQ_COLUMN + "=");
where.append("pv2." + PROVIDER_SEQ_COLUMN);
if (hasBookSpec) {
// can specify an issue by a combination of date, volume and edition;
// how these combine varies, so do the most liberal match possible
// and filter based on multiple results
if (date != null) {
// enables query "2009" to match "2009-05-10" in database
where.append(" and mi2." + DATE_COLUMN);
where.append(" like ? escape '\\'");
args.add(date.replace("\\","\\\\").replace("%","\\%") + "%");
}
if (volume != null) {
where.append(" and bi." + VOLUME_COLUMN + " = ?");
args.add(volume);
}
if (edition != null) {
where.append(" and bi." + ISSUE_COLUMN + " = ?");
args.add(edition);
}
}
// handle start page, author, and article title as
// equivalent ways to specify an article within an issue
if (hasArticleSpec) {
// accept any of the three
where.append(" and ( ");
if (spage != null) {
where.append("bi." + START_PAGE_COLUMN + " = ?");
args.add(spage);
}
if (chapter != null) {
if (spage != null) {
where.append(" or ");
}
where.append("bi." + ITEM_NO_COLUMN + " = ?");
args.add(chapter);
}
if (atitle != null) {
if ((spage != null) || (chapter != null)) {
where.append(" or ");
}
where.append("upper(n2." + NAME_COLUMN);
where.append(") like ? escape '\\'");
args.add(atitle.toUpperCase().replace("%","\\%") + "%");
}
if (author != null) {
if ((spage != null) || (chapter != null) || (atitle != null)) {
where.append(" or ");
}
from.append("," + AUTHOR_TABLE + " au");
// add the author query to the query
addAuthorQuery(author, where, args);
}
where.append(")");
}
String qstr = select.toString() + from.toString() + where.toString();
int maxPublishersPerArticle = getMaxPublishersPerArticle();
String[][] results = new String[maxPublishersPerArticle+1][11];
int count = resolveFromQuery(conn, qstr, args, results);
log.debug3(DEBUG_HEADER + "count = " + count);
if (count <= maxPublishersPerArticle) {
// ensure at most one result per publisher+provider in case
// more than one publisher+provider publishes the same book
Set pubs = new HashSet();
for (int i = 0; i < count; i++) {
// combine publisher and provider columns to determine uniqueness
if (!pubs.add(results[i][1] + results[i][10])) {
return OPEN_URL_INFO_NONE;
}
OpenUrlInfo info = OpenUrlInfo.newInstance(results[i][0], null,
OpenUrlInfo.ResolvedTo.CHAPTER);
if (resolved == null) {
resolved = info;
} else {
resolved.add(info);
}
}
}
} catch (DbException dbe) {
log.error("Getting ISBN:" + isbn, dbe);
} finally {
MetadataDbManager.safeRollbackAndClose(conn);
}
return (resolved == null) ? OPEN_URL_INFO_NONE : resolved;
}
/**
* Add author query to the query buffer and argument list.
* @param author the author
* @param where the query buffer
* @param args the argument list
*/
private void addAuthorQuery(String author, StringBuilder where,
List args) {
where.append("mi2." + MD_ITEM_SEQ_COLUMN + " = ");
where.append("au." + MD_ITEM_SEQ_COLUMN + " and (");
String authorUC = author.toUpperCase();
// match single author
where.append("upper(au.");
where.append(AUTHOR_NAME_COLUMN);
where.append(") = ?");
args.add(authorUC);
// escape escape character and then wildcard characters
String authorEsc = authorUC.replace("\\", "\\\\").replace("%","\\%");
// match last name of author
// (last, first name separated by ',')
where.append(" or upper(au.");
where.append(AUTHOR_NAME_COLUMN);
where.append(") like ? escape '\\'");
args.add(authorEsc+",%");
// match last name of author
// (first last name separated by ' ')
where.append(" or upper(au.");
where.append(AUTHOR_NAME_COLUMN);
where.append(") like ? escape '\\'");
args.add("% " + authorEsc);
where.append(")");
}
/**
* Provides the database manager.
*
* @return a DbManager with the database manager.
*/
// private MetadataDbManager getMetadataDbManager() {
// return (MetadataDbManager)LockssApp
// .getManager(MetadataDbManager.getManagerKey());
// }
}