com.googlecode.fascinator.redbox.sru.SRUClient Maven / Gradle / Ivy
/*
* The Fascinator - ReDBox/Mint SRU Client
* Copyright (C) 2012 Queensland Cyber Infrastructure Foundation (http://www.qcif.edu.au/)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package com.googlecode.fascinator.redbox.sru;
import com.googlecode.fascinator.common.BasicHttpClient;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.io.IOUtils;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.DocumentFactory;
import org.dom4j.Node;
import org.dom4j.io.SAXReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A light-weight SRU client implementation. Originally written for the purpose
* of searching the National Library of Australia's Party Infrastructure Project
* (PIP) via GET (ie. not POST or SOAP, both of which PIP also supports).
* More information/documentation for PIP is available on the NLA wiki.
*
* @author Greg Pendlebury
*
* Credit for some of inspiration has to go to another light-weight implementation
* available under LGPL we looked at before we started coding:
* SRUClient from 'Sinciput'.
*
*/
public class SRUClient {
/** Logging **/
private static Logger log = LoggerFactory.getLogger(SRUClient.class);
/** A SAX Reader for XML parsing **/
private SAXReader saxReader;
/** Namespaces for XML parsing **/
private Map namespaces;
/** Default URL is for the NLA **/
private String baseUrl = "http://www.nla.gov.au/apps/srw/search/peopleaustralia";
/** Default Schema is for EAC-CPF records from the NLA **/
private String recordSchema = "urn:isbn:1-931666-33-4";
/** Version parameter for the query **/
private String sruVersion = "1.1";
/** Request a particular response packing **/
private String responsePacking = "xml";
/** Unit testing only. Fake search response **/
private String testingResponseString;
/**
* Default Constructor. Connect to the NLA unless otherwise instructed. This
* will rely on the more complicated constructor defaulting to the searching
* for EAC-CPF records as well.
*
*/
public SRUClient() {
saxInit();
}
/**
* Constructor indicating the base URL for the SRU interface.
*
* @param baseUrl The Base URL for the SRU interface. Required.
* @param schema The SRU 'recordSchema' to use. NULL values will default to EAC-CPC ('urn:isbn:1-931666-33-4')
* @throws MalformedURLException Will be thrown if the 'baseUrl' provided is not well formed.
*/
public SRUClient(String baseUrl) throws MalformedURLException {
this(baseUrl, null, null, null);
}
/**
* Constructor indicating the base URL and metadata schema.
*
* @param baseUrl The Base URL for the SRU interface. Required.
* @param schema The SRU 'recordSchema' to use. NULL values will default to EAC-CPC ('urn:isbn:1-931666-33-4')
* @throws MalformedURLException Will be thrown if the 'baseUrl' provided is not well formed.
*/
public SRUClient(String baseUrl, String schema)
throws MalformedURLException {
this(baseUrl, schema, null, null);
}
/**
* Constructor indicating the base URL, metadata schema and format
* packing for responses.
*
* @param baseUrl The Base URL for the SRU interface. Required.
* @param schema The SRU 'recordSchema' to use. NULL values will default to EAC-CPC ('urn:isbn:1-931666-33-4')
* @param packing The SRU 'recordPacking' to use. NULL values will default to 'xml'
* @throws MalformedURLException Will be thrown if the 'baseUrl' provided is not well formed.
*/
public SRUClient(String baseUrl, String schema, String packing)
throws MalformedURLException {
this(baseUrl, schema, packing, null);
}
/**
* This constructor is where the real work happens. All the constructors
* above provide wrappers of this one based on how much you want to deviate
* from the defaults (which assume you are connecting to the NLA.
*
* @param baseUrl The Base URL for the SRU interface. Required.
* @param version The SRU 'version' to use. NULL values will default to v1.1
* @param schema The SRU 'recordSchema' to use. NULL values will default to EAC-CPC ('urn:isbn:1-931666-33-4')
* @param packing The SRU 'recordPacking' to use. NULL values will default to 'xml'
* @throws MalformedURLException Will be thrown if the 'baseUrl' provided is not well formed.
*/
public SRUClient(String baseUrl, String schema, String packing,
String version) throws MalformedURLException {
// Make sure our URL is valid first
try {
URL url = new URL(baseUrl);
this.baseUrl = baseUrl;
} catch (MalformedURLException ex) {
log.error("Invalid URL passed to constructor: ", ex);
throw ex;
}
// Start with the default NLA parameters if nothing has been configured
// NLA = EAC-CPF
if (schema != null) {
recordSchema = schema;
}
// NLA = 1.1
if (version != null) {
sruVersion = version;
}
// NLA = xml
if (packing != null) {
responsePacking = packing;
}
saxInit();
}
/**
* Used to change the 'recordSchema' after instantiation. All outgoing
* requests sent after this call will use the new schema.
*
* @param newSchema The new schema to use.
*/
public void setRecordSchema(String newSchema) {
recordSchema = newSchema;
}
/**
* Used to change the 'version' after instantiation. All outgoing
* requests sent after this call will use the new version.
*
* @param newVersion The new version to use.
*/
public void setVersion(String newVersion) {
sruVersion = newVersion;
}
/**
* Used to change 'recordPacking' after instantiation. All outgoing
* requests sent after this call will use the new format.
*
* @param newPacking The new packing format to use.
*/
public void setPacking(String newPacking) {
responsePacking = newPacking;
}
/**
* Simple init for the SAX Reader.
*
*/
private void saxInit() {
namespaces = new HashMap();
DocumentFactory docFactory = new DocumentFactory();
docFactory.setXPathNamespaceURIs(namespaces);
saxReader = new SAXReader(docFactory);
}
/**
* Used in unit testing to indicate a package resource to use as search
* responses, rather then submitting a real SRU query.
*
* @param fileName The name of a resource 'file' to use as simulated search result.
* @throws IOException If encoding/access issues occur accessing the resource.
*/
public void testResponseResource(String fileName) throws IOException {
ByteArrayOutputStream out = new ByteArrayOutputStream();
IOUtils.copy(getClass().getResourceAsStream("/" + fileName), out);
testingResponseString = out.toString("UTF-8");
}
/**
* Parse an XML document from a string
*
* @param xmlData The String to parse
* @return Document The parsed XML Object. Null if any problems occur.
*/
public Document parseXml(String xmlData) {
try {
byte[] bytes = xmlData.getBytes("utf-8");
ByteArrayInputStream in = new ByteArrayInputStream(bytes);
return saxReader.read(in);
} catch (UnsupportedEncodingException ex) {
log.error("Input is not UTF-8", ex);
return null;
} catch (DocumentException ex) {
log.error("Failed to parse XML", ex);
return null;
}
}
/**
* Parse an XML String response and populate a response Object.
*
* @param xmlData The XML String returned from the search
* @return SRUResponse An instantiated response object
*/
public SRUResponse getResponseObject(String xmlData) {
// Parsing
Document xmlResponse = parseXml(xmlData);
if (xmlResponse == null) {
log.error("Can't get results after XML parsing failed.");
return null;
}
// Processing
SRUResponse response = null;
try {
response = new SRUResponse(xmlResponse);
} catch (SRUException ex) {
log.error("Error processing XML response:", ex);
}
return response;
}
/**
* Parse an XML String response and get a List Object containing all of
* the SRU search results.
*
* @param xmlData The XML String returned from the search
* @return List A List containing a DOM4J Node for each search result
*/
public List getResultList(String xmlData) {
SRUResponse response = getResponseObject(xmlData);
if (response == null) {
log.error("Unable to get results from response XML.");
return null;
}
return response.getResults();
}
/**
* Basic wrapper for safely encoding Strings used in URLs.
*
* @param value The String to be used in the URL
* @return String A safely encoded version of 'value' for use in URLs.
*/
private String encode(String value) {
try {
return URLEncoder.encode(value, "UTF-8");
} catch (UnsupportedEncodingException ex) {
log.error("Error UTF-8 encoding value '{}'", value, ex);
return "";
}
}
/**
* Get the requested URL and return the GetMethod Object afterwards.
* To access more info use its method: eg. GetMethod.getStatusCode() and
* GetMethod.getResponseBodyAsString()
*
* Internally wraps a Fascinator BasicHttpClient Object, so any configured
* proxy details from the system will be used automatically.
*
* @param url The URL to retrieve
* @return GetMethod The instantiated and executed GetMethod Object.
* @throws IOException If any network errors occur accessing the URL. Note
* this does not cover HTTP errors returned from the web server; use the
* returned Object to check for these.
*/
private GetMethod getUrl(String url) throws IOException {
BasicHttpClient client = new BasicHttpClient(url);
GetMethod get = new GetMethod(url);
client.executeMethod(get);
return get;
}
/**
* Generate a basic search URL for this SRU interface.
*
* @param query The query String to perform against the SRU interface.
* @return String A URL that can be retrieved to execute this search.
*/
public String generateSearchUrl(String query) {
return this.generateSearchUrl(query, null, null, null, null);
}
/**
* Generate a search URL for this SRU interface. No sorting or pagination.
*
* @param query The query String to perform against the SRU interface. Required.
* @param operation The 'operation' perform. If null this will default to 'searchRetrieve'.
* @return String A URL that can be retrieved to execute this search.
*/
public String generateSearchUrl(String query, String operation) {
return this.generateSearchUrl(query, operation, null, null, null);
}
/**
* Generate a search URL for this SRU interface. No pagination.
*
* @param query The query String to perform against the SRU interface. Required.
* @param operation The 'operation' perform. If null this will default to 'searchRetrieve'.
* @param sortKeys Sorting. Optional, with no default.
* @return String A URL that can be retrieved to execute this search.
*/
public String generateSearchUrl(String query, String operation,
String sortKeys) {
return this.generateSearchUrl(query, operation, sortKeys, null, null);
}
/**
* Generate a search URL for this SRU interface. This is the actual
* implementation method wrapped by the methods above with most parameters
* as optional.
*
* @param query The query String to perform against the SRU interface. Required.
* @param operation The 'operation' perform. If null this will default to 'searchRetrieve'.
* @param sortKeys Sorting. Optional, with no default.
* @param startRecord Starting record number. Optional, with no default.
* @param maxRecords Maximum rows to return. Optional, with no default.
* @return String A URL that can be retrieved to execute this search.
*/
public String generateSearchUrl(String query, String operation,
String sortKeys, String startRecord, String maxRecords) {
String searchUrl = baseUrl;
if (query == null) {
log.error("Cannot generate a search URL without a search! 'query' parameter is required.");
return null;
}
if (operation == null) {
operation = "searchRetrieve";
}
// URL basics
searchUrl += "?version=" + encode(sruVersion);
searchUrl += "&recordSchema=" + encode(recordSchema);
searchUrl += "&recordPacking=" + encode(responsePacking);
// Search basics
searchUrl += "&operation=" + encode(operation);
searchUrl += "&query=" + encode(query);
// Optional extras on search. Sorting and pagination
if (sortKeys != null) {
searchUrl += "&sortKeys=" + encode(sortKeys);
}
if (startRecord != null) {
searchUrl += "&startRecord=" + encode(startRecord);
}
if (maxRecords != null) {
searchUrl += "&maximumRecords=" + encode(maxRecords);
}
return searchUrl;
}
/**
* Perform a basic search and return the response body.
*
* @param query The query String to perform against the SRU interface.
* @return String The response body return from the SRU interface.
*/
public String getSearchResponse(String query) {
return getSearchResponse(query, null, null, null, null);
}
/**
* Perform a search and return the response body. No sorting or pagination.
*
* @param query The query String to perform against the SRU interface. Required.
* @param operation The 'operation' perform. If null this will default to 'searchRetrieve'.
* @return String The response body return from the SRU interface.
*/
public String getSearchResponse(String query, String operation) {
return getSearchResponse(query, operation, null, null, null);
}
/**
* Perform a search and return the response body. No pagination.
*
* @param query The query String to perform against the SRU interface. Required.
* @param operation The 'operation' perform. If null this will default to 'searchRetrieve'.
* @param sortKeys Sorting. Optional, with no default.
* @return String The response body return from the SRU interface.
*/
public String getSearchResponse(String query, String operation,
String sortKeys) {
return getSearchResponse(query, operation, sortKeys, null, null);
}
/**
* Perform a search and return the response body. This is the actual
* implementation method wrapped by the methods above with most parameters
* as optional.
*
* @param query The query String to perform against the SRU interface. Required.
* @param operation The 'operation' perform. If null this will default to 'searchRetrieve'.
* @param sortKeys Sorting. Optional, with no default.
* @param startRecord Starting record number. Optional, with no default.
* @param maxRecords Maximum rows to return. Optional, with no default.
* @return String The response body return from the SRU interface.
*/
public String getSearchResponse(String query, String operation,
String sortKeys, String startRecord, String maxRecords) {
// Get a search URL to execute first
String searchUrl = generateSearchUrl(query, operation,
sortKeys, startRecord, maxRecords);
if (searchUrl == null) {
log.error("Invalid search URL. Cannot perform search.");
return null;
}
// Unit testing... don't perform a real search
if (testingResponseString != null) {
return testingResponseString;
}
// Perform the search
GetMethod get = null;
try {
get = getUrl(searchUrl);
int status = get.getStatusCode();
if (status != 200) {
String text = get.getStatusText();
log.error("Error access SRU interface, status code '{}' returned with message: {}", status, text);
return null;
}
} catch (IOException ex) {
log.error("Error during search: ", ex);
return null;
}
// Return our results body
String response = null;
try {
response = get.getResponseBodyAsString();
} catch (IOException ex) {
log.error("Error accessing response body: ", ex);
return null;
}
return response;
}
/**
* Make sure that the SAX Reader is aware of the XML namespaces used by
* the NLA when parsing their.
*
*/
private void nlaNamespaces() {
if (!namespaces.containsKey("srw")) {
namespaces.put("srw", "http://www.loc.gov/zing/srw/");
}
if (!namespaces.containsKey("eac")) {
namespaces.put("eac", "urn:isbn:1-931666-33-4");
}
}
/**
* Search for a record from the National Library of Australia with the
* provided identifier. If multiple records match this identifier only the
* first will be returned.
*
* @param id The identifier to search for
* @return String The record matching this identifier. Null if not found
*/
private Node nlaGetRecordNodeById(String id) {
nlaNamespaces();
// Run a search
String query = "rec.identifier=\""+id+"\"";
String rawXml = getSearchResponse(query);
// Get the results nodes
List results = getResultList(rawXml);
if (results.isEmpty()) {
log.warn("This identifier matches no records.");
return null;
}
if (results.size() > 1) {
log.warn("This identifier matches multiple records! Returning only the first.");
}
// Return first(only?) record
if ("xml".equals(responsePacking)) {
return results.get(0).selectSingleNode("*[1]");
} else {
return results.get(0);
}
}
/**
* Search for a record from the National Library of Australia with the
* provided identifier. If multiple records match this identifier only the
* first will be returned.
*
* @param id The identifier to search for
* @return String The record matching this identifier. Null if not found
*/
public String nlaGetRecordById(String id) {
Node node = nlaGetRecordNodeById(id);
if (node == null) {
return null;
}
if ("xml".equals(responsePacking)) {
return node.asXML();
} else {
return node.getText();
}
}
/**
* Search for a record from the National Library of Australia with the
* provided identifier. If multiple records match this identifier only the
* first will be returned.
*
* @param id The identifier to search for
* @return String The record matching this identifier. Null if not found
*/
public String nlaGetNationalId(String id) {
Node node = nlaGetRecordNodeById(id);
if (node == null) {
return null;
}
List otherIds = node.selectNodes("eac:control/eac:otherRecordId");
for (Node idNode : otherIds) {
String otherId = idNode.getText();
if (otherId.startsWith("http://nla.gov.au")) {
return otherId;
}
}
return null;
}
/**
* Search for a record from the National Library of Australia with the
* provided identifier. Process and return their Identity record.
*
* @param id The identifier to search for
* @return NLAIdentity A processed Identity
* @throws SRUException If processing the Identity fails.
*/
public NLAIdentity nlaGetIdentityById(String id) throws SRUException {
Node node = nlaGetRecordNodeById(id);
return new NLAIdentity(node);
}
/**
* Search for a records from the National Library of Australia and parse
* the resultant XML is a wrapper object.
*
* @param search The search to submit to the NLA
* @return SRUResponse A parsed response
*/
public SRUResponse nlaGetResponseBySearch(String search) {
return nlaGetResponseBySearch(search, null, null);
}
/**
* Search for a records from the National Library of Australia and parse
* the resultant XML is a wrapper object.
*
* @param search The search to submit to the NLA
* @param startRecord Starting record number. Optional, with no default.
* @param maxRecords Maximum rows to return. Optional, with no default.
* @return SRUResponse A parsed response
*/
public SRUResponse nlaGetResponseBySearch(String search, String startRecord,
String maxRecords) {
nlaNamespaces();
// Search NLA
String xmlResponse = getSearchResponse(search, null, null,
startRecord, maxRecords);
if (xmlResponse == null) {
log.error("Searching NLA failed!");
return null;
}
// Parse results
return getResponseObject(xmlResponse);
}
/**
* Search for records from the National Library of Australia. Process
* and return their Identity records. It is important to note that if any
* Identity fails to process it will not appear in the List.
*
* @param search The search to submit to the NLA
* @return List A list of processed Identities
*/
public List nlaGetIdentitiesBySearch(String search) {
return nlaGetIdentitiesBySearch(search, null, null);
}
/**
* Search for records from the National Library of Australia. Process
* and return their Identity records. It is important to note that if any
* Identity fails to process it will not appear in the List.
*
* @param search The search to submit to the NLA
* @param startRecord Starting record number. Optional, with no default.
* @param maxRecords Maximum rows to return. Optional, with no default.
* @return List A list of processed Identities
*/
public List nlaGetIdentitiesBySearch(String search,
String startRecord, String maxRecords) {
SRUResponse response = nlaGetResponseBySearch(search);
if (response == null) {
log.error("Searching NLA failed!");
return null;
}
// Process Identities
return NLAIdentity.convertNodesToIdentities(response.getResults());
}
}