
org.dspace.search.Harvest Maven / Gradle / Ivy
Show all versions of dspace-api Show documentation
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.search;
import java.sql.SQLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.logging.log4j.Logger;
import org.dspace.authorize.factory.AuthorizeServiceFactory;
import org.dspace.authorize.service.AuthorizeService;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DCDate;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.discovery.DiscoverQuery;
import org.dspace.discovery.DiscoverResult;
import org.dspace.discovery.IndexableObject;
import org.dspace.discovery.SearchServiceException;
import org.dspace.discovery.SearchUtils;
import org.dspace.discovery.indexobject.IndexableDSpaceObject;
import org.dspace.discovery.indexobject.IndexableItem;
import org.dspace.eperson.Group;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
/**
* Utility class for extracting information about items, possibly just within a
* certain community or collection, that have been created, modified or
* withdrawn within a particular range of dates.
*
* @author Robert Tansley
*/
public class Harvest {
/**
* log4j logger
*/
private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(Harvest.class);
protected static final AuthorizeService authorizeService = AuthorizeServiceFactory.getInstance()
.getAuthorizeService();
protected static final HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
protected static final ItemService itemService = ContentServiceFactory.getInstance().getItemService();
/**
* Default constructor
*/
private Harvest() { }
/**
* Obtain information about items that have been created, modified or
* withdrawn within a given date range. You can also specify 'offset' and
* 'limit' so that a big harvest can be split up into smaller sections.
*
* Note that dates are passed in the standard ISO8601 format used by DSpace
* (and OAI-PMH).
*
* FIXME: Assumes all in_archive items have public metadata
*
* @param context DSpace context
* @param scope a Collection, Community, or null
indicating the scope is
* all of DSpace
* @param startDate start of date range, or null
* @param endDate end of date range, or null
* @param offset for a partial harvest, the point in the overall list of
* matching items to start at. 0 means just start at the
* beginning.
* @param limit the number of matching items to return in a partial harvest.
* Specify 0 to return the whole list (or the rest of the list if
* an offset was specified.)
* @param items if true
the item
field of each
* HarvestedItemInfo
object is filled out
* @param collections if true
the collectionHandles
* field of each HarvestedItemInfo
object is
* filled out
* @param withdrawn If true
, information about withdrawn items is
* included
* @param nonAnon If items without anonymous access should be included or not
* @return List of HarvestedItemInfo
objects
* @throws SQLException if database error
* @throws java.text.ParseException If the date is not in a supported format
*/
public static List harvest(Context context, DSpaceObject scope,
String startDate, String endDate, int offset, int limit,
boolean items, boolean collections, boolean withdrawn,
boolean nonAnon) throws SQLException, ParseException {
DiscoverQuery discoverQuery = new DiscoverQuery();
discoverQuery.addFilterQueries("search.resourcetype:" + IndexableItem.TYPE);
if (scope != null) {
if (scope instanceof Community) {
discoverQuery.addFilterQueries("location:m" + scope.getID());
} else if (scope instanceof Collection) {
discoverQuery.addFilterQueries("location:l" + scope.getID());
}
}
if (startDate != null && endDate != null) {
discoverQuery.addFilterQueries("lastModified:[" + new DCDate(startDate).toString()
+ " TO " + new DCDate(endDate).toString() + "]");
} else if (startDate != null) {
discoverQuery.addFilterQueries("lastModified:[" + new DCDate(startDate).toString() + " TO *]");
} else if (endDate != null) {
discoverQuery.addFilterQueries("lastModified:[* TO " + new DCDate(endDate).toString() + " ]");
}
if (!withdrawn) {
discoverQuery.addFilterQueries("archived: true OR withdrawn: false");
} else {
discoverQuery.addFilterQueries("archived: true OR withdrawn: true");
}
// Order by item ID, so that for a given harvest the order will be
// consistent. This is so that big harvests can be broken up into
// several smaller operations (e.g. for OAI resumption tokens.)
discoverQuery.setSortField("search.resourceid", DiscoverQuery.SORT_ORDER.asc);
List infoObjects = new ArrayList<>();
// Count of items read from the record set that match the selection criteria.
// Note : Until 'index > offset' the records are not added to the output set.
int index = 0;
// Count of items added to the output set.
int itemCounter = 0;
try {
DiscoverResult discoverResult = SearchUtils.getSearchService().search(context, discoverQuery);
// Process results of query into HarvestedItemInfo objects
Iterator dsoIterator = discoverResult.getIndexableObjects().iterator();
while (dsoIterator.hasNext() && ((limit == 0) || (itemCounter < limit))) {
// the query is limited to ITEM
IndexableDSpaceObject indexableDSpaceObject = (IndexableDSpaceObject) dsoIterator.next();
HarvestedItemInfo itemInfo = new HarvestedItemInfo();
itemInfo.context = context;
itemInfo.handle = indexableDSpaceObject.getIndexedObject().getHandle();
itemInfo.itemID = indexableDSpaceObject.getID();
itemInfo.datestamp = ((IndexableItem) indexableDSpaceObject).getIndexedObject().getLastModified();
itemInfo.withdrawn = ((IndexableItem) indexableDSpaceObject).getIndexedObject().isWithdrawn();
if (collections) {
// Add collections data
fillCollections(itemInfo);
}
if (items) {
// Add the item reference
itemInfo.item = itemService.find(context, itemInfo.itemID);
}
if (nonAnon || (itemInfo.item == null) || (withdrawn && itemInfo.withdrawn)) {
index++;
if (index > offset) {
infoObjects.add(itemInfo);
itemCounter++;
}
} else {
// We only want items that allow for anonymous access.
if (anonAccessAllowed(context, itemInfo)) {
index++;
if (index > offset) {
infoObjects.add(itemInfo);
itemCounter++;
}
}
}
}
} catch (SearchServiceException e) {
log.error(e.getMessage(), e);
}
return infoObjects;
}
/**
* Get harvested item info for a single item. item
field in
* returned HarvestedItemInfo
object is always filled out.
*
* @param context DSpace context
* @param handle Prefix-less Handle of item
* @param collections if true
the collectionHandles
* field of the HarvestedItemInfo
object is filled
* out
* @return HarvestedItemInfo
object for the single item, or
* null
* @throws SQLException if database error
*/
public static HarvestedItemInfo getSingle(Context context, String handle,
boolean collections) throws SQLException {
// FIXME: Assume Handle is item
Item i = (Item) handleService.resolveToObject(context, handle);
if (i == null) {
return null;
}
// Fill out OAI info item object
HarvestedItemInfo itemInfo = new HarvestedItemInfo();
itemInfo.context = context;
itemInfo.item = i;
itemInfo.handle = handle;
itemInfo.withdrawn = i.isWithdrawn();
itemInfo.datestamp = i.getLastModified();
itemInfo.itemID = i.getID();
// Get the sets
if (collections) {
fillCollections(itemInfo);
}
return itemInfo;
}
/**
* Fill out the containers field of the HarvestedItemInfo object
*
* @param itemInfo HarvestedItemInfo object to fill out
* @throws SQLException if database error
*/
private static void fillCollections(HarvestedItemInfo itemInfo) throws SQLException {
// Get the collection Handles from DB
List collections = itemInfo.item.getCollections();
itemInfo.collectionHandles = new ArrayList<>();
for (Collection collection : collections) {
itemInfo.collectionHandles.add(collection.getHandle());
}
}
/**
* Does the item allow anonymous access ? ie. authorizedGroups must include id=0.
*/
private static boolean anonAccessAllowed(Context context, HarvestedItemInfo itemInfo) throws SQLException {
List authorizedGroups = authorizeService.getAuthorizedGroups(context, itemInfo.item, Constants.READ);
for (Group authorizedGroup : authorizedGroups) {
if (authorizedGroup.getName().equals(Group.ANONYMOUS)) {
return true;
}
}
return false;
}
}