All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dspace.search.Harvest Maven / Gradle / Ivy

The newest version!
/**
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 * http://www.dspace.org/license/
 */
package org.dspace.search;

import java.sql.SQLException;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.logging.log4j.Logger;
import org.dspace.authorize.factory.AuthorizeServiceFactory;
import org.dspace.authorize.service.AuthorizeService;
import org.dspace.content.Collection;
import org.dspace.content.Community;
import org.dspace.content.DCDate;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.ItemService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.discovery.DiscoverQuery;
import org.dspace.discovery.DiscoverResult;
import org.dspace.discovery.IndexableObject;
import org.dspace.discovery.SearchServiceException;
import org.dspace.discovery.SearchUtils;
import org.dspace.discovery.indexobject.IndexableDSpaceObject;
import org.dspace.discovery.indexobject.IndexableItem;
import org.dspace.eperson.Group;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;

/**
 * Utility class for extracting information about items, possibly just within a
 * certain community or collection, that have been created, modified or
 * withdrawn within a particular range of dates.
 *
 * @author Robert Tansley
 */
public class Harvest {
    /**
     * log4j logger
     */
    private static final Logger log = org.apache.logging.log4j.LogManager.getLogger(Harvest.class);

    protected static final AuthorizeService authorizeService = AuthorizeServiceFactory.getInstance()
                                                                                      .getAuthorizeService();
    protected static final HandleService handleService = HandleServiceFactory.getInstance().getHandleService();
    protected static final ItemService itemService = ContentServiceFactory.getInstance().getItemService();

    /**
     * Default constructor
     */
    private Harvest() { }

    /**
     * Obtain information about items that have been created, modified or
     * withdrawn within a given date range. You can also specify 'offset' and
     * 'limit' so that a big harvest can be split up into smaller sections.
     * 

* Note that dates are passed in the standard ISO8601 format used by DSpace * (and OAI-PMH). *

* FIXME: Assumes all in_archive items have public metadata * * @param context DSpace context * @param scope a Collection, Community, or null indicating the scope is * all of DSpace * @param startDate start of date range, or null * @param endDate end of date range, or null * @param offset for a partial harvest, the point in the overall list of * matching items to start at. 0 means just start at the * beginning. * @param limit the number of matching items to return in a partial harvest. * Specify 0 to return the whole list (or the rest of the list if * an offset was specified.) * @param items if true the item field of each * HarvestedItemInfo object is filled out * @param collections if true the collectionHandles * field of each HarvestedItemInfo object is * filled out * @param withdrawn If true, information about withdrawn items is * included * @param nonAnon If items without anonymous access should be included or not * @return List of HarvestedItemInfo objects * @throws SQLException if database error * @throws java.text.ParseException If the date is not in a supported format */ public static List harvest(Context context, DSpaceObject scope, String startDate, String endDate, int offset, int limit, boolean items, boolean collections, boolean withdrawn, boolean nonAnon) throws SQLException, ParseException { DiscoverQuery discoverQuery = new DiscoverQuery(); discoverQuery.addFilterQueries("search.resourcetype:" + IndexableItem.TYPE); if (scope != null) { if (scope instanceof Community) { discoverQuery.addFilterQueries("location:m" + scope.getID()); } else if (scope instanceof Collection) { discoverQuery.addFilterQueries("location:l" + scope.getID()); } } if (startDate != null && endDate != null) { discoverQuery.addFilterQueries("lastModified:[" + new DCDate(startDate).toString() + " TO " + new DCDate(endDate).toString() + "]"); } else if (startDate != null) { discoverQuery.addFilterQueries("lastModified:[" + new DCDate(startDate).toString() + " TO *]"); } else if (endDate != null) { discoverQuery.addFilterQueries("lastModified:[* TO " + new DCDate(endDate).toString() + " ]"); } if (!withdrawn) { discoverQuery.addFilterQueries("archived: true OR withdrawn: false"); } else { discoverQuery.addFilterQueries("archived: true OR withdrawn: true"); } // Order by item ID, so that for a given harvest the order will be // consistent. This is so that big harvests can be broken up into // several smaller operations (e.g. for OAI resumption tokens.) discoverQuery.setSortField("search.resourceid", DiscoverQuery.SORT_ORDER.asc); List infoObjects = new ArrayList<>(); // Count of items read from the record set that match the selection criteria. // Note : Until 'index > offset' the records are not added to the output set. int index = 0; // Count of items added to the output set. int itemCounter = 0; try { DiscoverResult discoverResult = SearchUtils.getSearchService().search(context, discoverQuery); // Process results of query into HarvestedItemInfo objects Iterator dsoIterator = discoverResult.getIndexableObjects().iterator(); while (dsoIterator.hasNext() && ((limit == 0) || (itemCounter < limit))) { // the query is limited to ITEM IndexableDSpaceObject indexableDSpaceObject = (IndexableDSpaceObject) dsoIterator.next(); HarvestedItemInfo itemInfo = new HarvestedItemInfo(); itemInfo.context = context; itemInfo.handle = indexableDSpaceObject.getIndexedObject().getHandle(); itemInfo.itemID = indexableDSpaceObject.getID(); itemInfo.datestamp = ((IndexableItem) indexableDSpaceObject).getIndexedObject().getLastModified(); itemInfo.withdrawn = ((IndexableItem) indexableDSpaceObject).getIndexedObject().isWithdrawn(); if (collections) { // Add collections data fillCollections(itemInfo); } if (items) { // Add the item reference itemInfo.item = itemService.find(context, itemInfo.itemID); } if (nonAnon || (itemInfo.item == null) || (withdrawn && itemInfo.withdrawn)) { index++; if (index > offset) { infoObjects.add(itemInfo); itemCounter++; } } else { // We only want items that allow for anonymous access. if (anonAccessAllowed(context, itemInfo)) { index++; if (index > offset) { infoObjects.add(itemInfo); itemCounter++; } } } } } catch (SearchServiceException e) { log.error(e.getMessage(), e); } return infoObjects; } /** * Get harvested item info for a single item. item field in * returned HarvestedItemInfo object is always filled out. * * @param context DSpace context * @param handle Prefix-less Handle of item * @param collections if true the collectionHandles * field of the HarvestedItemInfo object is filled * out * @return HarvestedItemInfo object for the single item, or * null * @throws SQLException if database error */ public static HarvestedItemInfo getSingle(Context context, String handle, boolean collections) throws SQLException { // FIXME: Assume Handle is item Item i = (Item) handleService.resolveToObject(context, handle); if (i == null) { return null; } // Fill out OAI info item object HarvestedItemInfo itemInfo = new HarvestedItemInfo(); itemInfo.context = context; itemInfo.item = i; itemInfo.handle = handle; itemInfo.withdrawn = i.isWithdrawn(); itemInfo.datestamp = i.getLastModified(); itemInfo.itemID = i.getID(); // Get the sets if (collections) { fillCollections(itemInfo); } return itemInfo; } /** * Fill out the containers field of the HarvestedItemInfo object * * @param itemInfo HarvestedItemInfo object to fill out * @throws SQLException if database error */ private static void fillCollections(HarvestedItemInfo itemInfo) throws SQLException { // Get the collection Handles from DB List collections = itemInfo.item.getCollections(); itemInfo.collectionHandles = new ArrayList<>(); for (Collection collection : collections) { itemInfo.collectionHandles.add(collection.getHandle()); } } /** * Does the item allow anonymous access ? ie. authorizedGroups must include id=0. */ private static boolean anonAccessAllowed(Context context, HarvestedItemInfo itemInfo) throws SQLException { List authorizedGroups = authorizeService.getAuthorizedGroups(context, itemInfo.item, Constants.READ); for (Group authorizedGroup : authorizedGroups) { if (authorizedGroup.getName().equals(Group.ANONYMOUS)) { return true; } } return false; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy