
org.dspace.app.itemimport.ItemImport Maven / Gradle / Ivy
Show all versions of dspace-api Show documentation
/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.app.itemimport;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.nio.file.Files;
import java.sql.SQLException;
import java.time.Instant;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
import org.apache.commons.cli.ParseException;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.tika.Tika;
import org.dspace.app.itemimport.factory.ItemImportServiceFactory;
import org.dspace.app.itemimport.service.ItemImportService;
import org.dspace.authorize.AuthorizeException;
import org.dspace.content.Collection;
import org.dspace.content.factory.ContentServiceFactory;
import org.dspace.content.service.CollectionService;
import org.dspace.core.Constants;
import org.dspace.core.Context;
import org.dspace.eperson.EPerson;
import org.dspace.eperson.factory.EPersonServiceFactory;
import org.dspace.eperson.service.EPersonService;
import org.dspace.handle.factory.HandleServiceFactory;
import org.dspace.handle.service.HandleService;
import org.dspace.scripts.DSpaceRunnable;
import org.dspace.utils.DSpace;
/**
* Import items into DSpace. The conventional use is upload files by copying
* them. DSpace writes the item's bitstreams into its assetstore. Metadata is
* also loaded to the DSpace database.
*
* A second use assumes the bitstream files already exist in a storage
* resource accessible to DSpace. In this case the bitstreams are 'registered'.
* That is, the metadata is loaded to the DSpace database and DSpace is given
* the location of the file which is subsumed into DSpace.
*
* The distinction is controlled by the format of lines in the 'contents' file.
* See comments in processContentsFile() below.
*
* Modified by David Little, UCSD Libraries 12/21/04 to
* allow the registration of files (bitstreams) into DSpace.
*/
public class ItemImport extends DSpaceRunnable {
public static String TEMP_DIR = "importSAF";
public static String MAPFILE_FILENAME = "mapfile";
public static String MAPFILE_BITSTREAM_TYPE = "importSAFMapfile";
protected boolean template = false;
protected String command = null;
protected String sourcedir = null;
protected String mapfile = null;
protected String eperson = null;
protected String[] collections = null;
protected boolean isTest = false;
protected boolean isExcludeContent = false;
protected boolean isResume = false;
protected boolean useWorkflow = false;
protected boolean useWorkflowSendEmail = false;
protected boolean isQuiet = false;
protected boolean commandLineCollections = false;
protected boolean zip = false;
protected boolean remoteUrl = false;
protected String zipfilename = null;
protected boolean zipvalid = false;
protected boolean help = false;
protected File workDir = null;
protected File workFile = null;
protected static final CollectionService collectionService =
ContentServiceFactory.getInstance().getCollectionService();
protected static final EPersonService epersonService =
EPersonServiceFactory.getInstance().getEPersonService();
protected static final HandleService handleService =
HandleServiceFactory.getInstance().getHandleService();
@Override
public ItemImportScriptConfiguration getScriptConfiguration() {
return new DSpace().getServiceManager()
.getServiceByName("import", ItemImportScriptConfiguration.class);
}
@Override
public void setup() throws ParseException {
help = commandLine.hasOption('h');
if (commandLine.hasOption('a')) {
command = "add";
}
if (commandLine.hasOption('r')) {
command = "replace";
}
if (commandLine.hasOption('d')) {
command = "delete";
}
if (commandLine.hasOption('w')) {
useWorkflow = true;
if (commandLine.hasOption('n')) {
useWorkflowSendEmail = true;
}
}
if (commandLine.hasOption('v')) {
isTest = true;
handler.logInfo("**Test Run** - not actually importing items.");
}
isExcludeContent = commandLine.hasOption('x');
if (commandLine.hasOption('p')) {
template = true;
}
if (commandLine.hasOption('c')) { // collections
collections = commandLine.getOptionValues('c');
commandLineCollections = true;
} else {
handler.logInfo("No collections given. Assuming 'collections' file inside item directory");
}
if (commandLine.hasOption('R')) {
isResume = true;
handler.logInfo("**Resume import** - attempting to import items not already imported");
}
if (commandLine.hasOption('q')) {
isQuiet = true;
}
setZip();
}
@Override
public void internalRun() throws Exception {
if (help) {
printHelp();
return;
}
Instant startTime = Instant.now();
Context context = new Context(Context.Mode.BATCH_EDIT);
setMapFile();
validate(context);
setEPerson(context);
// check collection
List mycollections = null;
// don't need to validate collections set if command is "delete"
// also if no collections are given in the command line
if (!"delete".equals(command) && commandLineCollections) {
handler.logInfo("Destination collections:");
mycollections = new ArrayList<>();
// validate each collection arg to see if it's a real collection
for (int i = 0; i < collections.length; i++) {
Collection collection = null;
if (collections[i] != null) {
// is the ID a handle?
if (collections[i].indexOf('/') != -1) {
// string has a / so it must be a handle - try and resolve
// it
collection = ((Collection) handleService
.resolveToObject(context, collections[i]));
} else {
// not a handle, try and treat it as an integer collection database ID
collection = collectionService.find(context, UUID.fromString(collections[i]));
}
}
// was the collection valid?
if (collection == null
|| collection.getType() != Constants.COLLECTION) {
throw new IllegalArgumentException("Cannot resolve "
+ collections[i] + " to collection");
}
// add resolved collection to list
mycollections.add(collection);
// print progress info
handler.logInfo((i == 0 ? "Owning " : "") + "Collection: " + collection.getName());
}
}
// end validation
// start
ItemImportService itemImportService = ItemImportServiceFactory.getInstance()
.getItemImportService();
try {
itemImportService.setTest(isTest);
itemImportService.setExcludeContent(isExcludeContent);
itemImportService.setResume(isResume);
itemImportService.setUseWorkflow(useWorkflow);
itemImportService.setUseWorkflowSendEmail(useWorkflowSendEmail);
itemImportService.setQuiet(isQuiet);
itemImportService.setHandler(handler);
try {
context.turnOffAuthorisationSystem();
readZip(context, itemImportService);
process(context, itemImportService, mycollections);
// complete all transactions
context.complete();
} catch (Exception e) {
context.abort();
throw new Exception(
"Error committing changes to database: " + e.getMessage() + ", aborting most recent changes", e);
}
if (isTest) {
handler.logInfo("***End of Test Run***");
}
} finally {
if (zip) {
// if zip file was valid then clean sourcedir
if (zipvalid && sourcedir != null && new File(sourcedir).exists()) {
FileUtils.deleteDirectory(new File(sourcedir));
}
// clean workdir
if (workDir != null && workDir.exists()) {
FileUtils.deleteDirectory(workDir);
}
// conditionally clean workFile if import was done in the UI or via a URL and it still exists
if (workFile != null && workFile.exists()) {
workFile.delete();
}
}
Instant endTime = Instant.now();
handler.logInfo("Started: " + DateTimeFormatter.ISO_INSTANT.format(startTime));
handler.logInfo("Ended: " + DateTimeFormatter.ISO_INSTANT.format(endTime));
handler.logInfo(
"Elapsed time: " + ((endTime.toEpochMilli() - startTime.toEpochMilli()) / 1000) + " secs (" +
(endTime.toEpochMilli() - startTime.toEpochMilli()) + " msecs)");
}
}
/**
* Validate the options
* @param context
*/
protected void validate(Context context) {
// check zip type: uploaded file or remote url
if (commandLine.hasOption('z')) {
zipfilename = commandLine.getOptionValue('z');
} else if (commandLine.hasOption('u')) {
remoteUrl = true;
zipfilename = commandLine.getOptionValue('u');
}
if (StringUtils.isBlank(zipfilename)) {
throw new UnsupportedOperationException("Must run with either name of zip file or url of zip file");
}
if (command == null) {
handler.logError("Must run with either add, replace, or remove (run with -h flag for details)");
throw new UnsupportedOperationException("Must run with either add, replace, or remove");
}
// can only resume for adds
if (isResume && !"add".equals(command)) {
handler.logError("Resume option only works with the --add command (run with -h flag for details)");
throw new UnsupportedOperationException("Resume option only works with the --add command");
}
if (isResume && StringUtils.isBlank(mapfile)) {
handler.logError("The mapfile does not exist. ");
throw new UnsupportedOperationException("The mapfile does not exist");
}
}
/**
* Process the import
* @param context
* @param itemImportService
* @param collections
* @throws Exception
*/
protected void process(Context context, ItemImportService itemImportService,
List collections) throws Exception {
readMapfile(context);
if ("add".equals(command)) {
itemImportService.addItems(context, collections, sourcedir, mapfile, template);
} else if ("replace".equals(command)) {
itemImportService.replaceItems(context, collections, sourcedir, mapfile, template);
} else if ("delete".equals(command)) {
itemImportService.deleteItems(context, mapfile);
}
// write input stream on handler
File mapFile = new File(mapfile);
try (InputStream mapfileInputStream = new FileInputStream(mapFile)) {
handler.writeFilestream(context, MAPFILE_FILENAME, mapfileInputStream, MAPFILE_BITSTREAM_TYPE);
} finally {
mapFile.delete();
}
}
/**
* Read the ZIP archive in SAF format
* @param context
* @param itemImportService
* @throws Exception
*/
protected void readZip(Context context, ItemImportService itemImportService) throws Exception {
Optional optionalFileStream = Optional.empty();
Optional validationFileStream = Optional.empty();
if (!remoteUrl) {
// manage zip via upload
optionalFileStream = handler.getFileStream(context, zipfilename);
validationFileStream = handler.getFileStream(context, zipfilename);
} else {
// manage zip via remote url
optionalFileStream = Optional.ofNullable(new URL(zipfilename).openStream());
validationFileStream = Optional.ofNullable(new URL(zipfilename).openStream());
}
if (validationFileStream.isPresent()) {
// validate zip file
if (validationFileStream.isPresent()) {
validateZip(validationFileStream.get());
}
workFile = new File(itemImportService.getTempWorkDir() + File.separator
+ zipfilename + "-" + context.getCurrentUser().getID());
FileUtils.copyInputStreamToFile(optionalFileStream.get(), workFile);
} else {
throw new IllegalArgumentException(
"Error reading file, the file couldn't be found for filename: " + zipfilename);
}
workDir = new File(itemImportService.getTempWorkDir() + File.separator + TEMP_DIR
+ File.separator + context.getCurrentUser().getID());
sourcedir = itemImportService.unzip(workFile, workDir.getAbsolutePath());
}
/**
* Confirm that the zip file has the correct MIME type
* @param inputStream
*/
protected void validateZip(InputStream inputStream) {
Tika tika = new Tika();
try {
String mimeType = tika.detect(inputStream);
if (mimeType.equals("application/zip")) {
zipvalid = true;
} else {
handler.logError("A valid zip file must be supplied. The provided file has mimetype: " + mimeType);
throw new UnsupportedOperationException("A valid zip file must be supplied");
}
} catch (IOException e) {
throw new IllegalArgumentException(
"There was an error while reading the zip file: " + zipfilename);
}
}
/**
* Read the mapfile
* @param context
*/
protected void readMapfile(Context context) {
if (isResume) {
try {
Optional optionalFileStream = handler.getFileStream(context, mapfile);
if (optionalFileStream.isPresent()) {
File tempFile = File.createTempFile(mapfile, "temp");
tempFile.deleteOnExit();
FileUtils.copyInputStreamToFile(optionalFileStream.get(), tempFile);
mapfile = tempFile.getAbsolutePath();
}
} catch (IOException | AuthorizeException e) {
throw new UnsupportedOperationException("The mapfile does not exist");
}
}
}
/**
* Set the mapfile option
* @throws IOException
*/
protected void setMapFile() throws IOException {
if (isResume && commandLine.hasOption('m')) {
mapfile = commandLine.getOptionValue('m');
} else {
mapfile = Files.createTempFile(MAPFILE_FILENAME, "temp").toString();
}
}
/**
* Set the zip option
*/
protected void setZip() {
zip = true;
}
/**
* Set the eperson in the context
* @param context
* @throws SQLException
*/
protected void setEPerson(Context context) throws SQLException {
EPerson myEPerson = epersonService.find(context, this.getEpersonIdentifier());
// check eperson
if (myEPerson == null) {
handler.logError("EPerson cannot be found: " + this.getEpersonIdentifier());
throw new UnsupportedOperationException("EPerson cannot be found: " + this.getEpersonIdentifier());
}
context.setCurrentUser(myEPerson);
}
}