java.fedora.client.utility.ingest.Ingest Maven / Gradle / Ivy
Show all versions of fcrepo-client Show documentation
/*
* -----------------------------------------------------------------------------
*
* License and Copyright: The contents of this file are subject to the
* Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of
* the License at
* http://www.fedora-commons.org/licenses.
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
* the specific language governing rights and limitations under the License.
*
* The entire file consists of original code.
* Copyright © 2008 Fedora Commons, Inc.
*
Copyright © 2002-2007 The Rector and Visitors of the University of
* Virginia and Cornell University
* All rights reserved.
*
* -----------------------------------------------------------------------------
*/
package fedora.client.utility.ingest;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.StringTokenizer;
import fedora.client.FedoraClient;
import fedora.client.utility.AutoFinder;
import fedora.client.utility.export.AutoExporter;
import fedora.server.access.FedoraAPIA;
import fedora.server.management.FedoraAPIM;
import fedora.server.types.gen.ComparisonOperator;
import fedora.server.types.gen.Condition;
import fedora.server.types.gen.FieldSearchQuery;
import fedora.server.types.gen.FieldSearchResult;
import fedora.server.types.gen.ObjectFields;
import fedora.server.types.gen.RepositoryInfo;
import fedora.utilities.FileComparator;
/**
* Title: Ingest.java
* Description: A utility class to initiate ingest of one or more objects.
* This class provides static utility methods, and it is also called by
* command line utilities.
*/
public class Ingest {
public static String LAST_PATH;
private static FileComparator _FILE_COMPARATOR = new FileComparator();
// if logMessage is null, will use original path in logMessage
public static String oneFromFile(File file,
String ingestFormat,
FedoraAPIA targetRepoAPIA,
FedoraAPIM targetRepoAPIM,
String logMessage)
throws Exception {
LAST_PATH=file.getPath();
String pid=AutoIngestor.ingestAndCommit(targetRepoAPIA,
targetRepoAPIM,
new FileInputStream(file),
ingestFormat,
getMessage(logMessage, file));
return pid;
}
/**************************************************************************
Ingest from directory
**************************************************************************/
// if logMessage is null, will use original path in logMessage
public static void multiFromDirectory(File dir,
String ingestFormat,
String fTypes,
FedoraAPIA targetRepoAPIA,
FedoraAPIM targetRepoAPIM,
String logMessage,
PrintStream log,
IngestCounter c) throws Exception {
String tps = fTypes.toUpperCase();
if (tps.indexOf("D") != -1) {
multiFromDirectory(dir, ingestFormat, 'D', targetRepoAPIA,
targetRepoAPIM, logMessage, log, c);
}
if (tps.indexOf("M")!=-1) {
multiFromDirectory(dir, ingestFormat, 'M', targetRepoAPIA,
targetRepoAPIM, logMessage, log, c);
}
if (tps.indexOf("O")!=-1) {
multiFromDirectory(dir, ingestFormat, 'O', targetRepoAPIA,
targetRepoAPIM, logMessage, log, c);
}
}
private static String getSearchString(char fType) {
if (fType == 'D') {
return "FedoraBDefObject";
} else if (fType == 'M') {
return "FedoraBMechObject";
} else if (fType == 'O') {
return "FedoraObject";
} else {
throw new RuntimeException("Unrecognized fType: " + fType);
}
}
public static void multiFromDirectory(File dir,
String ingestFormat,
char fType,
FedoraAPIA targetRepoAPIA,
FedoraAPIM targetRepoAPIM,
String logMessage,
PrintStream log,
IngestCounter c) throws Exception {
String searchString = getSearchString(fType);
multiFromDirectory(dir, ingestFormat, fType, searchString,
targetRepoAPIA, targetRepoAPIM, logMessage, log, c);
}
private static void multiFromDirectory(File dir,
String ingestFormat,
char fType,
String searchString,
FedoraAPIA targetRepoAPIA,
FedoraAPIM targetRepoAPIM,
String logMessage,
PrintStream log,
IngestCounter c) throws Exception {
File[] files = dir.listFiles();
Arrays.sort(files, _FILE_COMPARATOR);
for (int i = 0; i < files.length; i++) {
if (files[i].isDirectory()) {
multiFromDirectory(files[i], ingestFormat, fType, searchString,
targetRepoAPIA, targetRepoAPIM,
logMessage, log, c);
} else {
if (matches(files[i], searchString)) {
try {
String pid = oneFromFile(files[i], ingestFormat, targetRepoAPIA, targetRepoAPIM, logMessage);
c.successes++;
IngestLogger.logFromFile(log, files[i], fType, pid);
} catch (Exception e) {
// failed... just log it and continue
c.failures++;
IngestLogger.logFailedFromFile(log, files[i], fType, e);
}
}
}
}
}
private static boolean matches(File file, String searchString) throws Exception {
BufferedReader in = new BufferedReader(new FileReader(file));
try {
String line;
while ( (line=in.readLine()) != null ) {
if (line.indexOf(searchString)!=-1) return true;
}
} finally {
try { in.close(); } catch (Exception e) { }
}
return false;
}
/**************************************************************************
Ingest from repository
**************************************************************************/
// if logMessage is null, will make informative one up
public static String oneFromRepository(FedoraAPIA sourceRepoAPIA,
FedoraAPIM sourceRepoAPIM,
String sourceExportFormat,
String pid,
FedoraAPIA targetRepoAPIA,
FedoraAPIM targetRepoAPIM,
String logMessage)
throws Exception {
// EXPORT from source repository
// The export context is set to "migrate" since the intent
// of ingest from repository is to migrate an object from
// one repository to another. The "migrate" option will
// ensure that URLs that were relative to the "exporting"
// repository are made relative to the "importing" repository.
ByteArrayOutputStream out=new ByteArrayOutputStream();
AutoExporter.export(sourceRepoAPIA, sourceRepoAPIM,
pid,
sourceExportFormat,
"migrate",
out);
// INGEST into target repository
String realLogMessage=logMessage;
if (realLogMessage==null) {
realLogMessage="Ingested from source repository with pid " + pid;
}
return AutoIngestor.ingestAndCommit(targetRepoAPIA,
targetRepoAPIM,
new ByteArrayInputStream(out.toByteArray()),
//fixed.toString().getBytes("UTF-8")),
sourceExportFormat,
realLogMessage);
}
public static void multiFromRepository(String sourceProtocol,
String sourceHost,
int sourcePort,
FedoraAPIA sourceRepoAPIA,
FedoraAPIM sourceRepoAPIM,
String sourceExportFormat,
String fTypes,
FedoraAPIA targetRepoAPIA,
FedoraAPIM targetRepoAPIM,
String logMessage,
PrintStream log,
IngestCounter c) throws Exception {
String tps = fTypes.toUpperCase();
if (tps.indexOf("D")!=-1) {
multiFromRepository(sourceProtocol, sourceHost, sourcePort,
sourceRepoAPIA, sourceRepoAPIM,
sourceExportFormat, 'D', targetRepoAPIA,
targetRepoAPIM, logMessage, log, c);
}
if (tps.indexOf("M")!=-1) {
multiFromRepository(sourceProtocol, sourceHost, sourcePort,
sourceRepoAPIA, sourceRepoAPIM,
sourceExportFormat, 'M', targetRepoAPIA,
targetRepoAPIM, logMessage, log, c);
}
if (tps.indexOf("O")!=-1) {
multiFromRepository(sourceProtocol, sourceHost, sourcePort,
sourceRepoAPIA, sourceRepoAPIM,
sourceExportFormat, 'O', targetRepoAPIA,
targetRepoAPIM, logMessage, log, c);
}
}
public static void multiFromRepository(String sourceProtocol,
String sourceHost,
int sourcePort,
FedoraAPIA sourceRepoAPIA,
FedoraAPIM sourceRepoAPIM,
String sourceExportFormat,
char fType,
FedoraAPIA targetRepoAPIA,
FedoraAPIM targetRepoAPIM,
String logMessage,
PrintStream log,
IngestCounter c) throws Exception {
// prepare the FieldSearch query
String fTypeString = "" + fType;
FieldSearchQuery query = new FieldSearchQuery();
Condition cond = new Condition();
cond.setProperty("fType");
cond.setOperator(ComparisonOperator.fromValue("eq"));
cond.setValue(fTypeString);
Condition[] conditions = new Condition[1];
conditions[0] = cond;
query.setConditions(conditions);
query.setTerms(null);
String[] resultFields = new String[1];
resultFields[0] = "pid";
// get the first chunk of search results
FieldSearchResult result = AutoFinder.findObjects(sourceRepoAPIA,
resultFields,
100,
query);
while (result != null) {
ObjectFields[] ofs = result.getResultList();
// ingest all objects from this chunk of search results
for (int i=0; i < ofs.length; i++) {
String pid = ofs[i].getPid();
try {
String newPID = oneFromRepository(sourceRepoAPIA,
sourceRepoAPIM,
sourceExportFormat,
pid,
targetRepoAPIA,
targetRepoAPIM,
logMessage);
c.successes++;
IngestLogger.logFromRepos(log, pid, fType, newPID);
} catch (Exception e) {
// failed... just log it and continue
c.failures++;
IngestLogger.logFailedFromRepos(log, pid, fType, e);
}
}
// get the next chunk of search results, if any
String token = null;
try {
token = result.getListSession().getToken();
} catch (Throwable th) { }
if (token != null) {
result = AutoFinder.resumeFindObjects(sourceRepoAPIA, token);
} else {
result = null;
}
}
}
private static String getMessage(String logMessage, File file) {
if (logMessage!=null) return logMessage;
return "Ingested from local file " + file.getPath();
}
// fixme: this isn't ingest-specific... it doesn't belong here
public static String getDuration(long millis) {
long tsec=millis/1000;
long h=tsec/60/60;
long m=(tsec - (h*60*60))/60;
long s=(tsec - (h*60*60) - (m*60));
StringBuffer out=new StringBuffer();
if (h>0) {
out.append(h + " hour");
if (h>1) out.append('s');
}
if (m>0) {
if (h>0) out.append(", ");
out.append(m + " minute");
if (m>1) out.append('s');
}
if (s>0 || (h==0 && m==0)) {
if (h>0 || m>0) out.append(", ");
out.append(s + " second");
if (s!=1) out.append('s');
}
return out.toString();
}
/**
* Print error message and show usage for command-line interface.
*/
public static void badArgs(String msg) {
System.err.println("Command: fedora-ingest");
System.err.println();
System.err.println("Summary: Ingests one or more objects into a Fedora repository, from either");
System.err.println(" the local filesystem or another Fedora repository.");
System.err.println();
System.err.println("Syntax:");
System.err.println(" fedora-ingest f[ile] INPATH FORMAT THST:TPRT TUSR TPSS TPROTOCOL [LOG]");
System.err.println(" fedora-ingest d[ir] INPATH FORMAT FTYPS THST:TPRT TUSR TPSS TPROTOCOL [LOG]");
System.err.println(" fedora-ingest r[epos] SHST:SPRT SUSR SPSS PID|FTYPS THST:TPRT TUSR TPSS SPROTOCOL TPROTOCOL [LOG]");
System.err.println();
System.err.println("Where:");
System.err.println(" INPATH is the local file or directory name that is ingest source.");
System.err.println(" FORMAT is a string value (either 'foxml1.0' or 'metslikefedora1')");
System.err.println(" which indicates the XML format of the ingest file(s)");
System.err.println(" FTYPS is any combination of the characters O, D, and M, specifying");
System.err.println(" which Fedora object type(s) should be ingested. O=data objects,");
System.err.println(" D=behavior definitions, and M=behavior mechanisms.");
System.err.println(" PID is the id of the object to ingest from the source repository.");
System.err.println(" SHST/THST is the source or target repository's hostname.");
System.err.println(" SPRT/TPRT is the source or target repository's port number.");
System.err.println(" SUSR/TUSR is the id of the source or target repository user.");
System.err.println(" SPSS/TPSS is the password of the source or target repository user.");
System.err.println(" SPROTOCOL is the protocol to communicate with source repository (http or https)");
System.err.println(" TPROTOCOL is the protocol to communicate with target repository (http or https)");
System.err.println(" LOG is the optional log message. If unspecified, the log message");
System.err.println(" will indicate the source filename or repository of the object(s).");
System.err.println();
System.err.println("Examples:");
System.err.println("fedora-ingest f obj1.xml foxml1.0 myrepo.com:8443 jane jpw https");
System.err.println();
System.err.println(" Ingests obj1.xml (encoded in foxml1.0 format) from the");
System.err.println(" current directory into the repository at myrepo.com:80");
System.err.println(" as user 'jane' with password 'jpw' using the secure https protocol (SSL).");
System.err.println(" The logmessage will be system-generated, indicating");
System.err.println(" the source path+filename.");
System.err.println();
System.err.println("fedora-ingest d c:\\archive foxml1.0 M myrepo.com:80 jane janepw http \"\"");
System.err.println();
System.err.println(" Traverses entire directory structure of c:\\archive, and ingests ");
System.err.println(" any file that looks like a behavior mechanism object (M). ");
System.err.println(" It assumes all files will be in the XML format 'foxml1.0'");
System.err.println(" and will fail on ingests of files that are not of this format.");
System.err.println(" All log messages will be the quoted string.");
System.err.println();
System.err.println("fedora-ingest d c:\\archive foxml1.0 ODM myrepo.com:80 jane janepw http \"for jane\"");
System.err.println();
System.err.println(" Same as above, but ingests all three types of objects (O,D,M).");
System.err.println();
System.err.println("fedora-ingest r jrepo.com:8081 mike mpw demo:1 myrepo.com:8443 jane jpw http https \"\"");
System.err.println();
System.err.println(" Ingests the object whose pid is 'demo:1' from the source repository");
System.err.println(" 'srcrepo.com:8081' into the target repository 'myrepo.com:80'.");
System.err.println(" The object will be exported from the source repository in the default");
System.err.println(" export format configured at the source." );
System.err.println(" All log messages will be empty.");
System.err.println();
System.err.println("fedora-ingest r jrepo.com:8081 mike mpw O myrepo.com:8443 jane jpw http https \"\"");
System.err.println();
System.err.println(" Same as above, but ingests all data objects (type O).");
System.err.println();
System.err.println("ERROR : " + msg);
System.exit(1);
}
private static void summarize(IngestCounter counter, File logFile) {
System.out.println();
if (counter.failures > 0) {
System.out.println("WARNING: " + counter.failures + " of " + counter.getTotal() + " objects failed. Check log.");
} else {
System.out.println("SUCCESS: All " + counter.getTotal() + " objects were ingested.");
}
System.out.println();
System.out.println("A detailed log is at " + logFile.getPath());
}
/**
* Command-line interface for doing ingests.
*/
public static void main(String[] args) {
try {
if (args.length<1) {
Ingest.badArgs("No arguments entered!");
}
PrintStream log=null;
File logFile=null;
String logRootName=null;
IngestCounter counter = new IngestCounter();
char kind=args[0].toLowerCase().charAt(0);
if (kind=='f') {
// USAGE: fedora-ingest f[ile] INPATH FORMAT THST:TPRT TUSR TPSS PROTOCOL [LOG]
if (args.length<7 || args.length>8) {
Ingest.badArgs("Wrong number of arguments for file ingest.");
System.out.println(
"USAGE: fedora-ingest f[ile] INPATH FORMAT THST:TPRT TUSR TPSS PROTOCOL [LOG]");
}
File f=new File(args[1]);
String ingestFormat = args[2];
String logMessage=null;
if (args.length==8) {
logMessage=args[7];
}
String protocol=args[6];
String[] hp=args[3].split(":");
// ******************************************
// NEW: use new client utility class
// FIXME: Get around hardcoding the path in the baseURL
String baseURL = protocol + "://" + hp[0] + ":" + Integer.parseInt(hp[1]) + "/fedora";
FedoraClient fc = new FedoraClient(baseURL, args[4], args[5]);
FedoraAPIA targetRepoAPIA=fc.getAPIA();
FedoraAPIM targetRepoAPIM=fc.getAPIM();
//*******************************************
String pid = Ingest.oneFromFile(f, ingestFormat, targetRepoAPIA, targetRepoAPIM, logMessage);
if (pid==null){
System.out.print("ERROR: ingest failed for file: " + args[1]);
} else {
System.out.println("Ingested PID: " + pid);
}
} else if (kind=='d') {
// USAGE: fedora-ingest d[ir] INPATH FORMAT FTYPS THST:TPRT TUSR TPSS PROTOCOL [LOG]
if (args.length<8 || args.length>9) {
Ingest.badArgs("Wrong number of arguments (" + args.length + ") for directory ingest.");
System.out.println(
"USAGE: fedora-ingest d[ir] INPATH FORMAT FTYPS THST:TPRT TUSR TPSS PROTOCOL [LOG]");
}
File d=new File(args[1]);
String ingestFormat = args[2];
String logMessage=null;
if (args.length==9) {
logMessage=args[8];
}
String protocol=args[7];
String[] hp=args[4].split(":");
// ******************************************
// NEW: use new client utility class
// FIXME: Get around hardcoding the path in the baseURL
String baseURL = protocol + "://" + hp[0] + ":" + Integer.parseInt(hp[1]) + "/fedora";
FedoraClient fc = new FedoraClient(baseURL, args[5], args[6]);
FedoraAPIA targetRepoAPIA=fc.getAPIA();
FedoraAPIM targetRepoAPIM=fc.getAPIM();
//*******************************************
logRootName="ingest-from-dir";
logFile = IngestLogger.newLogFile(logRootName);
log =new PrintStream(new FileOutputStream(logFile), true, "UTF-8");
IngestLogger.openLog(log, logRootName);
Ingest.multiFromDirectory(d,
ingestFormat,
args[3],
targetRepoAPIA,
targetRepoAPIM,
logMessage, log, counter);
IngestLogger.closeLog(log, logRootName);
summarize(counter, logFile);
} else if (kind=='r') {
// USAGE: fedora-ingest r[epos] SHST:SPRT SUSR SPSS PID|FTYPS THST:TPRT TUSR TPSS SPROTOCOL TPROTOCOL [LOG]
if (args.length<10 || args.length>11) {
Ingest.badArgs("Wrong number of arguments for repository ingest.");
}
String logMessage=null;
if (args.length==11) {
logMessage=args[10];
}
//Source repository
String[] shp=args[1].split(":");
String source_host = shp[0];
String source_port = shp[1];
String source_user = args[2];
String source_password = args[3];
String source_protocol=args[8];
// ******************************************
// NEW: use new client utility class
// FIXME: Get around hardcoding the path in the baseURL
String sourceBaseURL =
source_protocol + "://" + source_host + ":" + Integer.parseInt(source_port) + "/fedora";
FedoraClient sfc = new FedoraClient(sourceBaseURL, source_user, source_password);
FedoraAPIA sourceRepoAPIA=sfc.getAPIA();
FedoraAPIM sourceRepoAPIM=sfc.getAPIM();
//*******************************************
//Target repository
String[] thp=args[5].split(":");
String target_host = thp[0];
String target_port = thp[1];
String target_user = args[6];
String target_password = args[7];
String target_protocol=args[9];
// ******************************************
// NEW: use new client utility class
// FIXME: Get around hardcoding the path in the baseURL
String targetBaseURL =
target_protocol + "://" + target_host + ":" + Integer.parseInt(target_port) + "/fedora";
FedoraClient tfc = new FedoraClient(targetBaseURL, target_user, target_password);
FedoraAPIA targetRepoAPIA=tfc.getAPIA();
FedoraAPIM targetRepoAPIM=tfc.getAPIM();
//*******************************************
// First, determine the default export format of the source repo.
// For backward compatibility with pre-2.0 repositories,
// assume the "metslikefedora1" format.
RepositoryInfo repoinfo = sourceRepoAPIA.describeRepository();
System.out.println("Ingest: exporting from a source repo version " + repoinfo.getRepositoryVersion());
String sourceExportFormat = null;
StringTokenizer stoken = new StringTokenizer(repoinfo.getRepositoryVersion(), ".");
if (new Integer(stoken.nextToken()).intValue() < 2){
sourceExportFormat = "metslikefedora1";
System.out.println("Ingest: source repos is using 'metslikefedora1' as export.");
} else {
sourceExportFormat = repoinfo.getDefaultExportFormat();
System.out.println("Ingest: source repos default export format is " + sourceExportFormat);
}
if (args[4].indexOf(":")!=-1) {
// single object
String successfulPID = Ingest.oneFromRepository(
sourceRepoAPIA,
sourceRepoAPIM,
sourceExportFormat,
args[4],
targetRepoAPIA,
targetRepoAPIM,
logMessage);
if (successfulPID==null){
System.out.print("ERROR: ingest from repo failed for PID=" + args[4]);
} else {
System.out.println("Ingested PID: " + successfulPID);
}
} else {
// multi-object
//hp=args[1].split(":");
logRootName="ingest-from-repository";
logFile = IngestLogger.newLogFile(logRootName);
log =new PrintStream(new FileOutputStream(logFile), true, "UTF-8");
IngestLogger.openLog(log, logRootName);
Ingest.multiFromRepository(
source_protocol,
source_host,
Integer.parseInt(source_port),
sourceRepoAPIA,
sourceRepoAPIM,
sourceExportFormat,
args[4],
targetRepoAPIA,
targetRepoAPIM,
logMessage, log, counter);
IngestLogger.closeLog(log, logRootName);
summarize(counter, logFile);
}
} else {
Ingest.badArgs("First argument must start with f, d, or r.");
}
} catch (Exception e) {
System.err.print("Error : ");
if (e.getMessage()==null) {
e.printStackTrace();
} else {
System.err.print(e.getMessage());
}
System.err.println();
if (Ingest.LAST_PATH!=null) {
System.out.println("(Last attempted file was " + Ingest.LAST_PATH + ")");
}
}
}
}