All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.duracloud.retrieval.RetrievalTool Maven / Gradle / Ivy

/*
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 *     http://duracloud.org/license/
 */
package org.duracloud.retrieval;

import org.duracloud.client.ContentStore;
import org.duracloud.common.error.DuraCloudRuntimeException;
import org.duracloud.common.util.ApplicationConfig;
import org.duracloud.error.ContentStoreException;
import org.duracloud.retrieval.config.RetrievalToolConfig;
import org.duracloud.retrieval.mgmt.CSVFileOutputWriter;
import org.duracloud.retrieval.mgmt.OutputWriter;
import org.duracloud.retrieval.mgmt.RetrievalManager;
import org.duracloud.retrieval.mgmt.SpaceListManager;
import org.duracloud.retrieval.mgmt.StatusManager;
import org.duracloud.retrieval.source.DuraStoreSpecifiedRetrievalSource;
import org.duracloud.retrieval.source.DuraStoreStitchingRetrievalSource;
import org.duracloud.retrieval.source.RetrievalSource;
import org.duracloud.retrieval.util.StoreClientUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

/**
 * Starting point for the Retrieval Tool. The purpose of this tool is to
 * retrieve content files or content listings from DuraCloud. When the Retrieval
 * Tool is started and the '-l' or '--list-only' option is not specified, it
 * will connect to DuraCloud and proceed to copy files from the provided list
 * of spaces to the local file system under the content directory. Any files
 * which already exist locally will be compared (via checksum) with the file in
 * DuraCloud. If the files are different the local file will either be renamed
 * or overwritten, depending on whether the overwrite flag is set.
 * If the '-l' or '--list-only' option is specified the the Retrieval Tool will
 * create a text file, in the content directory, for each space provided and
 * list each content ID in the space, one content ID per line.
 *
 * Once all files have been transferred to the local system the Retrieval Tool
 * will exit. This tool (currently) provides no ongoing synchronization between
 * DuraCloud and the local system.
 *
 * @author: Bill Branan
 * Date: Oct 12, 2010
 */
public class RetrievalTool {

    private static final String RETRIEVALTOOL_PROPERTIES =
        "retrievaltool.properties";

    private final Logger logger = LoggerFactory.getLogger(RetrievalTool.class);
    private RetrievalToolConfig retConfig;
    private ExecutorService executor;
    private OutputWriter outWriter;
    private RetrievalManager retManager;
    private RetrievalSource retSource;
    private String version;

    public RetrievalTool() {
        Properties props =
            ApplicationConfig.getPropsFromResource(RETRIEVALTOOL_PROPERTIES);
        this.version = props.getProperty("version");
    }

    /**
     * Sets the configuration of the retrieval tool.
     * @param retConfig to use for running the Retrieval Tool
     */
    protected void setRetrievalConfig(RetrievalToolConfig retConfig) {
        this.retConfig = retConfig;
        this.retConfig.setVersion(version);
    }

    private void startRetrievalManager(ContentStore contentStore) {
        retSource = getRetrievalSource(contentStore);
        outWriter = new CSVFileOutputWriter(retConfig.getWorkDir());
        boolean createSpaceDir = isCreateSpaceDir();
        boolean applyTimestamps = retConfig.isApplyTimestamps() ;
        retManager = new RetrievalManager(retSource,
                                          retConfig.getContentDir(),
                                          retConfig.getWorkDir(),
                                          retConfig.isOverwrite(),
                                          retConfig.getNumThreads(),
                                          outWriter,
                                          createSpaceDir,
                                          applyTimestamps);

        executor.execute(retManager);
    }

    private RetrievalSource getRetrievalSource(ContentStore contentStore) {
        if(retSource == null) {
            if(retConfig.getListFile() != null) {
                try {
                    List specifiedIds = new ArrayList();
                    BufferedReader br = new BufferedReader(new FileReader(retConfig.getListFile()));
                    String line = null;
                    while((line=br.readLine()) != null) {
                        specifiedIds.add(line);
                    }

                    retSource = new DuraStoreSpecifiedRetrievalSource(
                        contentStore,
                        retConfig.getSpaces(),  // this list should only contain 1 space ID, length 1
                        specifiedIds.iterator());
                } catch(FileNotFoundException fnfe) {
                    String error = "Error: file of content IDs specified using '-f' option does not exist.\n" +
                                   "Error Message: " + fnfe.getMessage();
                    throw new DuraCloudRuntimeException(error, fnfe);
                } catch(IOException ioe) {
                    String error = "Error: problem reading file of content IDs specified using '-f' option.\n" +
                            "Error Message: " + ioe.getMessage();
                    throw new DuraCloudRuntimeException(error, ioe);
                }
            } else {
                retSource = new DuraStoreStitchingRetrievalSource(contentStore,
                                retConfig.getSpaces(),
                                retConfig.isAllSpaces());
            }
        }
        return retSource;
    }

    private boolean isCreateSpaceDir() {
        boolean createDir = retConfig.isAllSpaces();
        if(! createDir) {
            createDir = retConfig.getSpaces().size() > 1;
        }
        return createDir;
    }

    private void waitForExit() {
        StatusManager statusManager = StatusManager.getInstance();
        statusManager.setVersion(version);

        int loops = 0;
        while(!retManager.isComplete()) {
            if(loops >= 60) { // Print status every 10 minutes
                System.out.println(statusManager.getPrintableStatus());
                loops = 0;
            } else {
                loops++;
            }
            sleep(10000);
        }

        logger.info("Shutting down the Retrieval Tool");
        
        outWriter.close();
        executor.shutdown();
        System.out.println("Retrieval Tool processing complete, final status:");
        System.out.println(statusManager.getPrintableStatus());
    }

    private void sleep(long millis) {
        try {
            Thread.sleep(millis);
        } catch (InterruptedException e) {
        }
    }

    private void startSpaceListManager(ContentStore contentStore) {
        List spaces;
        if(retConfig.isAllSpaces()) {
            try {
                spaces = contentStore.getSpaces();
            } catch(ContentStoreException e) {
                String errorMsg = "Unable to get spaces list due to error: " +
                                  e.getMessage();
                throw new DuraCloudRuntimeException(errorMsg, e);
            }
        } else {
            spaces = retConfig.getSpaces();
        }
        SpaceListManager spaceListManager =
            new SpaceListManager(contentStore,
                                 retConfig.getContentDir(),
                                 spaces,
                                 retConfig.isOverwrite(),
                                 retConfig.getNumThreads());
        executor.execute(spaceListManager);
        while(!spaceListManager.isComplete()) {
            sleep(1000);
        }
        executor.shutdown();
    }

    public void runRetrievalTool() {
        logger.info("Starting Retrieval Tool version " + version);
        logger.info("Running Retrieval Tool with configuration: " +
                    retConfig.getPrintableConfig());
        System.out.print("\nStarting up the Retrieval Tool ...");
        System.out.println(retConfig.getPrintableConfig());

        StoreClientUtil clientUtil = new StoreClientUtil();
        ContentStore contentStore =
            clientUtil.createContentStore(retConfig.getHost(),
                                          retConfig.getPort(),
                                          retConfig.getContext(),
                                          retConfig.getUsername(),
                                          retConfig.getPassword(),
                                          retConfig.getStoreId());

        executor = Executors.newFixedThreadPool(1);
        if(retConfig.isListOnly()) {
            startSpaceListManager(contentStore);
        } else {
            startRetrievalManager(contentStore);
            System.out.println("... Startup Complete");
            System.out.println("The Retrieval Tool will exit when processing " +
                           "is complete. Status will be printed every " +
                           "10 minutes.\n");
            waitForExit();
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy