All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.duracloud.mill.ltp.bit.LoopingBitIntegrityTaskProducer Maven / Gradle / Ivy

/*
 * The contents of this file are subject to the license and copyright
 * detailed in the LICENSE and NOTICE files at the root of the source
 * tree and available online at
 *
 *     http://duracloud.org/license/
 */
package org.duracloud.mill.ltp.bit;

import java.text.MessageFormat;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;

import org.duracloud.common.error.DuraCloudRuntimeException;
import org.duracloud.common.queue.TaskQueue;
import org.duracloud.common.queue.task.Task;
import org.duracloud.common.retry.Retriable;
import org.duracloud.common.retry.Retrier;
import org.duracloud.mill.bit.BitIntegrityCheckReportTask;
import org.duracloud.mill.bit.BitIntegrityCheckTask;
import org.duracloud.mill.common.storageprovider.StorageProviderFactory;
import org.duracloud.mill.credentials.AccountCredentials;
import org.duracloud.mill.credentials.CredentialsRepo;
import org.duracloud.mill.credentials.CredentialsRepoException;
import org.duracloud.mill.credentials.StorageProviderCredentials;
import org.duracloud.mill.db.model.BitIntegrityReport;
import org.duracloud.mill.db.repo.JpaBitIntegrityReportRepo;
import org.duracloud.mill.ltp.Frequency;
import org.duracloud.mill.ltp.LoopingTaskProducer;
import org.duracloud.mill.ltp.PathFilterManager;
import org.duracloud.mill.ltp.RunStats;
import org.duracloud.mill.ltp.StateManager;
import org.duracloud.mill.notification.NotificationManager;
import org.duracloud.reportdata.bitintegrity.BitIntegrityReportResult;
import org.duracloud.storage.provider.StorageProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * @author Daniel Bernstein
 * Date: Apr 28, 2014
 */
public class LoopingBitIntegrityTaskProducer extends LoopingTaskProducer {
    private static Logger log = LoggerFactory.getLogger(LoopingBitIntegrityTaskProducer.class);
    private PathFilterManager exclusionManager;
    private int waitTimeInMsBeforeQueueSizeCheck = 10000;
    private TaskQueue bitReportTaskQueue;
    private JpaBitIntegrityReportRepo bitReportRepo;
    private int waitBetweenRetriesMs = 5000;

    public LoopingBitIntegrityTaskProducer(CredentialsRepo credentialsRepo,
                                           JpaBitIntegrityReportRepo bitReportRepo,
                                           StorageProviderFactory storageProviderFactory,
                                           TaskQueue bitTaskQueue,
                                           TaskQueue bitReportTaskQueue,
                                           StateManager state,
                                           int maxTaskQueueSize,
                                           Frequency frequency,
                                           NotificationManager notificationManager,
                                           PathFilterManager exclusionManager,
                                           LoopingBitTaskProducerConfigurationManager config) {
        super(credentialsRepo,
              storageProviderFactory,
              bitTaskQueue,
              state,
              maxTaskQueueSize,
              frequency,
              null,
              notificationManager,
              config);
        this.exclusionManager = exclusionManager;
        this.bitReportTaskQueue = bitReportTaskQueue;
        this.bitReportRepo = bitReportRepo;
    }

    /* (non-Javadoc)
     * @see org.duracloud.mill.ltp.LoopingTaskProducer#loadMorselQueueFromSource(java.util.Queue)
     */
    @Override
    protected void loadMorselQueueFromSource(Queue morselQueue) {
        //generate set of morsels based on duplication policy
        try {
            for (String account : getAccountsList()) {
                String accountPath = "/" + account;

                log.debug("loading {}", account);

                if (exclusionManager.isExcluded(accountPath)) {
                    continue;
                }

                AccountCredentials accountCreds = getCredentialsRepo().getAccountCredentials(account);
                for (StorageProviderCredentials cred : accountCreds.getProviderCredentials()) {
                    if (!cred.isConfigured()) {
                        log.warn("Storage provider {} is not configured on account {} - skipping...",
                                 cred.getProviderId(), accountCreds.getAccount());
                        continue;
                    }
                    String storeId = cred.getProviderId();
                    String storePath = accountPath + "/" + storeId;
                    if (exclusionManager.isExcluded(storePath)) {
                        continue;
                    }

                    StorageProvider store = getStorageProvider(cred);

                    Iterator spaces = store.getSpaces();
                    while (spaces.hasNext()) {
                        String spaceId = spaces.next();
                        String spacePath = storePath + "/" + spaceId;
                        if (!exclusionManager.isExcluded(spacePath)) {

                            //check if most recent
                            BitIntegrityReport report = bitReportRepo
                                .findFirstByAccountAndStoreIdAndSpaceIdOrderByCompletionDateDesc(account,
                                                                                                 storeId,
                                                                                                 spaceId);
                            if (report != null) {
                                //skip if last report was a success that completed less than 60 days ago
                                long oneDayInMs = 24 * 60 * 60 * 1000;
                                if (report.getCompletionDate()
                                          .after(new Date(System.currentTimeMillis() - (60 * oneDayInMs)))
                                    && report.getResult().equals(BitIntegrityReportResult.SUCCESS)) {
                                    continue;
                                }
                            }

                            morselQueue.add(new BitIntegrityMorsel(account,
                                                                   cred.getProviderId(),
                                                                   cred.getProviderType().name(),
                                                                   spaceId));
                        }
                    }

                    log.info("loaded {} into morsel queue.", account);

                }
            }
        } catch (Exception e) {
            log.error(e.getMessage(), e);
            throw new DuraCloudRuntimeException(e);
        }
    }

    /**
     * @return
     * @throws CredentialsRepoException
     */
    private List getAccountsList() throws CredentialsRepoException {
        return getCredentialsRepo().getActiveAccounts();
    }

    /* (non-Javadoc)
     * @see org.duracloud.mill.ltp.LoopingTaskProducer#nibble(org.duracloud.mill.ltp.Morsel)
     */
    @Override
    protected void nibble(Queue queue) {
        BitIntegrityMorsel morsel = queue.peek();
        String storeId = morsel.getStoreId();
        String account = morsel.getAccount();
        StorageProvider store;

        try {
            store = getStorageProvider(account, storeId);
        } catch (Exception ex) {
            if (morsel.getMarker() != null) {
                throw new DuraCloudRuntimeException(
                    "Failed to get storage provider for " + morsel + ". Morsel has already been nibbled. " +
                    "Likely cause:  a storage provider was removed in the middle of processing the morsel. " +
                    "Further investigation and clean up recommended before restarting the run." +
                    "In most cases you should be able to remove the state file and restart the run.", ex);
            } else {
                //remove morsel.
                queue.poll();
                String message =
                    MessageFormat.format("Failed to get storage provider for {0}. Likely cause: A storage " +
                                         "provider was removed after the bit integrity run was started. Since no " +
                                         "tasks have been added yet for this morsel, we will simply skip it. " +
                                         "No further action required.", morsel);
                log.warn(message, morsel);
                sendEmail("Failed to get storage provider for " + morsel, message);
                return;
            }
        }

        int maxTaskQueueSize = getMaxTaskQueueSize();
        int taskQueueSize = getTaskQueue().size();

        while (taskQueueSize < maxTaskQueueSize) {

            if (taskQueueSize >= maxTaskQueueSize) {
                log.info("Task queue size ({}) has reached or exceeded max size ({}).",
                         taskQueueSize, maxTaskQueueSize);
            } else {

                if (addTasks(morsel, store, 1000)) {
                    log.info("All bit integrity tasks that could be created were created for account={}, storeId={}, " +
                             "spaceId={}. getTaskQueue().size = {}",
                             morsel.getAccount(), storeId, morsel.getSpaceId(), getTaskQueue().size());
                    log.info("{} completely nibbled.", morsel);

                    // check if queue is empty after waiting a few moments: It is possible that AWS will not have
                    // registered a new task that was added in the previous step (or even is pending). I observed
                    // this problem while debugging this code.
                    // If I put the breakpoint on the if statement and I ran the task producer against a space with a
                    // single content item then size would be reported as 0.  However if I put the breakpoint a line
                    // above on "long size = ..." then the size variable was evaluating to 1. At 5 seconds, I was
                    // still seeing the inconsistency.  At 10 seconds the matter seems to be resolved.
                    // Makes me a little nervous.  --dbernstein
                    log.debug("delay before checking the queue size in ms: {}", waitTimeInMsBeforeQueueSizeCheck);
                    sleep(waitTimeInMsBeforeQueueSizeCheck);
                    long size = getTaskQueue().sizeIncludingInvisibleAndDelayed();
                    if (size == 0) {
                        addReportTaskProcessorTask(queue.poll());
                    } else {
                        log.info("{} (queue) is not empty: {} items remain to be processed before " +
                                 "creating report generation task.", getTaskQueue().getName(), size);
                    }

                    break;

                } else {
                    log.info("morsel nibbled down: {}", morsel);
                }
            }

            taskQueueSize = getTaskQueue().size();
        }
    }

    /**
     * @param ms
     */
    private void sleep(int ms) {
        try {
            Thread.sleep(ms);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    /**
     *
     */
    private void addReportTaskProcessorTask(BitIntegrityMorsel morsel) {
        BitIntegrityCheckReportTask task = new BitIntegrityCheckReportTask();
        task.setAccount(morsel.getAccount());
        task.setStoreId(morsel.getStoreId());
        task.setSpaceId(morsel.getSpaceId());
        Task t = task.writeTask();
        this.bitReportTaskQueue.put(t);
        log.info("added report task {} to {}", t, this.bitReportTaskQueue);
    }

    /**
     * @param morsel
     * @param store
     * @param biteSize
     * @return
     */
    private boolean addTasks(BitIntegrityMorsel morsel,
                             final StorageProvider store,
                             final int biteSize) {
        final String account = morsel.getAccount();
        final String storeId = morsel.getStoreId();
        final String spaceId = morsel.getSpaceId();
        final String marker = morsel.getMarker();

        //load in next maxContentIdsToAdd or however many remain
        List contentIds = null;

        try {

            contentIds = (List) new Retrier(3, waitBetweenRetriesMs, 2).execute(new Retriable() {
                /* (non-Javadoc)
                 * @see org.duracloud.common.retry.Retriable#retry()
                 */
                @Override
                public Object retry() throws Exception {
                    return store.getSpaceContentsChunked(spaceId, null, biteSize, marker);
                }
            });

            int added = addToTaskQueue(account, storeId, spaceId, contentIds);

            ((BitIntegrityRunStats) getStats(account)).add(added);
            //if no tasks were added, it means that all contentIds in this morsel
            //have been touched in this run.
            if (added == 0) {
                return true;
            } else {
                String newMarker = contentIds.get(contentIds.size() - 1);
                morsel.setMarker(newMarker);
                return false;
            }

        } catch (Exception ex) {
            String message = MessageFormat.format("Bit integrity producer failure on  " +
                                                  "subdomain={0}, spaceId={1}, storeId={2} due to: {3}",
                                                  account, spaceId, storeId, ex.getMessage());
            log.error(message, ex);

            sendEmail(message, ex);
            return true;
        }
    }

    /**
     * @param account
     * @param storeId
     * @param contentIds
     * @return
     */
    private int addToTaskQueue(String account,
                               String storeId,
                               String spaceId,
                               List contentIds) {
        Set tasks = new HashSet<>();
        int addedCount = 0;

        for (String contentId : contentIds) {
            BitIntegrityCheckTask bitIntegrityTask = new BitIntegrityCheckTask();
            bitIntegrityTask.setAccount(account);
            bitIntegrityTask.setContentId(contentId);
            bitIntegrityTask.setSpaceId(spaceId);
            bitIntegrityTask.setStoreId(storeId);
            Task task = bitIntegrityTask.writeTask();
            tasks.add(task);
            addedCount++;
        }

        getTaskQueue().put(tasks);
        return addedCount;
    }

    /* (non-Javadoc)
     * @see org.duracloud.mill.ltp.LoopingTaskProducer#logIncrementalStatsBySubdomain(java.lang.String, org.duracloud
     * .mill.ltp.RunStats)
     */
    @Override
    protected void logIncrementalStatsByAccount(String account, RunStats stats) {
        log.info("Session stats by account (incremental): account={} tasksAdded={}",
                 account, ((BitIntegrityRunStats) stats).getAdded());

    }

    /* (non-Javadoc)
     * @see org.duracloud.mill.ltp.LoopingTaskProducer#logCumulativeSessionStats()
     */
    @Override
    protected void logCumulativeSessionStats(Map runstats, RunStats cumulativeTotals) {
        log.info("session stats (global cumulative): domains={} tasksAdded={}",
                 runstats.keySet().size(), ((BitIntegrityRunStats) cumulativeTotals).getAdded());
    }

    /* (non-Javadoc)
     * @see org.duracloud.mill.ltp.LoopingTaskProducer#logGlobalncrementalStats(org.duracloud.mill.ltp.RunStats)
     */
    @Override
    protected void logGlobalncrementalStats(RunStats incrementalTotals) {
        log.info("Session stats (global incremental): tasksAdded={}",
                 ((BitIntegrityRunStats) incrementalTotals).getAdded());
    }

    /* (non-Javadoc)
     * @see org.duracloud.mill.ltp.LoopingTaskProducer#createRunStats()
     */
    @Override
    protected RunStats createRunStats() {
        return new BitIntegrityRunStats();
    }

    public void setWaitTimeInMsBeforeQueueSizeCheck(int ms) {
        this.waitTimeInMsBeforeQueueSizeCheck = ms;
    }

    /* (non-Javadoc)
     * @see org.duracloud.mill.ltp.LoopingTaskProducer#getLoopingProducerTypePrefix()
     */
    @Override
    protected String getLoopingProducerTypePrefix() {
        return "bit";
    }

    /**
     * Modify the wait between retries
     *
     * @param waitBetweenRetriesMs
     */
    public void setWaitBetweenRetriesMs(int waitBetweenRetriesMs) {
        this.waitBetweenRetriesMs = waitBetweenRetriesMs;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy