All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.crawler.restlet.models.CrawlJobModel Maven / Gradle / Ivy

The newest version!
package org.archive.crawler.restlet.models;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.io.filefilter.IOFileFilter;
import org.archive.checkpointing.Checkpoint;
import org.archive.crawler.framework.CrawlController.State;
import org.archive.crawler.framework.CrawlJob;
import org.archive.crawler.reporting.Report;
import org.archive.spring.ConfigPath;
import org.archive.util.ArchiveUtils;
import org.archive.util.FileUtils;

@SuppressWarnings("serial")
public class CrawlJobModel extends LinkedHashMap implements Serializable{

    private CrawlJob crawlJob;
    public static final IOFileFilter EDIT_FILTER = FileUtils
            .getRegexFileFilter(".*\\.((c?xml)|(txt))$");
    
    public CrawlJobModel(CrawlJob crawlJob, String urlBaseRef){
        super();
        this.crawlJob=crawlJob;
        this.put("shortName",crawlJob.getShortName());
        if (crawlJob.getCrawlController() != null) {
            this.put("crawlControllerState", crawlJob.getCrawlController().getState());
            if (crawlJob.getCrawlController().getState() == State.FINISHED) {
                this.put("crawlExitStatus", crawlJob.getCrawlController().getCrawlExitStatus());
            }
        }

        this.put("statusDescription", crawlJob.getJobStatusDescription());
        Set actions = new LinkedHashSet();
        this.put("availableActions",actions);
    
        this.put("launchCount", crawlJob.getLaunchCount());
        this.put("lastLaunch",crawlJob.getLastLaunch());
        this.put("isProfile", crawlJob.isProfile());

        File primaryConfig = FileUtils.tryToCanonicalize(crawlJob.getPrimaryConfig());
        this.put("primaryConfig", primaryConfig.getAbsolutePath());
        this.put("primaryConfigUrl", urlBaseRef + "jobdir/" + primaryConfig.getName());
        this.put("url",urlBaseRef+"job/"+crawlJob.getShortName());

        this.put("jobLogTail", generateJobLogTail());
        this.put("uriTotalsReport", crawlJob.uriTotalsReportData());
        
        
        Map sizeTotalsReportData = crawlJob.sizeTotalsReportData();
        if (sizeTotalsReportData == null) {
            sizeTotalsReportData = new LinkedHashMap();
        }
        if (!sizeTotalsReportData.containsKey("dupByHash")) {
            sizeTotalsReportData.put("dupByHash", 0L);
        }
        if (!sizeTotalsReportData.containsKey("dupByHashCount")) {
            sizeTotalsReportData.put("dupByHashCount", 0L);
        }
        if (!sizeTotalsReportData.containsKey("novel")) {
            sizeTotalsReportData.put("novel", 0L);
        }
        if (!sizeTotalsReportData.containsKey("novelCount")) {
            sizeTotalsReportData.put("novelCount", 0L);
        }
        if (!sizeTotalsReportData.containsKey("notModified")) {
            sizeTotalsReportData.put("notModified", 0L);
        }
        if (!sizeTotalsReportData.containsKey("notModifiedCount")) {
            sizeTotalsReportData.put("notModifiedCount", 0L);
        }
        if (!sizeTotalsReportData.containsKey("total")) {
            sizeTotalsReportData.put("total", 0L);
        }
        if (!sizeTotalsReportData.containsKey("totalCount")) {
            sizeTotalsReportData.put("totalCount", 0L);
        }
        this.put("sizeTotalsReport", sizeTotalsReportData);
        
        this.put("rateReport", crawlJob.rateReportData());
        this.put("loadReport", crawlJob.loadReportData());
        this.put("elapsedReport", crawlJob.elapsedReportData()); 
        this.put("threadReport", crawlJob.threadReportData()); 
        this.put("frontierReport", crawlJob.frontierReportData());
        this.put("crawlLogTail", generateCrawlLogTail());
        this.put("configFiles",generateConfigReferencedPaths(urlBaseRef));


        this.put("isLaunchInfoPartial", crawlJob.isLaunchInfoPartial());
        this.put("isRunning", crawlJob.isRunning());
        this.put("isLaunchable",crawlJob.isLaunchable());
        this.put("hasApplicationContext",crawlJob.hasApplicationContext());
        this.put("alertCount", crawlJob.getAlertCount());        

        
        if (!crawlJob.hasApplicationContext())
            actions.add("build");

        if (!crawlJob.isProfile() && crawlJob.isLaunchable())
            actions.add("launch");
        if (crawlJob.isPausable())
            actions.add("pause");
        if (crawlJob.isUnpausable())
            actions.add("unpause");

        if (crawlJob.getCheckpointService() != null && crawlJob.isRunning())
            actions.add("checkpoint");
        if (crawlJob.isRunning())
            actions.add("terminate");
        if (crawlJob.hasApplicationContext())
            actions.add("teardown");

        if (crawlJob.getCheckpointService() != null) {
            Checkpoint recoveryCheckpoint = crawlJob.getCheckpointService().getRecoveryCheckpoint();
            if (recoveryCheckpoint != null)
                this.put("checkpointName", recoveryCheckpoint.getName());
        }
        
        List checkpointFiles = new ArrayList();
        if (crawlJob.getCheckpointService() != null) {
            if (crawlJob.isLaunchable() && crawlJob.getCheckpointService().hasAvailableCheckpoints()) {
                for (File f : crawlJob.getCheckpointService().findAvailableCheckpointDirectories()) {
                    checkpointFiles.add(f.getName()); 
                }
            }
        }
        this.put("checkpointFiles",checkpointFiles);
        if (crawlJob.hasApplicationContext())
            this.put("alertLogFilePath",crawlJob.getCrawlController().getLoggerModule().getAlertsLogPath().getFile().getAbsolutePath());
        if(crawlJob.isRunning() || (crawlJob.hasApplicationContext() && !crawlJob.isLaunchable()))
            this.put("crawlLogFilePath",crawlJob.getCrawlController().getLoggerModule().getCrawlLogPath().getFile().getAbsolutePath());
        this.put("reports", generateReports());
    }
    public String formatBytes(Long bytes){
        return ArchiveUtils.formatBytesForDisplay(bytes);
    }
    public String doubleToString(double number, int digits){
        return ArchiveUtils.doubleToString(number, digits);
    }
    public String getLastLaunchTime(){
        long ago = System.currentTimeMillis()
                - crawlJob.getLastLaunch().getMillis();
        return ArchiveUtils.formatMillisecondsToConventional(ago, 2);
    }
    /*
     * Alternative access to the file object, full name stored in base data map.
     */
    public File getConfigurationFilePath(){
        return crawlJob.getPrimaryConfig();
    }
    
    public List generateJobLogTail(){
        List jobLog = new ArrayList();
        if (crawlJob.getJobLog().exists()) {
            try {
                FileUtils.pagedLines(crawlJob.getJobLog(), -1, -5, jobLog);
                Collections.reverse(jobLog);
            } catch (IOException ioe) {
                return null;
            }
        }
        return jobLog;
    }
    public List generateCrawlLogTail() {
        List logLines = new LinkedList();
        if ((crawlJob.isRunning() || (crawlJob.hasApplicationContext() && !crawlJob.isLaunchable()))
                && crawlJob.getCrawlController().getLoggerModule()
                        .getCrawlLogPath().getFile().exists()) {
            try {
                FileUtils.pagedLines(crawlJob.getCrawlController()
                        .getLoggerModule().getCrawlLogPath().getFile(), -1,
                        -10, logLines);
                Collections.reverse(logLines);
            } catch (IOException ioe) {
                return null;
            }
        }
        return logLines;
    }
    public List> generateReports(){
        List> reports = new ArrayList>();
        if(crawlJob.hasApplicationContext()){
            for (Report report : crawlJob.getCrawlController().getStatisticsTracker().getReports()) {
                if (report.getShouldReportDuringCrawl()) {
                    Map reportMap = new LinkedHashMap();
                    String className = report.getClass().getSimpleName();
                    reportMap.put("className", className);
                    reportMap.put("shortName",className.substring(0,className.length() - "Report".length()));
                    reports.add(reportMap);
                }
            }
        }
        return reports;
    }
    private List> generateConfigReferencedPaths(String baseRef){
        List> referencedPaths = new ArrayList>();
        for (String key : crawlJob.getConfigPaths().keySet()) {
            ConfigPath cp = crawlJob.getConfigPaths().get(key);
            Map configMap = new LinkedHashMap();
            configMap.put("key", key);
            configMap.put("name", cp.getName());
            configMap.put("path",FileUtils.tryToCanonicalize(cp.getFile()).getAbsolutePath());
            configMap.put("url",baseRef+"engine/anypath/"+configMap.get("path"));
            configMap.put("editable", EDIT_FILTER.accept(cp.getFile()));
            referencedPaths.add(configMap);
        }
        return referencedPaths;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy