org.archive.crawler.restlet.models.CrawlJobModel Maven / Gradle / Ivy
The newest version!
package org.archive.crawler.restlet.models;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.io.filefilter.IOFileFilter;
import org.archive.checkpointing.Checkpoint;
import org.archive.crawler.framework.CrawlController.State;
import org.archive.crawler.framework.CrawlJob;
import org.archive.crawler.reporting.Report;
import org.archive.spring.ConfigPath;
import org.archive.util.ArchiveUtils;
import org.archive.util.FileUtils;
@SuppressWarnings("serial")
public class CrawlJobModel extends LinkedHashMap implements Serializable{
private CrawlJob crawlJob;
public static final IOFileFilter EDIT_FILTER = FileUtils
.getRegexFileFilter(".*\\.((c?xml)|(txt))$");
public CrawlJobModel(CrawlJob crawlJob, String urlBaseRef){
super();
this.crawlJob=crawlJob;
this.put("shortName",crawlJob.getShortName());
if (crawlJob.getCrawlController() != null) {
this.put("crawlControllerState", crawlJob.getCrawlController().getState());
if (crawlJob.getCrawlController().getState() == State.FINISHED) {
this.put("crawlExitStatus", crawlJob.getCrawlController().getCrawlExitStatus());
}
}
this.put("statusDescription", crawlJob.getJobStatusDescription());
Set actions = new LinkedHashSet();
this.put("availableActions",actions);
this.put("launchCount", crawlJob.getLaunchCount());
this.put("lastLaunch",crawlJob.getLastLaunch());
this.put("isProfile", crawlJob.isProfile());
File primaryConfig = FileUtils.tryToCanonicalize(crawlJob.getPrimaryConfig());
this.put("primaryConfig", primaryConfig.getAbsolutePath());
this.put("primaryConfigUrl", urlBaseRef + "jobdir/" + primaryConfig.getName());
this.put("url",urlBaseRef+"job/"+crawlJob.getShortName());
this.put("jobLogTail", generateJobLogTail());
this.put("uriTotalsReport", crawlJob.uriTotalsReportData());
Map sizeTotalsReportData = crawlJob.sizeTotalsReportData();
if (sizeTotalsReportData == null) {
sizeTotalsReportData = new LinkedHashMap();
}
if (!sizeTotalsReportData.containsKey("dupByHash")) {
sizeTotalsReportData.put("dupByHash", 0L);
}
if (!sizeTotalsReportData.containsKey("dupByHashCount")) {
sizeTotalsReportData.put("dupByHashCount", 0L);
}
if (!sizeTotalsReportData.containsKey("novel")) {
sizeTotalsReportData.put("novel", 0L);
}
if (!sizeTotalsReportData.containsKey("novelCount")) {
sizeTotalsReportData.put("novelCount", 0L);
}
if (!sizeTotalsReportData.containsKey("notModified")) {
sizeTotalsReportData.put("notModified", 0L);
}
if (!sizeTotalsReportData.containsKey("notModifiedCount")) {
sizeTotalsReportData.put("notModifiedCount", 0L);
}
if (!sizeTotalsReportData.containsKey("total")) {
sizeTotalsReportData.put("total", 0L);
}
if (!sizeTotalsReportData.containsKey("totalCount")) {
sizeTotalsReportData.put("totalCount", 0L);
}
this.put("sizeTotalsReport", sizeTotalsReportData);
this.put("rateReport", crawlJob.rateReportData());
this.put("loadReport", crawlJob.loadReportData());
this.put("elapsedReport", crawlJob.elapsedReportData());
this.put("threadReport", crawlJob.threadReportData());
this.put("frontierReport", crawlJob.frontierReportData());
this.put("crawlLogTail", generateCrawlLogTail());
this.put("configFiles",generateConfigReferencedPaths(urlBaseRef));
this.put("isLaunchInfoPartial", crawlJob.isLaunchInfoPartial());
this.put("isRunning", crawlJob.isRunning());
this.put("isLaunchable",crawlJob.isLaunchable());
this.put("hasApplicationContext",crawlJob.hasApplicationContext());
this.put("alertCount", crawlJob.getAlertCount());
if (!crawlJob.hasApplicationContext())
actions.add("build");
if (!crawlJob.isProfile() && crawlJob.isLaunchable())
actions.add("launch");
if (crawlJob.isPausable())
actions.add("pause");
if (crawlJob.isUnpausable())
actions.add("unpause");
if (crawlJob.getCheckpointService() != null && crawlJob.isRunning())
actions.add("checkpoint");
if (crawlJob.isRunning())
actions.add("terminate");
if (crawlJob.hasApplicationContext())
actions.add("teardown");
if (crawlJob.getCheckpointService() != null) {
Checkpoint recoveryCheckpoint = crawlJob.getCheckpointService().getRecoveryCheckpoint();
if (recoveryCheckpoint != null)
this.put("checkpointName", recoveryCheckpoint.getName());
}
List checkpointFiles = new ArrayList();
if (crawlJob.getCheckpointService() != null) {
if (crawlJob.isLaunchable() && crawlJob.getCheckpointService().hasAvailableCheckpoints()) {
for (File f : crawlJob.getCheckpointService().findAvailableCheckpointDirectories()) {
checkpointFiles.add(f.getName());
}
}
}
this.put("checkpointFiles",checkpointFiles);
if (crawlJob.hasApplicationContext())
this.put("alertLogFilePath",crawlJob.getCrawlController().getLoggerModule().getAlertsLogPath().getFile().getAbsolutePath());
if(crawlJob.isRunning() || (crawlJob.hasApplicationContext() && !crawlJob.isLaunchable()))
this.put("crawlLogFilePath",crawlJob.getCrawlController().getLoggerModule().getCrawlLogPath().getFile().getAbsolutePath());
this.put("reports", generateReports());
}
public String formatBytes(Long bytes){
return ArchiveUtils.formatBytesForDisplay(bytes);
}
public String doubleToString(double number, int digits){
return ArchiveUtils.doubleToString(number, digits);
}
public String getLastLaunchTime(){
long ago = System.currentTimeMillis()
- crawlJob.getLastLaunch().getMillis();
return ArchiveUtils.formatMillisecondsToConventional(ago, 2);
}
/*
* Alternative access to the file object, full name stored in base data map.
*/
public File getConfigurationFilePath(){
return crawlJob.getPrimaryConfig();
}
public List generateJobLogTail(){
List jobLog = new ArrayList();
if (crawlJob.getJobLog().exists()) {
try {
FileUtils.pagedLines(crawlJob.getJobLog(), -1, -5, jobLog);
Collections.reverse(jobLog);
} catch (IOException ioe) {
return null;
}
}
return jobLog;
}
public List generateCrawlLogTail() {
List logLines = new LinkedList();
if ((crawlJob.isRunning() || (crawlJob.hasApplicationContext() && !crawlJob.isLaunchable()))
&& crawlJob.getCrawlController().getLoggerModule()
.getCrawlLogPath().getFile().exists()) {
try {
FileUtils.pagedLines(crawlJob.getCrawlController()
.getLoggerModule().getCrawlLogPath().getFile(), -1,
-10, logLines);
Collections.reverse(logLines);
} catch (IOException ioe) {
return null;
}
}
return logLines;
}
public List