All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.crawler.monitor.DiskSpaceMonitor Maven / Gradle / Ivy

The newest version!
package org.archive.crawler.monitor;

import java.io.File;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.archive.crawler.event.StatSnapshotEvent;
import org.archive.crawler.framework.CrawlController;
import org.archive.spring.ConfigPath;
import org.archive.spring.ConfigPathConfigurer;
import org.archive.util.ArchiveUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationEvent;
import org.springframework.context.ApplicationListener;

/**
 * Monitors the available space on the paths configured. If the available space
 * drops below a specified threshold a crawl pause is requested.
 * 

* Monitoring is done via the java.io.File.getUsableSpace() method. * This method will sometimes fail on network attached storage, returning 0 * bytes available even if that is not actually the case. *

* Paths that do not resolve to actual filesystem folders or files will not be * evaluated (i.e. if java.io.File.exists() returns false * no further processing is carried out on that File). *

* Paths are checked available space whenever a {@link StatSnapshotEvent} occurs. * * @author Kristinn Sigurðsson */ public class DiskSpaceMonitor implements ApplicationListener { private static final Logger logger = Logger.getLogger(DiskSpaceMonitor.class.getName()); protected List monitorPaths = new ArrayList(); protected long pauseThresholdMiB = 8192; protected CrawlController controller; protected ConfigPathConfigurer configPathConfigurer; protected boolean monitorConfigPaths = true; /** * @param monitorPaths List of filesystem paths that should be monitored for available space. */ public void setMonitorPaths(List monitorPaths) { this.monitorPaths = monitorPaths; } public List getMonitorPaths() { return this.monitorPaths; } /** * Set the minimum amount of space that must be available on all monitored paths. * If the amount falls below this pause threshold on any path the crawl will be paused. * * @param pauseThresholdMiB The desired pause threshold value. * Specified in megabytes (MiB). */ public void setPauseThresholdMiB(long pauseThresholdMiB) { this.pauseThresholdMiB = pauseThresholdMiB; } public long getPauseThresholdMiB() { return this.pauseThresholdMiB; } /** * If enabled, all the paths returned by {@link ConfigPathConfigurer#getAllConfigPaths()} * will be monitored in addition to any paths explicitly specified via * {@link #setMonitorPaths(List)}. *

* true by default. *

* Note: This is not guaranteed to contain all paths that Heritrix writes to. * It is the responsibility of modules that write to disk to register their activity * with the {@link ConfigPathConfigurer} and some may not do so. * * @param monitorConfigPaths If config paths should be monitored for usable space. */ public void setMonitorConfigPaths(boolean monitorConfigPaths){ this.monitorConfigPaths = monitorConfigPaths; } public boolean getMonitorConfigPaths(){ return this.monitorConfigPaths; } /** Autowire access to CrawlController **/ @Autowired public void setCrawlController(CrawlController controller) { this.controller = controller; } public CrawlController getCrawlController() { return this.controller; } /** Autowire access to ConfigPathConfigurer **/ @Autowired public void setConfigPathConfigurer(ConfigPathConfigurer configPathConfigurer) { this.configPathConfigurer = configPathConfigurer; } public ConfigPathConfigurer getConfigPathConfigurer() { return this.configPathConfigurer; } /** * Checks available space on {@link StatSnapshotEvent}s. */ @Override public void onApplicationEvent(ApplicationEvent event) { if (event instanceof StatSnapshotEvent) { // Check available space every time the statistics tracker // updates its sample, by default every 20 sec. for (String path : getMonitorPaths()) { checkAvailableSpace(new File(path)); } if (monitorConfigPaths) { for(ConfigPath path : configPathConfigurer.getAllConfigPaths().values()) { checkAvailableSpace(path.getFile()); } } } } /** * Probe via File.getUsableSpace to see if monitored paths have fallen below * the pause threshold. If so, request a crawl pause. * * @param path The filesystem path to check for usable space */ protected void checkAvailableSpace(File path) { if (!path.exists()) { // Paths that can not be resolved will not report accurate // available space. Log and ignore. logger.fine("Ignoring non-existent path " + path.getAbsolutePath()); return; } long availBytes = path.getUsableSpace(); long thresholdBytes = getPauseThresholdMiB() * 1024 * 1024; if (availBytes < thresholdBytes && controller.isActive()) { // Enact pause controller.requestCrawlPause(); // Log issue String errorMsg = "Low Disk Pause - %d bytes (%s) available on %s, " + "this is below the minimum threshold of %d bytes (%s)"; logger.log(Level.SEVERE, String.format(errorMsg, availBytes, ArchiveUtils.formatBytesForDisplay(availBytes), path.getAbsolutePath(), thresholdBytes, ArchiveUtils.formatBytesForDisplay(thresholdBytes))); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy