org.archive.crawler.framework.ToePool Maven / Gradle / Ivy
/*
* This file is part of the Heritrix web crawler (crawler.archive.org).
*
* Licensed to the Internet Archive (IA) by one or more individual
* contributors.
*
* The IA licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.archive.crawler.framework;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.lang.StringUtils;
import org.archive.crawler.reporting.AlertThreadGroup;
import org.archive.util.ArchiveUtils;
import org.archive.util.Histotable;
import org.archive.util.Reporter;
/**
* A collection of ToeThreads. The class manages the ToeThreads currently
* running. It offers methods for increasing and decreasing their
* number, keeping track of their state and (not necessarily safely)
* killing hung threads.
*
* @author Gordon Mohr
* @author Kristinn Sigurdsson
*
* @see org.archive.crawler.framework.ToeThread
*/
public class ToePool extends ThreadGroup implements Reporter {
/** run worker thread slightly lower than usual */
public static int DEFAULT_TOE_PRIORITY = Thread.NORM_PRIORITY - 1;
protected CrawlController controller;
protected int nextSerialNumber = 1;
protected int targetSize = 0;
/**
* Constructor. Creates a pool of ToeThreads.
*
* @param c A reference to the CrawlController for the current crawl.
*/
public ToePool(AlertThreadGroup atg, CrawlController c) {
super(atg, "ToeThreads");
this.controller = c;
setDaemon(true);
}
public void cleanup() {
// force all Toes waiting on queues, etc to proceed
Thread[] toes = getToes();
for(Thread toe : toes) {
if(toe!=null) {
toe.interrupt();
}
}
// see HER-2036
this.controller = null;
}
/**
* @return The number of ToeThreads that are not available (Approximation).
*/
public int getActiveToeCount() {
Thread[] toes = getToes();
int count = 0;
for (int i = 0; i < toes.length; i++) {
if((toes[i] instanceof ToeThread) &&
((ToeThread)toes[i]).isActive()) {
count++;
}
}
return count;
}
/**
* @return The number of ToeThreads. This may include killed ToeThreads
* that were not replaced.
*/
public int getToeCount() {
Thread[] toes = getToes();
int count = 0;
for (int i = 0; i 0) {
// must create threads
for(int i = 1; i <= difference; i++) {
startNewThread();
}
} else {
// must retire extra threads
int retainedToes = targetSize;
Thread[] toes = this.getToes();
for (int i = 0; i < toes.length ; i++) {
if(!(toes[i] instanceof ToeThread)) {
continue;
}
retainedToes--;
if (retainedToes>=0) {
continue; // this toe is spared
}
// otherwise:
ToeThread tt = (ToeThread)toes[i];
tt.retire();
}
}
}
/**
* Kills specified thread. Killed thread can be optionally replaced with a
* new thread.
*
* WARNING: This operation should be used with great care. It may
* destabilize the crawler.
*
* @param threadNumber Thread to kill
* @param replace If true then a new thread will be created to take the
* killed threads place. Otherwise the total number of threads
* will decrease by one.
*/
public void killThread(int threadNumber, boolean replace){
Thread[] toes = getToes();
for (int i = 0; i< toes.length; i++) {
if(! (toes[i] instanceof ToeThread)) {
continue;
}
ToeThread toe = (ToeThread) toes[i];
if(toe.getSerialNumber()==threadNumber) {
toe.kill();
}
}
if(replace){
// Create a new toe thread to take its place. Replace toe
startNewThread();
}
}
private synchronized void startNewThread() {
ToeThread newThread = new ToeThread(this, nextSerialNumber++);
newThread.setPriority(DEFAULT_TOE_PRIORITY);
newThread.start();
}
/**
* @return Instance of CrawlController.
*/
public CrawlController getController() {
return controller;
}
//
// Reporter implementation
//
@Override
public void reportTo(PrintWriter writer) {
writer.print("Toe threads report - "
+ ArchiveUtils.get12DigitDate() + "\n");
writer.print(" Job being crawled: "
+ this.controller.getMetadata().getJobName() + "\n");
writer.print(" Number of toe threads in pool: " + getToeCount() + " ("
+ getActiveToeCount() + " active)\n\n");
Thread[] toes = this.getToes();
synchronized (toes) {
for (int i = 0; i < toes.length; i++) {
if (!(toes[i] instanceof ToeThread)) {
continue;
}
ToeThread tt = (ToeThread) toes[i];
if (tt != null) {
tt.reportTo(writer);
}
}
}
}
public void compactReportTo(PrintWriter writer) {
writer.print(getToeCount() + " threads (" + getActiveToeCount()
+ " active)\n");
Thread[] toes = this.getToes();
boolean legendWritten = false;
// TODO: sort by activity: those with curi the longest at front
synchronized (toes) {
for (int i = 0; i < toes.length; i++) {
if (!(toes[i] instanceof ToeThread)) {
continue;
}
ToeThread tt = (ToeThread) toes[i];
if (tt != null) {
if(!legendWritten) {
writer.println(tt.shortReportLegend());
legendWritten = true;
}
tt.shortReportLineTo(writer);
}
}
}
}
@Override
public Map shortReportMap() {
Histotable