All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.archive.crawler.framework.ToePool Maven / Gradle / Ivy

The newest version!
/*
 *  This file is part of the Heritrix web crawler (crawler.archive.org).
 *
 *  Licensed to the Internet Archive (IA) by one or more individual 
 *  contributors. 
 *
 *  The IA licenses this file to You under the Apache License, Version 2.0
 *  (the "License"); you may not use this file except in compliance with
 *  the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.archive.crawler.framework;

import java.io.PrintWriter;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.Map.Entry;

import org.apache.commons.lang.StringUtils;
import org.archive.crawler.reporting.AlertThreadGroup;
import org.archive.util.ArchiveUtils;
import org.archive.util.Histotable;
import org.archive.util.Reporter;

/**
 * A collection of ToeThreads. The class manages the ToeThreads currently
 * running. It offers methods for increasing and decreasing their 
 * number, keeping track of their state and (not necessarily safely)
 * killing hung threads.
 *
 * @author Gordon Mohr
 * @author Kristinn Sigurdsson
 *
 * @see org.archive.crawler.framework.ToeThread
 */
public class ToePool extends ThreadGroup implements Reporter {
    /** run worker thread slightly lower than usual */
    public static int DEFAULT_TOE_PRIORITY = Thread.NORM_PRIORITY - 1;
    
    protected CrawlController controller;
    protected int nextSerialNumber = 1;
    protected int targetSize = 0; 

    /**
     * Constructor. Creates a pool of ToeThreads. 
     *
     * @param c A reference to the CrawlController for the current crawl.
     */
    public ToePool(AlertThreadGroup atg, CrawlController c) {
        super(atg, "ToeThreads");        
        this.controller = c;
        setDaemon(true);
    }
    
    public void cleanup() {
    	// force all Toes waiting on queues, etc to proceed
        Thread[] toes = getToes();
        for(Thread toe : toes) {
            if(toe!=null) {
                toe.interrupt();
            }
        }
        
        // see HER-2036
        this.controller = null;
    }

    /**
     * @return The number of ToeThreads that are not available (Approximation).
     */
    public int getActiveToeCount() {
        Thread[] toes = getToes();
        int count = 0;
        for (int i = 0; i < toes.length; i++) {
            if((toes[i] instanceof ToeThread) &&
                    ((ToeThread)toes[i]).isActive()) {
                count++;
            }
        }
        return count; 
    }

    /**
     * @return The number of ToeThreads. This may include killed ToeThreads
     *         that were not replaced.
     */
    public int getToeCount() {
        Thread[] toes = getToes();
        int count = 0;
        for (int i = 0; i 0) {
            // must create threads
            for(int i = 1; i <= difference; i++) {
                startNewThread();
            }
        } else {
            // must retire extra threads
            int retainedToes = targetSize; 
            Thread[] toes = this.getToes();
            for (int i = 0; i < toes.length ; i++) {
                if(!(toes[i] instanceof ToeThread)) {
                    continue;
                }
                retainedToes--;
                if (retainedToes>=0) {
                    continue; // this toe is spared
                }
                // otherwise:
                ToeThread tt = (ToeThread)toes[i];
                tt.retire();
            }
        }
    }

    /**
     * Kills specified thread. Killed thread can be optionally replaced with a
     * new thread.
     *
     * 

WARNING: This operation should be used with great care. It may * destabilize the crawler. * * @param threadNumber Thread to kill * @param replace If true then a new thread will be created to take the * killed threads place. Otherwise the total number of threads * will decrease by one. */ public void killThread(int threadNumber, boolean replace){ Thread[] toes = getToes(); for (int i = 0; i< toes.length; i++) { if(! (toes[i] instanceof ToeThread)) { continue; } ToeThread toe = (ToeThread) toes[i]; if(toe.getSerialNumber()==threadNumber) { toe.kill(); } } if(replace){ // Create a new toe thread to take its place. Replace toe startNewThread(); } } private synchronized void startNewThread() { ToeThread newThread = new ToeThread(this, nextSerialNumber++); newThread.setPriority(DEFAULT_TOE_PRIORITY); newThread.start(); } /** * @return Instance of CrawlController. */ public CrawlController getController() { return controller; } // // Reporter implementation // @Override public void reportTo(PrintWriter writer) { writer.print("Toe threads report - " + ArchiveUtils.get12DigitDate() + "\n"); writer.print(" Job being crawled: " + this.controller.getMetadata().getJobName() + "\n"); writer.print(" Number of toe threads in pool: " + getToeCount() + " (" + getActiveToeCount() + " active)\n\n"); Thread[] toes = this.getToes(); synchronized (toes) { for (int i = 0; i < toes.length; i++) { if (!(toes[i] instanceof ToeThread)) { continue; } ToeThread tt = (ToeThread) toes[i]; if (tt != null) { tt.reportTo(writer); } } } } public void compactReportTo(PrintWriter writer) { writer.print(getToeCount() + " threads (" + getActiveToeCount() + " active)\n"); Thread[] toes = this.getToes(); boolean legendWritten = false; // TODO: sort by activity: those with curi the longest at front synchronized (toes) { for (int i = 0; i < toes.length; i++) { if (!(toes[i] instanceof ToeThread)) { continue; } ToeThread tt = (ToeThread) toes[i]; if (tt != null) { if(!legendWritten) { writer.println(tt.shortReportLegend()); legendWritten = true; } tt.shortReportLineTo(writer); } } } } @Override public Map shortReportMap() { Histotable steps = new Histotable(); Histotable processors = new Histotable(); Thread[] toes = getToes(); for (int i = 0; i < toes.length; i++) { if(!(toes[i] instanceof ToeThread)) { continue; } ToeThread tt = (ToeThread)toes[i]; if(tt!=null) { steps.tally(tt.getStep().toString()); String currentProcessorName = tt.getCurrentProcessorName(); if (StringUtils.isEmpty(currentProcessorName)) { currentProcessorName = "noActiveProcessor"; } processors.tally(currentProcessorName); } } Map data = new LinkedHashMap(); data.put("toeCount", getToeCount()); LinkedList unwound = new LinkedList(); for (Entry step: steps.getSortedByCounts()) { unwound.add(step.getValue() + " " + step.getKey()); } data.put("steps", unwound); unwound = new LinkedList(); for (Entry proc: processors.getSortedByCounts()) { unwound.add(proc.getValue() + " " + proc.getKey()); } data.put("processors", unwound); return data; } @SuppressWarnings("unchecked") @Override public void shortReportLineTo(PrintWriter w) { Map map = shortReportMap(); w.print(map.get("toeCount")); w.print(" threads: "); LinkedList sortedSteps = (LinkedList)map.get("steps"); { Iterator iter = sortedSteps.iterator(); if (!iter.hasNext()) { return; } w.print(iter.next()); if (iter.hasNext()) { w.print(", "); w.print(iter.next()); if (iter.hasNext()) { w.print(", etc..."); } } w.print("; "); } LinkedList sortedProcesses = (LinkedList)map.get("processors"); { Iterator iter = sortedProcesses.iterator(); if (iter.hasNext()) { w.print(iter.next()); while (iter.hasNext()) { w.print(", "); w.print(iter.next()); } } } } /* (non-Javadoc) * @see org.archive.util.Reporter#singleLineLegend() */ @Override public String shortReportLegend() { return "total: mostCommonStateTotal secondMostCommonStateTotal"; } public void waitForAll() { while (true) try { if (isAllAlive(getToes())) { return; } Thread.sleep(1000); } catch (InterruptedException e) { throw new IllegalStateException(e); } } private static boolean isAllAlive(Thread[] threads) { for (Thread t: threads) { if ((t != null) && (!t.isAlive())) { return false; } } return true; } }