All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.manifoldcf.crawler.system.SetPriorityThread Maven / Gradle / Ivy

/* $Id: SetPriorityThread.java 988245 2010-08-23 18:39:35Z kwright $ */

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.system;

import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.*;
import org.apache.manifoldcf.crawler.system.Logging;
import java.util.*;
import java.lang.reflect.*;

/** This class describes a thread whose job it is to continuously reset document priorities, based on recent
* queuing activity.  The goal is to evenly distribute queued documents across jobs and within classes of documents
* inside jobs.
* The way it works is for the thread to do some number of pending documents at a time.  The documents are fetched in such
* a way as to get the ones that have been least recently assessed preferentially.  The assessment process involves
* getting hold of a connector for the job that owns the document, and calculating the priority based on the recent
* history as maintained in the queueTracker object.
*/
public class SetPriorityThread extends Thread
{
  public static final String _rcsid = "@(#)$Id: SetPriorityThread.java 988245 2010-08-23 18:39:35Z kwright $";

  // Local data
  /** This is the number of documents per cycle */
  protected final int cycleCount;
  /** The blocking documents object */
  protected final BlockingDocuments blockingDocuments;
  /** Process ID */
  protected final String processID;

  /** Constructor.
  */
  public SetPriorityThread(int workerThreadCount, BlockingDocuments blockingDocuments, String processID)
    throws ManifoldCFException
  {
    super();
    this.blockingDocuments = blockingDocuments;
    this.processID = processID;
    cycleCount = workerThreadCount * 10;
    setName("Set priority thread");
    setDaemon(true);
    // This thread's priority is highest so that stuffer thread does not go wanting
    setPriority(MAX_PRIORITY);
  }

  public void run()
  {
    try
    {
      // Create a thread context object.
      IThreadContext threadContext = ThreadContextFactory.make();
      IRepositoryConnectionManager mgr = RepositoryConnectionManagerFactory.make(threadContext);
      IJobManager jobManager = JobManagerFactory.make(threadContext);
      
      Logging.threads.debug("Set priority thread coming up");

      // Job description map (local) - designed to improve performance.
      // Cleared and reloaded on every batch of documents.
      HashMap jobDescriptionMap = new HashMap();

      // Repository connection map (local) - designed to improve performance.
      // Cleared and reloaded on every batch of documents.
      HashMap connectionMap = new HashMap();

      // Loop
      while (true)
      {
        // Do another try/catch around everything in the loop
        try
        {
          if (Thread.currentThread().isInterrupted())
            throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);

          Logging.threads.debug("Set priority thread woke up");

          // We're going to go through all eligible documents.  We will pick them up in
          // chunks however.
          long currentTime = System.currentTimeMillis();

          // Note well:
          // The priority that is given to a document is based on what priorities have been handed out in the past.  It is based
          // on the documents that have been queued for action, what job they belong to, the throttling in effect for the job,
          // and what the connection says that the document's throttling bins are.
          // Periodically, however, QueueTracker deliberately destroys the history.  My thinking here is that we need to "start over"
          // periodically to guarantee that we respond appropriately to changes in the environment - specifically, starting and
          // stopping jobs, changing throttling parameters, etc.

          // Clear the job description map and connection map
          jobDescriptionMap.clear();
          connectionMap.clear();

          // Do up to cycleCount documents in a "block".  Beyond this number we reset everything and loop back around.
          // This allows everything to restart, and higher priority documents can be found again.
          int processedCount = 0;
          while (true)
          {
            if (Thread.currentThread().isInterrupted())
              throw new ManifoldCFException("Interrupted",ManifoldCFException.INTERRUPTED);

            if (processedCount >= cycleCount)
            {
              Logging.threads.debug("Done reprioritizing because exceeded cycle count");
              break;
            }

            // Cycle through the current list of stuffer-identified documents until we come to the end.  Reprioritize these
            // first.  NOTE: These documents will already have document priorities.
            DocumentDescription desc = blockingDocuments.getBlockingDocument();
            if (desc != null)
            {
              ManifoldCF.writeDocumentPriorities(threadContext,
                new DocumentDescription[]{desc},connectionMap,jobDescriptionMap);
              processedCount++;
              continue;
            }
	    
            // Grab a list of document identifiers to set priority on.
            // We may well wind up calculating priority for documents that wind up having their
            // state changed before we can write back, but this is okay because update is only
            // going to be permitted for rows that still have the right state.
            DocumentDescription[] descs = jobManager.getNextNotYetProcessedReprioritizationDocuments(processID,1000);
            if (descs.length > 0)
            {
              ManifoldCF.writeDocumentPriorities(threadContext,
                descs,connectionMap,jobDescriptionMap);
              processedCount += descs.length;
              continue;
            }

            Logging.threads.debug("Done reprioritizing because no more documents to reprioritize");
            ManifoldCF.sleep(5000L);
            break;

          }

        }
        catch (ManifoldCFException e)
        {
          if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
            break;

          if (e.getErrorCode() == ManifoldCFException.DATABASE_CONNECTION_ERROR)
          {
            Logging.threads.error("Set priority thread aborting and restarting due to database connection reset: "+e.getMessage(),e);
            try
            {
              // Give the database a chance to catch up/wake up
              ManifoldCF.sleep(10000L);
            }
            catch (InterruptedException se)
            {
              break;
            }
            continue;
          }

          // Log it, but keep the thread alive
          Logging.threads.error("Exception tossed: "+e.getMessage(),e);

          if (e.getErrorCode() == ManifoldCFException.SETUP_ERROR)
          {
            ManifoldCF.systemExit(1);
          }

        }
        catch (InterruptedException e)
        {
          // We're supposed to quit
          break;
        }
        catch (OutOfMemoryError e)
        {
          System.err.println("agents process ran out of memory - shutting down");
          e.printStackTrace(System.err);
          ManifoldCF.systemExit(-200);
        }
        catch (Throwable e)
        {
          // A more severe error - but stay alive
          Logging.threads.fatal("Error tossed: "+e.getMessage(),e);
        }
      }
    }
    catch (Throwable e)
    {
      // Severe error on initialization
      System.err.println("agents process could not start - shutting down");
      Logging.threads.fatal("SetPriorityThread initialization error tossed: "+e.getMessage(),e);
      ManifoldCF.systemExit(-300);
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy