All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.manifoldcf.crawler.system.SeedingThread Maven / Gradle / Ivy

/* $Id: SeedingThread.java 988245 2010-08-23 18:39:35Z kwright $ */

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.system;

import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.*;
import org.apache.manifoldcf.crawler.system.Logging;
import java.util.*;
import java.lang.reflect.*;

/** This class represents the background seeding thread.  Its job is to add
* seeded documents from the connector periodically, during adaptive crawls
* (which continue until stopped).  The actual use case is for creating a
* connector that handles RSS feeds, including keeping them current and
* handling deletions.
*/
public class SeedingThread extends Thread
{
  public static final String _rcsid = "@(#)$Id: SeedingThread.java 988245 2010-08-23 18:39:35Z kwright $";

  // Local data
  /** Seeding reset manager */
  protected final SeedingResetManager resetManager;
  /** Process ID */
  protected final String processID;

  /** The number of documents that are added to the queue per transaction */
  protected final static int MAX_COUNT = 100;

  /** Constructor.
  */
  public SeedingThread(SeedingResetManager resetManager, String processID)
    throws ManifoldCFException
  {
    super();
    setName("Seeding thread");
    setDaemon(true);
    this.resetManager = resetManager;
    this.processID = processID;
  }

  public void run()
  {
    resetManager.registerMe();

    try
    {
      // Create a thread context object.
      IThreadContext threadContext = ThreadContextFactory.make();
      IJobManager jobManager = JobManagerFactory.make(threadContext);
      IRepositoryConnectionManager connectionMgr = RepositoryConnectionManagerFactory.make(threadContext);
      IReprioritizationTracker rt = ReprioritizationTrackerFactory.make(threadContext);

      IRepositoryConnectorPool repositoryConnectorPool = RepositoryConnectorPoolFactory.make(threadContext);
      
      String[] identifiers = new String[MAX_COUNT];
      // Loop
      while (true)
      {
        // Do another try/catch around everything in the loop
        try
        {
          // Before we begin, conditionally reset
          resetManager.waitForReset(threadContext);

          long currentTime = System.currentTimeMillis();

          // Accumulate the wait before doing the next check.
          // We start with 10 seconds, which is the maximum.  If there's a service request
          // that's faster than that, we'll adjust the time downward.
          long waitTime = 60000L;

          Logging.threads.debug("Seeding thread woke up");

          // Grab active, adaptive jobs (and set their state to xxxSEEDING as a side effect)
          JobSeedingRecord[] seedJobs = jobManager.getJobsReadyForSeeding(processID,currentTime);

          // Process these jobs, and do the seeding.  The seeding is based on what came back
          // in the job start record for sync time.  If there's an interruption, we just go on
          // to the next job, since the whole thing will retry anyhow.
          try
          {

            if (seedJobs.length == 0)
            {
              Logging.threads.debug("Seeding thread found nothing to do");
              ManifoldCF.sleep(waitTime);
              continue;
            }

            if (Logging.threads.isDebugEnabled())
              Logging.threads.debug("Seeding thread: Found "+Integer.toString(seedJobs.length)+" jobs to seed");

            // Loop through jobs
            int i = 0;
            while (i < seedJobs.length)
            {
              JobSeedingRecord jsr = seedJobs[i++];
              Long jobID = jsr.getJobID();
              try
              {
                String lastSeedingVersion = jsr.getSeedingVersionString();
                IJobDescription jobDescription = jobManager.load(jobID,true);
                int jobType = jobDescription.getType();

                int hopcountMethod = jobDescription.getHopcountMode();

                IRepositoryConnection connection = connectionMgr.load(jobDescription.getConnectionName());
                IRepositoryConnector connector = repositoryConnectorPool.grab(connection);
                // Null will come back if the connector instance could not be obtained, so just skip in that case.
                if (connector == null)
                  continue;

                String newSeedingVersion = null;
                try
                {
                  
                  // Get the number of link types.
                  String[] legalLinkTypes = connector.getRelationshipTypes();

                  int model = connector.getConnectorModel();

                  try
                  {

                    SeedingActivity activity = new SeedingActivity(connection.getName(),connectionMgr,
                      jobManager,rt,
                      connection,connector,jobID,legalLinkTypes,false,hopcountMethod,processID);

                    if (Logging.threads.isDebugEnabled())
                      Logging.threads.debug("Seeding thread: Getting seeds for job "+jobID.toString());

                    newSeedingVersion = connector.addSeedDocuments(activity,jobDescription.getSpecification(),lastSeedingVersion,currentTime,jobType);

                    activity.doneSeeding(model==connector.MODEL_PARTIAL);

                    if (Logging.threads.isDebugEnabled())
                      Logging.threads.debug("Seeding thread: Done processing seeds from job "+jobID.toString());


                  }
                  catch (ServiceInterruption e)
                  {
                    if (!e.jobInactiveAbort())
                    {
                      Logging.jobs.warn("Seeding service interruption reported for job "+
                        jobID+" connection '"+connection.getName()+"': "+
                        e.getMessage(),e);
                    }

                    // If either we are going to be requeuing beyond the fail time, OR
                    // the number of retries available has hit 0, THEN we treat this
                    // as either an "ignore" or a hard error.
                    if (!e.jobInactiveAbort() && (jsr.getFailTime() != -1L && jsr.getFailTime() < e.getRetryTime() ||
                      jsr.getFailRetryCount() == 0))
                    {
                      // Treat this as a hard failure.
                      if (e.isAbortOnFail())
                      {
                        // Note the error in the job, and transition to inactive state
                        String message = e.jobInactiveAbort()?"":"Repeated service interruptions during seeding"+((e.getCause()!=null)?": "+e.getCause().getMessage():"");
                        if (jobManager.errorAbort(jobID,message) && message.length() > 0)
                          Logging.jobs.error(message,e.getCause());
                        jsr.noteStarted();
                      }
                      else
                      {
                        // Not sure this can happen -- but just transition silently to active state
                        jobManager.noteJobSeeded(jobID,newSeedingVersion);
                        jsr.noteStarted();
                      }
                    }
                    else
                    {
                      // Reset the job to the READYFORSTARTUP state, updating the failtime and failcount fields
                      jobManager.retrySeeding(jsr,e.getFailTime(),e.getFailRetryCount());
                      jsr.noteStarted();
                    }
                    // Go on to the next job
                    continue;
                  }
                }
                finally
                {
                  repositoryConnectorPool.release(connection,connector);
                }


                if (Logging.threads.isDebugEnabled())
                  Logging.threads.debug("Seeding thread: Successfully reseeded job "+jobID.toString());

                // Note that this job has been seeded!
                jobManager.noteJobSeeded(jobID,newSeedingVersion);
                jsr.noteStarted();

              }
              catch (ManifoldCFException e)
              {
                if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
                  throw new InterruptedException();
                if (e.getErrorCode() == ManifoldCFException.DATABASE_CONNECTION_ERROR)
                  throw e;
                if (jobManager.errorAbort(jobID,e.getMessage()))
                  Logging.threads.error("Exception tossed: "+e.getMessage(),e);
                jsr.noteStarted();
              }
            }
          }
          finally
          {
            // Clean up all jobs that did not seed
            ManifoldCFException exception = null;
            int i = 0;
            while (i < seedJobs.length)
            {
              JobSeedingRecord jsr = seedJobs[i++];
              if (!jsr.wasStarted())
              {
                if (Logging.threads.isDebugEnabled())
                  Logging.threads.debug("Seeding thread: aborting reseed for "+jsr.getJobID().toString());

                // Clean up from failed seed.
                try
                {
                  jobManager.resetSeedJob(jsr.getJobID());
                }
                catch (ManifoldCFException e)
                {
                  if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
                    throw e;
                  exception = e;
                }
              }
            }
            if (exception != null)
              throw exception;
          }

          // Sleep for the retry interval.
          ManifoldCF.sleep(waitTime);
        }
        catch (ManifoldCFException e)
        {
          if (e.getErrorCode() == ManifoldCFException.INTERRUPTED)
            break;

          if (e.getErrorCode() == ManifoldCFException.DATABASE_CONNECTION_ERROR)
          {
            resetManager.noteEvent();

            Logging.threads.error("Seeding thread aborting and restarting due to database connection reset: "+e.getMessage(),e);
            try
            {
              // Give the database a chance to catch up/wake up
              ManifoldCF.sleep(10000L);
            }
            catch (InterruptedException se)
            {
              break;
            }
            continue;
          }

          // Log it, but keep the thread alive
          Logging.threads.error("Exception tossed: "+e.getMessage(),e);

          if (e.getErrorCode() == ManifoldCFException.SETUP_ERROR)
          {
            // Shut the whole system down!
            ManifoldCF.systemExit(1);
          }

        }
        catch (InterruptedException e)
        {
          // We're supposed to quit
          break;
        }
        catch (OutOfMemoryError e)
        {
          System.err.println("agents process ran out of memory - shutting down");
          e.printStackTrace(System.err);
          ManifoldCF.systemExit(-200);
        }
        catch (Throwable e)
        {
          // A more severe error - but stay alive
          Logging.threads.fatal("Error tossed: "+e.getMessage(),e);
        }
      }

    }
    catch (Throwable e)
    {
      // Severe error on initialization
      System.err.println("agents process could not start - shutting down");
      Logging.threads.fatal("SeedingThread initialization error tossed: "+e.getMessage(),e);
      ManifoldCF.systemExit(-300);
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy