All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.manifoldcf.crawler.jobs.JobDescription Maven / Gradle / Ivy

/* $Id: JobDescription.java 988245 2010-08-23 18:39:35Z kwright $ */

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.jobs;

import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.*;
import java.util.*;

/** This is a paper object describing a job.
* Each job in the lcf framework has:
* - an identifier;
* - a description;
* - a repository connection;
* - one of a number of scheduling options: starting every n hours/days/weeks/months, on specific dates, or "continuous" (which basically
*   establishes a priority queue based on modification frequency);
* - "seeds" (or starting points), which are the places that scanning begins.
* Also remember that since incremental deletion must occur on a job-by-job basis, the scanning data also records the job that
* performed the scan, so that each job can rescan previous ingested data, and delete documents that have been removed.
*/
public class JobDescription implements IJobDescription
{
  public static final String _rcsid = "@(#)$Id: JobDescription.java 988245 2010-08-23 18:39:35Z kwright $";

  // Data
  protected boolean isNew = true;
  protected Long id = null;
  protected String description = null;
  protected String connectionName = null;
  protected final List pipelineStages = new ArrayList();
  protected final List notifications = new ArrayList();
  protected int type = TYPE_CONTINUOUS;
  protected int startMethod = START_WINDOWBEGIN;
  protected int priority = 5;

  // Absolute job-triggering times
  protected ScheduleList scheduleList = new ScheduleList();

  // Throttle
  protected Float rate = null;

  // Default interval for continuous crawling
  protected Long interval = new Long(1000L*3600L*24L);            // 1 day is the default

  // Maximum interval for continuous crawling
  protected Long maxInterval = null;
  
  // Document expiration time for this job, in milliseconds
  protected Long expiration = null;                       // Never is the default

  // Default reseed interval for continuous crawling
  protected Long reseedInterval = new Long(60L * 60L * 1000L);    // 1 hour is the default

  // Document specification
  protected Specification documentSpecification = new Specification();

  // Hop count filters.
  protected HashMap hopCountFilters = new HashMap();

  // Hopcount mode
  protected int hopcountMode = HOPCOUNT_ACCURATE;

  // Read-only mode
  protected boolean readOnly = false;


  /** Duplicate method, with optional "readonly" flag.
  */
  public JobDescription duplicate(boolean readOnly)
  {
    if (readOnly && this.readOnly)
      return this;
    // Make a new copy; we'll label it as readonly or not based on the input flag
    JobDescription rval = new JobDescription();
    rval.id = id;
    rval.isNew = isNew;
    rval.connectionName = connectionName;
    // Direct modification of this object is possible - so it also has to know if it is read-only!!
    rval.documentSpecification = documentSpecification.duplicate(readOnly);
    for (PipelineStage pipelineStage : pipelineStages)
    {
      rval.pipelineStages.add(new PipelineStage(pipelineStage.getPrerequisiteStage(),
        pipelineStage.getIsOutput(),
        pipelineStage.getConnectionName(),
        pipelineStage.getDescription(),
        pipelineStage.getSpecification().duplicate(readOnly)));
    }
    for (Notification notification : notifications)
    {
      rval.notifications.add(new Notification(notification.getConnectionName(),
        notification.getDescription(),
        notification.getSpecification().duplicate(readOnly)));
    }
    rval.description = description;
    rval.type = type;
    // No direct modification of this object is possible
    rval.scheduleList = scheduleList.duplicate();
    rval.interval = interval;
    rval.maxInterval = maxInterval;
    rval.expiration = expiration;
    rval.reseedInterval = reseedInterval;
    rval.rate = rate;
    rval.priority = priority;
    rval.startMethod = startMethod;
    rval.hopcountMode = hopcountMode;
    Iterator iter = hopCountFilters.keySet().iterator();
    while (iter.hasNext())
    {
      String linkType = (String)iter.next();
      Long maxHops = (Long)hopCountFilters.get(linkType);
      rval.hopCountFilters.put(linkType,maxHops);
    }
    rval.readOnly = readOnly;
    return rval;
  }

  /** Make the description "read only".  This must be done after the object has been complete specified.
  * Once a document is read-only, it cannot be made writable without duplication.
  */
  public void makeReadOnly()
  {
    if (readOnly)
      return;
    readOnly = true;
    for (PipelineStage pipelineStage : pipelineStages)
    {
      pipelineStage.getSpecification().makeReadOnly();
    }
    for (Notification notification : notifications)
    {
      notification.getSpecification().makeReadOnly();
    }
    documentSpecification.makeReadOnly();
  }

  /** Set isnew.
  *@param isNew is true if the object is new.
  */
  public void setIsNew(boolean isNew)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    this.isNew = isNew;
  }

  /** Get isnew.
  *@return true if the object is new.
  */
  @Override
  public boolean getIsNew()
  {
    return isNew;
  }

  /** Set the id.
  *@param id is the id.
  */
  public void setID(Long id)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    this.id = id;
  }

  /** Get the id.
  *@return the id.
  */
  @Override
  public Long getID()
  {
    return id;
  }

  /** Set the description.
  *@param description is the description.
  */
  @Override
  public void setDescription(String description)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    this.description = description;
  }

  /** Get the description.
  *@return the description
  */
  @Override
  public String getDescription()
  {
    return description;
  }

  /** Set the connection name.
  *@param connectionName is the connection name.
  */
  @Override
  public void setConnectionName(String connectionName)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    this.connectionName = connectionName;
  }

  /** Get the connection name.
  *@return the connection name.
  */
  @Override
  public String getConnectionName()
  {
    return connectionName;
  }

  /** Clear pipeline connections */
  @Override
  public void clearPipeline()
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    pipelineStages.clear();
  }
  
  /** Add a pipeline connection.
  *@param prerequisiteStage is the prerequisite stage number for this connection, or -1 if there is none.
  *@param isOutput is true if the pipeline stage is an output connection.
  *@param pipelineStageConnectionName is the name of the pipeline connection to add.
  *@param pipelineStageDescription is a description of the pipeline stage being added.
  *@return the empty output specification for this pipeline stage.
  */
  @Override
  public Specification addPipelineStage(int prerequisiteStage, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    PipelineStage ps = new PipelineStage(prerequisiteStage,isOutput,pipelineStageConnectionName,pipelineStageDescription);
    pipelineStages.add(ps);
    return ps.getSpecification();
  }
  
  /** Get a count of pipeline stages */
  @Override
  public int countPipelineStages()
  {
    return pipelineStages.size();
  }
  
  /** Insert a new pipeline stage.
  *@param index is the index to insert pipeline stage before
  *@param pipelineStageConnectionName is the connection name.
  *@param pipelineStageDescription is the description.
  *@return the newly-created output specification.
  */
  @Override
  public Specification insertPipelineStage(int index, boolean isOutput, String pipelineStageConnectionName, String pipelineStageDescription)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    // What we do here depends on the kind of stage we're inserting.
    // Both kinds take the current stage's prerequisite as their own.  But what happens to the current stage will
    // differ as to whether its reference changes or not.
    PipelineStage currentStage = pipelineStages.get(index);
    PipelineStage ps = new PipelineStage(currentStage.getPrerequisiteStage(),isOutput,pipelineStageConnectionName,pipelineStageDescription);
    pipelineStages.add(index,ps);
    currentStage.adjustReplacedStage(index,isOutput);
    // Adjust stage back-references
    int stage = index + 2;
    while (stage < pipelineStages.size())
    {
      pipelineStages.get(stage).adjustForInsert(index);
      stage++;
    }
    return ps.getSpecification();
  }
  
  /** Get the prerequisite stage number for a pipeline stage.
  *@param index is the index of the pipeline stage to get.
  *@return the preceding stage number for that stage, or -1 if there is none.
  */
  @Override
  public int getPipelineStagePrerequisite(int index)
  {
    return pipelineStages.get(index).getPrerequisiteStage();
  }
  
  /** Check if a pipeline stage is an output connection.
  *@param index is the index of the pipeline stage to check.
  *@return true if it is an output connection.
  */
  @Override
  public boolean getPipelineStageIsOutputConnection(int index)
  {
    return pipelineStages.get(index).getIsOutput();
  }

  /** Get a specific pipeline connection name.
  *@param index is the index of the pipeline stage whose connection name to get.
  *@return the name of the connection.
  */
  @Override
  public String getPipelineStageConnectionName(int index)
  {
    return pipelineStages.get(index).getConnectionName();
  }
  
  /** Get a specific pipeline stage description.
  *@param index is the index of the pipeline stage whose description to get.
  *@return the name of the connection.
  */
  @Override
  public String getPipelineStageDescription(int index)
  {
    return pipelineStages.get(index).getDescription();
  }

  /** Get a specific pipeline stage specification.
  *@param index is the index of the pipeline stage whose specification is needed.
  *@return the specification for the connection.
  */
  @Override
  public Specification getPipelineStageSpecification(int index)
  {
    return pipelineStages.get(index).getSpecification();
  }

  /** Delete a pipeline stage.
  *@param index is the index of the pipeline stage to delete.
  */
  @Override
  public void deletePipelineStage(int index)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    PipelineStage ps = pipelineStages.remove(index);
    int stage = index;
    while (stage < pipelineStages.size())
    {
      pipelineStages.get(stage).adjustForDelete(index,ps.getPrerequisiteStage());
      stage++;
    }
  }

  /** Clear notification connections.
  */
  @Override
  public void clearNotifications()
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    notifications.clear();
  }
  
  /** Add a notification.
  *@param notificationConnectionName is the name of the notification connection to add.
  *@param notificationDescription is a description of the notification being added.
  *@return the empty specification for this notification.
  */
  @Override
  public Specification addNotification(String notificationConnectionName, String notificationDescription)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    Notification ps = new Notification(notificationConnectionName,notificationDescription);
    notifications.add(ps);
    return ps.getSpecification();
  }
  
  /** Get a count of pipeline connections.
  *@return the current number of pipeline connections.
  */
  @Override
  public int countNotifications()
  {
    return notifications.size();
  }
  
  /** Get a specific notification connection name.
  *@param index is the index of the notification whose connection name to get.
  *@return the name of the connection.
  */
  @Override
  public String getNotificationConnectionName(int index)
  {
    return notifications.get(index).getConnectionName();
  }

  /** Get a specific notification description.
  *@param index is the index of the notification whose description to get.
  *@return the name of the connection.
  */
  @Override
  public String getNotificationDescription(int index)
  {
    return notifications.get(index).getDescription();
  }

  /** Get a specific notification specification.
  *@param index is the index of the notification whose specification is needed.
  *@return the specification for the connection.
  */
  @Override
  public Specification getNotificationSpecification(int index)
  {
    return notifications.get(index).getSpecification();
  }

  /** Delete a notification.
  *@param index is the index of the notification to delete.
  */
  @Override
  public void deleteNotification(int index)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    notifications.remove(index);
  }
  
  /** Insert a new notification.
  *@param index is the index to insert pipeline stage before
  *@param notificationConnectionName is the connection name.
  *@param notificationDescription is the description.
  *@return the newly-created output specification.
  */
  @Override
  public Specification insertNotification(int index, String notificationConnectionName, String notificationDescription)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    // What we do here depends on the kind of stage we're inserting.
    // Both kinds take the current stage's prerequisite as their own.  But what happens to the current stage will
    // differ as to whether its reference changes or not.
    Notification ps = new Notification(notificationConnectionName,notificationDescription);
    notifications.add(index,ps);
    return ps.getSpecification();
  }
  
  /** Set the job type.
  *@param type is the type (as an integer).
  */
  @Override
  public void setType(int type)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    this.type = type;
  }

  /** Get the job type.
  *@return the type (as an integer).
  */
  @Override
  public int getType()
  {
    return type;
  }

  /** Set the job's start method.
  *@param startMethod is the start description.
  */
  @Override
  public void setStartMethod(int startMethod)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    this.startMethod = startMethod;
  }

  /** Get the job's start method.
  *@return the start method.
  */
  @Override
  public int getStartMethod()
  {
    return startMethod;
  }


  // For day-specific jobs.  These occur at a given time that matches the specifications.
  // The specifications set certain criteria (specific hours, days of the week, etc.)

  /** Clear all the scheduling records.
  */
  @Override
  public void clearScheduleRecords()
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    scheduleList.clear();
  }

  /** Add a record.
  *@param record is the record to add.
  */
  @Override
  public void addScheduleRecord(ScheduleRecord record)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    scheduleList.addRecord(record);
  }

  /** Get the number of schedule records.
  *@return the count.
  */
  @Override
  public int getScheduleRecordCount()
  {
    return scheduleList.getRecordCount();
  }

  /** Get a specified schedule record.
  *@param index is the record number.
  *@return the record.
  */
  @Override
  public ScheduleRecord getScheduleRecord(int index)
  {
    return scheduleList.getRecord(index);
  }

  /** Delete a specified schedule record.
  *@param index is the record number.
  */
  @Override
  public void deleteScheduleRecord(int index)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    scheduleList.deleteRecord(index);
  }


  // For continuous jobs
  // This is the rescheduling interval to use when no calculated interval is known

  /** Set the rescheduling interval, in milliseconds.
  *@param interval is the default interval, or null for infinite.
  */
  @Override
  public void setInterval(Long interval)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    this.interval = interval;
  }

  /** Get the rescheduling interval, in milliseconds.
  *@return the default interval, or null for infinite.
  */
  @Override
  public Long getInterval()
  {
    return interval;
  }

  /** Set the maximum rescheduling interval, in milliseconds, or null if forever.
  *@param interval is the maximum interval.
  */
  @Override
  public void setMaxInterval(Long interval)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    this.maxInterval = interval;
  }

  /** Get the maximum rescheduling interval, in milliseconds.
  *@return the max interval, or null if forever.
  */
  @Override
  public Long getMaxInterval()
  {
    return maxInterval;
  }

  /** Set the expiration time, in milliseconds.
  *@param time is the maximum expiration time of a document, in milliseconds, or null if none.
  */
  @Override
  public void setExpiration(Long time)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    expiration = time;
  }

  /** Get the expiration time, in milliseconds.
  *@return the maximum expiration time of a document, or null if none.
  */
  @Override
  public Long getExpiration()
  {
    return expiration;
  }

  /** Set the reseeding interval, in milliseconds.
  *@param interval is the interval, or null for infinite.
  */
  @Override
  public void setReseedInterval(Long interval)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    this.reseedInterval = interval;
  }

  /** Get the reseeding interval, in milliseconds.
  *@return the interval, or null if infinite.
  */
  @Override
  public Long getReseedInterval()
  {
    return reseedInterval;
  }

  /** Get the document specification.
  *@return the document specification object.
  */
  @Override
  public Specification getSpecification()
  {
    return documentSpecification;
  }


  /** Set the job priority.  This is a simple integer between 1 and 10, where
  * 1 is the highest priority.
  *@param priority is the priority.
  */
  @Override
  public void setPriority(int priority)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    this.priority = priority;
  }

  /** Get the job priority.
  *@return the priority (a number between 1 and 10).
  */
  @Override
  public int getPriority()
  {
    return priority;
  }

  // Hopcount filters

  /** Get the set of hopcount filters the job has defined.
  *@return the set as a map, keyed by Strings and containing Longs.
  */
  @Override
  public Map getHopCountFilters()
  {
    return (Map)hopCountFilters.clone();
  }

  /** Clear the set of hopcount filters for the job.
  */
  @Override
  public void clearHopCountFilters()
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    hopCountFilters.clear();
  }


  /** Add a hopcount filter to the job.
  *@param linkType is the type of link the filter applies to.
  *@param maxHops is the maximum hop count.  Use null to remove a filter.
  */
  @Override
  public void addHopCountFilter(String linkType, Long maxHops)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    hopCountFilters.put(linkType,maxHops);
  }

  /** Get the hopcount mode. */
  @Override
  public int getHopcountMode()
  {
    return hopcountMode;
  }

  /** Set the hopcount mode. */
  @Override
  public void setHopcountMode(int mode)
  {
    if (readOnly)
      throw new IllegalStateException("Attempt to change read-only object");
    hopcountMode = mode;
  }

  protected static class Notification
  {
    protected final String connectionName;
    protected final String description;
    protected final Specification specification;
    
    public Notification(String connectionName, String description)
    {
      this.connectionName = connectionName;
      this.description = description;
      this.specification = new Specification();
    }

    public Notification(String connectionName, String description, Specification spec)
    {
      this.connectionName = connectionName;
      this.description = description;
      this.specification = spec;
    }

    public Specification getSpecification()
    {
      return specification;
    }
    
    public String getConnectionName()
    {
      return connectionName;
    }
    
    public String getDescription()
    {
      return description;
    }

  }
  
  protected static class PipelineStage
  {
    protected int prerequisiteStage;
    protected final boolean isOutput;
    protected final String connectionName;
    protected final String description;
    protected final Specification specification;
    
    public PipelineStage(int prerequisiteStage, boolean isOutput, String connectionName, String description)
    {
      this.prerequisiteStage = prerequisiteStage;
      this.isOutput = isOutput;
      this.connectionName = connectionName;
      this.description = description;
      this.specification = new Specification();
    }

    public PipelineStage(int prerequisiteStage, boolean isOutput, String connectionName, String description, Specification spec)
    {
      this.prerequisiteStage = prerequisiteStage;
      this.isOutput = isOutput;
      this.connectionName = connectionName;
      this.description = description;
      this.specification = spec;
    }
    
    public void adjustReplacedStage(int index, boolean isOutput)
    {
      if (!isOutput)
	prerequisiteStage = index;
      else
	adjustForInsert(index);
    }
    
    public void adjustForInsert(int index)
    {
      if (prerequisiteStage >= index)
      {
        prerequisiteStage++;
      }
    }
    
    public void adjustForDelete(int index, int prerequisite)
    {
      if (prerequisiteStage > index)
        prerequisiteStage--;
      else if (prerequisiteStage == index)
        prerequisiteStage = prerequisite;
    }
    
    public Specification getSpecification()
    {
      return specification;
    }
    
    public int getPrerequisiteStage()
    {
      return prerequisiteStage;
    }
    
    public boolean getIsOutput()
    {
      return isOutput;
    }
    
    public String getConnectionName()
    {
      return connectionName;
    }
    
    public String getDescription()
    {
      return description;
    }
  }
  
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy