All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.manifoldcf.crawler.system.QueuedDocumentSet Maven / Gradle / Ivy

/* $Id: QueuedDocumentSet.java 988245 2010-08-23 18:39:35Z kwright $ */

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.system;

import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import org.apache.manifoldcf.crawler.interfaces.*;
import java.util.*;

/** This class is what's actually queued.  It is immutable and it represents an array or set of QueuedDocument objects, all of which
* will be processed by a single worker thread in bulk.
*/
public class QueuedDocumentSet
{
  public static final String _rcsid = "@(#)$Id: QueuedDocumentSet.java 988245 2010-08-23 18:39:35Z kwright $";

  /** This is the array of QueuedDocument objects. */
  protected final QueuedDocument[] documents;
  /** The job description that applies to this document set.  There is no guarantee that
  * this won't change before we get around to processing the document; therefore any
  * job-based metadata changes will also need to go through the queue mechanism. */
  protected final IJobDescription jobDescription;
  /** The connection description that applies to this document set. */
  protected final IRepositoryConnection connection;

  /** Constructor.
  *@param documents is the arraylist representing the documents accumulated for a single connection.
  */
  public QueuedDocumentSet(List documents, IJobDescription jobDescription, IRepositoryConnection connection)
  {
    this.documents = (QueuedDocument[])documents.toArray(new QueuedDocument[0]);
    this.jobDescription = jobDescription;
    this.connection = connection;
  }

  /** Get the number of documents.
  *@return the number.
  */
  public int getCount()
  {
    return documents.length;
  }

  /** Get the nth document.
  *@param index is the document number.
  *@return the document.
  */
  public QueuedDocument getDocument(int index)
  {
    return documents[index];
  }

  /** Log that we are beginning the processing of a set of documents */
  public void beginProcessing(QueueTracker queueTracker)
  {
    int l = 0;
    while (l < documents.length)
    {
      QueuedDocument d = documents[l++];
      if (Logging.scheduling.isDebugEnabled())
      {
        StringBuilder sb = new StringBuilder();
        for (String binName : d.getBinNames())
        {
          sb.append(binName).append(" ");
        }
        Logging.scheduling.debug("Handing document '"+d.getDocumentDescription().getDocumentIdentifier()+"' with bins ["+sb.toString()+"] to worker thread");
      }

      queueTracker.beginProcessing(d.getBinNames());
    }
  }

  /** Log that we are done processing a set of documents */
  public void endProcessing(QueueTracker queueTracker)
  {
    int l = 0;
    while (l < documents.length)
    {
      QueuedDocument d = documents[l++];
      if (Logging.scheduling.isDebugEnabled())
      {
        StringBuilder sb = new StringBuilder();
        for (String binName : d.getBinNames())
        {
          sb.append(binName).append(" ");
        }
        Logging.scheduling.debug("Worker thread done document '"+d.getDocumentDescription().getDocumentIdentifier()+"' with bins ["+sb.toString()+"]");
      }

      queueTracker.endProcessing(d.getBinNames());
    }

  }

  /** Calculate a rating for this set.
  *@param overlapCalculator is the calculator object.
  *@return the rating.
  */
  public double calculateAssignmentRating(QueueTracker overlapCalculator)
  {
    // This rating is the average across all documents in the set.
    double ratingAccumulator = 0.0;
    int i = 0;
    while (i < documents.length)
    {
      QueuedDocument d = documents[i++];
      double rating = overlapCalculator.calculateAssignmentRating(d.getBinNames(),connection);
      if (false && Logging.scheduling.isDebugEnabled())
      {
        StringBuilder sb = new StringBuilder();
        for (String binName : d.getBinNames())
        {
          sb.append(binName).append(" ");
        }
        Logging.scheduling.debug("Document '"+d.getDocumentDescription().getDocumentIdentifier()+"' with bins ["+sb.toString()+"] given assignment rating "+new Double(rating).toString());
      }

      ratingAccumulator += rating;
    }

    return ratingAccumulator / (double)documents.length;
  }

  /** Get the job description.
  *@return the job description.
  */
  public IJobDescription getJobDescription()
  {
    return jobDescription;
  }

  /** Get the connection.
  *@return the connection.
  */
  public IRepositoryConnection getConnection()
  {
    return connection;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy