All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.manifoldcf.crawler.interfaces.PerformanceStatistics Maven / Gradle / Ivy

/* $Id: PerformanceStatistics.java 988245 2010-08-23 18:39:35Z kwright $ */

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.interfaces;

import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import org.apache.manifoldcf.crawler.system.Logging;
import java.util.*;

/** An instance of this class keeps a running average of how long it takes for every connection to process a document.
* This information is used to limit queuing per connection to something reasonable given the characteristics of the connection.
*/
public class PerformanceStatistics
{
  public static final String _rcsid = "@(#)$Id: PerformanceStatistics.java 988245 2010-08-23 18:39:35Z kwright $";

  /** This is the fetch rate that will be returned in the complete absence of any other information.  This represents a 'wild guess' of a sort,
  * used only at the very start of a job, and designed to not hopelessly overload the queue with stuff from one connection only. */
  protected static double DEFAULT_FETCH_RATE = 900.0;
  protected static long DEFAULT_FETCH_TIME = (long)(((double)60000.0)/DEFAULT_FETCH_RATE);

  /** These are the weighting coefficients for the average.  They should all add up to 1.0 */
  protected static double[] weights = new double[]{0.5,0.25,0.125,0.0625,0.0625};

  /** This hash is keyed by the connection name, and has elements of type AveragingQueue */
  protected HashMap connectionHash = new HashMap();

  /** Constructor */
  public PerformanceStatistics()
  {
  }

  /** Note the successful completion of a set of documents using a single connection, and record the statistics for them. **/
  public synchronized void noteDocumentsCompleted(String connectionName, int documentSetSize, long elapsedTime)
  {
    AveragingQueue q = (AveragingQueue)connectionHash.get(connectionName);
    if (q == null)
    {
      q = new AveragingQueue();
      connectionHash.put(connectionName,q);
    }
    q.addRecord(documentSetSize,elapsedTime);
  }

  /** Obtain current average document fetch rate (in documents per minute per connection) */
  public synchronized double calculateConnectionFetchRate(String connectionName)
  {
    AveragingQueue q = (AveragingQueue)connectionHash.get(connectionName);
    if (q == null)
      // If there's no averaging queue, return a value that is consistent with wide-open performance
      return DEFAULT_FETCH_RATE;
    return q.calculateFetchRate();
  }

  /** This class keeps track of some depth of fetch history for an individual connection, and is used to calculate a
  * weighted average fetches-per-minute rate. */
  protected static class AveragingQueue
  {
    /** The internal structure of the averaging queue is a circular buffer, which gets initialized to the default value */
    protected AveragingRecord[] records;

    /** This is the current start pointer */
    protected int startIndex;

    /** Constructor */
    public AveragingQueue()
    {
      records = new AveragingRecord[weights.length];
      int i = 0;
      while (i < weights.length)
      {
        records[i++] = new AveragingRecord(1,DEFAULT_FETCH_TIME);
      }
      startIndex = 0;
    }

    /** Add a record */
    public void addRecord(int setSize, long elapsedTime)
    {
      records[startIndex] = new AveragingRecord(setSize,elapsedTime);
      startIndex++;
      if (startIndex == records.length)
        startIndex -= records.length;
    }

    /** Calculate running-average fetch rate */
    public double calculateFetchRate()
    {
      // The calculation involves calculating the fetch rate for each point in the history we keep, and then multiplying it times the appropriate weight,
      // and summing the whole thing.
      double rval = 0.0;
      int currentIndex = startIndex;
      int i = 0;
      while (i < weights.length)
      {
        double currentWeight = weights[i++];
        if (currentIndex == 0)
          currentIndex = records.length;
        currentIndex--;
        AveragingRecord ar = records[currentIndex];
        rval += currentWeight * ar.calculateRate();
      }
      return rval;
    }
  }

  /** This class contains the data for a single document set against the given connection */
  protected static class AveragingRecord
  {
    protected int documentCount;
    protected long elapsedTime;

    public AveragingRecord(int documentCount, long elapsedTime)
    {
      this.documentCount = documentCount;
      this.elapsedTime = elapsedTime;
    }

    public double calculateRate()
    {
      return 60000.0 * ((double)documentCount)/((double)elapsedTime);
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy