All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.manifoldcf.crawler.interfaces.IFingerprintActivity Maven / Gradle / Ivy

/* $Id: IFingerprintActivity.java 988245 2010-08-23 18:39:35Z kwright $ */

/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.manifoldcf.crawler.interfaces;

import org.apache.manifoldcf.core.interfaces.*;
import org.apache.manifoldcf.agents.interfaces.*;
import java.io.*;
import java.util.*;

/** This interface abstracts from the activities that handle document fingerprinting and mime type acceptance.
*/
public interface IFingerprintActivity
{
  public static final String _rcsid = "@(#)$Id: IFingerprintActivity.java 988245 2010-08-23 18:39:35Z kwright $";

  /** Detect if a date is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
  * unusable documents that will be passed to this output connector.
  *@param date is the date of the document; may be null
  *@return true if a document with that date is indexable by this connector.
  */
  public boolean checkDateIndexable(Date date)
    throws ManifoldCFException, ServiceInterruption;

  /** Detect if a mime type is indexable or not.  This method is used by participating repository connectors to pre-filter the number of
  * unusable documents that will be passed to this output connector.
  *@param mimeType is the mime type of the document.
  *@return true if the mime type is indexable by this connector.
  */
  public boolean checkMimeTypeIndexable(String mimeType)
    throws ManifoldCFException, ServiceInterruption;

  /** Check whether a document is indexable by the currently specified output connector.
  *@param localFile is the local copy of the file to check.
  *@return true if the document is indexable.
  */
  public boolean checkDocumentIndexable(File localFile)
    throws ManifoldCFException, ServiceInterruption;

  /** Check whether a document of a specific length is indexable by the currently specified output connector.
  *@param length is the document length.
  *@return true if the document is indexable.
  */
  public boolean checkLengthIndexable(long length)
    throws ManifoldCFException, ServiceInterruption;

  /** Pre-determine whether a document's URL is indexable by this connector.  This method is used by participating repository connectors
  * to help filter out documents that are not worth indexing.
  *@param url is the URL of the document.
  *@return true if the file is indexable.
  */
  public boolean checkURLIndexable(String url)
    throws ManifoldCFException, ServiceInterruption;

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy