gov.sandia.cognition.text.document.extractor.DocumentExtractor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: DocumentExtractor.java
* Authors: Justin Basilico
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright January 19, 2009, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government. Export
* of this program may require a license from the United States Government.
* See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.text.document.extractor;
import gov.sandia.cognition.text.document.Document;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URLConnection;
/**
* Interface for extracting documents from files.
*
* @author Justin Basilico
* @since 3.0
*/
public interface DocumentExtractor
{
/**
* Determines if the given file can be extracted by this extractor.
*
* @param file
* The file to extract.
* @return
* True if this extractor can extract the file and false otherwise.
* @throws java.io.IOException
* If there is an IO error.
*/
public boolean canExtract(
final File file)
throws IOException;
/**
* Determines if the given file can be extracted by this extractor.
*
* @param uri
* The URI of the file to extract.
* @return
* True if this extractor can extract the file and false otherwise.
* @throws java.io.IOException
* If there is an IO error.
*/
public boolean canExtract(
final URI uri)
throws IOException;
/**
* Determines if the given file can be extracted by this extractor.
*
* @param connection
* The connection to the file to extract.
* @return
* True if this extractor can extract the file and false otherwise.
* @throws java.io.IOException
* If there is an IO error.
*/
public boolean canExtract(
final URLConnection connection)
throws IOException;
/**
* Attempts to extract all of the documents from the given file.
*
* @param file
* The file to extract.
* @return
* The list of documents extracted from the given file.
* @throws DocumentExtractionException
* If there is an error extracting data from the file.
* @throws java.io.IOException
* If there is an IO error.
*/
public Iterable extends Document> extractAll(
final File file)
throws DocumentExtractionException, IOException;
/**
* Attempts to extract all of the documents from the given file.
*
* @param uri
* The URI of the file to extract.
* @return
* The list of documents extracted from the given file.
* @throws DocumentExtractionException
* If there is an error extracting data from the file.
* @throws java.io.IOException
* If there is an IO error.
*/
public Iterable extends Document> extractAll(
final URI uri)
throws DocumentExtractionException, IOException;
/**
* Attempts to extract all of the documents from the given file.
*
* @param connection
* The connection to the file to extract.
* @return
* The list of documents extracted from the given file.
* @throws DocumentExtractionException
* If there is an error extracting data from the file.
* @throws java.io.IOException
* If there is an IO error.
*/
public Iterable extends Document> extractAll(
final URLConnection connection)
throws DocumentExtractionException, IOException;
}