org.opencms.search.documents.I_CmsDocumentFactory Maven / Gradle / Ivy
Show all versions of opencms-test Show documentation
/*
* This library is part of OpenCms -
* the Open Source Content Management System
*
* Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* For further information about Alkacon Software GmbH & Co. KG, please see the
* company website: http://www.alkacon.com
*
* For further information about OpenCms, please see the
* project website: http://www.opencms.org
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package org.opencms.search.documents;
import org.opencms.file.CmsObject;
import org.opencms.file.CmsResource;
import org.opencms.main.CmsException;
import org.opencms.search.CmsSearchIndex;
import org.opencms.search.I_CmsSearchDocument;
import java.util.List;
/**
* Used to create index Lucene Documents for OpenCms resources,
* controls the text extraction algorithm used for a specific OpenCms resource type / MIME type combination.
*
* The configuration of the search index is defined in opencms-search.xml
.
* There you can associate a combintion of OpenCms resource types and MIME types to an instance
* of this factory. This rather complex configuration is required because only the combination of
* OpenCms resource type and MIME type can decide what to use for search indexing.
* For example, if the OpenCms resource type is plain
,
* the extraction algorithm for MIME types .html
and .txt
must be different.
* On the other hand, the MIME type .html
in OpenCms can be almost any resource type,
* like xmlpage
, xmlcontent
or even jsp
.
*
* @since 6.0.0
*/
public interface I_CmsDocumentFactory extends I_CmsSearchExtractor {
/**
* Creates the Lucene Document for the given VFS resource and the given search index.
*
* This triggers the indexing process for the given VFS resource according to the configuration
* of the provided index.
*
* The provided index resource contains the basic contents to index.
* The provided search index contains the configuration what to index, such as the locale and
* possible special field mappings.
*
* @param cms the OpenCms user context used to access the OpenCms VFS
* @param resource the search index resource to create the Lucene document from
* @param index the search index to create the Document for
*
* @return the Search Document for the given index resource and the given search index
*
* @throws CmsException if something goes wrong
*
* @see org.opencms.search.fields.CmsSearchFieldConfiguration#createDocument(CmsObject, CmsResource, CmsSearchIndex, org.opencms.search.extractors.I_CmsExtractionResult)
*/
I_CmsSearchDocument createDocument(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsException;
/**
* Returns the disk based cache used to store the raw extraction results.
*
* In case null
is returned, then result caching is not supported for this factory.
*
* @return the disk based cache used to store the raw extraction results
*/
CmsExtractionResultCache getCache();
/**
* Returns the list of accepted keys for the resource types that can be indexed using this document factory.
*
* The result List contains String objects.
* This String is later matched against {@link A_CmsVfsDocument#getDocumentKey(String, String)} to find
* the corrospondig {@link I_CmsDocumentFactory} for a resource to index.
*
* The list of accepted resource types may contain a catch-all entry "*";
* in this case, a list for all possible resource types is returned,
* calculated by a logic depending on the document handler class.
*
* @param resourceTypes list of accepted resource types
* @param mimeTypes list of accepted mime types
*
* @return the list of accepted keys for the resource types that can be indexed using this document factory (String objects)
*
* @throws CmsException if something goes wrong
*/
List getDocumentKeys(List resourceTypes, List mimeTypes) throws CmsException;
/**
* Returns the name of this document type factory.
*
* @return the name of this document type factory
*/
String getName();
/**
* Returns true
if this document factory is locale depended.
*
* @return true
if this document factory is locale depended
*/
boolean isLocaleDependend();
/**
* Returns true
if result caching is supported for this factory.
*
* @return true
if result caching is supported for this factory
*/
boolean isUsingCache();
/**
* Sets the disk based cache used to store the raw extraction results.
*
* This should only be used for factories where {@link #isUsingCache()} returns true
.
*
* @param cache the disk based cache used to store the raw extraction results
*/
void setCache(CmsExtractionResultCache cache);
}