All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencms.search.documents.A_CmsVfsDocument Maven / Gradle / Ivy

Go to download

OpenCms is an enterprise-ready, easy to use website content management system based on Java and XML technology. Offering a complete set of features, OpenCms helps content managers worldwide to create and maintain beautiful websites fast and efficiently.

There is a newer version: 18.0
Show newest version
/*
 * This library is part of OpenCms -
 * the Open Source Content Management System
 *
 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * For further information about Alkacon Software GmbH & Co. KG, please see the
 * company website: http://www.alkacon.com
 *
 * For further information about OpenCms, please see the
 * project website: http://www.opencms.org
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.opencms.search.documents;

import org.opencms.file.CmsFile;
import org.opencms.file.CmsObject;
import org.opencms.file.CmsResource;
import org.opencms.file.types.I_CmsResourceType;
import org.opencms.main.CmsException;
import org.opencms.main.CmsLog;
import org.opencms.main.OpenCms;
import org.opencms.search.CmsSearchIndex;
import org.opencms.search.I_CmsSearchDocument;
import org.opencms.search.extractors.CmsExtractionResult;
import org.opencms.search.extractors.I_CmsExtractionResult;
import org.opencms.search.fields.CmsSearchField;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.logging.Log;

/**
 * Base document factory class for a VFS {@link org.opencms.file.CmsResource},
 * just requires a specialized implementation of
 * {@link I_CmsDocumentFactory#extractContent(CmsObject, CmsResource, CmsSearchIndex)}
 * for text extraction from the binary document content.

* * @since 6.0.0 */ public abstract class A_CmsVfsDocument implements I_CmsDocumentFactory { /** The log object for this class. */ private static final Log LOG = CmsLog.getLog(A_CmsVfsDocument.class); /** Name of the document type. */ protected String m_name; /** The cache used for storing extracted documents. */ private CmsExtractionResultCache m_cache; /** * Creates a new instance of this lucene document factory.

* * @param name name of the documenttype */ public A_CmsVfsDocument(String name) { m_name = name; } /** * Creates a document factory lookup key for the given resource type name / MIME type configuration.

* * If the given mimeType is null, this indicates that the key should * match all VFS resource of the given resource type regardless of the MIME type.

* * @param type the resource type name to use * @param mimeType the MIME type to use * * @return a document factory lookup key for the given resource id / MIME type configuration */ public static String getDocumentKey(String type, String mimeType) { StringBuffer result = new StringBuffer(16); result.append(I_CmsSearchDocument.VFS_DOCUMENT_KEY_PREFIX); result.append('_'); result.append(type); if (mimeType != null) { result.append(':'); result.append(mimeType); } return result.toString(); } /** * Generates a new lucene document instance from contents of the given resource for the provided index.

* * @see org.opencms.search.documents.I_CmsDocumentFactory#createDocument(CmsObject, CmsResource, CmsSearchIndex) */ public I_CmsSearchDocument createDocument(CmsObject cms, CmsResource resource, CmsSearchIndex index) throws CmsException { // extract the content from the resource I_CmsExtractionResult content = null; if (index.isExtractingContent()) { // do full text content extraction only if required // check if caching is enabled for this document type CmsExtractionResultCache cache = getCache(); String cacheName = null; if ((cache != null) && (resource.getSiblingCount() > 1)) { // hard drive based caching only makes sense for resources that have siblings, // because the index will also store the content as a blob cacheName = cache.getCacheName( resource, isLocaleDependend() ? index.getLocaleForResource(cms, resource, null) : null, getName()); content = cache.getCacheObject(cacheName); } if (content == null) { // extraction result has not been found in the cache // compare "date of last modification of content" from Lucene index and OpenCms VFS // if this is identical, then the data from the Lucene index can be re-used I_CmsSearchDocument oldDoc = index.getDocument(CmsSearchField.FIELD_PATH, resource.getRootPath()); // first check if the document is already in the index if ((oldDoc != null) && (oldDoc.getFieldValueAsDate(CmsSearchField.FIELD_DATE_CONTENT) != null)) { long contentDateIndex = oldDoc.getFieldValueAsDate(CmsSearchField.FIELD_DATE_CONTENT).getTime(); // now compare the date with the date stored in the resource if (contentDateIndex == resource.getDateContent()) { // extract stored content blob from index content = CmsExtractionResult.fromBytes(oldDoc.getContentBlob()); } } } if (content == null) { // extraction result has not been attached to the resource try { content = extractContent(cms, resource, index); if (LOG.isDebugEnabled()) { LOG.debug("Extracting content for '" + resource.getRootPath() + "' successful."); } if ((cache != null) && (resource.getSiblingCount() > 1)) { // save extracted content to the cache cache.saveCacheObject(cacheName, content); } } catch (CmsIndexNoContentException e) { // there was no content found for the resource LOG.info( Messages.get().getBundle().key(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()) + " " + e.getMessage()); } catch (Throwable e) { // text extraction failed for document - continue indexing meta information only LOG.error( Messages.get().getBundle().key(Messages.ERR_TEXT_EXTRACTION_1, resource.getRootPath()), e); } } } // create the Lucene document according to the index field configuration return index.getFieldConfiguration().createDocument(cms, resource, index, content); } /** * @see org.opencms.search.documents.I_CmsDocumentFactory#getCache() */ public CmsExtractionResultCache getCache() { return m_cache; } /** * @see org.opencms.search.documents.I_CmsDocumentFactory#getDocumentKeys(java.util.List, java.util.List) */ public List getDocumentKeys(List resourceTypes, List mimeTypes) throws CmsException { List keys = new ArrayList(); if (resourceTypes.contains("*")) { List allTypes = new ArrayList(); for (Iterator i = OpenCms.getResourceManager().getResourceTypes().iterator(); i.hasNext();) { I_CmsResourceType resourceType = i.next(); allTypes.add(resourceType.getTypeName()); } resourceTypes = allTypes; } try { for (Iterator i = resourceTypes.iterator(); i.hasNext();) { String typeName = i.next(); for (Iterator j = mimeTypes.iterator(); j.hasNext();) { keys.add(getDocumentKey(typeName, j.next())); } if (mimeTypes.isEmpty()) { keys.add(getDocumentKey(typeName, null)); } } } catch (Exception exc) { throw new CmsException(Messages.get().container(Messages.ERR_CREATE_DOC_KEY_0), exc); } return keys; } /** * @see org.opencms.search.documents.I_CmsDocumentFactory#getName() */ public String getName() { return m_name; } /** * @see org.opencms.search.documents.I_CmsDocumentFactory#setCache(org.opencms.search.documents.CmsExtractionResultCache) */ public void setCache(CmsExtractionResultCache cache) { m_cache = cache; } /** * Logs content extraction for the specified resource and index.

* * @param resource the resource to log content extraction for * @param index the search index to log content extraction for */ protected void logContentExtraction(CmsResource resource, CmsSearchIndex index) { if (LOG.isDebugEnabled()) { LOG.debug( Messages.get().getBundle().key( Messages.LOG_EXTRACT_CONTENT_2, resource.getRootPath(), index.getName())); } } /** * Upgrades the given resource to a {@link CmsFile} with content.

* * @param cms the current users OpenCms context * @param resource the resource to upgrade * * @return the given resource upgraded to a {@link CmsFile} with content * * @throws CmsException if the resource could not be read * @throws CmsIndexNoContentException if the resource has no content */ protected CmsFile readFile(CmsObject cms, CmsResource resource) throws CmsException, CmsIndexNoContentException { CmsFile file = cms.readFile(resource); if (file.getLength() <= 0) { throw new CmsIndexNoContentException( Messages.get().container(Messages.ERR_NO_CONTENT_1, resource.getRootPath())); } return file; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy