All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencms.search.CmsSearchManager Maven / Gradle / Ivy

/*
 * This library is part of OpenCms -
 * the Open Source Content Management System
 *
 * Copyright (c) Alkacon Software GmbH & Co. KG (http://www.alkacon.com)
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * For further information about Alkacon Software GmbH & Co. KG, please see the
 * company website: http://www.alkacon.com
 *
 * For further information about OpenCms, please see the
 * project website: http://www.opencms.org
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */

package org.opencms.search;

import org.opencms.configuration.CmsConfigurationException;
import org.opencms.db.CmsDriverManager;
import org.opencms.db.CmsPublishedResource;
import org.opencms.db.CmsResourceState;
import org.opencms.file.CmsObject;
import org.opencms.file.CmsProject;
import org.opencms.file.CmsResource;
import org.opencms.file.CmsResourceFilter;
import org.opencms.file.types.CmsResourceTypeXmlContainerPage;
import org.opencms.file.types.CmsResourceTypeXmlContent;
import org.opencms.i18n.CmsMessageContainer;
import org.opencms.jsp.CmsJspTagContainer;
import org.opencms.loader.CmsLoaderException;
import org.opencms.main.CmsEvent;
import org.opencms.main.CmsException;
import org.opencms.main.CmsIllegalArgumentException;
import org.opencms.main.CmsIllegalStateException;
import org.opencms.main.CmsLog;
import org.opencms.main.I_CmsEventListener;
import org.opencms.main.OpenCms;
import org.opencms.main.OpenCmsSolrHandler;
import org.opencms.relations.CmsRelation;
import org.opencms.relations.CmsRelationFilter;
import org.opencms.report.CmsLogReport;
import org.opencms.report.I_CmsReport;
import org.opencms.scheduler.I_CmsScheduledJob;
import org.opencms.search.documents.A_CmsVfsDocument;
import org.opencms.search.documents.CmsExtractionResultCache;
import org.opencms.search.documents.I_CmsDocumentFactory;
import org.opencms.search.documents.I_CmsTermHighlighter;
import org.opencms.search.fields.CmsLuceneField;
import org.opencms.search.fields.CmsLuceneFieldConfiguration;
import org.opencms.search.fields.CmsSearchField;
import org.opencms.search.fields.CmsSearchFieldConfiguration;
import org.opencms.search.fields.CmsSearchFieldMapping;
import org.opencms.search.solr.CmsSolrConfiguration;
import org.opencms.search.solr.CmsSolrFieldConfiguration;
import org.opencms.search.solr.CmsSolrIndex;
import org.opencms.search.solr.CmsSolrIndexWriter;
import org.opencms.search.solr.spellchecking.CmsSolrSpellchecker;
import org.opencms.security.CmsRole;
import org.opencms.security.CmsRoleViolationException;
import org.opencms.util.A_CmsModeStringEnumeration;
import org.opencms.util.CmsStringUtil;
import org.opencms.util.CmsUUID;
import org.opencms.util.CmsWaitHandle;

import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.locks.ReentrantLock;

import org.apache.commons.logging.Log;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrClient;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.SolrCore;

/**
 * Implements the general management and configuration of the search and
 * indexing facilities in OpenCms.

* * @since 6.0.0 */ public class CmsSearchManager implements I_CmsScheduledJob, I_CmsEventListener { /** * Enumeration class for force unlock types.

*/ public static final class CmsSearchForceUnlockMode extends A_CmsModeStringEnumeration { /** Force unlock type "always". */ public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode("always"); /** Force unlock type "never". */ public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode("never"); /** Force unlock type "only full". */ public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode("onlyfull"); /** Serializable version id. */ private static final long serialVersionUID = 74746076708908673L; /** * Creates a new force unlock type with the given name.

* * @param mode the mode id to use */ protected CmsSearchForceUnlockMode(String mode) { super(mode); } /** * Returns the lock type for the given type value.

* * @param type the type value to get the lock type for * * @return the lock type for the given type value */ public static CmsSearchForceUnlockMode valueOf(String type) { if (type.equals(ALWAYS.toString())) { return ALWAYS; } else if (type.equals(NEVER.toString())) { return NEVER; } else { return ONLYFULL; } } } /** * Handles offline index generation.

*/ protected class CmsSearchOfflineHandler implements I_CmsEventListener { /** Indicates if the event handlers for the offline search have been already registered. */ private boolean m_isEventRegistered; /** The list of resources to index. */ private List m_resourcesToIndex; /** * Initializes the offline index handler.

*/ protected CmsSearchOfflineHandler() { m_resourcesToIndex = new ArrayList(); } /** * Implements the event listener of this class.

* * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) */ @SuppressWarnings("unchecked") public void cmsEvent(CmsEvent event) { switch (event.getType()) { case I_CmsEventListener.EVENT_PROPERTY_MODIFIED: case I_CmsEventListener.EVENT_RESOURCE_CREATED: case I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED: case I_CmsEventListener.EVENT_RESOURCE_MODIFIED: Object change = event.getData().get(I_CmsEventListener.KEY_CHANGE); if ((change != null) && change.equals(new Integer(CmsDriverManager.NOTHING_CHANGED))) { // skip lock & unlock return; } // skip indexing if flag is set in event Object skip = event.getData().get(I_CmsEventListener.KEY_SKIPINDEX); if (skip != null) { return; } // a resource has been modified - offline indexes require (re)indexing List resources = Collections.singletonList( (CmsResource)event.getData().get(I_CmsEventListener.KEY_RESOURCE)); reIndexResources(resources); break; case I_CmsEventListener.EVENT_RESOURCE_DELETED: List eventResources = (List)event.getData().get( I_CmsEventListener.KEY_RESOURCES); List resourcesToDelete = new ArrayList(eventResources); for (CmsResource res : resourcesToDelete) { if (res.getState().isNew()) { // if the resource is new and a delete action was performed // --> set the state of the resource to deleted res.setState(CmsResourceState.STATE_DELETED); } } reIndexResources(resourcesToDelete); break; case I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED: case I_CmsEventListener.EVENT_RESOURCE_MOVED: case I_CmsEventListener.EVENT_RESOURCE_COPIED: case I_CmsEventListener.EVENT_RESOURCES_MODIFIED: // a list of resources has been modified - offline indexes require (re)indexing reIndexResources((List)event.getData().get(I_CmsEventListener.KEY_RESOURCES)); break; default: // no operation } } /** * Adds a list of {@link CmsPublishedResource} objects to be indexed.

* * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to be indexed */ protected synchronized void addResourcesToIndex(List resourcesToIndex) { m_resourcesToIndex.addAll(resourcesToIndex); } /** * Returns the list of {@link CmsPublishedResource} objects to index.

* * @return the resources to index */ protected List getResourcesToIndex() { List result; synchronized (this) { result = m_resourcesToIndex; m_resourcesToIndex = new ArrayList(); } try { CmsObject cms = m_adminCms; CmsProject offline = getOfflineIndexProject(); if (offline != null) { // switch to the offline project if available cms = OpenCms.initCmsObject(m_adminCms); cms.getRequestContext().setCurrentProject(offline); } findRelatedContainerPages(cms, result); } catch (CmsException e) { LOG.error(e.getLocalizedMessage(), e); } return result; } /** * Initializes this offline search handler, registering the event handlers if required.

*/ protected void initialize() { if (m_offlineIndexes.size() > 0) { // there is at least one offline index configured if ((m_offlineIndexThread == null) || !m_offlineIndexThread.isAlive()) { // create the offline indexing thread m_offlineIndexThread = new CmsSearchOfflineIndexThread(this); // start the offline index thread m_offlineIndexThread.start(); } } else { if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { // no offline indexes but thread still running, stop the thread m_offlineIndexThread.shutDown(); m_offlineIndexThread = null; } } // do this only in case there are offline indexes configured if (!m_isEventRegistered && (m_offlineIndexes.size() > 0)) { m_isEventRegistered = true; // register this object as event listener OpenCms.addCmsEventListener( this, new int[] { I_CmsEventListener.EVENT_PROPERTY_MODIFIED, I_CmsEventListener.EVENT_RESOURCE_CREATED, I_CmsEventListener.EVENT_RESOURCE_AND_PROPERTIES_MODIFIED, I_CmsEventListener.EVENT_RESOURCE_MODIFIED, I_CmsEventListener.EVENT_RESOURCES_AND_PROPERTIES_MODIFIED, I_CmsEventListener.EVENT_RESOURCE_MOVED, I_CmsEventListener.EVENT_RESOURCE_DELETED, I_CmsEventListener.EVENT_RESOURCE_COPIED, I_CmsEventListener.EVENT_RESOURCES_MODIFIED}); } } /** * Updates all offline indexes for the given list of {@link CmsResource} objects.

* * @param resources a list of {@link CmsResource} objects to update in the offline indexes */ protected synchronized void reIndexResources(List resources) { List resourcesToIndex = new ArrayList(resources.size()); for (CmsResource res : resources) { CmsPublishedResource pubRes = new CmsPublishedResource(res); resourcesToIndex.add(pubRes); } if (resourcesToIndex.size() > 0) { // add the resources found to the offline index thread addResourcesToIndex(resourcesToIndex); } } } /** * The offline indexer thread runs periodically and indexes all resources added by the event handler.

*/ protected class CmsSearchOfflineIndexThread extends Thread { /** The event handler that triggers this thread. */ CmsSearchOfflineHandler m_handler; /** Indicates if this thread is still alive. */ boolean m_isAlive; /** Indicates that an index update thread is currently running. */ private boolean m_isUpdating; /** If true a manual update (after file upload) was triggered. */ private boolean m_updateTriggered; /** The wait handle used for signalling when the worker thread has finished. */ private CmsWaitHandle m_waitHandle = new CmsWaitHandle(); /** * Constructor.

* * @param handler the offline index event handler */ protected CmsSearchOfflineIndexThread(CmsSearchOfflineHandler handler) { super("OpenCms: Offline Search Indexer"); m_handler = handler; } /** * Gets the wait handle used for signalling when the worker thread has finished. * * @return the wait handle **/ public CmsWaitHandle getWaitHandle() { return m_waitHandle; } /** * @see java.lang.Thread#interrupt() */ @Override public void interrupt() { super.interrupt(); m_updateTriggered = true; } /** * @see java.lang.Thread#run() */ @Override public void run() { // create a log report for the output I_CmsReport report = new CmsLogReport(m_adminCms.getRequestContext().getLocale(), CmsSearchManager.class); long offlineUpdateFrequency = getOfflineUpdateFrequency(); m_updateTriggered = false; try { while (m_isAlive) { if (!m_updateTriggered) { try { sleep(offlineUpdateFrequency); } catch (InterruptedException e) { // continue the thread after interruption if (!m_isAlive) { // the thread has been shut down while sleeping continue; } if (offlineUpdateFrequency != getOfflineUpdateFrequency()) { // offline update frequency change - clear interrupt status offlineUpdateFrequency = getOfflineUpdateFrequency(); } LOG.info(e.getLocalizedMessage(), e); } } if (m_isAlive) { // set update trigger to false since we do the update now m_updateTriggered = false; // get list of resource to update List resourcesToIndex = getResourcesToIndex(); if (resourcesToIndex.size() > 0) { // only start indexing if there is at least one resource startOfflineUpdateThread(report, resourcesToIndex); } else { getWaitHandle().release(); } // this is just called to clear the interrupt status of the thread interrupted(); } } } finally { // make sure that live status is reset in case of Exceptions m_isAlive = false; } } /** * @see java.lang.Thread#start() */ @Override public synchronized void start() { m_isAlive = true; super.start(); } /** * Obtains the list of resource to update in the offline index, * then optimizes the list by removing duplicate entries.

* * @return the list of resource to update in the offline index */ protected List getResourcesToIndex() { List resourcesToIndex = m_handler.getResourcesToIndex(); List result = new ArrayList(resourcesToIndex.size()); // Reverse to always keep the last list entries Collections.reverse(resourcesToIndex); for (CmsPublishedResource pubRes : resourcesToIndex) { boolean addResource = true; for (CmsPublishedResource resRes : result) { if (pubRes.equals(resRes) && (pubRes.getState() == resRes.getState()) && (pubRes.getMovedState() == resRes.getMovedState()) && pubRes.getRootPath().equals(resRes.getRootPath())) { // resource already in the update list addResource = false; break; } } if (addResource) { result.add(pubRes); } } Collections.reverse(result); return changeStateOfMoveOriginsToDeleted(result); } /** * Shuts down this offline index thread.

*/ protected void shutDown() { m_isAlive = false; interrupt(); if (m_isUpdating) { long waitTime = getOfflineUpdateFrequency() / 2; int waitSteps = 0; do { try { // wait half the time of the offline index frequency for the thread to finish Thread.sleep(waitTime); } catch (InterruptedException e) { // continue LOG.info(e.getLocalizedMessage(), e); } waitSteps++; // wait 5 times then stop waiting } while ((waitSteps < 5) && m_isUpdating); } } /** * Updates the offline search indexes for the given list of resources.

* * @param report the report to write the index information to * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index */ protected void startOfflineUpdateThread(I_CmsReport report, List resourcesToIndex) { CmsSearchOfflineIndexWorkThread thread = new CmsSearchOfflineIndexWorkThread(report, resourcesToIndex); long startTime = System.currentTimeMillis(); long waitTime = getOfflineUpdateFrequency() / 2; if (LOG.isDebugEnabled()) { LOG.debug( Messages.get().getBundle().key( Messages.LOG_OI_UPDATE_START_1, Integer.valueOf(resourcesToIndex.size()))); } m_isUpdating = true; thread.start(); do { try { // wait half the time of the offline index frequency for the thread to finish thread.join(waitTime); } catch (InterruptedException e) { // continue LOG.info(e.getLocalizedMessage(), e); } if (thread.isAlive()) { LOG.warn( Messages.get().getBundle().key( Messages.LOG_OI_UPDATE_LONG_2, Integer.valueOf(resourcesToIndex.size()), Long.valueOf(System.currentTimeMillis() - startTime))); } } while (thread.isAlive()); m_isUpdating = false; if (LOG.isDebugEnabled()) { LOG.debug( Messages.get().getBundle().key( Messages.LOG_OI_UPDATE_FINISH_2, Integer.valueOf(resourcesToIndex.size()), Long.valueOf(System.currentTimeMillis() - startTime))); } } /** * Helper method which changes the states of resources which are to be indexed but have the wrong path to 'deleted'. * This is needed to deal with moved resources, since the documents with the old paths must be removed from the index, * * @param resourcesToIndex the resources to index * * @return the resources to index, but resource states are set to 'deleted' for resources with outdated paths */ private List changeStateOfMoveOriginsToDeleted( List resourcesToIndex) { Map lastValidPaths = new HashMap(); for (CmsPublishedResource resource : resourcesToIndex) { if (resource.getState().isDeleted()) { // we don't want the last path to be from a deleted resource continue; } lastValidPaths.put(resource.getStructureId(), resource.getRootPath()); } List result = new ArrayList(); for (CmsPublishedResource resource : resourcesToIndex) { if (resource.getState().isDeleted()) { result.add(resource); continue; } String lastValidPath = lastValidPaths.get(resource.getStructureId()); if (resource.getRootPath().equals(lastValidPath) || resource.getStructureId().isNullUUID()) { result.add(resource); } else { result.add( new CmsPublishedResource( resource.getStructureId(), resource.getResourceId(), resource.getPublishTag(), resource.getRootPath(), resource.getType(), resource.isFolder(), CmsResource.STATE_DELETED, // make sure index entry with outdated path is deleted resource.getSiblingCount())); } } return result; } } /** * An offline index worker Thread runs each time for every offline index update action.

* * This was decoupled from the main {@link CmsSearchOfflineIndexThread} in order to avoid * problems if a single operation "hangs" the Tread.

*/ protected class CmsSearchOfflineIndexWorkThread extends Thread { /** The report to write the index information to. */ I_CmsReport m_report; /** The list of {@link CmsPublishedResource} objects to index. */ List m_resourcesToIndex; /** * Updates the offline search indexes for the given list of resources.

* * @param report the report to write the index information to * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index */ protected CmsSearchOfflineIndexWorkThread(I_CmsReport report, List resourcesToIndex) { super("OpenCms: Offline Search Index Worker"); m_report = report; m_resourcesToIndex = resourcesToIndex; } /** * @see java.lang.Thread#run() */ @Override public void run() { updateIndexOffline(m_report, m_resourcesToIndex); if (m_offlineIndexThread != null) { m_offlineIndexThread.getWaitHandle().release(); } } } /** This needs to be a fair lock to preserve order of threads accessing the search manager. */ private static final ReentrantLock SEARCH_MANAGER_LOCK = new ReentrantLock(true); /** The default value used for generating search result excerpts (1024 chars). */ public static final int DEFAULT_EXCERPT_LENGTH = 1024; /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */ public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f; /** Default for the maximum number of modifications before a commit in the search index is triggered (500). */ public static final int DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT = 500; /** The default update frequency for offline indexes (15000 msec = 15 sec). */ public static final int DEFAULT_OFFLINE_UPDATE_FREQNENCY = 15000; /** The default maximal wait time for re-indexing after editing a content. */ public static final int DEFAULT_MAX_INDEX_WAITTIME = 30000; /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */ public static final int DEFAULT_TIMEOUT = 60000; /** Scheduler parameter: Update only a specified list of indexes. */ public static final String JOB_PARAM_INDEXLIST = "indexList"; /** Scheduler parameter: Write the output of the update to the logfile. */ public static final String JOB_PARAM_WRITELOG = "writeLog"; /** Prefix for Lucene default analyzers package (org.apache.lucene.analysis.). */ public static final String LUCENE_ANALYZER = "org.apache.lucene.analysis.core."; /** The log object for this class. */ protected static final Log LOG = CmsLog.getLog(CmsSearchManager.class); /** The administrator OpenCms user context to access OpenCms VFS resources. */ protected CmsObject m_adminCms; /** The list of indexes that are configured for offline index mode. */ protected List m_offlineIndexes; /** The thread used of offline indexing. */ protected CmsSearchOfflineIndexThread m_offlineIndexThread; /** Configured analyzers for languages using <analyzer>. */ private HashMap m_analyzers; /** Stores the offline update frequency while indexing is paused. */ private long m_configuredOfflineIndexingFrequency; /** The Solr core container. */ private CoreContainer m_coreContainer; /** A map of document factory configurations. */ private List m_documentTypeConfigs; /** A map of document factories keyed by their matching Cms resource types and/or mimetypes. */ private Map m_documentTypes; /** The max age for extraction results to remain in the cache. */ private float m_extractionCacheMaxAge; /** The cache for the extraction results. */ private CmsExtractionResultCache m_extractionResultCache; /** Contains the available field configurations. */ private Map m_fieldConfigurations; /** The force unlock type. */ private CmsSearchForceUnlockMode m_forceUnlockMode; /** The class used to highlight the search terms in the excerpt of a search result. */ private I_CmsTermHighlighter m_highlighter; /** A list of search indexes. */ private List m_indexes; /** Seconds to wait for an index lock. */ private int m_indexLockMaxWaitSeconds = 10; /** Configured index sources. */ private Map m_indexSources; /** The max. char. length of the excerpt in the search result. */ private int m_maxExcerptLength; /** The maximum number of modifications before a commit in the search index is triggered. */ private int m_maxModificationsBeforeCommit; /** The offline index search handler. */ private CmsSearchOfflineHandler m_offlineHandler; /** The update frequency of the offline indexer in milliseconds. */ private long m_offlineUpdateFrequency; /** The maximal time to wait for re-indexing after a content is edited (in milliseconds). */ private long m_maxIndexWaitTime; /** Path to index files below WEB-INF/. */ private String m_path; /** The Solr configuration. */ private CmsSolrConfiguration m_solrConfig; /** Timeout for abandoning indexing thread. */ private long m_timeout; /** * Default constructor when called as cron job.

*/ public CmsSearchManager() { m_documentTypes = new HashMap(); m_documentTypeConfigs = new ArrayList(); m_analyzers = new HashMap(); m_indexes = new ArrayList(); m_indexSources = new TreeMap(); m_offlineHandler = new CmsSearchOfflineHandler(); m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE; m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH; m_offlineUpdateFrequency = DEFAULT_OFFLINE_UPDATE_FREQNENCY; m_maxIndexWaitTime = DEFAULT_MAX_INDEX_WAITTIME; m_maxModificationsBeforeCommit = DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT; m_fieldConfigurations = new HashMap(); // make sure we have a "standard" field configuration addFieldConfiguration(CmsLuceneFieldConfiguration.DEFAULT_STANDARD); if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_START_SEARCH_CONFIG_0)); } } /** * Returns an analyzer for the given class name.

* * @param className the class name of the analyzer * * @return the appropriate lucene analyzer * * @throws Exception if something goes wrong */ public static Analyzer getAnalyzer(String className) throws Exception { Analyzer analyzer = null; Class analyzerClass; try { analyzerClass = Class.forName(className); } catch (ClassNotFoundException e) { // allow Lucene standard classes to be written in a short form analyzerClass = Class.forName(LUCENE_ANALYZER + className); } // since Lucene 3.0 most analyzers need a "version" parameter and don't support an empty constructor if (StandardAnalyzer.class.equals(analyzerClass)) { // the Lucene standard analyzer is used - but without any stopwords. // TODO: Is it a good idea to remove the default english stopwords used by default? analyzer = new StandardAnalyzer(new CharArraySet(0, false)); } else { analyzer = (Analyzer)analyzerClass.newInstance(); } return analyzer; } /** * Returns the Solr index configured with the parameters name. * The parameters must contain a key/value pair with an existing * Solr index, otherwise null is returned.

* * @param cms the current context * @param params the parameter map * * @return the best matching Solr index */ public static final CmsSolrIndex getIndexSolr(CmsObject cms, Map params) { String indexName = null; CmsSolrIndex index = null; // try to get the index name from the parameters: 'core' or 'index' if (params != null) { indexName = params.get(OpenCmsSolrHandler.PARAM_CORE) != null ? params.get(OpenCmsSolrHandler.PARAM_CORE)[0] : (params.get(OpenCmsSolrHandler.PARAM_INDEX) != null ? params.get(OpenCmsSolrHandler.PARAM_INDEX)[0] : null); } if (indexName == null) { // if no parameter is specified try to use the default online/offline indexes by context indexName = cms.getRequestContext().getCurrentProject().isOnlineProject() ? CmsSolrIndex.DEFAULT_INDEX_NAME_ONLINE : CmsSolrIndex.DEFAULT_INDEX_NAME_OFFLINE; } // try to get the index index = indexName != null ? OpenCms.getSearchManager().getIndexSolr(indexName) : null; if (index == null) { // if there is exactly one index, a missing core / index parameter doesn't matter, since there is no choice. List solrs = OpenCms.getSearchManager().getAllSolrIndexes(); if ((solrs != null) && !solrs.isEmpty() && (solrs.size() == 1)) { index = solrs.get(0); } } return index; } /** * Returns true if the index for the given name is a Lucene index, false otherwise.

* * @param indexName the name of the index to check * * @return true if the index for the given name is a Lucene index */ public static boolean isLuceneIndex(String indexName) { CmsSearchIndex i = OpenCms.getSearchManager().getIndex(indexName); if (i instanceof CmsSolrIndex) { return false; } return true; } /** * Adds an analyzer.

* * @param analyzer an analyzer */ public void addAnalyzer(CmsSearchAnalyzer analyzer) { m_analyzers.put(analyzer.getLocale(), analyzer); if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info( Messages.get().getBundle().key( Messages.INIT_ADD_ANALYZER_2, analyzer.getLocale(), analyzer.getClassName())); } } /** * Adds a document type.

* * @param documentType a document type */ public void addDocumentTypeConfig(CmsSearchDocumentType documentType) { m_documentTypeConfigs.add(documentType); if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info( Messages.get().getBundle().key( Messages.INIT_SEARCH_DOC_TYPES_2, documentType.getName(), documentType.getClassName())); } } /** * Adds a search field configuration to the search manager.

* * @param fieldConfiguration the search field configuration to add */ public void addFieldConfiguration(CmsSearchFieldConfiguration fieldConfiguration) { m_fieldConfigurations.put(fieldConfiguration.getName(), fieldConfiguration); if (fieldConfiguration.getFields().isEmpty()) { LOG.debug( Messages.get().getBundle().key( Messages.LOG_FIELD_CONFIGURATION_IS_EMPTY_1, fieldConfiguration.getName())); } } /** * Adds a search index to the configuration.

* * @param searchIndex the search index to add */ public void addSearchIndex(CmsSearchIndex searchIndex) { if ((searchIndex.getSources() == null) || (searchIndex.getPath() == null)) { if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) { try { searchIndex.initialize(); } catch (CmsException e) { // should never happen LOG.error(e.getMessage(), e); } } } // name: not null or emtpy and unique String name = searchIndex.getName(); if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) { throw new CmsIllegalArgumentException( Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0)); } if (m_indexSources.keySet().contains(name)) { throw new CmsIllegalArgumentException( Messages.get().container(Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1, name)); } m_indexes.add(searchIndex); if (m_adminCms != null) { initOfflineIndexes(); } if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info( Messages.get().getBundle().key( Messages.INIT_ADD_SEARCH_INDEX_2, searchIndex.getName(), searchIndex.getProject())); } } /** * Adds a search index source configuration.

* * @param searchIndexSource a search index source configuration */ public void addSearchIndexSource(CmsSearchIndexSource searchIndexSource) { m_indexSources.put(searchIndexSource.getName(), searchIndexSource); if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info( Messages.get().getBundle().key( Messages.INIT_SEARCH_INDEX_SOURCE_2, searchIndexSource.getName(), searchIndexSource.getIndexerClassName())); } } /** * Implements the event listener of this class.

* * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent) */ public void cmsEvent(CmsEvent event) { switch (event.getType()) { case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES: List indexNames = null; if ((event.getData() != null) && CmsStringUtil.isNotEmptyOrWhitespaceOnly( (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES))) { indexNames = CmsStringUtil.splitAsList( (String)event.getData().get(I_CmsEventListener.KEY_INDEX_NAMES), ",", true); } try { if (LOG.isDebugEnabled()) { LOG.debug( Messages.get().getBundle().key( Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1, indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), new Exception()); } if (indexNames == null) { rebuildAllIndexes(getEventReport(event)); } else { rebuildIndexes(indexNames, getEventReport(event)); } } catch (CmsException e) { if (LOG.isErrorEnabled()) { LOG.error( Messages.get().getBundle().key( Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1, indexNames == null ? "" : CmsStringUtil.collectionAsString(indexNames, ",")), e); } } break; case I_CmsEventListener.EVENT_CLEAR_CACHES: if (LOG.isDebugEnabled()) { LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_CLEAR_CACHES_0), new Exception()); } break; case I_CmsEventListener.EVENT_PUBLISH_PROJECT: // event data contains a list of the published resources CmsUUID publishHistoryId = new CmsUUID((String)event.getData().get(I_CmsEventListener.KEY_PUBLISHID)); if (LOG.isDebugEnabled()) { LOG.debug(Messages.get().getBundle().key(Messages.LOG_EVENT_PUBLISH_PROJECT_1, publishHistoryId)); } updateAllIndexes(m_adminCms, publishHistoryId, getEventReport(event)); if (LOG.isDebugEnabled()) { LOG.debug( Messages.get().getBundle().key( Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1, publishHistoryId)); } break; default: // no operation } } /** * Returns all Solr index.

* * @return all Solr indexes */ public List getAllSolrIndexes() { List result = new ArrayList(); for (String indexName : getIndexNames()) { CmsSolrIndex index = getIndexSolr(indexName); if (index != null) { result.add(index); } } return result; } /** * Returns an analyzer for the given language.

* * The analyzer is selected according to the analyzer configuration.

* * @param locale the locale to get the analyzer for * @return the appropriate lucene analyzer * * @throws CmsSearchException if something goes wrong */ public Analyzer getAnalyzer(Locale locale) throws CmsSearchException { Analyzer analyzer = null; String className = null; CmsSearchAnalyzer analyzerConf = m_analyzers.get(locale); if (analyzerConf == null) { throw new CmsSearchException(Messages.get().container(Messages.ERR_ANALYZER_NOT_FOUND_1, locale)); } try { analyzer = getAnalyzer(analyzerConf.getClassName()); } catch (Exception e) { throw new CmsSearchException(Messages.get().container(Messages.ERR_LOAD_ANALYZER_1, className), e); } return analyzer; } /** * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.

* * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects. * * @return an unmodifiable view of the Analyzers Map */ public Map getAnalyzers() { return Collections.unmodifiableMap(m_analyzers); } /** * Returns the search analyzer for the given locale.

* * @param locale the locale to get the analyzer for * * @return the search analyzer for the given locale */ public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) { return m_analyzers.get(locale); } /** * Returns the name of the directory below WEB-INF/ where the search indexes are stored.

* * @return the name of the directory below WEB-INF/ where the search indexes are stored */ public String getDirectory() { return m_path; } /** * Returns the configured Solr home directory null if not set.

* * @return the Solr home directory */ public String getDirectorySolr() { return m_solrConfig != null ? m_solrConfig.getHome() : null; } /** * Returns a lucene document factory for given resource.

* * The type of the document factory is selected by the type of the resource * and the MIME type of the resource content, according to the configuration in opencms-search.xml.

* * @param resource a cms resource * @return a lucene document factory or null */ public I_CmsDocumentFactory getDocumentFactory(CmsResource resource) { // first get the MIME type of the resource String mimeType = OpenCms.getResourceManager().getMimeType(resource.getRootPath(), null, "unknown"); String resourceType = null; try { resourceType = OpenCms.getResourceManager().getResourceType(resource.getTypeId()).getTypeName(); } catch (CmsLoaderException e) { // ignore, unknown resource type, resource can not be indexed LOG.info(e.getLocalizedMessage(), e); } return getDocumentFactory(resourceType, mimeType); } /** * Returns a lucene document factory for given resource type and MIME type.

* * The type of the document factory is selected according to the configuration * in opencms-search.xml.

* * @param resourceType the resource type name * @param mimeType the MIME type * * @return a lucene document factory or null in case no matching factory was found */ public I_CmsDocumentFactory getDocumentFactory(String resourceType, String mimeType) { I_CmsDocumentFactory result = null; if (resourceType != null) { // create the factory lookup key for the document String documentTypeKey = A_CmsVfsDocument.getDocumentKey(resourceType, mimeType); // check if a setting is available for this specific MIME type result = m_documentTypes.get(documentTypeKey); if (result == null) { // no setting is available, try to use a generic setting without MIME type result = m_documentTypes.get(A_CmsVfsDocument.getDocumentKey(resourceType, null)); // please note: the result may still be null } } return result; } /** * Returns a document type config.

* * @param name the name of the document type config * @return the document type config. */ public CmsSearchDocumentType getDocumentTypeConfig(String name) { // this is really used only for the search manager GUI, // so performance is not an issue and no lookup map is generated for (int i = 0; i < m_documentTypeConfigs.size(); i++) { CmsSearchDocumentType type = m_documentTypeConfigs.get(i); if (type.getName().equals(name)) { return type; } } return null; } /** * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.

* * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map */ public List getDocumentTypeConfigs() { return Collections.unmodifiableList(m_documentTypeConfigs); } /** * Returns the maximum age a text extraction result is kept in the cache (in hours).

* * @return the maximum age a text extraction result is kept in the cache (in hours) */ public float getExtractionCacheMaxAge() { return m_extractionCacheMaxAge; } /** * Returns the search field configuration with the given name.

* * In case no configuration is available with the given name, null is returned.

* * @param name the name to get the search field configuration for * * @return the search field configuration with the given name */ public CmsSearchFieldConfiguration getFieldConfiguration(String name) { return m_fieldConfigurations.get(name); } /** * Returns the unmodifieable List of configured {@link CmsSearchFieldConfiguration} entries.

* * @return the unmodifieable List of configured {@link CmsSearchFieldConfiguration} entries */ public List getFieldConfigurations() { List result = new ArrayList( m_fieldConfigurations.values()); Collections.sort(result); return Collections.unmodifiableList(result); } /** * Returns the Lucene search field configurations only.

* * @return the Lucene search field configurations */ public List getFieldConfigurationsLucene() { List result = new ArrayList(); for (CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { if (conf instanceof CmsLuceneFieldConfiguration) { result.add((CmsLuceneFieldConfiguration)conf); } } Collections.sort(result); return Collections.unmodifiableList(result); } /** * Returns the Solr search field configurations only.

* * @return the Solr search field configurations */ public List getFieldConfigurationsSolr() { List result = new ArrayList(); for (CmsSearchFieldConfiguration conf : m_fieldConfigurations.values()) { if (conf instanceof CmsSolrFieldConfiguration) { result.add((CmsSolrFieldConfiguration)conf); } } Collections.sort(result); return Collections.unmodifiableList(result); } /** * Returns the force unlock mode during indexing.

* * @return the force unlock mode during indexing */ public CmsSearchForceUnlockMode getForceunlock() { return m_forceUnlockMode; } /** * Returns the highlighter.

* * @return the highlighter */ public I_CmsTermHighlighter getHighlighter() { return m_highlighter; } /** * Returns the Lucene search index configured with the given name.

* The index must exist, otherwise null is returned. * * @param indexName then name of the requested search index * * @return the Lucene search index configured with the given name */ public CmsSearchIndex getIndex(String indexName) { for (CmsSearchIndex index : m_indexes) { if (indexName.equalsIgnoreCase(index.getName())) { return index; } } return null; } /** * Returns the seconds to wait for an index lock during an update operation.

* * @return the seconds to wait for an index lock during an update operation */ public int getIndexLockMaxWaitSeconds() { return m_indexLockMaxWaitSeconds; } /** * Returns the names of all configured indexes.

* * @return list of names */ public List getIndexNames() { List indexNames = new ArrayList(); for (int i = 0, n = m_indexes.size(); i < n; i++) { indexNames.add((m_indexes.get(i)).getName()); } return indexNames; } /** * Returns the Solr index configured with the given name.

* The index must exist, otherwise null is returned. * * @param indexName then name of the requested Solr index * @return the Solr index configured with the given name */ public CmsSolrIndex getIndexSolr(String indexName) { CmsSearchIndex index = getIndex(indexName); if (index instanceof CmsSolrIndex) { return (CmsSolrIndex)index; } return null; } /** * Returns a search index source for a specified source name.

* * @param sourceName the name of the index source * @return a search index source */ public CmsSearchIndexSource getIndexSource(String sourceName) { return m_indexSources.get(sourceName); } /** * Returns the max. excerpt length.

* * @return the max excerpt length */ public int getMaxExcerptLength() { return m_maxExcerptLength; } /** * Returns the maximal time to wait for re-indexing after a content is edited (in milliseconds).

* * @return the maximal time to wait for re-indexing after a content is edited (in milliseconds) */ public long getMaxIndexWaitTime() { return m_maxIndexWaitTime; } /** * Returns the maximum number of modifications before a commit in the search index is triggered.

* * @return the maximum number of modifications before a commit in the search index is triggered */ public int getMaxModificationsBeforeCommit() { return m_maxModificationsBeforeCommit; } /** * Returns the update frequency of the offline indexer in milliseconds.

* * @return the update frequency of the offline indexer in milliseconds */ public long getOfflineUpdateFrequency() { return m_offlineUpdateFrequency; } /** * Returns an unmodifiable list of all configured {@link CmsSearchIndex} instances.

* * @return an unmodifiable list of all configured {@link CmsSearchIndex} instances */ public List getSearchIndexes() { return Collections.unmodifiableList(m_indexes); } /** * Returns an unmodifiable list of all configured {@link CmsSearchIndex} instances.

* * @return an unmodifiable list of all configured {@link CmsSearchIndex} instances */ public List getSearchIndexesAll() { return Collections.unmodifiableList(m_indexes); } /** * Returns an unmodifiable list of all configured {@link CmsSearchIndex} instances.

* * @return an unmodifiable list of all configured {@link CmsSearchIndex} instances */ public List getSearchIndexesSolr() { List indexes = new ArrayList(); for (CmsSearchIndex index : m_indexes) { if (index instanceof CmsSolrIndex) { indexes.add((CmsSolrIndex)index); } } return Collections.unmodifiableList(indexes); } /** * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.

* * @return an unmodifiable view (read-only) of the SearchIndexSources Map */ public Map getSearchIndexSources() { return Collections.unmodifiableMap(m_indexSources); } /** * Return singleton instance of the OpenCms spellchecker.

* * @param cms the cms object. * * @return instance of CmsSolrSpellchecker. */ public CmsSolrSpellchecker getSolrDictionary(CmsObject cms) { // get the core container that contains one core for each configured index if (m_coreContainer == null) { m_coreContainer = createCoreContainer(); } SolrCore spellcheckCore = m_coreContainer.getCore(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE); if (spellcheckCore == null) { LOG.error( Messages.get().getBundle().key( Messages.ERR_SPELLCHECK_CORE_NOT_AVAILABLE_1, CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)); return null; } else { return CmsSolrSpellchecker.getInstance(m_coreContainer, spellcheckCore); } } /** * Returns the Solr configuration.

* * @return the Solr configuration */ public CmsSolrConfiguration getSolrServerConfiguration() { return m_solrConfig; } /** * Returns the timeout to abandon threads indexing a resource.

* * @return the timeout to abandon threads indexing a resource */ public long getTimeout() { return m_timeout; } /** * Initializes the search manager.

* * @param cms the cms object * * @throws CmsRoleViolationException in case the given opencms object does not have {@link CmsRole#WORKPLACE_MANAGER} permissions */ public void initialize(CmsObject cms) throws CmsRoleViolationException { OpenCms.getRoleManager().checkRole(cms, CmsRole.WORKPLACE_MANAGER); try { // store the Admin cms to index Cms resources m_adminCms = OpenCms.initCmsObject(cms); } catch (CmsException e) { // this should never happen LOG.error(e.getLocalizedMessage(), e); } // make sure the site root is the root site m_adminCms.getRequestContext().setSiteRoot("/"); // create the extraction result cache m_extractionResultCache = new CmsExtractionResultCache( OpenCms.getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(getDirectory()), "/extractCache"); initializeIndexes(); initOfflineIndexes(); // register this object as event listener OpenCms.addCmsEventListener( this, new int[] { I_CmsEventListener.EVENT_CLEAR_CACHES, I_CmsEventListener.EVENT_PUBLISH_PROJECT, I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES}); } /** * Initializes all configured document types and search indexes.

* * This methods needs to be called if after a change in the index configuration has been made. */ public void initializeIndexes() { initAvailableDocumentTypes(); initSearchIndexes(); } /** * Initialize the offline index handler, require after an offline index has been added.

*/ public void initOfflineIndexes() { // check which indexes are configured as offline indexes List offlineIndexes = new ArrayList(); Iterator i = m_indexes.iterator(); while (i.hasNext()) { CmsSearchIndex index = i.next(); if (CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { // this is an offline index offlineIndexes.add(index); } } m_offlineIndexes = offlineIndexes; m_offlineHandler.initialize(); } /** * Returns if the offline indexing is paused.

* * @return true if the offline indexing is paused */ public boolean isOfflineIndexingPaused() { return m_offlineUpdateFrequency == Long.MAX_VALUE; } /** * Updates the indexes from as a scheduled job.

* * @param cms the OpenCms user context to use when reading resources from the VFS * @param parameters the parameters for the scheduled job * * @throws Exception if something goes wrong * * @return the String to write in the scheduler log * * @see org.opencms.scheduler.I_CmsScheduledJob#launch(CmsObject, Map) */ public String launch(CmsObject cms, Map parameters) throws Exception { CmsSearchManager manager = OpenCms.getSearchManager(); I_CmsReport report = null; boolean writeLog = Boolean.valueOf(parameters.get(JOB_PARAM_WRITELOG)).booleanValue(); if (writeLog) { report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); } List updateList = null; String indexList = parameters.get(JOB_PARAM_INDEXLIST); if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) { // index list has been provided as job parameter updateList = new ArrayList(); String[] indexNames = CmsStringUtil.splitAsArray(indexList, '|'); for (int i = 0; i < indexNames.length; i++) { // check if the index actually exists if (manager.getIndex(indexNames[i]) != null) { updateList.add(indexNames[i]); } else { if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexNames[i])); } } } } long startTime = System.currentTimeMillis(); if (updateList == null) { // all indexes need to be updated manager.rebuildAllIndexes(report); } else { // rebuild only the selected indexes manager.rebuildIndexes(updateList, report); } long runTime = System.currentTimeMillis() - startTime; String finishMessage = Messages.get().getBundle().key( Messages.LOG_REBUILD_INDEXES_FINISHED_1, CmsStringUtil.formatRuntime(runTime)); if (LOG.isInfoEnabled()) { LOG.info(finishMessage); } return finishMessage; } /** * Pauses the offline indexing.

* May take some time, because the indexes are updated first.

*/ public void pauseOfflineIndexing() { if (m_offlineUpdateFrequency != Long.MAX_VALUE) { m_configuredOfflineIndexingFrequency = m_offlineUpdateFrequency; m_offlineUpdateFrequency = Long.MAX_VALUE; updateOfflineIndexes(0); } } /** * Rebuilds (if required creates) all configured indexes.

* * @param report the report object to write messages (or null) * * @throws CmsException if something goes wrong */ public void rebuildAllIndexes(I_CmsReport report) throws CmsException { try { SEARCH_MANAGER_LOCK.lock(); CmsMessageContainer container = null; for (int i = 0, n = m_indexes.size(); i < n; i++) { // iterate all configured search indexes CmsSearchIndex searchIndex = m_indexes.get(i); try { // update the index updateIndex(searchIndex, report, null); } catch (CmsException e) { container = new CmsMessageContainer( Messages.get(), Messages.ERR_INDEX_REBUILD_ALL_1, new Object[] {searchIndex.getName()}); LOG.error( Messages.get().getBundle().key(Messages.ERR_INDEX_REBUILD_ALL_1, searchIndex.getName()), e); } } // clean up the extraction result cache cleanExtractionCache(); if (container != null) { // throw stored exception throw new CmsSearchException(container); } } finally { SEARCH_MANAGER_LOCK.unlock(); } } /** * Rebuilds (if required creates) the index with the given name.

* * @param indexName the name of the index to rebuild * @param report the report object to write messages (or null) * * @throws CmsException if something goes wrong */ public void rebuildIndex(String indexName, I_CmsReport report) throws CmsException { try { SEARCH_MANAGER_LOCK.lock(); // get the search index by name CmsSearchIndex index = getIndex(indexName); // update the index updateIndex(index, report, null); // clean up the extraction result cache cleanExtractionCache(); } finally { SEARCH_MANAGER_LOCK.unlock(); } } /** * Rebuilds (if required creates) the List of indexes with the given name.

* * @param indexNames the names (String) of the index to rebuild * @param report the report object to write messages (or null) * * @throws CmsException if something goes wrong */ public void rebuildIndexes(List indexNames, I_CmsReport report) throws CmsException { try { SEARCH_MANAGER_LOCK.lock(); Iterator i = indexNames.iterator(); while (i.hasNext()) { String indexName = i.next(); // get the search index by name CmsSearchIndex index = getIndex(indexName); if (index != null) { // update the index updateIndex(index, report, null); } else { if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); } } } // clean up the extraction result cache cleanExtractionCache(); } finally { SEARCH_MANAGER_LOCK.unlock(); } } /** * Registers a new Solr core for the given index.

* * @param index the index to register a new Solr core for * * @throws CmsConfigurationException if no Solr server is configured */ public void registerSolrIndex(CmsSolrIndex index) throws CmsConfigurationException { if ((m_solrConfig == null) || !m_solrConfig.isEnabled()) { // No solr server configured throw new CmsConfigurationException(Messages.get().container(Messages.ERR_SOLR_NOT_ENABLED_0)); } if (m_solrConfig.getServerUrl() != null) { // HTTP Server configured // TODO Implement multi core support for HTTP server // @see http://lucidworks.lucidimagination.com/display/solr/Configuring+solr.xml index.setSolrServer(new HttpSolrClient(m_solrConfig.getServerUrl())); } // get the core container that contains one core for each configured index if (m_coreContainer == null) { m_coreContainer = createCoreContainer(); } // create a new core if no core exists for the given index if (!m_coreContainer.getCoreNames().contains(index.getCoreName())) { // Being sure the core container is not 'null', // we can create a core for this index if not already existent File dataDir = new File(index.getPath()); if (!dataDir.exists()) { dataDir.mkdirs(); if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info( Messages.get().getBundle().key( Messages.INIT_SOLR_INDEX_DIR_CREATED_2, index.getName(), index.getPath())); } } File instanceDir = new File( m_solrConfig.getHome() + FileSystems.getDefault().getSeparator() + index.getName()); if (!instanceDir.exists()) { instanceDir.mkdirs(); if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info( Messages.get().getBundle().key( Messages.INIT_SOLR_INDEX_DIR_CREATED_2, index.getName(), index.getPath())); } } // create the core // TODO: suboptimal - forces always the same schema SolrCore core = null; try { // creation includes registration. // TODO: this was the old code: core = m_coreContainer.create(descriptor, false); Map properties = new HashMap(3); properties.put(CoreDescriptor.CORE_DATADIR, dataDir.getAbsolutePath()); properties.put(CoreDescriptor.CORE_CONFIGSET, "default"); core = m_coreContainer.create(index.getCoreName(), instanceDir.toPath(), properties); } catch (NullPointerException e) { if (core != null) { core.close(); } throw new CmsConfigurationException( Messages.get().container( Messages.ERR_SOLR_SERVER_NOT_CREATED_3, index.getName() + " (" + index.getCoreName() + ")", index.getPath(), m_solrConfig.getSolrConfigFile().getAbsolutePath()), e); } } if (index.isNoSolrServerSet()) { index.setSolrServer(new EmbeddedSolrServer(m_coreContainer, index.getCoreName())); } if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info( Messages.get().getBundle().key( Messages.INIT_SOLR_SERVER_CREATED_1, index.getName() + " (" + index.getCoreName() + ")")); } } /** * Removes this field configuration from the OpenCms configuration (if it is not used any more).

* * @param fieldConfiguration the field configuration to remove from the configuration * * @return true if remove was successful, false if preconditions for removal are ok but the given * field configuration was unknown to the manager. * * @throws CmsIllegalStateException if the given field configuration is still used by at least one * {@link CmsSearchIndex}. * */ public boolean removeSearchFieldConfiguration(CmsSearchFieldConfiguration fieldConfiguration) throws CmsIllegalStateException { // never remove the standard field configuration if (fieldConfiguration.getName().equals(CmsSearchFieldConfiguration.STR_STANDARD)) { throw new CmsIllegalStateException( Messages.get().container( Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1, fieldConfiguration.getName())); } // validation if removal will be granted Iterator itIndexes = m_indexes.iterator(); CmsSearchIndex idx; // the list for collecting indexes that use the given field configuration List referrers = new ArrayList(); CmsSearchFieldConfiguration refFieldConfig; while (itIndexes.hasNext()) { idx = itIndexes.next(); refFieldConfig = idx.getFieldConfiguration(); if (refFieldConfig.equals(fieldConfiguration)) { referrers.add(idx); } } if (referrers.size() > 0) { throw new CmsIllegalStateException( Messages.get().container( Messages.ERR_INDEX_CONFIGURATION_DELETE_2, fieldConfiguration.getName(), referrers.toString())); } // remove operation (no exception) return m_fieldConfigurations.remove(fieldConfiguration.getName()) != null; } /** * Removes a search field from the field configuration.

* * @param fieldConfiguration the field configuration * @param field field to remove from the field configuration * * @return true if remove was successful, false if preconditions for removal are ok but the given * field was unknown. * * @throws CmsIllegalStateException if the given field is the last field inside the given field configuration. */ public boolean removeSearchFieldConfigurationField( CmsSearchFieldConfiguration fieldConfiguration, CmsSearchField field) throws CmsIllegalStateException { if (fieldConfiguration.getFields().size() < 2) { throw new CmsIllegalStateException( Messages.get().container( Messages.ERR_CONFIGURATION_FIELD_DELETE_2, field.getName(), fieldConfiguration.getName())); } else { if (LOG.isInfoEnabled()) { LOG.info( Messages.get().getBundle().key( Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2, field.getName(), fieldConfiguration.getName())); } return fieldConfiguration.getFields().remove(field); } } /** * Removes a search field mapping from the given field.

* * @param field the field * @param mapping mapping to remove from the field * * @return true if remove was successful, false if preconditions for removal are ok but the given * mapping was unknown. * * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field. */ public boolean removeSearchFieldMapping(CmsLuceneField field, CmsSearchFieldMapping mapping) throws CmsIllegalStateException { if (field.getMappings().size() < 2) { throw new CmsIllegalStateException( Messages.get().container( Messages.ERR_FIELD_MAPPING_DELETE_2, mapping.getType().toString(), field.getName())); } else { if (LOG.isInfoEnabled()) { LOG.info( Messages.get().getBundle().key( Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2, mapping.toString(), field.getName())); } return field.getMappings().remove(mapping); } } /** * Removes a search index from the configuration.

* * @param searchIndex the search index to remove */ public void removeSearchIndex(CmsSearchIndex searchIndex) { // shut down index to remove potential config files of Solr indexes searchIndex.shutDown(); if (searchIndex instanceof CmsSolrIndex) { CmsSolrIndex solrIndex = (CmsSolrIndex)searchIndex; m_coreContainer.unload(solrIndex.getCoreName(), true, true, true); } m_indexes.remove(searchIndex); initOfflineIndexes(); if (LOG.isInfoEnabled()) { LOG.info( Messages.get().getBundle().key( Messages.LOG_REMOVE_SEARCH_INDEX_2, searchIndex.getName(), searchIndex.getProject())); } } /** * Removes all indexes included in the given list (which must contain the name of an index to remove).

* * @param indexNames the names of the index to remove */ public void removeSearchIndexes(List indexNames) { Iterator i = indexNames.iterator(); while (i.hasNext()) { String indexName = i.next(); // get the search index by name CmsSearchIndex index = getIndex(indexName); if (index != null) { // remove the index removeSearchIndex(index); } else { if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_NO_INDEX_WITH_NAME_1, indexName)); } } } } /** * Removes this indexsource from the OpenCms configuration (if it is not used any more).

* * @param indexsource the indexsource to remove from the configuration * * @return true if remove was successful, false if preconditions for removal are ok but the given * searchindex was unknown to the manager. * * @throws CmsIllegalStateException if the given indexsource is still used by at least one * {@link CmsSearchIndex}. * */ public boolean removeSearchIndexSource(CmsSearchIndexSource indexsource) throws CmsIllegalStateException { // validation if removal will be granted Iterator itIndexes = m_indexes.iterator(); CmsSearchIndex idx; // the list for collecting indexes that use the given index source List referrers = new ArrayList(); // the current list of referred index sources of the iterated index List refsources; while (itIndexes.hasNext()) { idx = itIndexes.next(); refsources = idx.getSources(); if (refsources != null) { if (refsources.contains(indexsource)) { referrers.add(idx); } } } if (referrers.size() > 0) { throw new CmsIllegalStateException( Messages.get().container( Messages.ERR_INDEX_SOURCE_DELETE_2, indexsource.getName(), referrers.toString())); } // remove operation (no exception) return m_indexSources.remove(indexsource.getName()) != null; } /** * Resumes offline indexing if it was paused.

*/ public void resumeOfflineIndexing() { if (m_offlineUpdateFrequency == Long.MAX_VALUE) { setOfflineUpdateFrequency( m_configuredOfflineIndexingFrequency > 0 ? m_configuredOfflineIndexingFrequency : DEFAULT_OFFLINE_UPDATE_FREQNENCY); } } /** * Sets the name of the directory below WEB-INF/ where the search indexes are stored.

* * @param value the name of the directory below WEB-INF/ where the search indexes are stored */ public void setDirectory(String value) { m_path = value; } /** * Sets the maximum age a text extraction result is kept in the cache (in hours).

* * @param extractionCacheMaxAge the maximum age for a text extraction result to set */ public void setExtractionCacheMaxAge(float extractionCacheMaxAge) { m_extractionCacheMaxAge = extractionCacheMaxAge; } /** * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.

* * @param extractionCacheMaxAge the maximum age for a text extraction result to set */ public void setExtractionCacheMaxAge(String extractionCacheMaxAge) { try { setExtractionCacheMaxAge(Float.parseFloat(extractionCacheMaxAge)); } catch (NumberFormatException e) { LOG.error( Messages.get().getBundle().key( Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2, extractionCacheMaxAge, new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)), e); setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE); } } /** * Sets the unlock mode during indexing.

* * @param value the value */ public void setForceunlock(String value) { m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value); } /** * Sets the highlighter.

* * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.

* * @param highlighter the package/class name of the highlighter */ public void setHighlighter(String highlighter) { try { m_highlighter = (I_CmsTermHighlighter)Class.forName(highlighter).newInstance(); } catch (Exception e) { m_highlighter = null; LOG.error(e.getLocalizedMessage(), e); } } /** * Sets the seconds to wait for an index lock during an update operation.

* * @param value the seconds to wait for an index lock during an update operation */ public void setIndexLockMaxWaitSeconds(int value) { m_indexLockMaxWaitSeconds = value; } /** * Sets the max. excerpt length.

* * @param maxExcerptLength the max. excerpt length to set */ public void setMaxExcerptLength(int maxExcerptLength) { m_maxExcerptLength = maxExcerptLength; } /** * Sets the max. excerpt length as a String.

* * @param maxExcerptLength the max. excerpt length to set */ public void setMaxExcerptLength(String maxExcerptLength) { try { setMaxExcerptLength(Integer.parseInt(maxExcerptLength)); } catch (Exception e) { LOG.error( Messages.get().getBundle().key( Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2, maxExcerptLength, new Integer(DEFAULT_EXCERPT_LENGTH)), e); setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH); } } /** * Sets the maximal wait time for offline index updates after edit operations.

* * @param maxIndexWaitTime the maximal wait time to set in milliseconds */ public void setMaxIndexWaitTime(long maxIndexWaitTime) { m_maxIndexWaitTime = maxIndexWaitTime; } /** * Sets the maximal wait time for offline index updates after edit operations.

* * @param maxIndexWaitTime the maximal wait time to set in milliseconds */ public void setMaxIndexWaitTime(String maxIndexWaitTime) { try { setMaxIndexWaitTime(Long.parseLong(maxIndexWaitTime)); } catch (Exception e) { LOG.error( Messages.get().getBundle().key( Messages.LOG_PARSE_MAX_INDEX_WAITTIME_FAILED_2, maxIndexWaitTime, new Long(DEFAULT_MAX_INDEX_WAITTIME)), e); setMaxIndexWaitTime(DEFAULT_MAX_INDEX_WAITTIME); } } /** * Sets the maximum number of modifications before a commit in the search index is triggered.

* * @param maxModificationsBeforeCommit the maximum number of modifications to set */ public void setMaxModificationsBeforeCommit(int maxModificationsBeforeCommit) { m_maxModificationsBeforeCommit = maxModificationsBeforeCommit; } /** * Sets the maximum number of modifications before a commit in the search index is triggered as a string.

* * @param value the maximum number of modifications to set */ public void setMaxModificationsBeforeCommit(String value) { try { setMaxModificationsBeforeCommit(Integer.parseInt(value)); } catch (Exception e) { LOG.error( Messages.get().getBundle().key( Messages.LOG_PARSE_MAXCOMMIT_FAILED_2, value, new Integer(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT)), e); setMaxModificationsBeforeCommit(DEFAULT_MAX_MODIFICATIONS_BEFORE_COMMIT); } } /** * Sets the update frequency of the offline indexer in milliseconds.

* * @param offlineUpdateFrequency the update frequency in milliseconds to set */ public void setOfflineUpdateFrequency(long offlineUpdateFrequency) { m_offlineUpdateFrequency = offlineUpdateFrequency; updateOfflineIndexes(0); } /** * Sets the update frequency of the offline indexer in milliseconds.

* * @param offlineUpdateFrequency the update frequency in milliseconds to set */ public void setOfflineUpdateFrequency(String offlineUpdateFrequency) { try { setOfflineUpdateFrequency(Long.parseLong(offlineUpdateFrequency)); } catch (Exception e) { LOG.error( Messages.get().getBundle().key( Messages.LOG_PARSE_OFFLINE_UPDATE_FAILED_2, offlineUpdateFrequency, new Long(DEFAULT_OFFLINE_UPDATE_FREQNENCY)), e); setOfflineUpdateFrequency(DEFAULT_OFFLINE_UPDATE_FREQNENCY); } } /** * Sets the Solr configuration.

* * @param config the Solr configuration */ public void setSolrServerConfiguration(CmsSolrConfiguration config) { m_solrConfig = config; } /** * Sets the timeout to abandon threads indexing a resource.

* * @param value the timeout in milliseconds */ public void setTimeout(long value) { m_timeout = value; } /** * Sets the timeout to abandon threads indexing a resource as a String.

* * @param value the timeout in milliseconds */ public void setTimeout(String value) { try { setTimeout(Long.parseLong(value)); } catch (Exception e) { LOG.error( Messages.get().getBundle().key(Messages.LOG_PARSE_TIMEOUT_FAILED_2, value, new Long(DEFAULT_TIMEOUT)), e); setTimeout(DEFAULT_TIMEOUT); } } /** * Shuts down the search manager.

* * This will cause all search indices to be shut down.

*/ public void shutDown() { if (m_offlineIndexThread != null) { m_offlineIndexThread.shutDown(); } if (m_offlineHandler != null) { OpenCms.removeCmsEventListener(m_offlineHandler); } Iterator i = m_indexes.iterator(); while (i.hasNext()) { CmsSearchIndex index = i.next(); index.shutDown(); index = null; } m_indexes.clear(); shutDownSolrContainer(); if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SHUTDOWN_MANAGER_0)); } } /** * Updates all offline indexes.

* * Can be used to force an index update when it's not convenient to wait until the * offline update interval has eclipsed.

* * Since the offline indexes still need some time to update the new resources, * the method waits for at most the configurable maxIndexWaitTime * to ensure that updating is finished. * * @see #updateOfflineIndexes(long) * */ public void updateOfflineIndexes() { updateOfflineIndexes(getMaxIndexWaitTime()); } /** * Updates all offline indexes.

* * Can be used to force an index update when it's not convenient to wait until the * offline update interval has eclipsed.

* * Since the offline index will still need some time to update the new resources even if it runs directly, * a wait time of 2500 or so should be given in order to make sure the index finished updating. * * @param waitTime milliseconds to wait after the offline update index was notified of the changes */ public void updateOfflineIndexes(long waitTime) { if ((m_offlineIndexThread != null) && m_offlineIndexThread.isAlive()) { // notify existing thread of update frequency change if (LOG.isDebugEnabled()) { LOG.debug(Messages.get().getBundle().key(Messages.LOG_OI_UPDATE_INTERRUPT_0)); } m_offlineIndexThread.interrupt(); if (waitTime > 0) { m_offlineIndexThread.getWaitHandle().enter(waitTime); } } } /** * Cleans up the extraction result cache.

*/ protected void cleanExtractionCache() { // clean up the extraction result cache m_extractionResultCache.cleanCache(m_extractionCacheMaxAge); } /** * Collects the related containerpages to the resources that have been published.

* * @param adminCms an OpenCms user context with Admin permissions * @param updateResources the resources to be re-indexed * * @return the updated list of resource to re-index */ protected List findRelatedContainerPages( CmsObject adminCms, List updateResources) { Set elementGroups = new HashSet(); Set containerPages = new HashSet(); int containerPageTypeId = -1; try { containerPageTypeId = CmsResourceTypeXmlContainerPage.getContainerPageTypeId(); } catch (CmsLoaderException e) { // will happen during setup, when container page type is not available yet LOG.info(e.getLocalizedMessage(), e); } if (containerPageTypeId != -1) { for (CmsPublishedResource pubRes : updateResources) { try { if (OpenCms.getResourceManager().getResourceType( pubRes.getType()) instanceof CmsResourceTypeXmlContent) { CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId()); filter.filterStrong(); List relations = adminCms.readRelations(filter); for (CmsRelation relation : relations) { CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { containerPages.add(res); if (CmsJspTagContainer.isDetailContainersPage(adminCms, adminCms.getSitePath(res))) { addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); } } else if (OpenCms.getResourceManager().getResourceType( res.getTypeId()).getTypeName().equals( CmsResourceTypeXmlContainerPage.GROUP_CONTAINER_TYPE_NAME)) { elementGroups.add(res); } } } if (containerPageTypeId == pubRes.getType()) { addDetailContent( adminCms, containerPages, adminCms.getRequestContext().removeSiteRoot(pubRes.getRootPath())); } } catch (CmsException e) { LOG.error(e.getLocalizedMessage(), e); } } for (CmsResource pubRes : elementGroups) { try { CmsRelationFilter filter = CmsRelationFilter.relationsToStructureId(pubRes.getStructureId()); filter.filterStrong(); List relations = adminCms.readRelations(filter); for (CmsRelation relation : relations) { CmsResource res = relation.getSource(adminCms, CmsResourceFilter.ALL); if (CmsResourceTypeXmlContainerPage.isContainerPage(res)) { containerPages.add(res); if (CmsJspTagContainer.isDetailContainersPage(adminCms, adminCms.getSitePath(res))) { addDetailContent(adminCms, containerPages, adminCms.getSitePath(res)); } } } } catch (CmsException e) { LOG.error(e.getLocalizedMessage(), e); } } // add all found container pages as published resource objects to the list for (CmsResource page : containerPages) { CmsPublishedResource pubCont = new CmsPublishedResource(page); if (!updateResources.contains(pubCont)) { // ensure container page is added only once updateResources.add(pubCont); } } } return updateResources; } /** * Returns the set of names of all configured document types.

* * @return the set of names of all configured document types */ protected List getDocumentTypes() { List names = new ArrayList(); for (Iterator i = m_documentTypes.values().iterator(); i.hasNext();) { I_CmsDocumentFactory factory = i.next(); names.add(factory.getName()); } return names; } /** * Returns the a offline project used for offline indexing.

* * @return the offline project if available */ protected CmsProject getOfflineIndexProject() { CmsProject result = null; for (CmsSearchIndex index : m_offlineIndexes) { try { result = m_adminCms.readProject(index.getProject()); if (!result.isOnlineProject()) { break; } } catch (Exception e) { // may be a missconfigured index, ignore LOG.error(e.getLocalizedMessage(), e); } } return result; } /** * Returns a new thread manager for the indexing threads.

* * @return a new thread manager for the indexing threads */ protected CmsIndexingThreadManager getThreadManager() { return new CmsIndexingThreadManager(m_timeout, m_maxModificationsBeforeCommit); } /** * Initializes the available Cms resource types to be indexed.

* * A map stores document factories keyed by a string representing * a colon separated list of Cms resource types and/or mimetypes.

* * The keys of this map are used to trigger a document factory to convert * a Cms resource into a Lucene index document.

* * A document factory is a class implementing the interface * {@link org.opencms.search.documents.I_CmsDocumentFactory}.

*/ protected void initAvailableDocumentTypes() { CmsSearchDocumentType documenttype = null; String className = null; String name = null; I_CmsDocumentFactory documentFactory = null; List resourceTypes = null; List mimeTypes = null; Class c = null; m_documentTypes = new HashMap(); for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) { documenttype = m_documentTypeConfigs.get(i); name = documenttype.getName(); try { className = documenttype.getClassName(); resourceTypes = documenttype.getResourceTypes(); mimeTypes = documenttype.getMimeTypes(); if (name == null) { throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_NAME_0)); } if (className == null) { throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_CLASS_DEF_0)); } if (resourceTypes.size() == 0) { throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0)); } try { c = Class.forName(className); documentFactory = (I_CmsDocumentFactory)c.getConstructor(new Class[] {String.class}).newInstance( new Object[] {name}); } catch (ClassNotFoundException exc) { throw new CmsIndexException( Messages.get().container(Messages.ERR_DOCCLASS_NOT_FOUND_1, className), exc); } catch (Exception exc) { throw new CmsIndexException(Messages.get().container(Messages.ERR_DOCCLASS_INIT_1, className), exc); } if (documentFactory.isUsingCache()) { // init cache if used by the factory documentFactory.setCache(m_extractionResultCache); } for (Iterator key = documentFactory.getDocumentKeys( resourceTypes, mimeTypes).iterator(); key.hasNext();) { m_documentTypes.put(key.next(), documentFactory); } } catch (CmsException e) { if (LOG.isWarnEnabled()) { LOG.warn(Messages.get().getBundle().key(Messages.LOG_DOCTYPE_CONFIG_FAILED_1, name), e); } } } } /** * Initializes the configured search indexes.

* * This initializes also the list of Cms resources types * to be indexed by an index source.

*/ protected void initSearchIndexes() { CmsSearchIndex index = null; for (int i = 0, n = m_indexes.size(); i < n; i++) { index = m_indexes.get(i); // reset disabled flag index.setEnabled(true); // check if the index has been configured correctly if (index.checkConfiguration(m_adminCms)) { // the index is configured correctly try { index.initialize(); } catch (Exception e) { if (CmsLog.INIT.isWarnEnabled()) { // in this case the index will be disabled CmsLog.INIT.warn(Messages.get().getBundle().key(Messages.INIT_SEARCH_INIT_FAILED_1, index), e); } } } // output a log message if the index was successfully configured or not if (CmsLog.INIT.isInfoEnabled()) { if (index.isEnabled()) { CmsLog.INIT.info( Messages.get().getBundle().key(Messages.INIT_INDEX_CONFIGURED_2, index, index.getProject())); } else { CmsLog.INIT.warn( Messages.get().getBundle().key( Messages.INIT_INDEX_NOT_CONFIGURED_2, index, index.getProject())); } } } } /** * Incrementally updates all indexes that have their rebuild mode set to "auto" * after resources have been published.

* * @param adminCms an OpenCms user context with Admin permissions * @param publishHistoryId the history ID of the published project * @param report the report to write the output to */ protected void updateAllIndexes(CmsObject adminCms, CmsUUID publishHistoryId, I_CmsReport report) { int oldPriority = Thread.currentThread().getPriority(); try { SEARCH_MANAGER_LOCK.lock(); Thread.currentThread().setPriority(Thread.MIN_PRIORITY); List publishedResources; try { // read the list of all published resources publishedResources = adminCms.readPublishedResources(publishHistoryId); } catch (CmsException e) { LOG.error( Messages.get().getBundle().key(Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1, publishHistoryId), e); return; } Set bothNewAndDeleted = getIdsOfPublishResourcesWhichAreBothNewAndDeleted(publishedResources); // When published resources with both states 'new' and 'deleted' exist in the same publish job history, the resource has been moved List updateResources = new ArrayList(); for (CmsPublishedResource res : publishedResources) { if (res.isFolder() || res.getState().isUnchanged()) { // folders and unchanged resources don't need to be indexed after publish continue; } if (res.getState().isDeleted() || res.getState().isNew() || res.getState().isChanged()) { if (updateResources.contains(res)) { // resource may have been added as a sibling of another resource // in this case we make sure to use the value from the publish list because of the "deleted" flag boolean hasMoved = bothNewAndDeleted.contains(res.getStructureId()) || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_DESTINATION) || (res.getMovedState() == CmsPublishedResource.STATE_MOVED_SOURCE); // check it this is a moved resource with source / target info, in this case we need both entries if (!hasMoved) { // if the resource was moved, we must contain both entries updateResources.remove(res); } // "equals()" implementation of published resource checks for id, // so the removed value may have a different "deleted" or "modified" status value updateResources.add(res); } else { // resource not yet contained in the list updateResources.add(res); // check for the siblings (not for deleted resources, these are already gone) if (!res.getState().isDeleted() && (res.getSiblingCount() > 1)) { // this resource has siblings try { // read siblings from the online project List siblings = adminCms.readSiblings( res.getRootPath(), CmsResourceFilter.ALL); Iterator itSib = siblings.iterator(); while (itSib.hasNext()) { // check all siblings CmsResource sibling = itSib.next(); CmsPublishedResource sib = new CmsPublishedResource(sibling); if (!updateResources.contains(sib)) { // ensure sibling is added only once updateResources.add(sib); } } } catch (CmsException e) { // ignore, just use the original resource if (LOG.isWarnEnabled()) { LOG.warn( Messages.get().getBundle().key( Messages.LOG_UNABLE_TO_READ_SIBLINGS_1, res.getRootPath()), e); } } } } } } findRelatedContainerPages(adminCms, updateResources); if (!updateResources.isEmpty()) { // sort the resource to update Collections.sort(updateResources); // only update the indexes if the list of remaining published resources is not empty Iterator i = m_indexes.iterator(); while (i.hasNext()) { CmsSearchIndex index = i.next(); if (CmsSearchIndex.REBUILD_MODE_AUTO.equals(index.getRebuildMode())) { // only update indexes which have the rebuild mode set to "auto" try { updateIndex(index, report, updateResources); } catch (CmsException e) { LOG.error( Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e); } } } } // clean up the extraction result cache cleanExtractionCache(); } finally { SEARCH_MANAGER_LOCK.unlock(); Thread.currentThread().setPriority(oldPriority); } } /** * Updates (if required creates) the index with the given name.

* * If the optional List of {@link CmsPublishedResource} instances is provided, the index will be * incrementally updated for these resources only. If this List is null or empty, * the index will be fully rebuild.

* * @param index the index to update or rebuild * @param report the report to write output messages to * @param resourcesToIndex an (optional) list of {@link CmsPublishedResource} objects to update in the index * * @throws CmsException if something goes wrong */ protected void updateIndex(CmsSearchIndex index, I_CmsReport report, List resourcesToIndex) throws CmsException { try { SEARCH_MANAGER_LOCK.lock(); // copy the stored admin context for the indexing CmsObject cms = OpenCms.initCmsObject(m_adminCms); // make sure a report is available if (report == null) { report = new CmsLogReport(cms.getRequestContext().getLocale(), CmsSearchManager.class); } // check if the index has been configured correctly if (!index.checkConfiguration(cms)) { // the index is disabled return; } // set site root and project for this index cms.getRequestContext().setSiteRoot("/"); // switch to the index project cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) { // rebuild the complete index // create a new thread manager for the indexing threads CmsIndexingThreadManager threadManager = getThreadManager(); boolean isOfflineIndex = false; if (CmsSearchIndex.REBUILD_MODE_OFFLINE.equals(index.getRebuildMode())) { // disable offline indexing while the complete index is rebuild isOfflineIndex = true; index.setRebuildMode(CmsSearchIndex.REBUILD_MODE_MANUAL); // re-initialize the offline indexes, this will disable this offline index initOfflineIndexes(); } I_CmsIndexWriter writer = null; try { // create a backup of the existing index String backup = index.createIndexBackup(); if (backup != null) { index.indexSearcherOpen(backup); } // create a new index writer writer = index.getIndexWriter(report, true); if (writer instanceof CmsSolrIndexWriter) { try { ((CmsSolrIndexWriter)writer).deleteAllDocuments(); } catch (IOException e) { LOG.error(e.getMessage(), e); } } // output start information on the report report.println( Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1, index.getName()), I_CmsReport.FORMAT_HEADLINE); // iterate all configured index sources of this index Iterator sources = index.getSources().iterator(); while (sources.hasNext()) { // get the next index source CmsSearchIndexSource source = sources.next(); // create the indexer I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); // new index creation, use all resources from the index source indexer.rebuildIndex(writer, threadManager, source); // wait for indexing threads to finish while (threadManager.isRunning()) { try { Thread.sleep(500); } catch (InterruptedException e) { // just continue with the loop after interruption LOG.info(e.getLocalizedMessage(), e); } } // commit and optimize the index after each index source has been finished try { writer.commit(); } catch (IOException e) { if (LOG.isWarnEnabled()) { LOG.warn( Messages.get().getBundle().key( Messages.LOG_IO_INDEX_WRITER_COMMIT_2, index.getName(), index.getPath()), e); } } try { writer.optimize(); } catch (IOException e) { if (LOG.isWarnEnabled()) { LOG.warn( Messages.get().getBundle().key( Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_2, index.getName(), index.getPath()), e); } } } if (backup != null) { // remove the backup after the files have been re-indexed index.indexSearcherClose(); index.removeIndexBackup(backup); } // output finish information on the report report.println( Messages.get().container(Messages.RPT_SEARCH_INDEXING_REBUILD_END_1, index.getName()), I_CmsReport.FORMAT_HEADLINE); } finally { if (writer != null) { try { writer.close(); } catch (IOException e) { if (LOG.isWarnEnabled()) { LOG.warn( Messages.get().getBundle().key( Messages.LOG_IO_INDEX_WRITER_CLOSE_2, index.getPath(), index.getName()), e); } } } if (isOfflineIndex) { // reset the mode of the offline index index.setRebuildMode(CmsSearchIndex.REBUILD_MODE_OFFLINE); // re-initialize the offline indexes, this will re-enable this index initOfflineIndexes(); } // index has changed - initialize the index searcher instance index.indexSearcherOpen(index.getPath()); } // show information about indexing runtime threadManager.reportStatistics(report); } else { updateIndexIncremental(cms, index, report, resourcesToIndex); } } finally { SEARCH_MANAGER_LOCK.unlock(); } } /** * Incrementally updates the given index.

* * @param cms the OpenCms user context to use for accessing the VFS * @param index the index to update * @param report the report to write output messages to * @param resourcesToIndex a list of {@link CmsPublishedResource} objects to update in the index * * @throws CmsException if something goes wrong */ protected void updateIndexIncremental( CmsObject cms, CmsSearchIndex index, I_CmsReport report, List resourcesToIndex) throws CmsException { try { SEARCH_MANAGER_LOCK.lock(); // update the existing index List updateCollections = new ArrayList(); boolean hasResourcesToDelete = false; boolean hasResourcesToUpdate = false; // iterate all configured index sources of this index Iterator sources = index.getSources().iterator(); while (sources.hasNext()) { // get the next index source CmsSearchIndexSource source = sources.next(); // create the indexer I_CmsIndexer indexer = source.getIndexer().newInstance(cms, report, index); // collect the resources to update CmsSearchIndexUpdateData updateData = indexer.getUpdateData(source, resourcesToIndex); if (!updateData.isEmpty()) { // add the update collection to the internal pipeline updateCollections.add(updateData); hasResourcesToDelete = hasResourcesToDelete | updateData.hasResourcesToDelete(); hasResourcesToUpdate = hasResourcesToUpdate | updateData.hasResourceToUpdate(); } } // only start index modification if required if (hasResourcesToDelete || hasResourcesToUpdate) { // output start information on the report report.println( Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1, index.getName()), I_CmsReport.FORMAT_HEADLINE); I_CmsIndexWriter writer = null; try { // obtain an index writer that updates the current index writer = index.getIndexWriter(report, false); if (hasResourcesToDelete) { // delete the resource from the index Iterator i = updateCollections.iterator(); while (i.hasNext()) { CmsSearchIndexUpdateData updateCollection = i.next(); if (updateCollection.hasResourcesToDelete()) { updateCollection.getIndexer().deleteResources( writer, updateCollection.getResourcesToDelete()); } } } if (hasResourcesToUpdate) { // create a new thread manager CmsIndexingThreadManager threadManager = getThreadManager(); Iterator i = updateCollections.iterator(); while (i.hasNext()) { CmsSearchIndexUpdateData updateCollection = i.next(); if (updateCollection.hasResourceToUpdate()) { updateCollection.getIndexer().updateResources( writer, threadManager, updateCollection.getResourcesToUpdate()); } } // wait for indexing threads to finish while (threadManager.isRunning()) { try { Thread.sleep(500); } catch (InterruptedException e) { // just continue with the loop after interruption LOG.info(e.getLocalizedMessage(), e); } } } } finally { // close the index writer if (writer != null) { try { writer.commit(); } catch (IOException e) { LOG.error( Messages.get().getBundle().key( Messages.LOG_IO_INDEX_WRITER_COMMIT_2, index.getName(), index.getPath()), e); } } // index has changed - initialize the index searcher instance index.indexSearcherUpdate(); } // output finish information on the report report.println( Messages.get().container(Messages.RPT_SEARCH_INDEXING_UPDATE_END_1, index.getName()), I_CmsReport.FORMAT_HEADLINE); } } finally { SEARCH_MANAGER_LOCK.unlock(); } } /** * Updates the offline search indexes for the given list of resources.

* * @param report the report to write the index information to * @param resourcesToIndex the list of {@link CmsPublishedResource} objects to index */ protected void updateIndexOffline(I_CmsReport report, List resourcesToIndex) { CmsObject cms = m_adminCms; try { // copy the administration context for the indexing cms = OpenCms.initCmsObject(m_adminCms); // set site root and project for this index cms.getRequestContext().setSiteRoot("/"); } catch (CmsException e) { LOG.error(e.getLocalizedMessage(), e); } Iterator j = m_offlineIndexes.iterator(); while (j.hasNext()) { CmsSearchIndex index = j.next(); if (index.getSources() != null) { try { // switch to the index project cms.getRequestContext().setCurrentProject(cms.readProject(index.getProject())); updateIndexIncremental(cms, index, report, resourcesToIndex); } catch (CmsException e) { LOG.error(Messages.get().getBundle().key(Messages.LOG_UPDATE_INDEX_FAILED_1, index.getName()), e); } } } } /** * Checks if the given containerpage is used as a detail containers and adds the related detail content to the resource set.

* * @param adminCms the cms context * @param containerPages the containerpages * @param containerPage the container page site path */ private void addDetailContent(CmsObject adminCms, Set containerPages, String containerPage) { if (CmsJspTagContainer.isDetailContainersPage(adminCms, containerPage)) { try { CmsResource detailRes = adminCms.readResource( CmsJspTagContainer.getDetailContentPath(containerPage), CmsResourceFilter.IGNORE_EXPIRATION); containerPages.add(detailRes); } catch (Throwable e) { if (LOG.isWarnEnabled()) { LOG.warn(e.getLocalizedMessage(), e); } } } } /** * Creates the Solr core container.

* * @return the created core container */ private CoreContainer createCoreContainer() { CoreContainer container = null; try { // get the core container // still no core container: create it container = CoreContainer.createAndLoad( Paths.get(m_solrConfig.getHome()), m_solrConfig.getSolrFile().toPath()); if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info( Messages.get().getBundle().key( Messages.INIT_SOLR_CORE_CONTAINER_CREATED_2, m_solrConfig.getHome(), m_solrConfig.getSolrFile().getName())); } } catch (Exception e) { LOG.error( Messages.get().getBundle().key( Messages.ERR_SOLR_CORE_CONTAINER_NOT_CREATED_1, m_solrConfig.getSolrFile().getAbsolutePath()), e); } return container; } /** * Returns the report in the given event data, if null * a new log report is used.

* * @param event the event to get the report for * * @return the report */ private I_CmsReport getEventReport(CmsEvent event) { I_CmsReport report = null; if (event.getData() != null) { report = (I_CmsReport)event.getData().get(I_CmsEventListener.KEY_REPORT); } if (report == null) { report = new CmsLogReport(Locale.ENGLISH, getClass()); } return report; } /** * Gets all structure ids for which published resources of both states 'new' and 'deleted' exist in the given list.

* * @param publishedResources a list of published resources * * @return the set of structure ids that satisfy the condition above */ private Set getIdsOfPublishResourcesWhichAreBothNewAndDeleted( List publishedResources) { Set result = new HashSet(); Set deletedSet = new HashSet(); for (CmsPublishedResource pubRes : publishedResources) { if (pubRes.getState().isNew()) { result.add(pubRes.getStructureId()); } if (pubRes.getState().isDeleted()) { deletedSet.add(pubRes.getStructureId()); } } result.retainAll(deletedSet); return result; } /** * Shuts down the Solr core container.

*/ private void shutDownSolrContainer() { if (m_coreContainer != null) { for (SolrCore core : m_coreContainer.getCores()) { // do not unload spellcheck core because otherwise the core.properties file is removed // even when calling m_coreContainer.unload(core.getName(), false, false, false); if (!core.getName().equals(CmsSolrSpellchecker.SPELLCHECKER_INDEX_CORE)) { m_coreContainer.unload(core.getName(), false, false, true); } } m_coreContainer.shutdown(); if (CmsLog.INIT.isInfoEnabled()) { CmsLog.INIT.info(Messages.get().getBundle().key(Messages.INIT_SOLR_SHUTDOWN_SUCCESS_0)); } m_coreContainer = null; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy