
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProviderService Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.plugins.index.lucene;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import javax.annotation.Nonnull;
import javax.management.NotCompliantMBeanException;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.FilenameUtils;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.ReferenceCardinality;
import org.apache.felix.scr.annotations.ReferencePolicy;
import org.apache.felix.scr.annotations.ReferencePolicyOption;
import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
import org.apache.jackrabbit.oak.cache.CacheStats;
import org.apache.jackrabbit.oak.commons.PropertiesUtil;
import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider;
import org.apache.jackrabbit.oak.plugins.index.aggregate.NodeAggregator;
import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
import org.apache.jackrabbit.oak.spi.commit.BackgroundObserver;
import org.apache.jackrabbit.oak.plugins.index.lucene.score.ScorerProviderFactory;
import org.apache.jackrabbit.oak.spi.commit.BackgroundObserverMBean;
import org.apache.jackrabbit.oak.spi.commit.Observer;
import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
import org.apache.jackrabbit.oak.spi.whiteboard.Registration;
import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.util.InfoStream;
import org.osgi.framework.BundleContext;
import org.osgi.framework.ServiceRegistration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.google.common.base.Preconditions.checkNotNull;
import static org.apache.commons.io.FileUtils.ONE_MB;
import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean;
@SuppressWarnings("UnusedDeclaration")
@Component(metatype = true, label = "Apache Jackrabbit Oak LuceneIndexProvider")
public class LuceneIndexProviderService {
public static final String REPOSITORY_HOME = "repository.home";
private LuceneIndexProvider indexProvider;
private final List regs = Lists.newArrayList();
private final List oakRegs = Lists.newArrayList();
private final Logger log = LoggerFactory.getLogger(getClass());
@Reference(cardinality = ReferenceCardinality.OPTIONAL_UNARY,
policyOption = ReferencePolicyOption.GREEDY,
policy = ReferencePolicy.DYNAMIC
)
private NodeAggregator nodeAggregator;
private static final boolean PROP_DISABLED_DEFAULT = false;
@Property(
boolValue = PROP_DISABLED_DEFAULT,
label = "Disable this component",
description = "If true, this component is disabled."
)
private static final String PROP_DISABLED = "disabled";
@Property(
boolValue = false,
label = "Enable Debug Logging",
description = "Enables debug logging in Lucene. After enabling this actual logging can be " +
"controlled via changing log level for category 'oak.lucene' to debug")
private static final String PROP_DEBUG = "debug";
@Property(
boolValue = true,
label = "Enable CopyOnRead",
description = "Enable copying of Lucene index to local file system to improve query performance"
)
private static final String PROP_COPY_ON_READ = "enableCopyOnReadSupport";
@Property(
label = "Local index storage path",
description = "Local file system path where Lucene indexes would be copied when CopyOnRead is enabled. " +
"If not specified then indexes would be stored under 'index' dir under Repository Home"
)
private static final String PROP_LOCAL_INDEX_DIR = "localIndexDir";
private static final boolean PROP_COPY_ON_WRITE_DEFAULT = true;
@Property(
boolValue = PROP_COPY_ON_WRITE_DEFAULT,
label = "Enable CopyOnWrite",
description = "Enable copying of Lucene index to local file system to improve index writer performance"
)
private static final String PROP_COPY_ON_WRITE = "enableCopyOnWriteSupport";
@Property(
boolValue = true,
label = "Open index asynchronously",
description = "Enable opening of indexes in asynchronous mode"
)
private static final String PROP_ASYNC_INDEX_OPEN = "enableOpenIndexAsync";
private static final int PROP_THREAD_POOL_SIZE_DEFAULT = 5;
@Property(
intValue = PROP_THREAD_POOL_SIZE_DEFAULT,
label = "Thread pool size",
description = "Thread pool size used to perform various asynchronous task in Oak Lucene"
)
private static final String PROP_THREAD_POOL_SIZE = "threadPoolSize";
private static final boolean PROP_PREFETCH_INDEX_FILES_DEFAULT = true;
@Property(
boolValue = PROP_PREFETCH_INDEX_FILES_DEFAULT,
label = "Prefetch Index Files",
description = "Prefetch the index files when CopyOnRead is enabled. When enabled all new Lucene" +
" index files would be copied locally before the index is made available to QueryEngine"
)
private static final String PROP_PREFETCH_INDEX_FILES = "prefetchIndexFiles";
private static final int PROP_EXTRACTED_TEXT_CACHE_SIZE_DEFAULT = 20;
@Property(
intValue = PROP_EXTRACTED_TEXT_CACHE_SIZE_DEFAULT,
label = "Extracted text cache size (MB)",
description = "Cache size in MB for caching extracted text for some time. When set to 0 then " +
"cache would be disabled"
)
private static final String PROP_EXTRACTED_TEXT_CACHE_SIZE = "extractedTextCacheSizeInMB";
private static final int PROP_EXTRACTED_TEXT_CACHE_EXPIRY_DEFAULT = 300;
@Property(
intValue = PROP_EXTRACTED_TEXT_CACHE_EXPIRY_DEFAULT,
label = "Extracted text cache expiry (secs)",
description = "Time in seconds for which the extracted text would be cached in memory"
)
private static final String PROP_EXTRACTED_TEXT_CACHE_EXPIRY = "extractedTextCacheExpiryInSecs";
private static final boolean PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE_DEFAULT = false;
@Property(
boolValue = PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE_DEFAULT,
label = "Always use pre-extracted text cache",
description = "By default pre extracted text cache would only be used for reindex case. If this setting " +
"is enabled then it would also be used in normal incremental indexing"
)
private static final String PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE = "alwaysUsePreExtractedCache";
private static final int PROP_BOOLEAN_CLAUSE_LIMIT_DEFAULT = 1024;
@Property(
intValue = PROP_BOOLEAN_CLAUSE_LIMIT_DEFAULT,
label = "Boolean Clause Limit",
description = "Limit for number of boolean clauses generated for handling of OR query"
)
private static final String PROP_BOOLEAN_CLAUSE_LIMIT = "booleanClauseLimit";
private Whiteboard whiteboard;
private BackgroundObserver backgroundObserver;
@Reference
ScorerProviderFactory scorerFactory;
@Reference
private IndexAugmentorFactory augmentorFactory;
@Reference(policy = ReferencePolicy.DYNAMIC,
cardinality = ReferenceCardinality.OPTIONAL_UNARY,
policyOption = ReferencePolicyOption.GREEDY
)
private volatile PreExtractedTextProvider extractedTextProvider;
private IndexCopier indexCopier;
private File indexDir;
private ExecutorService executorService;
private int threadPoolSize;
private ExtractedTextCache extractedTextCache;
@Activate
private void activate(BundleContext bundleContext, Map config)
throws NotCompliantMBeanException, IOException {
boolean disabled = PropertiesUtil.toBoolean(config.get(PROP_DISABLED), PROP_DISABLED_DEFAULT);
if (disabled) {
log.info("Component disabled by configuration");
return;
}
configureBooleanClauseLimit(config);
initializeFactoryClassLoaders(getClass().getClassLoader());
whiteboard = new OsgiWhiteboard(bundleContext);
threadPoolSize = PropertiesUtil.toInteger(config.get(PROP_THREAD_POOL_SIZE), PROP_THREAD_POOL_SIZE_DEFAULT);
initializeExtractedTextCache(bundleContext, config);
indexProvider = new LuceneIndexProvider(createTracker(bundleContext, config), scorerFactory, augmentorFactory);
initializeLogging(config);
initialize();
regs.add(bundleContext.registerService(QueryIndexProvider.class.getName(), indexProvider, null));
registerObserver(bundleContext, config);
registerIndexEditor(bundleContext, config);
oakRegs.add(registerMBean(whiteboard,
LuceneIndexMBean.class,
new LuceneIndexMBeanImpl(indexProvider.getTracker()),
LuceneIndexMBean.TYPE,
"Lucene Index statistics"));
}
@Deactivate
private void deactivate() throws InterruptedException, IOException {
for (ServiceRegistration reg : regs) {
reg.unregister();
}
for (Registration reg : oakRegs){
reg.unregister();
}
if (backgroundObserver != null){
backgroundObserver.close();
}
if (indexProvider != null) {
indexProvider.close();
indexProvider = null;
}
//Close the copier first i.e. before executorService
if (indexCopier != null){
indexCopier.close();
}
if (executorService != null){
executorService.shutdown();
executorService.awaitTermination(1, TimeUnit.MINUTES);
}
InfoStream.setDefault(InfoStream.NO_OUTPUT);
}
IndexCopier getIndexCopier() {
return indexCopier;
}
ExtractedTextCache getExtractedTextCache() {
return extractedTextCache;
}
private void initialize(){
if(indexProvider == null){
return;
}
if(nodeAggregator != null){
log.debug("Using NodeAggregator {}", nodeAggregator.getClass());
}
indexProvider.setAggregator(nodeAggregator);
}
private void initializeLogging(Map config) {
boolean debug = PropertiesUtil.toBoolean(config.get(PROP_DEBUG), false);
if (debug) {
InfoStream.setDefault(LoggingInfoStream.INSTANCE);
log.info("Registered LoggingInfoStream with Lucene. Lucene logs can be enabled " +
"now via category [{}]", LoggingInfoStream.PREFIX);
}
}
private void registerIndexEditor(BundleContext bundleContext, Map config) throws IOException {
boolean enableCopyOnWrite = PropertiesUtil.toBoolean(config.get(PROP_COPY_ON_WRITE), PROP_COPY_ON_WRITE_DEFAULT);
LuceneIndexEditorProvider editorProvider;
if (enableCopyOnWrite){
initializeIndexCopier(bundleContext, config);
editorProvider = new LuceneIndexEditorProvider(indexCopier, extractedTextCache, augmentorFactory);
log.info("Enabling CopyOnWrite support. Index files would be copied under {}", indexDir.getAbsolutePath());
} else {
editorProvider = new LuceneIndexEditorProvider(null, extractedTextCache, augmentorFactory);
}
regs.add(bundleContext.registerService(IndexEditorProvider.class.getName(), editorProvider, null));
oakRegs.add(registerMBean(whiteboard,
TextExtractionStatsMBean.class,
editorProvider.getExtractedTextCache().getStatsMBean(),
TextExtractionStatsMBean.TYPE,
"TextExtraction statistics"));
}
private IndexTracker createTracker(BundleContext bundleContext, Map config) throws IOException {
boolean enableCopyOnRead = PropertiesUtil.toBoolean(config.get(PROP_COPY_ON_READ), true);
if (enableCopyOnRead){
initializeIndexCopier(bundleContext, config);
log.info("Enabling CopyOnRead support. Index files would be copied under {}", indexDir.getAbsolutePath());
return new IndexTracker(indexCopier);
}
return new IndexTracker();
}
private void initializeIndexCopier(BundleContext bundleContext, Map config) throws IOException {
if(indexCopier != null){
return;
}
String indexDirPath = PropertiesUtil.toString(config.get(PROP_LOCAL_INDEX_DIR), null);
boolean prefetchEnabled = PropertiesUtil.toBoolean(config.get(PROP_PREFETCH_INDEX_FILES),
PROP_PREFETCH_INDEX_FILES_DEFAULT);
if (Strings.isNullOrEmpty(indexDirPath)) {
String repoHome = bundleContext.getProperty(REPOSITORY_HOME);
if (repoHome != null){
indexDirPath = FilenameUtils.concat(repoHome, "index");
}
}
checkNotNull(indexDirPath, "Index directory cannot be determined as neither index " +
"directory path [%s] nor repository home [%s] defined", PROP_LOCAL_INDEX_DIR, REPOSITORY_HOME);
if (prefetchEnabled){
log.info("Prefetching of index files enabled. Index would be opened after copying all new files locally");
}
indexDir = new File(indexDirPath);
indexCopier = new IndexCopier(getExecutorService(), indexDir, prefetchEnabled);
oakRegs.add(registerMBean(whiteboard,
CopyOnReadStatsMBean.class,
indexCopier,
CopyOnReadStatsMBean.TYPE,
"IndexCopier support statistics"));
}
private ExecutorService getExecutorService(){
if (executorService == null){
executorService = createExecutor();
}
return executorService;
}
private ExecutorService createExecutor() {
ThreadPoolExecutor executor = new ThreadPoolExecutor(0, 5, 60L, TimeUnit.SECONDS,
new LinkedBlockingQueue(), new ThreadFactory() {
private final AtomicInteger counter = new AtomicInteger();
private final Thread.UncaughtExceptionHandler handler = new Thread.UncaughtExceptionHandler() {
@Override
public void uncaughtException(Thread t, Throwable e) {
log.warn("Error occurred in asynchronous processing ", e);
}
};
@Override
public Thread newThread(@Nonnull Runnable r) {
Thread thread = new Thread(r, createName());
thread.setDaemon(true);
thread.setPriority(Thread.MIN_PRIORITY);
thread.setUncaughtExceptionHandler(handler);
return thread;
}
private String createName() {
return "oak-lucene-" + counter.getAndIncrement();
}
});
executor.setKeepAliveTime(1, TimeUnit.MINUTES);
executor.allowCoreThreadTimeOut(true);
return executor;
}
private void registerObserver(BundleContext bundleContext, Map config) {
boolean enableAsyncIndexOpen = PropertiesUtil.toBoolean(config.get(PROP_ASYNC_INDEX_OPEN), true);
Observer observer = indexProvider;
if (enableAsyncIndexOpen) {
backgroundObserver = new BackgroundObserver(indexProvider, getExecutorService(), 5);
observer = backgroundObserver;
oakRegs.add(registerMBean(whiteboard,
BackgroundObserverMBean.class,
backgroundObserver.getMBean(),
BackgroundObserverMBean.TYPE,
"LuceneIndexConfigObserver queue stats"));
log.info("Registering the LuceneIndexProvider as a BackgroundObserver");
}
regs.add(bundleContext.registerService(Observer.class.getName(), observer, null));
}
private void initializeFactoryClassLoaders(ClassLoader classLoader) {
ClassLoader originalClassLoader = Thread.currentThread()
.getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(classLoader);
//Access TokenizerFactory etc trigger a static initialization
//so switch the TCCL so that static initializer picks up the right
//classloader
initializeFactoryClassLoaders0(classLoader);
} catch (Throwable t) {
log.warn("Error occurred while initializing the Lucene " +
"Factories", t);
} finally {
Thread.currentThread().setContextClassLoader(originalClassLoader);
}
}
private void initializeFactoryClassLoaders0(ClassLoader classLoader) {
//Factories use the Threads context classloader to perform SPI classes
//lookup by default which would not work in OSGi world. So reload the
//factories by providing the bundle classloader
TokenizerFactory.reloadTokenizers(classLoader);
CharFilterFactory.reloadCharFilters(classLoader);
TokenFilterFactory.reloadTokenFilters(classLoader);
}
private void initializeExtractedTextCache(BundleContext bundleContext, Map config) {
int cacheSizeInMB = PropertiesUtil.toInteger(config.get(PROP_EXTRACTED_TEXT_CACHE_SIZE),
PROP_EXTRACTED_TEXT_CACHE_SIZE_DEFAULT);
int cacheExpiryInSecs = PropertiesUtil.toInteger(config.get(PROP_EXTRACTED_TEXT_CACHE_EXPIRY),
PROP_EXTRACTED_TEXT_CACHE_EXPIRY_DEFAULT);
boolean alwaysUsePreExtractedCache = PropertiesUtil.toBoolean(config.get(PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE),
PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE_DEFAULT);
extractedTextCache = new ExtractedTextCache(cacheSizeInMB * ONE_MB, cacheExpiryInSecs, alwaysUsePreExtractedCache);
if (extractedTextProvider != null){
registerExtractedTextProvider(extractedTextProvider);
}
CacheStats stats = extractedTextCache.getCacheStats();
if (stats != null){
oakRegs.add(registerMBean(whiteboard,
CacheStatsMBean.class, stats,
CacheStatsMBean.TYPE, stats.getName()));
log.info("Extracted text caching enabled with maxSize {} MB, expiry time {} secs",
cacheSizeInMB, cacheExpiryInSecs);
}
}
private void registerExtractedTextProvider(PreExtractedTextProvider provider){
if (extractedTextCache != null){
if (provider != null){
String usage = extractedTextCache.isAlwaysUsePreExtractedCache() ?
"always" : "only during reindexing phase";
log.info("Registering PreExtractedTextProvider {} with extracted text cache. " +
"It would be used {}", provider, usage);
} else {
log.info("Unregistering PreExtractedTextProvider with extracted text cache");
}
extractedTextCache.setExtractedTextProvider(provider);
}
}
private void configureBooleanClauseLimit(Map config) {
int booleanClauseLimit = PropertiesUtil.toInteger(config.get(PROP_BOOLEAN_CLAUSE_LIMIT),
PROP_BOOLEAN_CLAUSE_LIMIT_DEFAULT);
if (booleanClauseLimit != BooleanQuery.getMaxClauseCount()){
BooleanQuery.setMaxClauseCount(booleanClauseLimit);
log.info("Changed the Max boolean clause limit to {}", booleanClauseLimit);
}
}
protected void bindNodeAggregator(NodeAggregator aggregator) {
this.nodeAggregator = aggregator;
initialize();
}
protected void unbindNodeAggregator(NodeAggregator aggregator) {
this.nodeAggregator = null;
initialize();
}
protected void bindExtractedTextProvider(PreExtractedTextProvider preExtractedTextProvider){
this.extractedTextProvider = preExtractedTextProvider;
registerExtractedTextProvider(preExtractedTextProvider);
}
protected void unbindExtractedTextProvider(PreExtractedTextProvider preExtractedTextProvider){
this.extractedTextProvider = null;
registerExtractedTextProvider(null);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy