All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexProviderService Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.jackrabbit.oak.plugins.index.lucene;

import java.io.File;
import java.io.IOException;
import java.util.Dictionary;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.jackrabbit.guava.common.base.Strings;
import org.apache.jackrabbit.guava.common.collect.ImmutableMap;
import org.apache.jackrabbit.guava.common.collect.Lists;
import org.apache.commons.io.FilenameUtils;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.ReferenceCardinality;
import org.apache.felix.scr.annotations.ReferencePolicy;
import org.apache.felix.scr.annotations.ReferencePolicyOption;
import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
import org.apache.jackrabbit.oak.api.jmx.CheckpointMBean;
import org.apache.jackrabbit.oak.cache.CacheStats;
import org.apache.jackrabbit.oak.commons.PropertiesUtil;
import org.apache.jackrabbit.oak.osgi.OsgiWhiteboard;
import org.apache.jackrabbit.oak.plugins.document.spi.JournalPropertyService;
import org.apache.jackrabbit.oak.plugins.index.AsyncIndexInfoService;
import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider;
import org.apache.jackrabbit.oak.plugins.index.IndexInfoProvider;
import org.apache.jackrabbit.oak.plugins.index.IndexPathService;
import org.apache.jackrabbit.oak.plugins.index.fulltext.PreExtractedTextProvider;
import org.apache.jackrabbit.oak.plugins.index.importer.IndexImporterProvider;
import org.apache.jackrabbit.oak.plugins.index.lucene.directory.ActiveDeletedBlobCollectorFactory;
import org.apache.jackrabbit.oak.plugins.index.lucene.directory.BufferedOakDirectory;
import org.apache.jackrabbit.oak.plugins.index.lucene.directory.LuceneIndexFileSystemStatistics;
import org.apache.jackrabbit.oak.plugins.index.lucene.directory.LuceneIndexImporter;
import org.apache.jackrabbit.oak.plugins.index.lucene.hybrid.DocumentQueue;
import org.apache.jackrabbit.oak.plugins.index.lucene.hybrid.ExternalObserverBuilder;
import org.apache.jackrabbit.oak.plugins.index.lucene.hybrid.LocalIndexObserver;
import org.apache.jackrabbit.oak.plugins.index.lucene.hybrid.LuceneJournalPropertyService;
import org.apache.jackrabbit.oak.plugins.index.lucene.hybrid.NRTIndexFactory;
import org.apache.jackrabbit.oak.plugins.index.lucene.property.PropertyIndexCleaner;
import org.apache.jackrabbit.oak.plugins.index.lucene.reader.DefaultIndexReaderFactory;
import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.TextExtractionStatsMBean;
import org.apache.jackrabbit.oak.spi.blob.GarbageCollectableBlobStore;
import org.apache.jackrabbit.oak.spi.commit.BackgroundObserver;
import org.apache.jackrabbit.oak.spi.commit.BackgroundObserverMBean;
import org.apache.jackrabbit.oak.spi.commit.Observer;
import org.apache.jackrabbit.oak.spi.gc.GCMonitor;
import org.apache.jackrabbit.oak.spi.mount.MountInfoProvider;
import org.apache.jackrabbit.oak.spi.query.QueryIndex;
import org.apache.jackrabbit.oak.spi.query.QueryIndexProvider;
import org.apache.jackrabbit.oak.spi.state.Clusterable;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.apache.jackrabbit.oak.spi.whiteboard.Registration;
import org.apache.jackrabbit.oak.spi.whiteboard.Whiteboard;
import org.apache.jackrabbit.oak.stats.Clock;
import org.apache.jackrabbit.oak.stats.StatisticsProvider;
import org.apache.lucene.analysis.util.CharFilterFactory;
import org.apache.lucene.analysis.util.TokenFilterFactory;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.util.InfoStream;
import org.jetbrains.annotations.NotNull;
import org.osgi.framework.BundleContext;
import org.osgi.framework.ServiceRegistration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull;
import static java.util.Collections.emptyMap;
import static org.apache.commons.io.FileUtils.ONE_MB;
import static org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.TYPE_LUCENE;
import static org.apache.jackrabbit.oak.spi.blob.osgi.SplitBlobStoreService.ONLY_STANDALONE_TARGET;
import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.registerMBean;
import static org.apache.jackrabbit.oak.spi.whiteboard.WhiteboardUtils.scheduleWithFixedDelay;

@SuppressWarnings("UnusedDeclaration")
@Component(metatype = true, label = "Apache Jackrabbit Oak LuceneIndexProvider")
public class LuceneIndexProviderService {
    public static final String REPOSITORY_HOME = "repository.home";

    private LuceneIndexProvider indexProvider;

    private final List regs = Lists.newArrayList();
    private final List oakRegs = Lists.newArrayList();

    private final Logger log = LoggerFactory.getLogger(getClass());

    @Reference(cardinality = ReferenceCardinality.OPTIONAL_UNARY,
            policyOption = ReferencePolicyOption.GREEDY,
            policy = ReferencePolicy.DYNAMIC
    )
    private QueryIndex.NodeAggregator nodeAggregator;

    private static final boolean PROP_DISABLED_DEFAULT = false;

    @Property(
            boolValue = PROP_DISABLED_DEFAULT,
            label = "Disable this component",
            description = "If true, this component is disabled."
    )
    private static final String PROP_DISABLED = "disabled";

    @Property(
            boolValue = false,
            label = "Enable Debug Logging",
            description = "Enables debug logging in Lucene. After enabling this actual logging can be " +
            "controlled via changing log level for category 'oak.lucene' to debug")
    private static final String PROP_DEBUG = "debug";

    @Property(
            boolValue = true,
            label = "Enable CopyOnRead",
            description = "Enable copying of Lucene index to local file system to improve query performance",
            propertyPrivate = true
    )
    private static final String PROP_COPY_ON_READ = "enableCopyOnReadSupport";

    @Property(
            label = "Local index storage path",
            description = "Local file system path where Lucene indexes would be copied when CopyOnRead is enabled. " +
                    "If not specified then indexes would be stored under 'index' dir under Repository Home"
    )
    private static final String PROP_LOCAL_INDEX_DIR = "localIndexDir";


    private static final boolean PROP_COPY_ON_WRITE_DEFAULT = true;
    @Property(
            boolValue = PROP_COPY_ON_WRITE_DEFAULT,
            label = "Enable CopyOnWrite",
            description = "Enable copying of Lucene index to local file system to improve index writer performance",
            propertyPrivate = true
    )
    private static final String PROP_COPY_ON_WRITE = "enableCopyOnWriteSupport";

    @Property(
            boolValue = true,
            label = "Open index asynchronously",
            description = "Enable opening of indexes in asynchronous mode"
    )
    private static final String PROP_ASYNC_INDEX_OPEN = "enableOpenIndexAsync";

    private static final int PROP_THREAD_POOL_SIZE_DEFAULT = 5;
    @Property(
            intValue = PROP_THREAD_POOL_SIZE_DEFAULT,
            label = "Thread pool size",
            description = "Thread pool size used to perform various asynchronous task in Oak Lucene"
    )
    private static final String PROP_THREAD_POOL_SIZE = "threadPoolSize";

    private static final boolean PROP_PREFETCH_INDEX_FILES_DEFAULT = true;
    @Property(
            boolValue = PROP_PREFETCH_INDEX_FILES_DEFAULT,
            label = "Prefetch Index Files",
            description = "Prefetch the index files when CopyOnRead is enabled. When enabled all new Lucene" +
                    " index files would be copied locally before the index is made available to QueryEngine"
    )
    private static final String PROP_PREFETCH_INDEX_FILES = "prefetchIndexFiles";

    private static final int PROP_EXTRACTED_TEXT_CACHE_SIZE_DEFAULT = 20;
    @Property(
            intValue = PROP_EXTRACTED_TEXT_CACHE_SIZE_DEFAULT,
            label = "Extracted text cache size (MB)",
            description = "Cache size in MB for caching extracted text for some time. When set to 0 then " +
                    "cache would be disabled"
    )
    private static final String PROP_EXTRACTED_TEXT_CACHE_SIZE = "extractedTextCacheSizeInMB";

    private static final int PROP_EXTRACTED_TEXT_CACHE_EXPIRY_DEFAULT = 300;
    @Property(
            intValue = PROP_EXTRACTED_TEXT_CACHE_EXPIRY_DEFAULT,
            label = "Extracted text cache expiry (secs)",
            description = "Time in seconds for which the extracted text would be cached in memory"
    )
    private static final String PROP_EXTRACTED_TEXT_CACHE_EXPIRY = "extractedTextCacheExpiryInSecs";

    private static final boolean PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE_DEFAULT = false;
    @Property(
            boolValue = PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE_DEFAULT,
            label = "Always use pre-extracted text cache",
            description = "By default pre extracted text cache would only be used for reindex case. If this setting " +
                    "is enabled then it would also be used in normal incremental indexing"
    )
    private static final String PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE = "alwaysUsePreExtractedCache";

    private static final int PROP_BOOLEAN_CLAUSE_LIMIT_DEFAULT = 1024;
    @Property(
            intValue = PROP_BOOLEAN_CLAUSE_LIMIT_DEFAULT,
            label = "Boolean Clause Limit",
            description = "Limit for number of boolean clauses generated for handling of OR query"
    )
    private static final String PROP_BOOLEAN_CLAUSE_LIMIT = "booleanClauseLimit";

    private static final boolean PROP_HYBRID_INDEXING_DEFAULT = true;
    @Property(
            boolValue = PROP_HYBRID_INDEXING_DEFAULT,
            label = "Hybrid Indexing",
            description = "When enabled Lucene NRT Indexing mode would be enabled"
    )
    private static final String PROP_HYBRID_INDEXING = "enableHybridIndexing";

    private static final int PROP_HYBRID_QUEUE_SIZE_DEFAULT = 10000;
    @Property(
            intValue = PROP_HYBRID_QUEUE_SIZE_DEFAULT,
            label = "Queue size",
            description = "Size of in memory queue used for storing Lucene Documents which need to be " +
                    "added to local index"
    )
    private static final String PROP_HYBRID_QUEUE_SIZE = "hybridQueueSize";

    public static final long PROP_HYBRID_QUEUE_TIMEOUT_DEFAULT = 100;
    @Property(
            longValue = PROP_HYBRID_QUEUE_TIMEOUT_DEFAULT,
            label = "Queue timeout",
            description = "Maximum time to wait for adding entries to the queue used for storing Lucene Documents which need to be " +
                    "added to local index"
    )
    private static final String PROP_HYBRID_QUEUE_TIMEOUT = "hybridQueueTimeout";

    private static final boolean PROP_DISABLE_DEFN_STORAGE_DEFAULT = false;
    @Property(
            boolValue = PROP_DISABLE_DEFN_STORAGE_DEFAULT,
            label = "Disable index definition storage",
            description = "By default index definitions would be stored at time of reindexing to ensure that future " +
                    "modifications to it are not effective untill index is reindex. Set this to true would disable " +
                    "this feature"
    )
    private static final String PROP_DISABLE_STORED_INDEX_DEFINITION = "disableStoredIndexDefinition";

    private static final boolean PROP_DELETED_BLOB_COLLECTION_DEFAULT_ENABLED = true;
    @Property(
            boolValue = PROP_DELETED_BLOB_COLLECTION_DEFAULT_ENABLED,
            label = "Enable actively removing deleted index blobs from blob store",
            description = "Index blobs are explicitly unique and don't require mark-sweep type collection." +
                    "This is used to enable the feature. Cleanup implies purging index blobs marked as deleted " +
                    "earlier during some indexing cycle."
    )
    private static final String PROP_NAME_DELETED_BLOB_COLLECTION_DEFAULT_ENABLED = "deletedBlobsCollectionEnabled";

    private static final int LUCENE_INDEX_STATS_UPDATE_INTERVAL_DEFAULT = 300;
    @Property(
            intValue = LUCENE_INDEX_STATS_UPDATE_INTERVAL_DEFAULT,
            label = "Lucene index stats update interval (seconds)",
            description = "Delay in seconds after which Lucene stats are updated in async index update cycle."
    )
    private static final String LUCENE_INDEX_STATS_UPDATE_INTERVAL = "luceneIndexStatsUpdateInterval";

    private static final int PROP_INDEX_CLEANER_INTERVAL_DEFAULT = 10*60;
    @Property(
            intValue = PROP_INDEX_CLEANER_INTERVAL_DEFAULT,
            label = "Property Index Cleaner Interval (seconds)",
            description = "Cleaner interval time (in seconds) for synchronous property indexes configured as " +
                    "part of lucene indexes"
    )
    private static final String PROP_INDEX_CLEANER_INTERVAL = "propIndexCleanerIntervalInSecs";

    private static final boolean PROP_ENABLE_SINGLE_BLOB_PER_INDEX_FILE_DEFAULT = true;
    @Property(
            boolValue = PROP_ENABLE_SINGLE_BLOB_PER_INDEX_FILE_DEFAULT,
            label = "With CoW enabled, should index files by written as single blobs",
            description = "Index files can be written as single blobs as chunked into smaller blobs. Enable" +
                    " this to write single blob per index file. This would reduce number of blobs in the data store."
    )
    private static final String PROP_NAME_ENABLE_SINGLE_BLOB_PER_INDEX_FILE = "enableSingleBlobIndexFiles";

    private static final long PROP_INDEX_FILESYSTEM_STATS_INTERVAL_DEFAULT = 300;
    @Property(
            longValue = PROP_INDEX_FILESYSTEM_STATS_INTERVAL_DEFAULT,
            label = "Lucene Index File System Stats Interval (seconds)",
            description = "Interval (in seconds) for calculation of File System metrics for Lucene Index such as Local Index Directory Size"
    )
    private static final String PROP_INDEX_FILESYSTEM_STATS_INTERVAL = "propIndexFSStatsIntervalInSecs";

    private final Clock clock = Clock.SIMPLE;

    private Whiteboard whiteboard;

    private BackgroundObserver backgroundObserver;

    private BackgroundObserver externalIndexObserver;

    @Reference
    private IndexAugmentorFactory augmentorFactory;

    @Reference
    private StatisticsProvider statisticsProvider;

    @Reference(policy = ReferencePolicy.DYNAMIC,
            cardinality = ReferenceCardinality.OPTIONAL_UNARY,
            policyOption = ReferencePolicyOption.GREEDY
    )
    private volatile PreExtractedTextProvider extractedTextProvider;

    @Reference
    private MountInfoProvider mountInfoProvider;

    @Reference
    private NodeStore nodeStore;

    @Reference
    private IndexPathService indexPathService;

    @Reference
    private AsyncIndexInfoService asyncIndexInfoService;

    @Reference(
            cardinality = ReferenceCardinality.OPTIONAL_UNARY,
            policy = ReferencePolicy.STATIC,
            policyOption = ReferencePolicyOption.GREEDY,
            target = ONLY_STANDALONE_TARGET
    )
    private GarbageCollectableBlobStore blobStore;

    @Reference
    private CheckpointMBean checkpointMBean;

    private IndexCopier indexCopier;

    private ActiveDeletedBlobCollectorFactory.ActiveDeletedBlobCollector activeDeletedBlobCollector;

    private File indexDir;

    private ExecutorService executorService;

    private int threadPoolSize;

    private ExtractedTextCache extractedTextCache;

    private boolean hybridIndex;

    private NRTIndexFactory nrtIndexFactory;

    private DocumentQueue documentQueue;

    private LuceneIndexEditorProvider editorProvider;

    private IndexTracker tracker;

    private PropertyIndexCleaner cleaner;
    private AsyncIndexesSizeStatsUpdate asyncIndexesSizeStatsUpdate;

    @Activate
    private void activate(BundleContext bundleContext, Map config) throws IOException {
        asyncIndexesSizeStatsUpdate = new AsyncIndexesSizeStatsUpdateImpl(
                PropertiesUtil.toLong(config.get(LUCENE_INDEX_STATS_UPDATE_INTERVAL),
                        LUCENE_INDEX_STATS_UPDATE_INTERVAL_DEFAULT) * 1000); // convert seconds to millis
        boolean disabled = PropertiesUtil.toBoolean(config.get(PROP_DISABLED), PROP_DISABLED_DEFAULT);
        hybridIndex = PropertiesUtil.toBoolean(config.get(PROP_HYBRID_INDEXING), PROP_DISABLED_DEFAULT);

        if (disabled) {
            log.info("Component disabled by configuration");
            return;
        }

        configureIndexDefinitionStorage(config);
        configureBooleanClauseLimit(config);
        initializeFactoryClassLoaders(getClass().getClassLoader());
        if (System.getProperty(BufferedOakDirectory.ENABLE_WRITING_SINGLE_BLOB_INDEX_FILE_PARAM) == null) {
            BufferedOakDirectory.setEnableWritingSingleBlobIndexFile(PropertiesUtil.toBoolean(
                    config.get(PROP_NAME_ENABLE_SINGLE_BLOB_PER_INDEX_FILE),
                    PROP_ENABLE_SINGLE_BLOB_PER_INDEX_FILE_DEFAULT));
        } else {
            log.info("Not setting config for single blob for an index file as it's set by command line!");
        }

        whiteboard = new OsgiWhiteboard(bundleContext);
        threadPoolSize = PropertiesUtil.toInteger(config.get(PROP_THREAD_POOL_SIZE), PROP_THREAD_POOL_SIZE_DEFAULT);
        initializeIndexDir(bundleContext, config);
        initializeExtractedTextCache(bundleContext, config, statisticsProvider);
        tracker = createTracker(bundleContext, config);
        indexProvider = new LuceneIndexProvider(tracker, augmentorFactory);
        initializeActiveBlobCollector(whiteboard, config);
        initializeLogging(config);
        initialize();

        regs.add(bundleContext.registerService(QueryIndexProvider.class.getName(), indexProvider, null));
        registerObserver(bundleContext, config);
        registerLocalIndexObserver(bundleContext, tracker, config);

        registerIndexInfoProvider(bundleContext);
        registerIndexImporterProvider(bundleContext);
        registerPropertyIndexCleaner(config, bundleContext);

        LuceneIndexMBeanImpl mBean = new LuceneIndexMBeanImpl(tracker, nodeStore, indexPathService, getIndexCheckDir(), cleaner);
        oakRegs.add(registerMBean(whiteboard,
                LuceneIndexMBean.class,
                mBean,
                LuceneIndexMBean.TYPE,
                "Lucene Index statistics"));
        registerGCMonitor(whiteboard, tracker);

        registerIndexEditor(bundleContext, tracker, mBean, config);

        LuceneIndexFileSystemStatistics luceneIndexFSStats = new LuceneIndexFileSystemStatistics(statisticsProvider, indexCopier);
        registerLuceneFileSystemStats(luceneIndexFSStats, PropertiesUtil.toLong(config.get(PROP_INDEX_FILESYSTEM_STATS_INTERVAL),PROP_INDEX_FILESYSTEM_STATS_INTERVAL_DEFAULT));
    }

    private File getIndexCheckDir() {
        return new File(checkNotNull(indexDir), "indexCheckDir");
    }

    @Deactivate
    private void deactivate() throws InterruptedException, IOException {
        for (ServiceRegistration reg : regs) {
            reg.unregister();
        }

        for (Registration reg : oakRegs){
            reg.unregister();
        }

        if (backgroundObserver != null){
            backgroundObserver.close();
        }

        if (externalIndexObserver != null){
            externalIndexObserver.close();
        }

        if (indexProvider != null) {
            indexProvider.close();
            indexProvider = null;
        }

        if (documentQueue != null){
            documentQueue.close();
        }

        if (nrtIndexFactory != null){
            nrtIndexFactory.close();
        }

        //Close the copier first i.e. before executorService
        if (indexCopier != null){
            indexCopier.close();
        }

        if (executorService != null){
            executorService.shutdown();
            executorService.awaitTermination(1, TimeUnit.MINUTES);
        }

        if (extractedTextCache != null) {
            extractedTextCache.close();
        }

        InfoStream.setDefault(InfoStream.NO_OUTPUT);
    }

    void initializeIndexDir(BundleContext bundleContext, Map config) {
        String indexDirPath = PropertiesUtil.toString(config.get(PROP_LOCAL_INDEX_DIR), null);
        if (Strings.isNullOrEmpty(indexDirPath)) {
            String repoHome = bundleContext.getProperty(REPOSITORY_HOME);
            if (repoHome != null){
                indexDirPath = FilenameUtils.concat(repoHome, "index");
            }
        }

        checkNotNull(indexDirPath, "Index directory cannot be determined as neither index " +
                "directory path [%s] nor repository home [%s] defined", PROP_LOCAL_INDEX_DIR, REPOSITORY_HOME);

        indexDir = new File(indexDirPath);
    }

    IndexCopier getIndexCopier() {
        return indexCopier;
    }

    ExtractedTextCache getExtractedTextCache() {
        return extractedTextCache;
    }

    private void initialize(){
        if(indexProvider == null){
            return;
        }

        if(nodeAggregator != null){
            log.debug("Using NodeAggregator {}", nodeAggregator.getClass());
        }

        indexProvider.setAggregator(nodeAggregator);
    }

    private void initializeLogging(Map config) {
        boolean debug = PropertiesUtil.toBoolean(config.get(PROP_DEBUG), false);
        if (debug) {
            InfoStream.setDefault(LoggingInfoStream.INSTANCE);
            log.info("Registered LoggingInfoStream with Lucene. Lucene logs can be enabled " +
                    "now via category [{}]", LoggingInfoStream.PREFIX);
        }
    }

    private void registerIndexEditor(BundleContext bundleContext, IndexTracker tracker, LuceneIndexMBean mBean, Map config) throws IOException {
        boolean enableCopyOnWrite = PropertiesUtil.toBoolean(config.get(PROP_COPY_ON_WRITE), PROP_COPY_ON_WRITE_DEFAULT);
        if (enableCopyOnWrite){
            initializeIndexCopier(bundleContext, config);
            editorProvider = new LuceneIndexEditorProvider(indexCopier, tracker, extractedTextCache,
                    augmentorFactory,  mountInfoProvider, activeDeletedBlobCollector, mBean, statisticsProvider);
            log.info("Enabling CopyOnWrite support. Index files would be copied under {}", indexDir.getAbsolutePath());
        } else {
            editorProvider = new LuceneIndexEditorProvider(null, tracker, extractedTextCache, augmentorFactory,
                    mountInfoProvider, activeDeletedBlobCollector, mBean, statisticsProvider);
        }
        editorProvider.setBlobStore(blobStore);
        editorProvider.withAsyncIndexesSizeStatsUpdate(asyncIndexesSizeStatsUpdate);

        if (hybridIndex){
            editorProvider.setIndexingQueue(checkNotNull(documentQueue));
        }

        Dictionary props = new Hashtable<>();
        props.put("type", TYPE_LUCENE);
        regs.add(bundleContext.registerService(IndexEditorProvider.class.getName(), editorProvider, props));
        oakRegs.add(registerMBean(whiteboard,
                TextExtractionStatsMBean.class,
                editorProvider.getExtractedTextCache().getStatsMBean(),
                TextExtractionStatsMBean.TYPE,
                "TextExtraction statistics"));
    }

    private IndexTracker createTracker(BundleContext bundleContext, Map config) throws IOException {
        boolean enableCopyOnRead = PropertiesUtil.toBoolean(config.get(PROP_COPY_ON_READ), true);
        IndexTracker tracker;
        if (enableCopyOnRead){
            initializeIndexCopier(bundleContext, config);
            log.info("Enabling CopyOnRead support. Index files would be copied under {}", indexDir.getAbsolutePath());
            if (hybridIndex) {
                nrtIndexFactory = new NRTIndexFactory(indexCopier, statisticsProvider);
            }
            tracker = new IndexTracker(new DefaultIndexReaderFactory(mountInfoProvider, indexCopier), nrtIndexFactory);
        } else {
            tracker = new IndexTracker(new DefaultIndexReaderFactory(mountInfoProvider, null));
        }

        tracker.setAsyncIndexInfoService(asyncIndexInfoService);
        return tracker;
    }

    private void initializeIndexCopier(BundleContext bundleContext, Map config) throws IOException {
        if(indexCopier != null){
            return;
        }
        boolean prefetchEnabled = PropertiesUtil.toBoolean(config.get(PROP_PREFETCH_INDEX_FILES),
                PROP_PREFETCH_INDEX_FILES_DEFAULT);

        if (prefetchEnabled){
            log.info("Prefetching of index files enabled. Index would be opened after copying all new files locally");
        }

        indexCopier = new IndexCopier(getExecutorService(), indexDir, prefetchEnabled);

        oakRegs.add(registerMBean(whiteboard,
                CopyOnReadStatsMBean.class,
                indexCopier,
                CopyOnReadStatsMBean.TYPE,
                "IndexCopier support statistics"));

    }

    ExecutorService getExecutorService(){
        if (executorService == null){
            executorService = createExecutor();
        }
        return executorService;
    }

    private ExecutorService createExecutor() {
        ThreadPoolExecutor executor = new ThreadPoolExecutor(5, 5, 60L, TimeUnit.SECONDS,
                new LinkedBlockingQueue<>(), new ThreadFactory() {
            private final AtomicInteger counter = new AtomicInteger();
            private final Thread.UncaughtExceptionHandler handler = (t, e) -> log.warn("Error occurred in asynchronous processing ", e);
            @Override
            public Thread newThread(@NotNull Runnable r) {
                Thread thread = new Thread(r, createName());
                thread.setDaemon(true);
                thread.setPriority(Thread.MIN_PRIORITY);
                thread.setUncaughtExceptionHandler(handler);
                return thread;
            }

            private String createName() {
                return "oak-lucene-" + counter.getAndIncrement();
            }
        });
        executor.setKeepAliveTime(1, TimeUnit.MINUTES);
        executor.allowCoreThreadTimeOut(true);
        return executor;
    }

    private void registerObserver(BundleContext bundleContext, Map config) {
        boolean enableAsyncIndexOpen = PropertiesUtil.toBoolean(config.get(PROP_ASYNC_INDEX_OPEN), true);
        Observer observer = indexProvider;
        if (enableAsyncIndexOpen) {
            backgroundObserver = new BackgroundObserver(indexProvider, getExecutorService(), 5);
            observer = backgroundObserver;
            oakRegs.add(registerMBean(whiteboard,
                    BackgroundObserverMBean.class,
                    backgroundObserver.getMBean(),
                    BackgroundObserverMBean.TYPE,
                    "LuceneIndexConfigObserver queue stats"));
            log.info("Registering the LuceneIndexProvider as a BackgroundObserver");
        }
        regs.add(bundleContext.registerService(Observer.class.getName(), observer, null));
    }

    private void registerLocalIndexObserver(BundleContext bundleContext, IndexTracker tracker, Map config) {
        if (!hybridIndex){
            log.info("Hybrid indexing feature disabled");
            return;
        }

        int queueSize = PropertiesUtil.toInteger(config.get(PROP_HYBRID_QUEUE_SIZE), PROP_HYBRID_QUEUE_SIZE_DEFAULT);
        long queueOfferTimeoutMillis = PropertiesUtil.toLong(config.get(PROP_HYBRID_QUEUE_TIMEOUT), PROP_HYBRID_QUEUE_TIMEOUT_DEFAULT);
        documentQueue = new DocumentQueue(queueSize, queueOfferTimeoutMillis, tracker, getExecutorService(), statisticsProvider);
        LocalIndexObserver localIndexObserver = new LocalIndexObserver(documentQueue, statisticsProvider);
        regs.add(bundleContext.registerService(Observer.class.getName(), localIndexObserver, null));

        int observerQueueSize = 1000;
        int builderMaxSize = 5000;
        regs.add(bundleContext.registerService(JournalPropertyService.class.getName(),
                new LuceneJournalPropertyService(builderMaxSize), null));
        ExternalObserverBuilder builder = new ExternalObserverBuilder(documentQueue, tracker, statisticsProvider,
                getExecutorService(), observerQueueSize);
        log.info("Configured JournalPropertyBuilder with max size {} and backed by BackgroundObserver " +
                "with queue size {}", builderMaxSize, observerQueueSize);

        Observer observer = builder.build();
        externalIndexObserver = builder.getBackgroundObserver();
        regs.add(bundleContext.registerService(Observer.class.getName(), observer, null));
        oakRegs.add(registerMBean(whiteboard,
                BackgroundObserverMBean.class,
                externalIndexObserver.getMBean(),
                BackgroundObserverMBean.TYPE,
                "LuceneExternalIndexObserver queue stats"));
        log.info("Hybrid indexing enabled for configured indexes with queue size of {}", queueSize);
    }

    private void initializeFactoryClassLoaders(ClassLoader classLoader) {
        ClassLoader originalClassLoader = Thread.currentThread()
                .getContextClassLoader();
        try {
            Thread.currentThread().setContextClassLoader(classLoader);
            //Access TokenizerFactory etc trigger a static initialization
            //so switch the TCCL so that static initializer picks up the right
            //classloader
            initializeFactoryClassLoaders0(classLoader);
            initializeClasses();
        } catch (Throwable t) {
            log.warn("Error occurred while initializing the Lucene " +
                    "Factories", t);
        } finally {
            Thread.currentThread().setContextClassLoader(originalClassLoader);
        }
    }

    private void initializeFactoryClassLoaders0(ClassLoader classLoader) {
        //Factories use the Threads context classloader to perform SPI classes
        //lookup by default which would not work in OSGi world. So reload the
        //factories by providing the bundle classloader
        TokenizerFactory.reloadTokenizers(classLoader);
        CharFilterFactory.reloadCharFilters(classLoader);
        TokenFilterFactory.reloadTokenFilters(classLoader);
    }

    private void initializeClasses() {
        // prevent LUCENE-6482
        // (also done in IndexDefinition, just to be save)
        OakCodec ensureLucene46CodecLoaded = new OakCodec();
        // to ensure the JVM doesn't optimize away object creation
        // (probably not really needed; just to be save)
        log.debug("Lucene46Codec is loaded: {}", ensureLucene46CodecLoaded);
    }

    private void initializeExtractedTextCache(BundleContext bundleContext, Map config, StatisticsProvider statisticsProvider) {
        int cacheSizeInMB = PropertiesUtil.toInteger(config.get(PROP_EXTRACTED_TEXT_CACHE_SIZE),
                PROP_EXTRACTED_TEXT_CACHE_SIZE_DEFAULT);
        int cacheExpiryInSecs = PropertiesUtil.toInteger(config.get(PROP_EXTRACTED_TEXT_CACHE_EXPIRY),
                PROP_EXTRACTED_TEXT_CACHE_EXPIRY_DEFAULT);
        boolean alwaysUsePreExtractedCache = PropertiesUtil.toBoolean(config.get(PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE),
                PROP_PRE_EXTRACTED_TEXT_ALWAYS_USE_DEFAULT);

        extractedTextCache = new ExtractedTextCache(
                cacheSizeInMB * ONE_MB,
                cacheExpiryInSecs,
                alwaysUsePreExtractedCache,
                indexDir, statisticsProvider);
        if (extractedTextProvider != null){
            registerExtractedTextProvider(extractedTextProvider);
        }
        CacheStats stats = extractedTextCache.getCacheStats();
        if (stats != null){
            oakRegs.add(registerMBean(whiteboard,
                    CacheStatsMBean.class, stats,
                    CacheStatsMBean.TYPE, stats.getName()));
            log.info("Extracted text caching enabled with maxSize {} MB, expiry time {} secs",
                    cacheSizeInMB, cacheExpiryInSecs);
        }
    }

    private void registerExtractedTextProvider(PreExtractedTextProvider provider){
        if (extractedTextCache != null){
            if (provider != null){
                String usage = extractedTextCache.isAlwaysUsePreExtractedCache() ?
                        "always" : "only during reindexing phase";
                log.info("Registering PreExtractedTextProvider {} with extracted text cache. " +
                        "It would be used {}",  provider, usage);
            } else {
                log.info("Unregistering PreExtractedTextProvider with extracted text cache");
            }
            extractedTextCache.setExtractedTextProvider(provider);
        }
    }

    private void configureBooleanClauseLimit(Map config) {
        int booleanClauseLimit = PropertiesUtil.toInteger(config.get(PROP_BOOLEAN_CLAUSE_LIMIT),
                PROP_BOOLEAN_CLAUSE_LIMIT_DEFAULT);
        if (booleanClauseLimit != BooleanQuery.getMaxClauseCount()){
            BooleanQuery.setMaxClauseCount(booleanClauseLimit);
            log.info("Changed the Max boolean clause limit to {}", booleanClauseLimit);
        }
    }

    private void configureIndexDefinitionStorage(Map config) {
        boolean disableStorage = PropertiesUtil.toBoolean(config.get(PROP_DISABLE_STORED_INDEX_DEFINITION),
                PROP_DISABLE_DEFN_STORAGE_DEFAULT);
        if (disableStorage){
            log.info("Feature to ensure that index definition matches the index state is disabled. Change in " +
                    "index definition would now affect query plans and might lead to inconsistent results.");
            IndexDefinition.setDisableStoredIndexDefinition(disableStorage);
        }
    }

    private void registerGCMonitor(Whiteboard whiteboard,
                                   final IndexTracker tracker) {
        GCMonitor gcMonitor = new GCMonitor.Empty() {
            @Override
            public void compacted() {
                tracker.refresh();
            }
        };
        oakRegs.add(whiteboard.register(GCMonitor.class, gcMonitor, emptyMap()));
    }

    private void registerIndexInfoProvider(BundleContext bundleContext) {
        IndexInfoProvider infoProvider = new LuceneIndexInfoProvider(nodeStore, asyncIndexInfoService, getIndexCheckDir());
        regs.add(bundleContext.registerService(IndexInfoProvider.class.getName(), infoProvider, null));
    }

    private void registerIndexImporterProvider(BundleContext bundleContext) {
        LuceneIndexImporter importer = new LuceneIndexImporter(blobStore);
        regs.add(bundleContext.registerService(IndexImporterProvider.class.getName(), importer, null));
    }

    private void initializeActiveBlobCollector(Whiteboard whiteboard, Map config) {
        boolean activeDeletionEnabled = PropertiesUtil.toBoolean(
                config.get(PROP_NAME_DELETED_BLOB_COLLECTION_DEFAULT_ENABLED),
                PROP_DELETED_BLOB_COLLECTION_DEFAULT_ENABLED);
        if (activeDeletionEnabled && blobStore!= null) {
            File blobCollectorWorkingDir = new File(indexDir, "deleted-blobs");
            activeDeletedBlobCollector = ActiveDeletedBlobCollectorFactory.newInstance(blobCollectorWorkingDir,
                    getExecutorService());
            ActiveDeletedBlobCollectorMBean bean =
                    new ActiveDeletedBlobCollectorMBeanImpl(activeDeletedBlobCollector, whiteboard, nodeStore,
                            indexPathService, asyncIndexInfoService, blobStore, getExecutorService());

            oakRegs.add(registerMBean(whiteboard, ActiveDeletedBlobCollectorMBean.class, bean,
                    ActiveDeletedBlobCollectorMBean.TYPE, "Active lucene files collection"));
            log.info("Active blob collector initialized at working dir: {}", blobCollectorWorkingDir);
        } else {
            activeDeletedBlobCollector = ActiveDeletedBlobCollectorFactory.NOOP;
            log.info("Active blob collector set to NOOP. enabled: {} seconds; blobStore: {}",
                    activeDeletionEnabled, blobStore);
        }
    }

    private void registerPropertyIndexCleaner(Map config, BundleContext bundleContext) {
        int cleanerInterval = PropertiesUtil.toInteger(config.get(PROP_INDEX_CLEANER_INTERVAL),
                PROP_INDEX_CLEANER_INTERVAL_DEFAULT);

        if (cleanerInterval <= 0) {
            log.info("Property index cleaner would not be registered");
            return;
        }

        cleaner = new PropertyIndexCleaner(nodeStore, indexPathService, asyncIndexInfoService, statisticsProvider);

        //Proxy check for DocumentNodeStore
        if (nodeStore instanceof Clusterable) {
            cleaner.setRecursiveDelete(true);
            log.info("PropertyIndexCleaner configured to perform recursive delete");
        }
        oakRegs.add(scheduleWithFixedDelay(whiteboard, cleaner,
                ImmutableMap.of("scheduler.name", PropertyIndexCleaner.class.getName()),
                cleanerInterval, true, true));
        log.info("Property index cleaner configured to run every [{}] seconds", cleanerInterval);
    }

    private void registerLuceneFileSystemStats(LuceneIndexFileSystemStatistics luceneIndexFSStats, long delayInSeconds) {
        Map config = ImmutableMap.of(
                "scheduler.name", LuceneIndexFileSystemStatistics.class.getName()
        );
        oakRegs.add(scheduleWithFixedDelay(whiteboard, luceneIndexFSStats, config, delayInSeconds, false, true));
        log.info("Lucene FileSystem Statistics calculator configured to run every [{}] seconds", delayInSeconds);
    }


    protected void bindNodeAggregator(QueryIndex.NodeAggregator aggregator) {
        this.nodeAggregator = aggregator;
        initialize();
    }

    protected void unbindNodeAggregator(QueryIndex.NodeAggregator aggregator) {
        this.nodeAggregator = null;
        initialize();
    }

    protected void bindExtractedTextProvider(PreExtractedTextProvider preExtractedTextProvider){
        this.extractedTextProvider = preExtractedTextProvider;
        registerExtractedTextProvider(preExtractedTextProvider);
    }

    protected void unbindExtractedTextProvider(PreExtractedTextProvider preExtractedTextProvider){
        this.extractedTextProvider = null;
        registerExtractedTextProvider(null);
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy