All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.index.IndexerSupport Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.jackrabbit.oak.index;

import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;

import org.apache.commons.io.FileUtils;
import org.apache.felix.inventory.Format;
import java.util.function.Predicate;
import java.util.regex.Pattern;

import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
import org.apache.jackrabbit.oak.plugins.index.importer.AsyncLaneSwitcher;
import org.apache.jackrabbit.oak.plugins.index.importer.IndexDefinitionUpdater;
import org.apache.jackrabbit.oak.plugins.index.importer.IndexerInfo;
import org.apache.jackrabbit.oak.plugins.index.inventory.IndexDefinitionPrinter;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
import org.apache.jackrabbit.oak.spi.commit.CommitInfo;
import org.apache.jackrabbit.oak.spi.commit.EmptyHook;
import org.apache.jackrabbit.oak.spi.filter.PathFilter;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.jackrabbit.guava.common.base.Preconditions.checkNotNull;
import static org.apache.jackrabbit.guava.common.base.Preconditions.checkState;

public class IndexerSupport {
    private final Logger log = LoggerFactory.getLogger(getClass());
    /**
     * Directory name in output directory under which indexes are
     * stored
     */
    public static final String LOCAL_INDEX_ROOT_DIR = "indexes";
    /**
     * Checkpoint value which indicate that head state needs to be used
     * This would be mostly used for testing purpose
     */
    private static final String HEAD_AS_CHECKPOINT = "head";

    /**
     * Index lane name which is used for indexing
     */
    private static final String REINDEX_LANE = "offline-reindex-async";
    private Map checkpointInfo = Collections.emptyMap();
    protected final IndexHelper indexHelper;
    private File localIndexDir;
    private File indexDefinitions;
    private final String checkpoint;
    private File existingDataDumpDir;

    public IndexerSupport(IndexHelper indexHelper, String checkpoint) {
        this.indexHelper = indexHelper;
        this.checkpoint = checkpoint;
    }

    public IndexerSupport withExistingDataDumpDir(File existingDataDumpDir) {
        this.existingDataDumpDir = existingDataDumpDir;
        return this;
    }

    public File getExistingDataDumpDir() {
        return existingDataDumpDir;
    }

    public File getLocalIndexDir() throws IOException {
        if (localIndexDir == null) {
            localIndexDir = new File(indexHelper.getWorkDir(), LOCAL_INDEX_ROOT_DIR);
            FileUtils.forceMkdir(localIndexDir);
        }
        return localIndexDir;
    }

    public File copyIndexFilesToOutput() throws IOException {
        File destDir = new File(indexHelper.getOutputDir(), getLocalIndexDir().getName());
        FileUtils.moveDirectoryToDirectory(getLocalIndexDir(), indexHelper.getOutputDir(), true);
        return destDir;
    }

    public void writeMetaInfo(String checkpoint) throws IOException {
        new IndexerInfo(getLocalIndexDir(), checkpoint).save();
    }

    public String getCheckpoint() {
        return checkpoint;
    }

    public NodeState retrieveNodeStateForCheckpoint() {
        NodeState checkpointedState;
        if (HEAD_AS_CHECKPOINT.equals(checkpoint)) {
            checkpointedState = indexHelper.getNodeStore().getRoot();
            log.warn("Using head state for indexing. Such an index cannot be imported back");
        } else {
            checkpointedState = indexHelper.getNodeStore().retrieve(checkpoint);
            checkNotNull(checkpointedState, "Not able to retrieve revision referred via checkpoint [%s]", checkpoint);
            checkpointInfo = indexHelper.getNodeStore().checkpointInfo(checkpoint);
        }
        return checkpointedState;
    }

    public void updateIndexDefinitions(NodeBuilder rootBuilder) throws IOException, CommitFailedException {
        if (indexDefinitions != null) {
            new IndexDefinitionUpdater(indexDefinitions).apply(rootBuilder);
        }
    }

    protected void dumpIndexDefinitions(NodeStore nodeStore) throws IOException {
        IndexDefinitionPrinter printer = new IndexDefinitionPrinter(nodeStore, indexHelper.getIndexPathService());
        printer.setFilter("{\"properties\":[\"*\", \"-:childOrder\"],\"nodes\":[\"*\", \"-:index-definition\", \"-:data\", \"-:suggest-data\"]}");
        PrinterDumper dumper = new PrinterDumper(getLocalIndexDir(), IndexDefinitionUpdater.INDEX_DEFINITIONS_JSON,
                false, Format.JSON, printer);
        dumper.dump();
    }

    public void switchIndexLanesAndReindexFlag(NodeStore copyOnWriteStore) throws CommitFailedException, IOException {
        NodeState root = copyOnWriteStore.getRoot();
        NodeBuilder builder = root.builder();
        updateIndexDefinitions(builder);

        for (String indexPath : indexHelper.getIndexPaths()) {
            //TODO Do it only for lucene indexes for now
            NodeBuilder idxBuilder = childBuilder(builder, indexPath, false);
            checkState(idxBuilder.exists(), "No index definition found at path [%s]", indexPath);

            idxBuilder.setProperty(IndexConstants.REINDEX_PROPERTY_NAME, true);
            AsyncLaneSwitcher.switchLane(idxBuilder, REINDEX_LANE);
        }

        copyOnWriteStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
        log.info("Switched the async lane for indexes at {} to {} and marked them for reindex", indexHelper.getIndexPaths(), REINDEX_LANE);
    }

    public void postIndexWork(NodeStore copyOnWriteStore) throws CommitFailedException, IOException {
        switchIndexLanesBack(copyOnWriteStore);
        dumpIndexDefinitions(copyOnWriteStore);
    }

    protected void switchIndexLanesBack(NodeStore copyOnWriteStore) throws CommitFailedException {
        NodeState root = copyOnWriteStore.getRoot();
        NodeBuilder builder = root.builder();

        for (String indexPath : indexHelper.getIndexPaths()) {
            NodeBuilder idxBuilder = childBuilder(builder, indexPath, false);
            AsyncLaneSwitcher.revertSwitch(idxBuilder, indexPath);
        }

        copyOnWriteStore.merge(builder, EmptyHook.INSTANCE, CommitInfo.EMPTY);
        log.info("Switched the async lane for indexes at {} back to there original lanes", indexHelper.getIndexPaths());
    }

    public Map getCheckpointInfo() {
        return checkpointInfo;
    }

    public void setIndexDefinitions(File indexDefinitions) {
        this.indexDefinitions = indexDefinitions;
    }

    public static NodeBuilder childBuilder(NodeBuilder nb, String path, boolean createNew) {
        for (String name : PathUtils.elements(checkNotNull(path))) {
            nb = createNew ? nb.child(name) : nb.getChildNode(name);
        }
        return nb;
    }

    public Set getIndexDefinitions() throws IOException, CommitFailedException {
        NodeState checkpointedState = this.retrieveNodeStateForCheckpoint();
        NodeStore copyOnWriteStore = new MemoryNodeStore(checkpointedState);
        NodeBuilder builder = copyOnWriteStore.getRoot().builder();
        NodeState root = builder.getNodeState();
        this.updateIndexDefinitions(builder);
        IndexDefinition.Builder indexDefBuilder = new IndexDefinition.Builder();

        Set indexDefinitions = new HashSet<>();

        for (String indexPath : indexHelper.getIndexPaths()) {
            NodeBuilder idxBuilder = IndexerSupport.childBuilder(builder, indexPath, false);
            IndexDefinition indexDf = indexDefBuilder.defn(idxBuilder.getNodeState()).indexPath(indexPath).root(root).build();
            indexDefinitions.add(indexDf);
        }
        return indexDefinitions;
    }

    /**
     * @param indexDefinitions
     * @return set of preferred path elements referred from the given set of index definitions.
     */
    public Set getPreferredPathElements(Set indexDefinitions) {
        Set preferredPathElements = new HashSet<>();
        for (IndexDefinition indexDf : indexDefinitions) {
            preferredPathElements.addAll(indexDf.getRelativeNodeNames());
        }
        return preferredPathElements;
    }

    /**
     * @param indexDefinitions set of IndexDefinition to be used to calculate the Path Predicate
     * @param typeToRepositoryPath Function to convert type  to valid repository path of type 
     * @param 
     * @return filter predicate based on the include/exclude path rules of the given set of index definitions.
     */
    public  Predicate getFilterPredicate(Set indexDefinitions, Function typeToRepositoryPath) {
        return t -> indexDefinitions.stream().anyMatch(indexDef -> indexDef.getPathFilter().filter(typeToRepositoryPath.apply(t)) != PathFilter.Result.EXCLUDE);
    }

    /**
     * @param pattern Pattern for a custom excludes regex based on which paths would be filtered out
     * @param typeToRepositoryPath Function to convert type  to valid repository path of type 
     * @param 
     * @return Return a predicate that should test true for all paths that do not match the provided regex pattern.
     */
    public  Predicate getFilterPredicateBasedOnCustomRegex(Pattern pattern, Function typeToRepositoryPath) {
        return t -> !pattern.matcher(typeToRepositoryPath.apply(t)).find();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy