org.apache.jackrabbit.oak.plugins.index.property.strategy.ContentMirrorStoreStrategy Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of oak-core Show documentation
There is a newer version: 1.62.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.oak.plugins.index.property.strategy;

import static com.google.common.collect.Queues.newArrayDeque;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_CONTENT_NODE_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.ENTRY_COUNT_PROPERTY_NAME;
import static org.apache.jackrabbit.oak.plugins.index.IndexConstants.KEY_COUNT_PROPERTY_NAME;

import java.util.Deque;
import java.util.Iterator;
import java.util.Set;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.index.counter.NodeCounterEditor;
import org.apache.jackrabbit.oak.plugins.index.counter.jmx.NodeCounter;
import org.apache.jackrabbit.oak.plugins.memory.MemoryChildNodeEntry;
import org.apache.jackrabbit.oak.query.FilterIterators;
import org.apache.jackrabbit.oak.query.QueryEngineSettings;
import org.apache.jackrabbit.oak.spi.query.Filter;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.spi.state.NodeStateUtils;
import org.apache.jackrabbit.oak.util.ApproximateCounter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Iterators;
import com.google.common.collect.Queues;
import com.google.common.collect.Sets;

/**
 * An IndexStoreStrategy implementation that saves the nodes under a hierarchy
 * that mirrors the repository tree. 

 * This should minimize the chance that concurrent updates overlap on the same
 * content node.

 * 

 * For example for a node that is under {@code /test/node}, the index
 * structure will be {@code /oak:index/index/test/node}:
 * 
 *  * {@code
 * /
 *   test
 *     node
 *   oak:index
 *     index
 *       test
 *         node
 * }
 * 
 *
 */
public class ContentMirrorStoreStrategy implements IndexStoreStrategy {

    static final Logger LOG = LoggerFactory.getLogger(ContentMirrorStoreStrategy.class);
    
    /**
     * logging a warning every {@code oak.traversing.warn} traversed nodes. Default {@code 10000}
     */
    public static final int TRAVERSING_WARN = Integer.getInteger("oak.traversing.warn", 10000);

    private final String indexName;

    public ContentMirrorStoreStrategy() {
        this(INDEX_CONTENT_NODE_NAME);
    }

    public ContentMirrorStoreStrategy(String indexName) {
        this.indexName = indexName;
    }

    @Override
    public void update(
            NodeBuilder index, String path,
            @Nullable final String indexName,
            @Nullable final NodeBuilder indexMeta,
            Set beforeKeys, Set afterKeys) {
        for (String key : beforeKeys) {
            remove(index, key, path);
        }
        for (String key : afterKeys) {
            insert(index, key, path);
        }
    }

    private void remove(NodeBuilder index, String key, String value) {
        ApproximateCounter.adjustCountSync(index, -1);
        NodeBuilder builder = index.getChildNode(key);
        if (builder.exists()) {
            ApproximateCounter.adjustCountSync(builder, -1);
            // Collect all builders along the given path
            Deque builders = newArrayDeque();
            builders.addFirst(builder);

            // Descend to the correct location in the index tree
            for (String name : PathUtils.elements(value)) {
                builder = builder.getChildNode(name);
                builders.addFirst(builder);
            }

            // Drop the match value,  if present
            if (builder.exists()) {
                builder.removeProperty("match");
            }

            // Prune all index nodes that are no longer needed
            prune(index, builders, key);
        }
    }

    private void insert(NodeBuilder index, String key, String value) {
        ApproximateCounter.adjustCountSync(index, 1);
        // NodeBuilder builder = index.child(key);
        NodeBuilder builder = fetchKeyNode(index, key);
        ApproximateCounter.adjustCountSync(builder, 1);
        for (String name : PathUtils.elements(value)) {
            builder = builder.child(name);
        }
        builder.setProperty("match", true);
    }

    public Iterable query(final Filter filter, final String indexName,
            final NodeState indexMeta, final String indexStorageNodeName,
            final Iterable values) {
        final NodeState index = indexMeta.getChildNode(indexStorageNodeName);
        return new Iterable() {
            @Override
            public Iterator iterator() {
                PathIterator it = new PathIterator(filter, indexName, "");
                if (values == null) {
                    it.setPathContainsValue(true);
                    it.enqueue(getChildNodeEntries(index).iterator());
                } else {
                    for (String p : values) {
                        NodeState property = index.getChildNode(p);
                        if (property.exists()) {
                            // we have an entry for this value, so use it
                            it.enqueue(Iterators.singletonIterator(
                                    new MemoryChildNodeEntry("", property)));
                        }
                    }
                }
                return it;
            }
        };
    }

    @Nonnull
    Iterable getChildNodeEntries(@Nonnull
    final NodeState index) {
        return index.getChildNodeEntries();
    }

    @Override
    public Iterable query(final Filter filter, final String name,
            final NodeState indexMeta, final Iterable values) {
        return query(filter, name, indexMeta, this.indexName, values);
    }

    @Override
    public long count(NodeState root, NodeState indexMeta, Set values, int max) {
        return count(null, root, indexMeta, this.indexName, values, max);
    }

    @Override
    public long count(final Filter filter, NodeState root, NodeState indexMeta, Set values, int max) {
        return count(filter, root, indexMeta, this.indexName, values, max);
    }

    long count(Filter filter, NodeState root, NodeState indexMeta, final String indexStorageNodeName,
            Set values, int max) {
        NodeState index = indexMeta.getChildNode(indexStorageNodeName);
        long count = -1;
        if (values == null) {
            // property is not null
            PropertyState ec = indexMeta.getProperty(ENTRY_COUNT_PROPERTY_NAME);
            if (ec != null) {
                // negative value implies fall-back to counting
                count = ec.getValue(Type.LONG);
            } else {
                // negative value means that approximation isn't available
                count = ApproximateCounter.getCountSync(index);
            }
            if (count < 0) {
                CountingNodeVisitor v = new CountingNodeVisitor(max);
                v.visit(index);
                count = v.getEstimatedCount();
                if (count >= max) {
                    // "is not null" queries typically read more data
                    count *= 10;
                }
            }
        } else {
            // property = x, or property in (x, y, z)
            int size = values.size();
            if (size == 0) {
                return 0;
            }
            PropertyState ec = indexMeta.getProperty(ENTRY_COUNT_PROPERTY_NAME);       
            if (ec != null) {
                count = ec.getValue(Type.LONG);
                if (count >= 0) {
                    // assume 10*NodeCounterEditor.DEFAULT_RESOLUTION entries per key, so that this index is used
                    // instead of traversal, but not instead of a regular property index
                    long keyCount = count / (10 * NodeCounterEditor.DEFAULT_RESOLUTION);
                    ec = indexMeta.getProperty(KEY_COUNT_PROPERTY_NAME);
                    if (ec != null) {
                        keyCount = ec.getValue(Type.LONG);
                    }
                    // cast to double to avoid overflow 
                    // (entryCount could be Long.MAX_VALUE)
                    // the cost is not multiplied by the size, 
                    // otherwise the traversing index might be used              
                    keyCount = Math.max(1, keyCount);
                    count = (long) ((double) count / keyCount) + size;
                }
            } else {
                // for this index, property "entryCount" is not set
                long approxMax = 0;
                long approxCount = ApproximateCounter.getCountSync(index);
                if (approxCount != -1) {
                    // approximate count is available for the index:
                    // check approximate counts for each value
                    for (String p : values) {
                        NodeState s = index.getChildNode(p);
                        if (s.exists()) {
                            long a = ApproximateCounter.getCountSync(s);
                            if (a != -1) {
                                approxMax += a;
                            } else if (approxMax > 0) {
                                // in absence of approx count for a key we should be conservative
                                approxMax += 10 * NodeCounterEditor.DEFAULT_RESOLUTION;
                            }
                        }
                    }
                    if (approxMax > 0) {
                        count = approxMax;
                    }
                }
            }
            // still, property = x, or property in (x, y, z),
            // and we don't know the count ("entryCount" = -1)
            if (count < 0) {
                count = 0;
                max = Math.max(10, max / size);
                int i = 0;
                for (String p : values) {
                    if (count > max && i > 3) {
                        // the total count is extrapolated from the the number
                        // of values counted so far to the total number of values
                        count = count * size / i;
                        break;
                    }
                    NodeState s = index.getChildNode(p);
                    if (s.exists()) {
                        CountingNodeVisitor v = new CountingNodeVisitor(max);
                        v.visit(s);
                        count += v.getEstimatedCount();
                    }
                    i++;
                }
            }
        }

        String filterRootPath = null;
        if (filter != null &&
                filter.getPathRestriction().equals(Filter.PathRestriction.ALL_CHILDREN)) {
            filterRootPath = filter.getPath();
        }

        if (filterRootPath != null) {
            // scale cost according to path restriction
            long totalNodesCount = NodeCounter.getEstimatedNodeCount(root, "/", true);
            if (totalNodesCount != -1) {
                long filterPathCount = NodeCounter.getEstimatedNodeCount(root, filterRootPath, true);
                if (filterPathCount != -1) {
                    // assume nodes in the index are evenly distributed in the repository (old idea)
                    long countScaledDown = (long) ((double) count / totalNodesCount * filterPathCount);
                    // assume 80% of the indexed nodes are in this subtree
                    long mostNodesFromThisSubtree = (long) (filterPathCount * 0.8);
                    // count can at most be the assumed subtree size
                    count = Math.min(count, mostNodesFromThisSubtree);
                    // this in theory should not have any effect, 
                    // except if the above estimates are incorrect,
                    // so this is just for safety feature
                    count = Math.max(count, countScaledDown);
                }
            }
        }
        return count;
    }

    /**
     * An iterator over paths within an index node.
     */
    static class PathIterator implements Iterator {
        
        private final Filter filter;
        private final String indexName;
        private final Deque> nodeIterators =
                Queues.newArrayDeque();
        private int readCount;
        private int intermediateNodeReadCount;
        private boolean init;
        private boolean closed;
        private String filterPath;
        private String pathPrefix;
        private String parentPath;
        private String currentPath;
        private boolean pathContainsValue;
        
        /**
         * Keep the returned path, to avoid returning duplicate entries.
         */
        private final Set knownPaths = Sets.newHashSet();
        private final QueryEngineSettings settings;

        PathIterator(Filter filter, String indexName, String pathPrefix) {
            this.filter = filter;
            this.pathPrefix = pathPrefix;
            this.indexName = indexName;
            boolean shouldDescendDirectly = filter.getPathRestriction().equals(Filter.PathRestriction.ALL_CHILDREN);
            if (shouldDescendDirectly) {            
                filterPath = filter.getPath();
                if (PathUtils.denotesRoot(filterPath)) {
                    filterPath = "";
                }
            } else {
                filterPath = "";
            }            
            parentPath = "";
            currentPath = "/";
            this.settings = filter.getQueryEngineSettings();
        }

        void enqueue(Iterator it) {
            nodeIterators.addLast(it);
        }

        void setPathContainsValue(boolean pathContainsValue) {
            if (init) {
                throw new IllegalStateException("This iterator is already initialized");
            }
            this.pathContainsValue = pathContainsValue;
        }

        @Override
        public boolean hasNext() {
            if (!closed && !init) {
                fetchNext();
                init = true;
            }
            return !closed;
        }

        private void fetchNext() {
            while (true) {
                fetchNextPossiblyDuplicate();
                if (closed) {
                    return;
                }
                if (pathContainsValue) {
                    String value = PathUtils.elements(currentPath).iterator().next();
                    currentPath = PathUtils.relativize(value, currentPath);
                    // don't return duplicate paths:
                    // Set.add returns true if the entry was new,
                    // so if it returns false, it was already known
                    if (!knownPaths.add(currentPath)) {
                        continue;
                    }
                }
                break;
            }
        }

        private void fetchNextPossiblyDuplicate() {
            while (!nodeIterators.isEmpty()) {
                Iterator iterator = nodeIterators.getLast();
                if (iterator.hasNext()) {
                    ChildNodeEntry entry = iterator.next();

                    NodeState node = entry.getNodeState();

                    String name = entry.getName();
                    if (NodeStateUtils.isHidden(name)) {
                        continue;
                    }
                    currentPath = PathUtils.concat(parentPath, name);

                    if (!"".equals(filterPath)) {
                        String p = currentPath;
                        if (pathContainsValue) {
                            String value = PathUtils.elements(p).iterator().next();
                            p = PathUtils.relativize(value, p);                        
                        }
                        if ("".equals(pathPrefix)) {
                            p = PathUtils.concat("/", p);
                        } else {
                            p = PathUtils.concat(pathPrefix, p);
                        }
                        if (!"".equals(p) && 
                                !p.equals(filterPath) && 
                                !PathUtils.isAncestor(p, filterPath) && 
                                !PathUtils.isAncestor(filterPath, p)) {
                            continue;
                        }
                    }

                    nodeIterators.addLast(node.getChildNodeEntries().iterator());
                    parentPath = currentPath;

                    if (node.getBoolean("match")) {
                        readCount++;
                        if (readCount % TRAVERSING_WARN == 0) {
                            FilterIterators.checkReadLimit(readCount, settings);
                            LOG.warn("Index-Traversed {} nodes ({} index entries) using index {} with filter {}", readCount, intermediateNodeReadCount, indexName, filter);
                        }
                        return;
                    } else {
                        intermediateNodeReadCount++;
                    }

                } else {
                    nodeIterators.removeLast();
                    parentPath = PathUtils.getParentPath(parentPath);
                }
            }
            currentPath = null;
            closed = true;
        }

        @Override
        public String next() {
            if (closed) {
                throw new IllegalStateException("This iterator is closed");
            }
            if (!init) {
                fetchNext();
                init = true;
            }
            String result = PathUtils.concat(pathPrefix, currentPath);
            fetchNext();
            return result;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }

    }

    /**
     * A node visitor to recursively traverse a number of nodes.
     */
    interface NodeVisitor {
        void visit(NodeState state);
    }

    /**
     * A node visitor that counts the number of matching nodes up to a given
     * maximum, in order to estimate the number of matches.
     */
    static class CountingNodeVisitor implements NodeVisitor {

        /**
         * The maximum number of matching nodes to count.
         */
        final int maxCount;

        /**
         * The current count of matching nodes.
         */
        int count;

        /**
         * The current depth (number of parent nodes).
         */
        int depth;

        /**
         * The sum of the depth of all matching nodes. This value is used to
         * calculate the average depth.
         */
        long depthTotal;

        CountingNodeVisitor(int maxCount) {
            this.maxCount = maxCount;
        }

        @Override
        public void visit(NodeState state) {
            if (state.hasProperty("match")) {
                count++;
                depthTotal += depth;
            }
            if (count < maxCount) {
                depth++;
                for (ChildNodeEntry entry : state.getChildNodeEntries()) {
                    if (count >= maxCount) {
                        break;
                    }
                    visit(entry.getNodeState());
                }
                depth--;
            }
        }

        /**
         * The number of matches (at most the maximum count).
         * 
         * @return the match count
         */
        int getCount() {
            return count;
        }

        /**
         * The number of estimated matches. This value might be higher than the
         * number of counted matches, if the maximum number of matches has been
         * reached. It is based on the average depth of matches, and the average
         * number of child nodes.
         * 
         * @return the estimated matches
         */
        int getEstimatedCount() {
            if (count < maxCount) {
                return count;
            }
            double averageDepth = (int) (depthTotal / count);
            // the number of estimated matches is higher
            // the higher the average depth of the first hits
            long estimatedNodes = (long) (count * Math.pow(1.1, averageDepth));
            estimatedNodes = Math.min(estimatedNodes, Integer.MAX_VALUE);
            return Math.max(count, (int) estimatedNodes);
        }

    }
    
    /**
     * fetch from the index the key node
     * 
     * @param index
     *            the current index root
     * @param key
     *            the 'key' to fetch from the repo
     * @return the node representing the key
     */
    NodeBuilder fetchKeyNode(@Nonnull NodeBuilder index, 
                             @Nonnull String key) {
        return index.child(key);
    }

    /**
     * Physically prune a list of nodes from the index
     * 
     * @param index
     *            the current index
     * @param builders
     *            list of nodes to prune
     * @param key the key of the index we're processing
     */
    void prune(final NodeBuilder index, final Deque builders, final String key) {
        for (NodeBuilder node : builders) {
            if (node.getBoolean("match") || node.getChildNodeCount(1) > 0) {
                return;
            } else if (node.exists()) {
                node.remove();
            }
        }
    }

    @Override
    public boolean exists(NodeBuilder index, String key) {
        // This is currently not implemented, because there is no test case for it,
        // and because there is currently no need for this method with this class.
        // We would need to traverse the tree and search for an entry "match".
        // See also OAK-2663 for a potential (but untested) implementation.
        throw new UnsupportedOperationException();
   }

    @Override
    public String getIndexNodeName() {
        return indexName;
    }
}