com.googlecode.concurrenttrees.radixinverted.ConcurrentInvertedRadixTree Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of concurrent-trees Show documentation
Concurrent Radix Trees and Concurrent Suffix Trees for Java.
There is a newer version: 2.6.1
/**
 * Copyright 2012-2013 Niall Gallagher
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.googlecode.concurrenttrees.radixinverted;

import com.googlecode.concurrenttrees.common.CharSequences;
import com.googlecode.concurrenttrees.common.KeyValuePair;
import com.googlecode.concurrenttrees.common.LazyIterator;
import com.googlecode.concurrenttrees.radix.ConcurrentRadixTree;
import com.googlecode.concurrenttrees.radix.node.Node;
import com.googlecode.concurrenttrees.radix.node.NodeFactory;
import com.googlecode.concurrenttrees.radix.node.util.PrettyPrintable;

import java.io.Serializable;
import java.util.Collections;
import java.util.Iterator;

/**
 * An implementation of {@link InvertedRadixTree} which supports lock-free concurrent reads, and allows items to be
 * added to and to be removed from the tree atomically by background thread(s), without blocking reads.
 * 
 * This implementation is based on {@link ConcurrentRadixTree}.
 *
 * @author Niall Gallagher
 */
public class ConcurrentInvertedRadixTree implements InvertedRadixTree, PrettyPrintable, Serializable {

    static class ConcurrentInvertedRadixTreeImpl extends ConcurrentRadixTree {

        public ConcurrentInvertedRadixTreeImpl(NodeFactory nodeFactory) {
            super(nodeFactory);
        }

        /**
         * Lazily traverses the tree based on characters in the given input, and returns from the tree the next node
         * and its value where the key associated with the node matches the characters from the input. More than
         * one matching keyword can be found for the same input, if there are keys in the tree which are prefixes of
         * each other.
         * 

         * Example:

         * Given two keywords in the tree: "Ford" and "Ford Focus"

         * Given a document: "I am shopping for a Ford Focus car"

         * Where the given input in this instance is the suffix of the document: "Ford Focus car"

         * ...then this method will return both "Ford" and "Ford Focus".

         * The caller can invoke this method repeatedly for each suffix of the document.

         *
         * @param input A sequence of characters which controls traversal of the tree
         * @return An iterable which will search for the next node in the tree matching the input
         */
        protected Iterable> scanForKeysAtStartOfInput(final CharSequence input) {
            return new Iterable>() {
                @Override
                public Iterator> iterator() {
                    return new LazyIterator>() {

                        Node currentNode = root;
                        int charsMatched = 0;

                        final int documentLength = input.length();

                        @Override
                        protected KeyValuePair computeNext() {
                            while (charsMatched < documentLength) {
                                Node nextNode = currentNode.getOutgoingEdge(input.charAt(charsMatched));
                                if (nextNode == null) {
                                    // Next node is a dead end...
                                    return endOfData();
                                }

                                currentNode = nextNode;
                                CharSequence currentNodeEdgeCharacters = currentNode.getIncomingEdge();
                                final int numCharsInEdge = currentNodeEdgeCharacters.length();
                                if (numCharsInEdge + charsMatched > documentLength) {
                                    // This node can't be a match because it is too long...
                                    return endOfData();
                                }
                                for (int i = 0; i < numCharsInEdge; i++) {
                                    if (currentNodeEdgeCharacters.charAt(i) != input.charAt(charsMatched + i)) {
                                        // Found a difference between a character in the input
                                        // and a character in the edge represented by current node,
                                        // current node is a dead end...
                                        return endOfData();
                                    }
                                }
                                // All characters in the current edge matched, add this number to total chars matched...
                                charsMatched += numCharsInEdge;

                                if (currentNode.getValue() != null) {
                                    // This is an explicit node and all of its chars match input, return a match...
                                    return new KeyValuePairImpl(CharSequences.toString(input.subSequence(0, charsMatched)), currentNode.getValue());
                                } // else the node matches, but is not an explicit node so we should continue scanning...
                            }
                            return endOfData();
                        }
                    };
                }
            };
        }

        /**
         * Traverses the tree based on characters in the given input, and returns the longest key in the tree
         * which is a prefix of the input, and its associated value.
         * 
         * This uses a similar algorithm as {@link #scanForKeysAtStartOfInput(CharSequence)} except it returns
         * the last result that would be returned, however this algorithm locates the last node more efficiently
         * by creating garbage objects during traversal due to not having to return the intermediate results.
         *
         * @param input A sequence of characters which controls traversal of the tree
         * @return The longest key in the tree which is a prefix of the input, and its associated value;
         * or null if no such key is contained in the tree
         */
        protected KeyValuePair scanForLongestKeyAtStartOfInput(final CharSequence input) {
            Node currentNode = root;
            int charsMatched = 0;

            final int documentLength = input.length();

            Node candidateNode = null;
            int candidateCharsMatched = 0;

            outer_loop: while (charsMatched < documentLength) {
                Node nextNode = currentNode.getOutgoingEdge(input.charAt(charsMatched));
                if (nextNode == null) {
                    // Next node is a dead end...
                    break;
                }

                currentNode = nextNode;
                CharSequence currentNodeEdgeCharacters = currentNode.getIncomingEdge();
                final int numCharsInEdge = currentNodeEdgeCharacters.length();
                if (numCharsInEdge + charsMatched > documentLength) {
                    // This node can't be a match because it is too long...
                    break;
                }
                for (int i = 0; i < numCharsInEdge; i++) {
                    if (currentNodeEdgeCharacters.charAt(i) != input.charAt(charsMatched + i)) {
                        // Found a difference between a character in the input
                        // and a character in the edge represented by current node,
                        // current node is a dead end...
                        break outer_loop;
                    }
                }
                // All characters in the current edge matched, add this number to total chars matched...
                charsMatched += numCharsInEdge;

                if (currentNode.getValue() != null) {
                    // This is an explicit node and all of its chars match input, return a match...
                    candidateNode = currentNode;
                    candidateCharsMatched = charsMatched;
                } // else the node matches, but is not an explicit node so we should continue scanning...
            }
            return candidateNode == null ? null : new KeyValuePairImpl(CharSequences.toString(input.subSequence(0, candidateCharsMatched)), candidateNode.getValue());
        }
    }

    private final ConcurrentInvertedRadixTreeImpl radixTree;

    /**
     * Creates a new {@link ConcurrentInvertedRadixTree} which will use the given {@link NodeFactory} to create nodes.
     *
     * @param nodeFactory An object which creates {@link Node} objects on-demand, and which might return node
     * implementations optimized for storing the values supplied to it for the creation of each node
     */
    public ConcurrentInvertedRadixTree(NodeFactory nodeFactory) {
        this.radixTree = new ConcurrentInvertedRadixTreeImpl(nodeFactory);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public O put(CharSequence key, O value) {
        return radixTree.put(key, value);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public O putIfAbsent(CharSequence key, O value) {
        return radixTree.putIfAbsent(key, value);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public boolean remove(CharSequence key) {
        return radixTree.remove(key);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public O getValueForExactKey(CharSequence key) {
        return radixTree.getValueForExactKey(key);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable getKeysStartingWith(CharSequence prefix) {
        return radixTree.getKeysStartingWith(prefix);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable getValuesForKeysStartingWith(CharSequence prefix) {
        return radixTree.getValuesForKeysStartingWith(prefix);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable> getKeyValuePairsForKeysStartingWith(CharSequence prefix) {
        return radixTree.getKeyValuePairsForKeysStartingWith(prefix);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable getClosestKeys(CharSequence candidate) {
        return radixTree.getClosestKeys(candidate);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable getValuesForClosestKeys(CharSequence candidate) {
        return radixTree.getValuesForClosestKeys(candidate);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable> getKeyValuePairsForClosestKeys(CharSequence candidate) {
        return radixTree.getKeyValuePairsForClosestKeys(candidate);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable getKeysPrefixing(final CharSequence document) {
        return new Iterable() {
            @Override
            public Iterator iterator() {
                return new LazyIterator() {
                    Iterator> matchesForCurrentSuffix = radixTree.scanForKeysAtStartOfInput(document).iterator();

                    @Override
                    protected CharSequence computeNext() {
                        if (matchesForCurrentSuffix.hasNext()) {
                            return matchesForCurrentSuffix.next().getKey();
                        }
                        else {
                            return endOfData();
                        }
                    }
                };
            }
        };
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable getValuesForKeysPrefixing(final CharSequence document) {
        return new Iterable() {
            @Override
            public Iterator iterator() {
                return new LazyIterator() {
                    Iterator> matchesForCurrentSuffix = radixTree.scanForKeysAtStartOfInput(document).iterator();

                    @Override
                    protected O computeNext() {
                        if (matchesForCurrentSuffix.hasNext()) {
                            return matchesForCurrentSuffix.next().getValue();
                        }
                        else {
                            return endOfData();
                        }
                    }
                };
            }
        };
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable> getKeyValuePairsForKeysPrefixing(final CharSequence document) {
        return new Iterable>() {
            @Override
            public Iterator> iterator() {
                return new LazyIterator>() {
                    Iterator> matchesForCurrentSuffix = radixTree.scanForKeysAtStartOfInput(document).iterator();

                    @Override
                    protected KeyValuePair computeNext() {
                        if (matchesForCurrentSuffix.hasNext()) {
                            return matchesForCurrentSuffix.next();
                        }
                        else {
                            return endOfData();
                        }
                    }
                };
            }
        };
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public CharSequence getLongestKeyPrefixing(CharSequence document) {
        KeyValuePair match = radixTree.scanForLongestKeyAtStartOfInput(document);
        return match == null ? null : match.getKey();
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public O getValueForLongestKeyPrefixing(CharSequence document) {
        KeyValuePair match = radixTree.scanForLongestKeyAtStartOfInput(document);
        return match == null ? null : match.getValue();
    }


    /**
     * {@inheritDoc}
     */
    @Override
    public KeyValuePair getKeyValuePairForLongestKeyPrefixing(CharSequence document) {
        return radixTree.scanForLongestKeyAtStartOfInput(document);
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable getKeysContainedIn(final CharSequence document) {
        return new Iterable() {
            @Override
            public Iterator iterator() {
                return new LazyIterator() {
                    Iterator documentSuffixes = CharSequences.generateSuffixes(document).iterator();
                    Iterator> matchesForCurrentSuffix = Collections.>emptyList().iterator();

                    @Override
                    protected CharSequence computeNext() {
                        while(!matchesForCurrentSuffix.hasNext()) {
                            if (documentSuffixes.hasNext()) {
                                CharSequence nextSuffix = documentSuffixes.next();
                                matchesForCurrentSuffix = radixTree.scanForKeysAtStartOfInput(nextSuffix).iterator();
                            }
                            else {
                                return endOfData();
                            }
                        }
                        return matchesForCurrentSuffix.next().getKey();
                    }
                };
            }
        };
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable getValuesForKeysContainedIn(final CharSequence document) {
        return new Iterable() {
            @Override
            public Iterator iterator() {
                return new LazyIterator() {
                    Iterator documentSuffixes = CharSequences.generateSuffixes(document).iterator();
                    Iterator> matchesForCurrentSuffix = Collections.>emptyList().iterator();

                    @Override
                    protected O computeNext() {
                        while(!matchesForCurrentSuffix.hasNext()) {
                            if (documentSuffixes.hasNext()) {
                                CharSequence nextSuffix = documentSuffixes.next();
                                matchesForCurrentSuffix = radixTree.scanForKeysAtStartOfInput(nextSuffix).iterator();
                            }
                            else {
                                return endOfData();
                            }
                        }
                        return matchesForCurrentSuffix.next().getValue();
                    }
                };
            }
        };
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Iterable> getKeyValuePairsForKeysContainedIn(final CharSequence document) {
        return new Iterable>() {
            @Override
            public Iterator> iterator() {
                return new LazyIterator>() {
                    Iterator documentSuffixes = CharSequences.generateSuffixes(document).iterator();
                    Iterator> matchesForCurrentSuffix = Collections.>emptyList().iterator();

                    @Override
                    protected KeyValuePair computeNext() {
                        while(!matchesForCurrentSuffix.hasNext()) {
                            if (documentSuffixes.hasNext()) {
                                CharSequence nextSuffix = documentSuffixes.next();
                                matchesForCurrentSuffix = radixTree.scanForKeysAtStartOfInput(nextSuffix).iterator();
                            }
                            else {
                                return endOfData();
                            }
                        }
                        return matchesForCurrentSuffix.next();
                    }
                };
            }
        };
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public int size() {
        return radixTree.size();
    }

    @Override
    public Node getNode() {
        return radixTree.getNode();
    }


}