All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.cassandra.index.sasi.sa.SuffixSA Maven / Gradle / Ivy

There is a newer version: 3.11.12.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.cassandra.index.sasi.sa;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;

import org.apache.cassandra.index.sasi.disk.DynamicTokenTreeBuilder;
import org.apache.cassandra.index.sasi.disk.OnDiskIndexBuilder;
import org.apache.cassandra.index.sasi.disk.TokenTreeBuilder;
import org.apache.cassandra.db.marshal.AbstractType;
import org.apache.cassandra.utils.LongTimSort;
import org.apache.cassandra.utils.Pair;

import com.google.common.base.Charsets;

public class SuffixSA extends SA
{
    public SuffixSA(AbstractType comparator, OnDiskIndexBuilder.Mode mode)
    {
        super(comparator, mode);
    }

    protected Term getTerm(ByteBuffer termValue, TokenTreeBuilder tokens)
    {
        return new CharTerm(charCount, Charsets.UTF_8.decode(termValue.duplicate()), tokens);
    }

    public TermIterator finish()
    {
        return new SASuffixIterator();
    }

    private class SASuffixIterator extends TermIterator
    {

        private static final int COMPLETE_BIT = 31;

        private final long[] suffixes;

        private int current = 0;
        private IndexedTerm lastProcessedSuffix;
        private TokenTreeBuilder container;

        public SASuffixIterator()
        {
            // each element has term index and char position encoded as two 32-bit integers
            // to avoid binary search per suffix while sorting suffix array.
            suffixes = new long[charCount];

            long termIndex = -1, currentTermLength = -1;
            boolean isComplete = false;
            for (int i = 0; i < charCount; i++)
            {
                if (i >= currentTermLength || currentTermLength == -1)
                {
                    Term currentTerm = terms.get((int) ++termIndex);
                    currentTermLength = currentTerm.getPosition() + currentTerm.length();
                    isComplete = true;
                }

                suffixes[i] = (termIndex << 32) | i;
                if (isComplete)
                    suffixes[i] |= (1L << COMPLETE_BIT);

                isComplete = false;
            }

            LongTimSort.sort(suffixes, (a, b) -> {
                Term aTerm = terms.get((int) (a >>> 32));
                Term bTerm = terms.get((int) (b >>> 32));
                return comparator.compare(aTerm.getSuffix(clearCompleteBit(a) - aTerm.getPosition()),
                                          bTerm.getSuffix(clearCompleteBit(b) - bTerm.getPosition()));
            });
        }

        private int clearCompleteBit(long value)
        {
            return (int) (value & ~(1L << COMPLETE_BIT));
        }

        private Pair suffixAt(int position)
        {
            long index = suffixes[position];
            Term term = terms.get((int) (index >>> 32));
            boolean isPartitial = (index & ((long) 1 << 31)) == 0;
            return Pair.create(new IndexedTerm(term.getSuffix(clearCompleteBit(index) - term.getPosition()), isPartitial), term.getTokens());
        }

        public ByteBuffer minTerm()
        {
            return suffixAt(0).left.getBytes();
        }

        public ByteBuffer maxTerm()
        {
            return suffixAt(suffixes.length - 1).left.getBytes();
        }

        protected Pair computeNext()
        {
            while (true)
            {
                if (current >= suffixes.length)
                {
                    if (lastProcessedSuffix == null)
                        return endOfData();

                    Pair result = finishSuffix();

                    lastProcessedSuffix = null;
                    return result;
                }

                Pair suffix = suffixAt(current++);

                if (lastProcessedSuffix == null)
                {
                    lastProcessedSuffix = suffix.left;
                    container = new DynamicTokenTreeBuilder(suffix.right);
                }
                else if (comparator.compare(lastProcessedSuffix.getBytes(), suffix.left.getBytes()) == 0)
                {
                    lastProcessedSuffix = suffix.left;
                    container.add(suffix.right);
                }
                else
                {
                    Pair result = finishSuffix();

                    lastProcessedSuffix = suffix.left;
                    container = new DynamicTokenTreeBuilder(suffix.right);

                    return result;
                }
            }
        }

        private Pair finishSuffix()
        {
            return Pair.create(lastProcessedSuffix, container.finish());
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy