All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.daisy.braille.utils.pef.SearchIndex Maven / Gradle / Ivy

package org.daisy.braille.utils.pef;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;

/**
 * Provides a search index.
 *
 * @param  the type of index
 * @author Joel Håkansson
 */
public class SearchIndex {
    private static final Logger logger = Logger.getLogger(SearchIndex.class.getCanonicalName());

    private static final boolean debug = false;
    private final Map> index;
    private static final String REGEX = "[\\s\\.,:/-]";
    private final int exclude;

    /**
     * Creates a new search index with the default sub-word limit (3).
     */
    public SearchIndex() {
        this(3);
    }

    /**
     * Creates a new search index with the specified sub-word limit.
     *
     * @param subwordLimit the smallest part of a word longer than limit
     *                     to be added to the index. For example, if the limit is
     *                     3, a search for "li" will not match entries containing
     *                     "limit"
     * @throws IllegalArgumentException if subwordLimit is < 1
     */
    public SearchIndex(int subwordLimit) {
        index = new Hashtable<>();
        if (subwordLimit < 1) {
            throw new IllegalArgumentException("Value must be 1 or greater.");
        }
        this.exclude = subwordLimit;
    }

    /**
     * Associates a string and (if the string length exceeds the sub word limit) its
     * substrings with an object.
     *
     * @param val the string
     * @param obj the object
     */
    public void add(String val, E obj) {
        add(val, obj, false);
    }

    /**
     * Associates a string with an object.
     *
     * @param val    the string
     * @param obj    the object
     * @param strict when true, only the specified string will match the
     *               object. When false and the string length exceeds the sub word
     *               limit, substrings of the specified string will also
     *               match this object.
     */
    public void add(String val, E obj, boolean strict) {
        for (String ind : val.toLowerCase().split(REGEX)) {
            if (ind != null && ind.length() > 0) {
                if (strict || ind.length() <= exclude) {
                    addToIndex(ind, obj);
                } else {
                    for (int i = exclude; i <= ind.length(); i++) {
                        String indx = ind.substring(0, i);
                        addToIndex(indx, obj);
                    }
                }
            }
        }
    }

    void addToIndex(String indx, E obj) {
        if (debug) {
            logger.info("Adding index: " + indx);
        }
        Set c = index.get(indx);
        if (c == null) {
            c = new HashSet<>();
            index.put(indx, c);
        }
        c.add(obj);
    }

    /**
     * Gets all objects with an entry for the specified string.
     *
     * @param str
     * @return the objects
     */
    private Set matches(String str) {
        str = str.toLowerCase().replaceAll(REGEX, "");
        if (debug) {
            logger.info("Search for: " + str);
        }
        Set books = index.get(str);

        if (books == null) {
            return new HashSet<>();
        }
        return books;
    }

    /**
     * Returns the set of objects that matches all the specified strings.
     *
     * @param strs the strings
     * @return returns a set of matching objects
     */
    public Set containsAll(Iterable strs) {
        Set result = new HashSet<>();
        boolean first = true;
        for (String s : strs) {
            if (first) {
                result.addAll(matches(s));
                first = false;
            } else {
                Set r = matches(s);
                Iterator i = result.iterator();
                while (i.hasNext()) {
                    E f = i.next();
                    if (!r.contains(f)) {
                        i.remove();
                    }
                }
            }
        }
        return result;
    }

    private List normalizeString(String str) {
        String[] t = str.replaceAll(REGEX, " ").split("\\s");
        List ret = new ArrayList<>();
        for (String s : t) {
            if (!"".equals(s) && s != null) {
                ret.add(s);
            }
        }
        return ret;
    }

    /**
     * Returns the set of objects that matches all the character units found
     * in the input string.
     *
     * @param str the string
     * @return returns a set of matching objects
     */
    public Set containsAll(String str) {
        return containsAll(normalizeString(str));
    }

    /**
     * Returns the set of objects that matches all the specified strings.
     *
     * @param strs the strings
     * @return returns a set of matching objects
     */
    public Set containsAll(String... strs) {
        return containsAll(Arrays.asList(strs));
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy