All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.day.cq.commons.predicates.servlets.AbstractSearchServlet Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
/*************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * ___________________
 * 
 * Copyright 2024 Adobe
 * All Rights Reserved.
 * 
 * NOTICE: All information contained herein is, and remains
 * the property of Adobe and its suppliers, if any. The intellectual
 * and technical concepts contained herein are proprietary to Adobe
 * and its suppliers and are protected by all applicable intellectual
 * property laws, including trade secret and copyright laws.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe.
 **************************************************************************/

package com.day.cq.commons.predicates.servlets;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.StringTokenizer;

/**
 * AbstractSearchServlet is a base class for search servlets.
 */
public abstract class AbstractSearchServlet extends AbstractPredicateServlet {

    /** Query clause */
    public static final String QUERY = "query";

    /** Start index */
    public static final String START = "start";

    /** Result limit */
    public static final String LIMIT = "limit";

    /** tidy param */
    public static final String TIDY = "tidy";

    /**
     * List of unicode blocks that contain characters that act as words.
     */
    public static final List WORD_CHARS;

    /**
     * Split terms at these characters.
     */
    public static final String SPLIT_CHARACTERS = " _-.,";

    static {
        // this list should be kept roughly synchronized with
        // the standard lucene tokenizer!
        List list = new ArrayList<>();
        // Chinese and Japanese
        list.add(Character.UnicodeBlock.HIRAGANA);
        list.add(Character.UnicodeBlock.KATAKANA);
        list.add(Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
        list.add(Character.UnicodeBlock.BOPOMOFO);
        list.add(Character.UnicodeBlock.CJK_COMPATIBILITY);
        list.add(Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
        list.add(Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
        list.add(Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
        list.add(Character.UnicodeBlock.SPECIALS);
        // Korean
        list.add(Character.UnicodeBlock.HANGUL_SYLLABLES);
        list.add(Character.UnicodeBlock.HANGUL_JAMO);
        WORD_CHARS = Collections.unmodifiableList(list);
    }

    /**
     * @param text the text to check.
     * @return true if text is a single word;
     *         false otherwise.
     */
    protected boolean isSingleWord(String text) {
        for (int i = 0; i < text.length(); i++) {
            if (WORD_CHARS.contains(Character.UnicodeBlock.of(text.charAt(i)))) {
                return false;
            }
        }
        return true;
    }

    /**
     * Conditionally appends a wildcard to the query text if the
     * text is not considered a single word. This method also breaks
     * the text into multiple terms as {@link #SPLIT_CHARACTERS}. The wildcard
     * is only added to the last term.
     * 

* See also: {@link #isSingleWord(String)}. * * @param text the query text. * @return the processed query text, possibly with appended '*' wildcard. */ protected String applyWildcard(String text) { // only append * if query string is a single word if (!isSingleWord(text)) { return text; } StringBuilder modified = new StringBuilder(); StringTokenizer t = new StringTokenizer(text, SPLIT_CHARACTERS); String space = ""; while (t.hasMoreTokens()) { modified.append(space); space = " "; modified.append(t.nextToken()); } modified.append("*"); return modified.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy