com.day.cq.commons.predicates.servlets.AbstractSearchServlet Maven / Gradle / Ivy
Show all versions of aem-sdk-api Show documentation
/*************************************************************************
*
* ADOBE CONFIDENTIAL
* ___________________
*
* Copyright 2024 Adobe
* All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains
* the property of Adobe and its suppliers, if any. The intellectual
* and technical concepts contained herein are proprietary to Adobe
* and its suppliers and are protected by all applicable intellectual
* property laws, including trade secret and copyright laws.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe.
**************************************************************************/
package com.day.cq.commons.predicates.servlets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.StringTokenizer;
/**
* AbstractSearchServlet
is a base class for search servlets.
*/
public abstract class AbstractSearchServlet extends AbstractPredicateServlet {
/** Query clause */
public static final String QUERY = "query";
/** Start index */
public static final String START = "start";
/** Result limit */
public static final String LIMIT = "limit";
/** tidy param */
public static final String TIDY = "tidy";
/**
* List of unicode blocks that contain characters that act as words.
*/
public static final List WORD_CHARS;
/**
* Split terms at these characters.
*/
public static final String SPLIT_CHARACTERS = " _-.,";
static {
// this list should be kept roughly synchronized with
// the standard lucene tokenizer!
List list = new ArrayList<>();
// Chinese and Japanese
list.add(Character.UnicodeBlock.HIRAGANA);
list.add(Character.UnicodeBlock.KATAKANA);
list.add(Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS);
list.add(Character.UnicodeBlock.BOPOMOFO);
list.add(Character.UnicodeBlock.CJK_COMPATIBILITY);
list.add(Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A);
list.add(Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS);
list.add(Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS);
list.add(Character.UnicodeBlock.SPECIALS);
// Korean
list.add(Character.UnicodeBlock.HANGUL_SYLLABLES);
list.add(Character.UnicodeBlock.HANGUL_JAMO);
WORD_CHARS = Collections.unmodifiableList(list);
}
/**
* @param text the text to check.
* @return true
if text
is a single word;
* false
otherwise.
*/
protected boolean isSingleWord(String text) {
for (int i = 0; i < text.length(); i++) {
if (WORD_CHARS.contains(Character.UnicodeBlock.of(text.charAt(i)))) {
return false;
}
}
return true;
}
/**
* Conditionally appends a wildcard to the query text
if the
* text is not considered a single word. This method also breaks
* the text into multiple terms as {@link #SPLIT_CHARACTERS}. The wildcard
* is only added to the last term.
*
* See also: {@link #isSingleWord(String)}.
*
* @param text the query text.
* @return the processed query text, possibly with appended '*' wildcard.
*/
protected String applyWildcard(String text) {
// only append * if query string is a single word
if (!isSingleWord(text)) {
return text;
}
StringBuilder modified = new StringBuilder();
StringTokenizer t = new StringTokenizer(text, SPLIT_CHARACTERS);
String space = "";
while (t.hasMoreTokens()) {
modified.append(space);
space = " ";
modified.append(t.nextToken());
}
modified.append("*");
return modified.toString();
}
}