org.pageseeder.flint.lucene.search.Fields Maven / Gradle / Ivy
/*
* Copyright 2015 Allette Systems (Australia)
* http://www.allette.com.au
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.pageseeder.flint.lucene.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.pageseeder.flint.lucene.util.Beta;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A set of utility methods for dealing with search fields.
*
* @author Christophe Lauret
* @version 12 August 2010
*/
public final class Fields {
/** Utility class */
private Fields() {
}
/**
* Returns a mapping of fields with a default boost value of 1.0.
*
* @param fields the list of fields to create the map.
* @return the corresponding map with each field value mapped to a boost value of 1.0
*/
@Beta
public static Map asBoostMap(List fields) {
Map map = new LinkedHashMap<>();
for (String f : fields) {
map.put(f, 1.0f);
}
return map;
}
/**
* Indicates whether the given field name is valid.
*
* This method does not check for the existence of the field.
*
* @param field the name of the field to check.
* @return true
if the field name is a valid name for the index;
* false
otherwise.
*/
@Beta
public static boolean isValidName(String field) {
return field != null && field.length() > 0;
}
/**
* Returns a list of valid field names.
*
* @param fields the list of fields to create the map.
* @return a list of valid field names.
*/
@Beta
public static List filterNames(List fields) {
List names = new ArrayList<>();
for (String f : fields) {
if (isValidName(f)) {
names.add(f);
}
}
return names;
}
/**
* Returns a list of possible field values from the specified text.
*
* You can use this method to extract the list of terms or phrase values to create a query.
*
*
Spaces are ignored unless they are within double quotation marks.
*
*
See examples below:
*
* |Big| => [Big]
* |Big bang| => [Big, bang]
* | Big bang | => [Big, bang]
* |The "Big bang"| => [The, "Big bang"]
* |The "Big bang| => [The, "Big, bang]
*
*
* Note: this class does not exclude terms which could be considered stop words by the index.
*
* @param text The text for which values are needed.
* @return the corresponding list of values.
*/
@Beta
public static List toValues(String text) {
List values = new ArrayList<>();
Pattern p = Pattern.compile("(\\\"[^\\\"]+\\\")|(\\S+)");
Matcher m = p.matcher(text);
while (m.find()) {
values.add(m.group());
}
return values;
}
/**
* Returns the string value of the specified field.
*
* This method will automatically decompress the value of the field if it is binary.
*
* @param f The field
* @return The value of the field as a string.
*/
public static String toString(IndexableField f) {
if (f == null) return null;
String value = f.stringValue();
// is it a compressed field?
if (value == null) {
BytesRef binary = f.binaryValue();
if (binary != null) {
value = binary.utf8ToString();
}
}
return value;
}
/**
* Returns the terms for a field
*
* @param field The field
* @param text The text to analyze
* @param analyzer The analyzer
*
* @return the corresponding list of terms produced by the analyzer.
*/
public static List toTerms(String field, String text, Analyzer analyzer) {
List terms = new ArrayList<>();
try {
TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
stream.reset();
while (stream.incrementToken()) {
terms.add(attribute.toString());
}
stream.end();
stream.close();
} catch (IOException ex) {
// Should not occur since we use a StringReader
}
return terms;
}
}