org.pageseeder.flint.lucene.search.Fields Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of pso-flint-lucene Show documentation
Flint framework
The newest version!
/*
 * Copyright 2015 Allette Systems (Australia)
 * http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.flint.lucene.search;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.pageseeder.flint.lucene.util.Beta;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * A set of utility methods for dealing with search fields.
 *
 * @author Christophe Lauret
 * @version 12 August 2010
 */
public final class Fields {

  /** Utility class */
  private Fields() {
  }

  /**
   * Returns a mapping of fields with a default boost value of 1.0.
   *
   * @param fields the list of fields to create the map.
   * @return the corresponding map with each field value mapped to a boost value of 1.0
   */
  @Beta
  public static Map asBoostMap(List fields) {
    Map map = new LinkedHashMap<>();
    for (String f : fields) {
      map.put(f, 1.0f);
    }
    return map;
  }

  /**
   * Indicates whether the given field name is valid.
   *
   * This method does not check for the existence of the field.
   *
   * @param field the name of the field to check.
   * @return true if the field name is a valid name for the index;
   *         false otherwise.
   */
  @Beta
  public static boolean isValidName(String field) {
    return field != null && field.length() > 0;
  }

  /**
   * Returns a list of valid field names.
   *
   * @param fields the list of fields to create the map.
   * @return a list of valid field names.
   */
  @Beta
  public static List filterNames(List fields) {
    List names = new ArrayList<>();
    for (String f : fields) {
      if (isValidName(f)) {
        names.add(f);
      }
    }
    return names;
  }

  /**
   * Returns a list of possible field values from the specified text.
   *
   * 
You can use this method to extract the list of terms or phrase values to create a query.
   *
   * 
Spaces are ignored unless they are within double quotation marks.
   *
   * 
See examples below:
   * 
   * |Big|             => [Big]
   * |Big bang|        => [Big, bang]
   * |   Big   bang |  => [Big, bang]
   * |The "Big bang"|  => [The, "Big bang"]
   * |The "Big bang|   => [The, "Big, bang]
   * 
   *
   * Note: this class does not exclude terms which could be considered stop words by the index.
   *
   * @param text The text for which values are needed.
   * @return the corresponding list of values.
   */
  @Beta
  public static List toValues(String text) {
    List values = new ArrayList<>();
    Pattern p = Pattern.compile("(\\\"[^\\\"]+\\\")|(\\S+)");
    Matcher m = p.matcher(text);
    while (m.find()) {
      values.add(m.group());
    }
    return values;
  }

  /**
   * Returns the string value of the specified field.
   *
   * This method will automatically decompress the value of the field if it is binary.
   *
   * @param f The field
   * @return The value of the field as a string.
   */
  public static String toString(IndexableField f) {
    if (f == null) return null;
    String value = f.stringValue();
    // is it a compressed field?
    if (value == null) {
      BytesRef binary = f.binaryValue();
      if (binary != null) {
        value = binary.utf8ToString();
      }
    }
    return value;
  }

  /**
   * Returns the terms for a field
   *
   * @param field    The field
   * @param text     The text to analyze
   * @param analyzer The analyzer
   *
   * @return the corresponding list of terms produced by the analyzer.
   */
  public static List toTerms(String field, String text, Analyzer analyzer) {
    List terms = new ArrayList<>();
    try {
      TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
      CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class);
      stream.reset();
      while (stream.incrementToken()) {
        terms.add(attribute.toString());
      }
      stream.end();
      stream.close();
    } catch (IOException ex) {
      // Should not occur since we use a StringReader
    }
    return terms;
  }

}