All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pageseeder.flint.lucene.search.Fields Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2015 Allette Systems (Australia)
 * http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.flint.lucene.search;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.BytesRef;
import org.pageseeder.flint.lucene.util.Beta;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * A set of utility methods for dealing with search fields.
 *
 * @author Christophe Lauret
 * @version 12 August 2010
 */
public final class Fields {

  /** Utility class */
  private Fields() {
  }

  /**
   * Returns a mapping of fields with a default boost value of 1.0.
   *
   * @param fields the list of fields to create the map.
   * @return the corresponding map with each field value mapped to a boost value of 1.0
   */
  @Beta
  public static Map asBoostMap(List fields) {
    Map map = new LinkedHashMap<>();
    for (String f : fields) {
      map.put(f, 1.0f);
    }
    return map;
  }

  /**
   * Indicates whether the given field name is valid.
   *
   * 

This method does not check for the existence of the field. * * @param field the name of the field to check. * @return true if the field name is a valid name for the index; * false otherwise. */ @Beta public static boolean isValidName(String field) { return field != null && field.length() > 0; } /** * Returns a list of valid field names. * * @param fields the list of fields to create the map. * @return a list of valid field names. */ @Beta public static List filterNames(List fields) { List names = new ArrayList<>(); for (String f : fields) { if (isValidName(f)) { names.add(f); } } return names; } /** * Returns a list of possible field values from the specified text. * *

You can use this method to extract the list of terms or phrase values to create a query. * *

Spaces are ignored unless they are within double quotation marks. * *

See examples below: *

   * |Big|             => [Big]
   * |Big bang|        => [Big, bang]
   * |   Big   bang |  => [Big, bang]
   * |The "Big bang"|  => [The, "Big bang"]
   * |The "Big bang|   => [The, "Big, bang]
   * 
* *

Note: this class does not exclude terms which could be considered stop words by the index. * * @param text The text for which values are needed. * @return the corresponding list of values. */ @Beta public static List toValues(String text) { List values = new ArrayList<>(); Pattern p = Pattern.compile("(\\\"[^\\\"]+\\\")|(\\S+)"); Matcher m = p.matcher(text); while (m.find()) { values.add(m.group()); } return values; } /** * Returns the string value of the specified field. * *

This method will automatically decompress the value of the field if it is binary. * * @param f The field * @return The value of the field as a string. */ public static String toString(IndexableField f) { if (f == null) return null; String value = f.stringValue(); // is it a compressed field? if (value == null) { BytesRef binary = f.binaryValue(); if (binary != null) { value = binary.utf8ToString(); } } return value; } /** * Returns the terms for a field * * @param field The field * @param text The text to analyze * @param analyzer The analyzer * * @return the corresponding list of terms produced by the analyzer. */ public static List toTerms(String field, String text, Analyzer analyzer) { List terms = new ArrayList<>(); try { TokenStream stream = analyzer.tokenStream(field, new StringReader(text)); CharTermAttribute attribute = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { terms.add(attribute.toString()); } stream.end(); stream.close(); } catch (IOException ex) { // Should not occur since we use a StringReader } return terms; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy