All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pageseeder.flint.lucene.search.AutoSuggest Maven / Gradle / Ivy

The newest version!
package org.pageseeder.flint.lucene.search;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.search.suggest.Lookup.LookupResult;
import org.apache.lucene.search.suggest.analyzing.AnalyzingInfixSuggester;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.pageseeder.flint.Index;
import org.pageseeder.flint.IndexException;
import org.pageseeder.flint.IndexManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.util.*;
import java.util.Map.Entry;

public class AutoSuggest {

  private final static Logger LOGGER = LoggerFactory.getLogger(AutoSuggest.class);

  private final AnalyzingInfixSuggester suggester;

  private final List _resultFields = new ArrayList<>();

  private final String _name;

  private final Index _index;

  private final boolean _useTerms;

  private final List _searchFields = new ArrayList<>();

  //TODO maybe it should be a list
  private String _withField = null;

  private final Map _weights = new HashMap<>();

  private long lastBuilt = -1;

  private AutoSuggest(String name, Index index, Directory dir, Analyzer indexAnalyzer, Analyzer searchAnalyzer, boolean useTerms, int minChars) throws IndexException {
    this._name = name;
    this._index = index;
    this._useTerms = useTerms;
    try {
      this.suggester = new AnalyzingInfixSuggester(dir, indexAnalyzer, searchAnalyzer, minChars, true, true, true);
    } catch (IOException ex) {
      LOGGER.error("Failed to build autosuggest {}", this._name, ex);
      throw new IndexException("Failed to build autosuggest", ex);
    }
  }

  public List getSearchFields() {
    return this._searchFields;
  }

  public void addSearchField(String field) {
    if (field != null) this._searchFields.add(field);
  }

  public void addSearchFields(Collection fields) {
    if (fields != null) this._searchFields.addAll(fields);
  }

  public void addResultField(String field) {
    if (field != null) this._resultFields.add(field);
  }

  public void addResultFields(Collection fields) {
    if (fields != null) this._resultFields.addAll(fields);
  }

  public void setCriteriaField(String field) {
    if (this._useTerms && field != null)
      throw new IllegalStateException("Illogical to use criteria for words suggestions!");
    this._withField = field;
  }

  public void setWeight(String field, float weight) {
    this._weights.put(field, weight);
  }

  /**
   * Comma separated list of weights:
   *  level:2,price:10,number:0.5
   * @param weights weights as string
   */
  public void setWeights(String weights) {
    if (weights == null) return;
    for (String weight: weights.split(",")) {
      String[] parts = weight.split(":");
      if (parts.length == 2) {
        try {
          this._weights.put(parts[0], Float.valueOf(parts[1]));
        } catch (NumberFormatException ex) {
          LOGGER.error("Ignoring invalid autosuggest {} weight for field {}: not a number! ({})", this._name, parts[0], parts[1]);
        }
      }
    }
  }

  public long getLastBuilt() {
    return this.lastBuilt;
  }

  public boolean isCurrent() {
    return this._index.getIndexIO().getLastTimeUsed() < this.lastBuilt;
  }

  /**
   * @deprecated use the method with no parameters
   * @param mgr the manager not used
   * @return true if current
   */
  public boolean isCurrent(IndexManager mgr) {
    return this.isCurrent();
  }

  public void build(IndexReader reader) {
    // can't search while we're building it
    synchronized (this.suggester) {
      try {
        boolean buildit = false;
        if (this._useTerms) {
          for (String field : this._searchFields) {
            org.apache.lucene.index.Terms terms = MultiTerms.getTerms(reader, field);
            if (terms == null) continue;
            TermsEnum termsEnum = terms.iterator();
            BytesRef text;
            while ((text = termsEnum.next()) != null) {
              this.suggester.add(text, null, 1, null);
              buildit = true;
            }
          }
        } else {
          Set fieldsToLoad = new HashSet<>();
          fieldsToLoad.addAll(this._resultFields);
          fieldsToLoad.addAll(this._searchFields);
          fieldsToLoad.addAll(this._weights.keySet());
          if (this._withField != null) fieldsToLoad.add(this._withField);
          for (LeafReaderContext ctxt : reader.leaves()) {
            if (addEntries(ctxt, fieldsToLoad)) {
              buildit = true;
            }
          }
        }
        if (buildit) {
          this.suggester.refresh();
          this.lastBuilt = System.currentTimeMillis();
        }
      } catch (IOException | IllegalStateException ex) {
        LOGGER.error("Failed to build dictionary for autosuggest {}", this._name, ex);
      }
    }
  }

  /**
   * Add entries to the suggester
   *
   * @param context    where to read the documents from
   * @param fieldsToLoad the fields to add
   *
   * @return true if something was added
   *
   * @throws IOException if reading/adding entries failed
   */
  private boolean addEntries(LeafReaderContext context, Set fieldsToLoad) throws IOException {
    boolean buildit = false;
    // check for leaves
    try (LeafReader subReader = context.reader()) {
      List leaves = subReader.leaves();
      if (leaves != null && !leaves.isEmpty()) {
        if (leaves.size() > 1 || leaves.get(0) != context) {
          for (LeafReaderContext ctxt : leaves) {
            if (addEntries(ctxt, fieldsToLoad)) {
              buildit = true;
            }
          }
          return buildit;
        }
      }
      // go through our docs then
      Bits live = subReader.getLiveDocs();
      for (int i = 0; i < subReader.maxDoc(); i++) {
        if (live != null && !live.get(i)) continue;
        Document doc = subReader.storedFields().document(i, fieldsToLoad);
        // load criteria values
        Set contexts = null;
        if (this._withField != null) {
          String[] with = doc.getValues(this._withField);
          if (with != null) {
            contexts = new HashSet<>();
            for (String w : with) {
              contexts.add(new BytesRef(w));
            }
          }
        }
        // find doc weight
        float weightF = 0;
        for (Entry aweight : this._weights.entrySet()) {
          String val = doc.get(aweight.getKey());
          try {
            // default value is 1 if missing
            weightF += aweight.getValue() * (val == null ? 1 : Float.parseFloat(val));
          } catch (NumberFormatException ex) {
            LOGGER.error("Failed to compute weight as field {} is not a number! ({})", aweight.getKey(), val);
          }
        }
        // mutiply by 100 to turn to long (2 decimal precision)
        long weight = weightF == 0 ? 100 : (long) (weightF * 100);
        // create payload
        byte[] serialized = serialize(this._resultFields, doc);
        BytesRef payload = serialized == null ? null : new BytesRef(serialized);
        for (String field : this._searchFields) {
          String[] texts = doc.getValues(field);
          if (texts != null) {
            for (String text : texts) {
              try {
                this.suggester.add(new BytesRef(text), contexts, weight, payload);
              } catch (Exception ex) {
                LOGGER.error("Failed to add text for field {} to autosuggest {}", field, this._name);
              }
              buildit = true;
            }
          } else {
            LOGGER.error("Failed to load values for field {} in autosuggest {}", field, this._name);
          }
        }
      }
    }
    return buildit;
  }

  /**
   * Serialize the fields provided into a payload for a suggester entry.
   *
   * @param fields  list of fields to load from the document
   * @param doc     the document
   *
   * @return the payload as a byte array
   */
  private static byte[] serialize(Collection fields, Document doc) {
    if (fields.isEmpty()) return null;
    // build map
    Map result = new HashMap<>();
    for (String field : fields) {
      String[] values = doc.getValues(field);
      if (values != null) result.put(field, values);
    }
    // serialize it
    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    try {
      ObjectOutputStream out = new ObjectOutputStream(bos);
      out.writeObject(result);
      out.close();
    } catch (IOException ex) {
      // all internal so shouldn't happen
      LOGGER.error("Failed to build payload", ex);
      return null;
    }
    return bos.toByteArray();
  }

  @SuppressWarnings("unchecked")
  private Map deserialize(byte[] bytes) throws IOException {
    ByteArrayInputStream bis = new ByteArrayInputStream(bytes);
    ObjectInputStream in = new ObjectInputStream(bis);
    try {
      return (Map) in.readObject();
    } catch (ClassNotFoundException ex) {
      throw new IOException("Class not found when deserializing", ex);
    }
  }

  public List suggest(String text) {
    return suggest(text, 10);
  }

  public List suggest(String text, int nb) {
    return suggest(text, (Collection) null, nb);
  }

  public List suggest(String text, String with, int nb) {
    return suggest(text, with == null ? null : Collections.singleton(with), nb);
  }

  public List suggest(String text, Collection criteria, int nb) {
    List suggestions = new ArrayList<>();
    if (this.lastBuilt == -1) {
      LOGGER.warn("Loading suggestions with empty suggester for autosuggest {}!", this._name);
      return suggestions;
    }
    if (this.suggester == null) return suggestions;
    Set contexts = null;
    if (criteria != null && !criteria.isEmpty()) {
      if (this._useTerms)
        throw new IllegalStateException("Illogical to use criteria for words suggestions!");
      contexts = new HashSet<>();
      for (String with : criteria) {
        contexts.add(new BytesRef(with));
      }
    }
    List results = null;
    synchronized (this.suggester) {
      try {
        results = this.suggester.lookup(text, contexts, false, nb);
      } catch (IOException ex) {
        LOGGER.error("Failed to lookup suggestions for autosuggest {}", this._name, ex);
      }
    }
    if (results != null) {
      for (LookupResult result : results) {
        Suggestion suggestion = new Suggestion();
        suggestion.text = result.key.toString();
        suggestion.highlight = result.highlightKey.toString();
        suggestion.weight = result.value;
        if (result.payload != null) {
          try {
            suggestion.document = deserialize(result.payload.bytes);
          } catch (IOException ex) {
            LOGGER.error("Failed to deserialize suggestion payload", ex);
          }
        }
        if (!suggestions.contains(suggestion))
          suggestions.add(suggestion);
      }
    }
    return suggestions;
  }

  public void close() {
    try {
      this.suggester.close();
    } catch (IOException ex) {
      LOGGER.error("Failed to close autosuggest {}", this._name, ex);
    }
  }
  // --------------------------------------------------------------------------------------
  // static business
  // --------------------------------------------------------------------------------------

  public static class Builder {
    private Boolean _terms = null;
    private String _name = null;
    private Index _index = null;
    private Directory _dir = null;
    private Analyzer _indexAnalyzer = null;
    private Analyzer _searchAnalyzer = null;
    //TODO it should be a list
    private String _criteria = null;
    private final Map _weights = new HashMap<>();
    private int _minChars = 2;
    private Collection _searchFields = new ArrayList<>();
    private Collection _resultFields = new ArrayList<>();
    public Builder index(Index index) {
      this._index = index;
      return this;
    }
    public Builder indexAnalyzer(Analyzer analyzer) {
      this._indexAnalyzer = analyzer;
      return this;
    }
    public Builder searchAnalyzer(Analyzer analyzer) {
      this._searchAnalyzer = analyzer;
      return this;
    }
    public Builder name(String name) {
      this._name = name;
      return this;
    }
    public Builder useTerms(boolean terms) {
      this._terms = terms;
      return this;
    }
    public Builder directory(Directory dir) {
      this._dir = dir;
      return this;
    }
    public Builder minChars(int minChars) {
      this._minChars = minChars;
      return this;
    }
    public Builder searchFields(Collection searchFields) {
      this._searchFields = searchFields;
      return this;
    }
    public Builder weights(Map weights) {
      if (weights != null) this._weights.putAll(weights);
      return this;
    }
    public Builder resultFields(Collection resultFields) {
      this._resultFields = resultFields;
      return this;
    }
    public Builder criteria(String criteria) {
      this._criteria = criteria;
      return this;
    }
    public AutoSuggest build() throws IndexException {
      if (this._terms == null) throw new IllegalStateException("missing terms");
      if (this._name  == null) throw new IllegalStateException("missing name");
      if (this._index == null) throw new IllegalStateException("missing index");
      Directory dir = this._dir == null ? new ByteBuffersDirectory() : this._dir;
      Analyzer indexAnalyzer  = this._indexAnalyzer  == null ? new StandardAnalyzer(CharArraySet.EMPTY_SET) : this._indexAnalyzer;
      Analyzer searchAnalyzer = this._searchAnalyzer == null ? new StandardAnalyzer(CharArraySet.EMPTY_SET) : this._searchAnalyzer;
      AutoSuggest as = new AutoSuggest(this._name, this._index, dir, indexAnalyzer, searchAnalyzer, this._terms, this._minChars);
      as.setCriteriaField(this._criteria);
      as.addSearchFields(this._searchFields);
      as.addResultFields(this._resultFields);
      for (Entry w : this._weights.entrySet())
        as.setWeight(w.getKey(), w.getValue());
      return as;
    }
  }

  public static class Suggestion {
    public String text;
    public String highlight;
    public Map document;
    public long weight;
    @Override
    public boolean equals(Object obj) {
      if (!(obj instanceof Suggestion)) return false;
      Suggestion s = (Suggestion) obj;
      return this.text.equals(s.text) &&
             this.highlight.equals(s.highlight) &&
             ((this.document == null && s.document == null) || (this.document!= null && this.document.equals(s.document)));
    }
    @Override
    public int hashCode() {
      return this.text.hashCode() * 3 +
             this.highlight.hashCode() * 11 +
             (this.document != null ? 17 * this.document.hashCode() : 0);
    }
    @Override
    public String toString() {
      return this.text + (weight != 100 ? "("+weight+")" : "");
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy