org.apache.solr.spelling.PossibilityIterator Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.spelling;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.PriorityQueue;
import java.util.Set;

/**
 * Given a list of possible Spelling Corrections for multiple mis-spelled words in a query, This
 * iterator returns Possible Correction combinations ordered by reasonable probability that such a
 * combination will return actual hits if re-queried. This implementation simply ranks the Possible
 * Combinations by the sum of their component ranks.
 */
public class PossibilityIterator implements Iterator {
  private List> possibilityList = new ArrayList<>();
  private Iterator rankedPossibilityIterator = null;
  private int correctionIndex[];
  private boolean done = false;
  private Iterator> nextOnes = null;
  private int nextOnesRank = 0;
  private int nextOnesIndex = 0;
  private boolean suggestionsMayOverlap = false;

  @SuppressWarnings("unused")
  private PossibilityIterator() {
    throw new AssertionError("You shan't go here.");
  }

  /**
   * We assume here that the passed-in inner LinkedHashMaps are already sorted in order of "Best
   * Possible Correction".
   */
  public PossibilityIterator(
      Map> suggestions,
      int maximumRequiredSuggestions,
      int maxEvaluations,
      boolean overlap) {
    this.suggestionsMayOverlap = overlap;
    for (Map.Entry> entry : suggestions.entrySet()) {
      Token token = entry.getKey();
      if (entry.getValue().size() == 0) {
        continue;
      }
      List possibleCorrections = new ArrayList<>();
      for (Map.Entry entry1 : entry.getValue().entrySet()) {
        SpellCheckCorrection correction = new SpellCheckCorrection();
        correction.setOriginal(token);
        correction.setCorrection(entry1.getKey());
        correction.setNumberOfOccurences(entry1.getValue());
        possibleCorrections.add(correction);
      }
      possibilityList.add(possibleCorrections);
    }

    int wrapSize = possibilityList.size();
    if (wrapSize == 0) {
      done = true;
    } else {
      correctionIndex = new int[wrapSize];
      for (int i = 0; i < wrapSize; i++) {
        int suggestSize = possibilityList.get(i).size();
        if (suggestSize == 0) {
          done = true;
          break;
        }
        correctionIndex[i] = 0;
      }
    }
    PriorityQueue rankedPossibilities =
        new PriorityQueue<>(11, new RankComparator());
    Set removeDuplicates = null;
    if (suggestionsMayOverlap) {
      removeDuplicates = new HashSet<>();
    }
    long numEvaluations = 0;
    while (numEvaluations < maxEvaluations && internalHasNext()) {
      RankedSpellPossibility rsp = internalNext();
      numEvaluations++;
      if (rankedPossibilities.size() >= maximumRequiredSuggestions
          && rsp.rank >= rankedPossibilities.peek().rank) {
        continue;
      }
      if (!isSuggestionForReal(rsp)) {
        continue;
      }
      if (removeDuplicates == null) {
        rankedPossibilities.offer(rsp);
      } else {
        // Needs to be in token-offset order so that the match-and-replace
        // option for collations can work.
        rsp.corrections.sort(new StartOffsetComparator());
        if (removeDuplicates.add(rsp)) {
          rankedPossibilities.offer(rsp);
        }
      }
      if (rankedPossibilities.size() > maximumRequiredSuggestions) {
        RankedSpellPossibility removed = rankedPossibilities.poll();
        if (removeDuplicates != null) {
          removeDuplicates.remove(removed);
        }
      }
    }

    RankedSpellPossibility[] rpArr = new RankedSpellPossibility[rankedPossibilities.size()];
    for (int i = rankedPossibilities.size() - 1; i >= 0; i--) {
      rpArr[i] = rankedPossibilities.remove();
    }
    rankedPossibilityIterator = Arrays.asList(rpArr).iterator();
  }

  private boolean isSuggestionForReal(RankedSpellPossibility rsp) {
    for (SpellCheckCorrection corr : rsp.corrections) {
      if (!corr.getOriginalAsString().equals(corr.getCorrection())) {
        return true;
      }
    }
    return false;
  }

  private boolean internalHasNext() {
    if (nextOnes != null && nextOnes.hasNext()) {
      return true;
    }
    if (done) {
      return false;
    }
    internalNextAdvance();
    if (nextOnes != null && nextOnes.hasNext()) {
      return true;
    }
    return false;
  }

  /**
   * This method is converting the independent LinkHashMaps containing various (silo'ed) suggestions
   * for each mis-spelled word into individual "holistic query corrections", aka. "Spell Check
   * Possibility"
   *
   * Rank here is the sum of each selected term's position in its respective LinkedHashMap.
   */
  private RankedSpellPossibility internalNext() {
    if (nextOnes != null && nextOnes.hasNext()) {
      RankedSpellPossibility rsl = new RankedSpellPossibility();
      rsl.corrections = nextOnes.next();
      rsl.rank = nextOnesRank;
      rsl.index = nextOnesIndex++;
      return rsl;
    }
    if (done) {
      throw new NoSuchElementException();
    }
    internalNextAdvance();
    if (nextOnes != null && nextOnes.hasNext()) {
      RankedSpellPossibility rsl = new RankedSpellPossibility();
      rsl.corrections = nextOnes.next();
      rsl.rank = nextOnesRank;
      rsl.index = nextOnesIndex++;
      return rsl;
    }
    throw new NoSuchElementException();
  }

  private void internalNextAdvance() {
    List possibleCorrection = null;
    if (nextOnes != null && nextOnes.hasNext()) {
      possibleCorrection = nextOnes.next();
    } else {
      if (done) {
        throw new NoSuchElementException();
      }
      possibleCorrection = new ArrayList<>();
      List> possibleCorrections = null;
      int rank = 0;
      while (!done && (possibleCorrections == null || possibleCorrections.size() == 0)) {
        rank = 0;
        for (int i = 0; i < correctionIndex.length; i++) {
          List singleWordPossibilities = possibilityList.get(i);
          SpellCheckCorrection singleWordPossibility =
              singleWordPossibilities.get(correctionIndex[i]);
          rank += correctionIndex[i];
          if (i == correctionIndex.length - 1) {
            correctionIndex[i]++;
            if (correctionIndex[i] == singleWordPossibilities.size()) {
              correctionIndex[i] = 0;
              if (correctionIndex.length == 1) {
                done = true;
              }
              for (int ii = i - 1; ii >= 0; ii--) {
                correctionIndex[ii]++;
                if (correctionIndex[ii] >= possibilityList.get(ii).size() && ii > 0) {
                  correctionIndex[ii] = 0;
                } else {
                  break;
                }
              }
            }
          }
          possibleCorrection.add(singleWordPossibility);
        }
        if (correctionIndex[0] == possibilityList.get(0).size()) {
          done = true;
        }
        if (suggestionsMayOverlap) {
          possibleCorrections = separateOverlappingTokens(possibleCorrection);
        } else {
          possibleCorrections = new ArrayList<>(1);
          possibleCorrections.add(possibleCorrection);
        }
      }
      nextOnes = possibleCorrections.iterator();
      nextOnesRank = rank;
      nextOnesIndex = 0;
    }
  }

  private List> separateOverlappingTokens(
      List possibleCorrection) {
    List> ret = null;
    if (possibleCorrection.size() == 1) {
      ret = new ArrayList<>(1);
      ret.add(possibleCorrection);
      return ret;
    }
    ret = new ArrayList<>();
    for (int i = 0; i < possibleCorrection.size(); i++) {
      List c = compatible(possibleCorrection, i);
      ret.add(c);
    }
    return ret;
  }

  private List compatible(List all, int pos) {
    List priorPassCompatibles = null;
    {
      List firstPassCompatibles = new ArrayList<>(all.size());
      SpellCheckCorrection sacred = all.get(pos);
      firstPassCompatibles.add(sacred);
      int index = pos;
      boolean gotOne = false;
      for (int i = 0; i < all.size() - 1; i++) {
        index++;
        if (index == all.size()) {
          index = 0;
        }
        SpellCheckCorrection disposable = all.get(index);
        if (!conflicts(sacred, disposable)) {
          firstPassCompatibles.add(disposable);
          gotOne = true;
        }
      }
      if (!gotOne) {
        return firstPassCompatibles;
      }
      priorPassCompatibles = firstPassCompatibles;
    }

    {
      pos = 1;
      while (true) {
        if (pos == priorPassCompatibles.size() - 1) {
          return priorPassCompatibles;
        }
        List subsequentPassCompatibles =
            new ArrayList<>(priorPassCompatibles.size());
        SpellCheckCorrection sacred = null;
        for (int i = 0; i <= pos; i++) {
          sacred = priorPassCompatibles.get(i);
          subsequentPassCompatibles.add(sacred);
        }
        int index = pos;
        boolean gotOne = false;
        for (int i = 0; i < priorPassCompatibles.size() - 1; i++) {
          index++;
          if (index == priorPassCompatibles.size()) {
            break;
          }
          SpellCheckCorrection disposable = priorPassCompatibles.get(index);
          if (!conflicts(sacred, disposable)) {
            subsequentPassCompatibles.add(disposable);
            gotOne = true;
          }
        }
        if (!gotOne || pos == priorPassCompatibles.size() - 1) {
          return subsequentPassCompatibles;
        }
        priorPassCompatibles = subsequentPassCompatibles;
        pos++;
      }
    }
  }

  private boolean conflicts(SpellCheckCorrection c1, SpellCheckCorrection c2) {
    int s1 = c1.getOriginal().startOffset();
    int e1 = c1.getOriginal().endOffset();
    int s2 = c2.getOriginal().startOffset();
    int e2 = c2.getOriginal().endOffset();
    if (s2 >= s1 && s2 <= e1) {
      return true;
    }
    if (s1 >= s2 && s1 <= e2) {
      return true;
    }
    return false;
  }

  @Override
  public boolean hasNext() {
    return rankedPossibilityIterator.hasNext();
  }

  @Override
  public PossibilityIterator.RankedSpellPossibility next() {
    return rankedPossibilityIterator.next();
  }

  @Override
  public void remove() {
    throw new UnsupportedOperationException();
  }

  public static class RankedSpellPossibility {
    public List corrections;
    public int rank;
    public int index;

    @Override
    // hashCode() and equals() only consider the actual correction, not the rank
    // or index.
    public int hashCode() {
      final int prime = 31;
      int result = 1;
      result = prime * result + ((corrections == null) ? 0 : corrections.hashCode());
      return result;
    }

    @Override
    // hashCode() and equals() only consider the actual correction, not the rank
    // or index.
    public boolean equals(Object obj) {
      if (this == obj) return true;
      if (obj == null) return false;
      if (!(obj instanceof RankedSpellPossibility)) return false;
      RankedSpellPossibility other = (RankedSpellPossibility) obj;
      return Objects.equals(corrections, other.corrections);
    }

    @Override
    public String toString() {
      StringBuilder sb = new StringBuilder();
      sb.append("rank=").append(rank).append(" (").append(index).append(")");
      if (corrections != null) {
        for (SpellCheckCorrection corr : corrections) {
          sb.append("     ");
          sb.append(corr.getOriginal())
              .append(">")
              .append(corr.getCorrection())
              .append(" (")
              .append(corr.getNumberOfOccurences())
              .append(")");
        }
      }
      return sb.toString();
    }
  }

  private static class StartOffsetComparator implements Comparator {
    @Override
    public int compare(SpellCheckCorrection o1, SpellCheckCorrection o2) {
      return o1.getOriginal().startOffset() - o2.getOriginal().startOffset();
    }
  }

  private static class RankComparator implements Comparator {
    // Rank poorer suggestions ahead of better ones for use with a PriorityQueue
    @Override
    public int compare(RankedSpellPossibility r1, RankedSpellPossibility r2) {
      int retval = r2.rank - r1.rank;
      if (retval == 0) {
        retval = r2.index - r1.index;
      }
      return retval;
    }
  }
}