All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.rabidgremlin.mutters.opennlp.intent.OpenNLPIntentMatcher Maven / Gradle / Ivy

package com.rabidgremlin.mutters.opennlp.intent;

import java.net.URL;
import java.util.Set;
import java.util.SortedMap;

import com.rabidgremlin.mutters.core.SlotMatcher;
import com.rabidgremlin.mutters.core.Tokenizer;
import com.rabidgremlin.mutters.core.ml.AbstractMachineLearningIntentMatcher;

import opennlp.tools.doccat.DoccatModel;
import opennlp.tools.doccat.DocumentCategorizerME;

/**
 * Intent matcher that uses OpenNLP's document classifier.
 * 
 * 
 * @author rabidgremlin
 *
 */
public class OpenNLPIntentMatcher
    extends AbstractMachineLearningIntentMatcher
{
  /** The document categoriser for the intent matcher. */
  private DoccatModel model;

  /** Default minimum match score. */
  private static final float MIN_MATCH_SCORE = 0.75f;

  /**
   * Constructor. Sets up the matcher to use the specified model (on the classpath) and specified tokenizer. Defaults to
   * min score match of MIN_MATCH_SCORE and no maybe intent matching.
   * 
   * @param intentModel The name of the document categoriser model file to use. This file must be on the classpath.
   * @param tokenizer The tokenizer to use when tokenizing an utterance.
   * @param slotMatcher The slot matcher to use to extract slots from the utterance.
   */
  public OpenNLPIntentMatcher(String intentModel, Tokenizer tokenizer, SlotMatcher slotMatcher)
  {
    this(Thread.currentThread().getContextClassLoader().getResource(intentModel), tokenizer, slotMatcher, MIN_MATCH_SCORE, -1);
  }

  /**
   * Constructor. Sets up the matcher to use the specified model (on the classpath) and specifies the minimum and maybe
   * match scores.
   * 
   * @param intentModel The name of the document categoriser model file to use. This file must be on the classpath.
   * @param minMatchScore The minimum match score for an intent match to be considered good.
   * @param maybeMatchScore The maybe match score. Use -1 to disable maybe matching.
   * @param tokenizer The tokenizer to use when tokenizing an utterance.
   * @param slotMatcher The slot matcher to use to extract slots from the utterance.
   */
  public OpenNLPIntentMatcher(String intentModel, Tokenizer tokenizer, SlotMatcher slotMatcher, float minMatchScore, float maybeMatchScore)
  {
    this(Thread.currentThread().getContextClassLoader().getResource(intentModel), tokenizer, slotMatcher, minMatchScore, maybeMatchScore);
  }

  /**
   * Constructor. Sets up the matcher to use the specified model (via a URL) and specifies the minimum and maybe match
   * score.
   * 
   * @param intentModelUrl A URL pointing at the document categoriser model file to load.
   * @param minMatchScore The minimum match score for an intent match to be considered good.
   * @param maybeMatchScore The maybe match score. Use -1 to disable maybe matching.
   * @param tokenizer The tokenizer to use when tokenizing an utterance.
   * @param slotMatcher The slot matcher to use to extract slots from the utterance.
   */
  public OpenNLPIntentMatcher(URL intentModelUrl, Tokenizer tokenizer, SlotMatcher slotMatcher, float minMatchScore, float maybeMatchScore)
  {
    super(tokenizer, slotMatcher, minMatchScore, maybeMatchScore);

    try
    {
      model = new DoccatModel(intentModelUrl);
    }
    catch (Exception e)
    {
      throw new IllegalArgumentException("Unable to load intent model", e);
    }
  }

  @Override
  protected SortedMap> generateSortedScoreMap(String[] utteranceTokens)
  {
    DocumentCategorizerME intentCategorizer = new DocumentCategorizerME(model);
    return intentCategorizer.sortedScoreMap(utteranceTokens);
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy