All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opennlp.tools.sentiment.SentimentME Maven / Gradle / Ivy

Go to download

Combines Apache OpenNLP and Apache Tika and provides facilities for automatically deriving sentiment from text.

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.sentiment;

import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import opennlp.tools.ml.EventTrainer;
import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.ml.TrainerFactory.TrainerType;
import opennlp.tools.ml.model.Event;
import opennlp.tools.ml.model.MaxentModel;
import opennlp.tools.ml.model.SequenceClassificationModel;
import opennlp.tools.namefind.BioCodec;
import opennlp.tools.namefind.NameContextGenerator;
import opennlp.tools.namefind.TokenNameFinderFactory;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.Sequence;
import opennlp.tools.util.SequenceCodec;
import opennlp.tools.util.SequenceValidator;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;
import opennlp.tools.util.featuregen.AdditionalContextFeatureGenerator;
import opennlp.tools.util.featuregen.WindowFeatureGenerator;

/**
 * Class for creating a maximum-entropy-based Sentiment Analysis model.
 */
public class SentimentME {

  public static final String OTHER = "other";
  public static final String START = "start";
  public static final String CONTINUE = "cont";
  public static final int DEFAULT_BEAM_SIZE = 3;

  private static String[][] EMPTY = new String[0][0];

  protected SentimentContextGenerator contextGenerator;
  private AdditionalContextFeatureGenerator additionalContextFeatureGenerator = new AdditionalContextFeatureGenerator();
  private Sequence bestSequence;
  protected SequenceClassificationModel model;
  private SequenceValidator sequenceValidator;
  private SentimentFactory factory;
  private MaxentModel maxentModel;
  private SequenceCodec seqCodec = new BioCodec();

  /**
   * Constructor, initialises
   *
   * @param sentModel
   *          sentiment analysis model
   */
  public SentimentME(SentimentModel sentModel) {

    this.model = sentModel.getSentimentModel();
    maxentModel = sentModel.getMaxentModel();

    factory = sentModel.getFactory();

    contextGenerator = factory.createContextGenerator();
  }

  /**
   * Trains a Sentiment Analysis model.
   *
   * @param languageCode
   *          the code for the language of the text, e.g. "en"
   * @param samples
   *          the sentiment samples to be used
   * @param trainParams
   *          parameters for training
   * @param factory
   *          a Sentiment Analysis factory
   * @return a Sentiment Analysis model
   */
  public static SentimentModel train(String languageCode,
      ObjectStream samples, TrainingParameters trainParams,
      SentimentFactory factory) throws IOException {

    Map entries = new HashMap();

    MaxentModel sentimentModel = null;

    SequenceClassificationModel seqModel = null;

    TrainerType trainerType = TrainerFactory
        .getTrainerType(trainParams.getSettings());

    ObjectStream eventStream = new SentimentEventStream(samples,
        factory.createContextGenerator());

    EventTrainer trainer = TrainerFactory
        .getEventTrainer(trainParams.getSettings(), entries);
    sentimentModel = trainer.train(eventStream);

    Map manifestInfoEntries = new HashMap();

    return new SentimentModel(languageCode, sentimentModel, manifestInfoEntries,
        factory);

  }

  /**
   * Makes a sentiment prediction
   *
   * @param sentence
   *          the text to be analysed for its sentiment
   * @return the predicted sentiment
   */
  public String predict(String sentence) {
    String[] tokens = factory.getTokenizer().tokenize(sentence);

    double prob[] = probabilities(tokens);
    String sentiment = getBestSentiment(prob);

    return sentiment;
  }

  /**
   * Returns the best chosen sentiment for the text predicted on
   *
   * @param outcome
   *          the outcome
   * @return the best sentiment
   */
  public String getBestSentiment(double[] outcome) {
    return maxentModel.getBestOutcome(outcome);
  }

  /**
   * Returns the analysis probabilities
   *
   * @param text
   *          the text to categorize
   */
  public double[] probabilities(String text[]) {
    return maxentModel.eval(contextGenerator.getContext(text));
  }

  /**
   * Makes a sentiment prediction by calling the helper method
   *
   * @param tokens
   *          the text to be analysed for its sentiment
   * @return the prediction made by the helper method
   */
  public Span[] predict2(String[] tokens) {
    return predict2(tokens, EMPTY);
  }

  /**
   * Makes a sentiment prediction
   *
   * @param tokens
   *          the text to be analysed for its sentiment
   * @param additionalContext
   *          any required additional context
   * @return the predictions
   */
  public Span[] predict2(String[] tokens, String[][] additionalContext) {

    additionalContextFeatureGenerator.setCurrentContext(additionalContext);

    bestSequence = model.bestSequence(tokens, additionalContext,
        contextGenerator, sequenceValidator);

    List c = bestSequence.getOutcomes();

    Span[] spans = seqCodec.decode(c);
    return spans;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy