Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright 2017-2022 John Snow Labs
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.johnsnowlabs.nlp.annotators.sda.vivekn
import com.johnsnowlabs.nlp.annotators.common.{TokenizedSentence, TokenizedWithSentence}
import com.johnsnowlabs.nlp.serialization.{MapFeature, SetFeature}
import com.johnsnowlabs.nlp._
import org.apache.spark.ml.param.{DoubleParam, IntParam, LongParam}
import org.apache.spark.ml.util.Identifiable
/** Sentiment analyser inspired by the algorithm by Vivek Narayanan
* [[https://github.com/vivekn/sentiment/]].
*
* The algorithm is based on the paper
* [[https://arxiv.org/abs/1305.6143 "Fast and accurate sentiment classification using an enhanced Naive Bayes model"]].
*
* This is the instantiated model of the
* [[com.johnsnowlabs.nlp.annotators.sda.vivekn.ViveknSentimentApproach ViveknSentimentApproach]].
* For training your own model, please see the documentation of that class.
*
* The analyzer requires sentence boundaries to give a score in context. Tokenization is needed
* to make sure tokens are within bounds. Transitivity requirements are also required.
*
* For extended examples of usage, see the
* [[https://github.com/JohnSnowLabs/spark-nlp/blob/master/examples/python/training/english/vivekn-sentiment/VivekNarayanSentimentApproach.ipynb Examples]]
* and the
* [[https://github.com/JohnSnowLabs/spark-nlp/tree/master/src/test/scala/com/johnsnowlabs/nlp/annotators/sda/vivekn ViveknSentimentTestSpec]].
*
* @see
* [[com.johnsnowlabs.nlp.annotators.sda.pragmatic.SentimentDetector SentimentDetector]] for an
* alternative approach to sentiment detection
* @groupname anno Annotator types
* @groupdesc anno
* Required input and expected output annotator types
* @groupname Ungrouped Members
* @groupname param Parameters
* @groupname setParam Parameter setters
* @groupname getParam Parameter getters
* @groupname Ungrouped Members
* @groupprio param 1
* @groupprio anno 2
* @groupprio Ungrouped 3
* @groupprio setParam 4
* @groupprio getParam 5
* @groupdesc param
* A list of (hyper-)parameter keys this annotator can take. Users can set and get the
* parameter values through setters and getters, respectively.
*/
class ViveknSentimentModel(override val uid: String)
extends AnnotatorModel[ViveknSentimentModel]
with HasSimpleAnnotate[ViveknSentimentModel]
with ViveknSentimentUtils {
import com.johnsnowlabs.nlp.AnnotatorType._
/** Output annotator type : SENTIMENT
*
* @group anno
*/
override val outputAnnotatorType: AnnotatorType = SENTIMENT
/** Input annotator type : SENTIMENT
*
* @group anno
*/
override val inputAnnotatorTypes: Array[AnnotatorType] = Array(TOKEN, DOCUMENT)
/** positive_sentences
*
* @group param
*/
protected val positive: MapFeature[String, Long] = new MapFeature(this, "positive_sentences")
/** negative_sentences
*
* @group param
*/
protected val negative: MapFeature[String, Long] = new MapFeature(this, "negative_sentences")
/** words
*
* @group param
*/
protected val words: SetFeature[String] = new SetFeature[String](this, "words")
/** Count of positive words
*
* @group param
*/
val positiveTotals: LongParam =
new LongParam(this, "positive_totals", "Count of positive words")
/** Count of negative words
*
* @group param
*/
val negativeTotals: LongParam =
new LongParam(this, "negative_totals", "Count of negative words")
/** Proportion of feature content to be considered relevant (Default: `0.5`)
*
* @group param
*/
val importantFeatureRatio = new DoubleParam(
this,
"importantFeatureRatio",
"Proportion of feature content to be considered relevant (Default: `0.5`)")
/** Proportion to lookahead in unimportant features (Default: `0.025`)
*
* @group param
*/
val unimportantFeatureStep = new DoubleParam(
this,
"unimportantFeatureStep",
"Proportion to lookahead in unimportant features (Default: `0.025`)")
/** Content feature limit, to boost performance in very dirt text (Default: disabled with `-1`)
*
* @group param
*/
val featureLimit = new IntParam(
this,
"featureLimit",
"Content feature limit, to boost performance in very dirt text (Default disabled with: -1")
def this() = this(Identifiable.randomUID("VIVEKN"))
/** Set Proportion of feature content to be considered relevant (Default: `0.5`)
*
* @group setParam
*/
def setImportantFeatureRatio(v: Double): this.type = set(importantFeatureRatio, v)
/** Set Proportion to lookahead in unimportant features (Default: `0.025`)
*
* @group setParam
*/
def setUnimportantFeatureStep(v: Double): this.type = set(unimportantFeatureStep, v)
/** Set Content feature limit, to boost performance in very dirt text (Default: disabled with
* `-1`)
*
* @group setParam
*/
def setFeatureLimit(v: Int): this.type = set(featureLimit, v)
/** Get Proportion of feature content to be considered relevant (Default: `0.5`) */
def getImportantFeatureRatio(v: Double): Double = $(importantFeatureRatio)
/** Get Proportion to lookahead in unimportant features (Default: `0.025`) */
def getUnimportantFeatureStep(v: Double): Double = $(unimportantFeatureStep)
/** Get Content feature limit, to boost performance in very dirt text (Default: disabled with
* `-1`)
*
* @group getParam
*/
def getFeatureLimit(v: Int): Int = $(featureLimit)
/** Count of positive words
*
* @group getParam
*/
def getPositive: Map[String, Long] = $$(positive)
/** Count of negative words
*
* @group getParam
*/
def getNegative: Map[String, Long] = $$(negative)
/** Set of unique words
*
* @group getParam
*/
def getFeatures: Set[String] = $$(words)
private[vivekn] def setPositive(value: Map[String, Long]): this.type = set(positive, value)
private[vivekn] def setNegative(value: Map[String, Long]): this.type = set(negative, value)
private[vivekn] def setPositiveTotals(value: Long): this.type = set(positiveTotals, value)
private[vivekn] def setNegativeTotals(value: Long): this.type = set(negativeTotals, value)
private[vivekn] def setWords(value: Array[String]): this.type = {
require(
value.nonEmpty,
"Word analysis for features cannot be empty. Set prune to false if training is small")
val currentFeatures = scala.collection.mutable.Set.empty[String]
val start = (value.length * $(importantFeatureRatio)).ceil.toInt
val afterStart = {
if ($(featureLimit) == -1) value.length
else $(featureLimit)
}
val step = (afterStart * $(unimportantFeatureStep)).ceil.toInt
value.take(start).foreach(currentFeatures.add)
Range(start, afterStart, step).foreach(k => {
value.slice(k, k + step).foreach(currentFeatures.add)
})
set(words, currentFeatures.toSet)
}
/** Positive: 0, Negative: 1, NA: 2 */
def classify(sentence: TokenizedSentence): (Short, Double) = {
val wordFeatures = negateSequence(sentence.tokens).intersect($$(words)).toList
if (wordFeatures.isEmpty) return (2, 0.0)
val positiveScore = wordFeatures
.map(word =>
scala.math.log(($$(positive).getOrElse(word, 0L) + 1.0) / (2.0 * $(positiveTotals))))
.sum
val negativeScore = wordFeatures
.map(word =>
scala.math.log(($$(negative).getOrElse(word, 0L) + 1.0) / (2.0 * $(negativeTotals))))
.sum
val positiveSum = wordFeatures.map(word => $$(positive).getOrElse(word, 0L)).sum.toDouble
val negativeSum = wordFeatures.map(word => $$(negative).getOrElse(word, 0L)).sum.toDouble
lazy val positiveConfidence = positiveSum / (positiveSum + negativeSum)
lazy val negativeConfidence = negativeSum / (positiveSum + negativeSum)
if (positiveScore > negativeScore) (0, positiveConfidence) else (1, negativeConfidence)
}
/** Tokens are needed to identify each word in a sentence boundary POS tags are optionally
* submitted to the model in case they are needed Lemmas are another optional annotator for
* some models Bounds of sentiment are hardcoded to 0 as they render useless
* @param annotations
* Annotations that correspond to inputAnnotationCols generated by previous annotators if any
* @return
* any number of annotations processed for every input annotation. Not necessary one to one
* relationship
*/
override def annotate(annotations: Seq[Annotation]): Seq[Annotation] = {
val sentences = TokenizedWithSentence.unpack(annotations)
sentences
.filter(s => s.indexedTokens.nonEmpty)
.map(sentence => {
val (result, confidence) = classify(sentence)
Annotation(
outputAnnotatorType,
sentence.indexedTokens.map(t => t.begin).min,
sentence.indexedTokens.map(t => t.end).max,
if (result == 0) "positive" else if (result == 1) "negative" else "na",
Map("confidence" -> confidence.toString.take(6)))
})
}
}
trait ReadablePretrainedVivekn
extends ParamsAndFeaturesReadable[ViveknSentimentModel]
with HasPretrained[ViveknSentimentModel] {
override val defaultModelName = Some("sentiment_vivekn")
/** Java compliant-overrides */
override def pretrained(): ViveknSentimentModel = super.pretrained()
override def pretrained(name: String): ViveknSentimentModel = super.pretrained(name)
override def pretrained(name: String, lang: String): ViveknSentimentModel =
super.pretrained(name, lang)
override def pretrained(name: String, lang: String, remoteLoc: String): ViveknSentimentModel =
super.pretrained(name, lang, remoteLoc)
}
/** This is the companion object of [[ViveknSentimentModel]]. Please refer to that class for the
* documentation.
*/
object ViveknSentimentModel extends ReadablePretrainedVivekn