Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
Stanford CoreNLP provides a set of natural language analysis tools which can take raw English language text input and give the base forms of words, their parts of speech, whether they are names of companies, people, etc., normalize dates, times, and numeric quantities, mark up the structure of sentences in terms of phrases and word dependencies, and indicate which noun phrases refer to the same entities. It provides the foundational building blocks for higher level text understanding applications.
package edu.stanford.nlp.sentiment;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Properties;
import edu.stanford.nlp.ling.SentenceUtils;
import org.ejml.simple.SimpleMatrix;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeCoreAnnotations;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Generics;
/**
* A wrapper class which creates a suitable pipeline for the sentiment
* model and processes raw text.
*
* The main program has the following options:
* -parserModel Which parser model to use, defaults to englishPCFG.ser.gz
* -sentimentModel Which sentiment model to use, defaults to sentiment.ser.gz
* -file Which file to process.
* -fileList A comma separated list of files to process.
* -stdin Read one line at a time from stdin.
* -output pennTrees: Output trees with scores at each binarized node. vectors: Number tree nodes and print out the vectors. probabilities: Output the scores for different labels for each node. Defaults to printing just the root.
* -filterUnknown Remove unknown trees from the input. Only applies to TREES input, in which case the trees must be binarized with sentiment labels
* -help Print out help
*
* @author John Bauer
*/
public class SentimentPipeline {
/** A logger for this class */
private static Redwood.RedwoodChannels log = Redwood.channels(SentimentPipeline.class);
private static final NumberFormat NF = new DecimalFormat("0.0000");
enum Output {
PENNTREES, VECTORS, ROOT, PROBABILITIES
}
enum Input {
TEXT, TREES
}
private SentimentPipeline() {} // static methods
/**
* Sets the labels on the tree (except the leaves) to be the integer
* value of the sentiment prediction. Makes it easy to print out
* with Tree.toString()
*/
static void setSentimentLabels(Tree tree) {
if (tree.isLeaf()) {
return;
}
for (Tree child : tree.children()) {
setSentimentLabels(child);
}
Label label = tree.label();
if (!(label instanceof CoreLabel)) {
throw new IllegalArgumentException("Required a tree with CoreLabels");
}
CoreLabel cl = (CoreLabel) label;
cl.setValue(Integer.toString(RNNCoreAnnotations.getPredictedClass(tree)));
}
/**
* Sets the labels on the tree to be the indices of the nodes.
* Starts counting at the root and does a postorder traversal.
*/
static int setIndexLabels(Tree tree, int index) {
if (tree.isLeaf()) {
return index;
}
tree.label().setValue(Integer.toString(index));
index++;
for (Tree child : tree.children()) {
index = setIndexLabels(child, index);
}
return index;
}
/**
* Outputs the vectors from the tree. Counts the tree nodes the
* same as setIndexLabels.
*/
static int outputTreeVectors(PrintStream out, Tree tree, int index) {
if (tree.isLeaf()) {
return index;
}
out.print(" " + index + ":");
SimpleMatrix vector = RNNCoreAnnotations.getNodeVector(tree);
for (int i = 0; i < vector.getNumElements(); ++i) {
out.print(" " + NF.format(vector.get(i)));
}
out.println();
index++;
for (Tree child : tree.children()) {
index = outputTreeVectors(out, child, index);
}
return index;
}
/**
* Outputs the scores from the tree. Counts the tree nodes the
* same as setIndexLabels.
*/
static int outputTreeScores(PrintStream out, Tree tree, int index) {
if (tree.isLeaf()) {
return index;
}
out.print(" " + index + ":");
SimpleMatrix vector = RNNCoreAnnotations.getPredictions(tree);
for (int i = 0; i < vector.getNumElements(); ++i) {
out.print(" " + NF.format(vector.get(i)));
}
out.println();
index++;
for (Tree child : tree.children()) {
index = outputTreeScores(out, child, index);
}
return index;
}
/**
* Outputs a tree using the output style requested
*/
static void outputTree(PrintStream out, CoreMap sentence, List