
com.ibm.watson.developer_cloud.cognitive_client.AggregateData Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-client-java Show documentation
Show all versions of cognitive-client-java Show documentation
A Java library providing enhanced client support for IBM's Watson Developer Cloud.
The newest version!
/**
*
*/
package com.ibm.watson.developer_cloud.cognitive_client;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
/**
* @author ArunIyengar
* For storing aggregate data from text analysis
*/
public class AggregateData implements Serializable {
/**
* Generated by Eclipse
*/
private static final long serialVersionUID = 3476200215120220334L;
/**
* Data for a particular entity, keyword, concept, taxonomy, etc.
*/
public static class Data implements Serializable {
/**
* Generated by Eclipse
*/
private static final long serialVersionUID = -5649818767370818194L;
private int count; // typically represents the # of times something is found
private double relevance; // the relevance of something within a document
private double score; // can be used for sentiment analysis scores, among other things
private Data(int countVal, double relevanceVal,double scoreVal) {
count = countVal;
relevance = relevanceVal;
score = scoreVal;
}
/**
* Returns count field
*
* @return count field
*
*/
public int getCount() {
return count;
}
/**
* Sets count field
*
* @param countVal
* value for count field
*
*/
void setCount(int countVal) {
count = countVal;
}
/**
* Returns relevance value
*
* @return relevance value
*
*/
public double getRelevance() {
return relevance;
}
/**
* Sets relevance value
*
* @param relevanceVal
* value for relevance field
*
*/
void setRelevance(double relevanceVal) {
score = relevanceVal;
}
/**
* Returns score value
*
* @return score value
*
*/
public double getScore() {
return score;
}
/**
* Sets score value
*
* @param scoreVal
* value for score
*
*/
void setScore(double scoreVal) {
score = scoreVal;
}
private void addData(Data data) {
score = ((count*score)+(data.count*data.score))/(count+data.count);
count += data.count;
relevance += data.relevance;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(70);
sb.append("Count: " + count);
sb.append(", Relevance: " + relevance);
sb.append(", Score: " + score + "\n");
return sb.toString();
}
}
/**
* Correponds to fields of Data objects which users might want to sort, for example
*/
public enum DataType {
COUNT,
RELEVANCE,
SCORE
}
/**
* Corresponds to types of data maintained by AggregateData
*/
public enum Type {
CONCEPT,
DISAMBIGUATEDENTITY,
ENTITYAMBIGUOUS,
KEYWORD,
TAXONOMY
}
private String description;
private String analysisResults; // used for storing results from a natural language service
private ArrayList documents = new ArrayList(); // Can be used to store all documents analyzed
private ArrayList rawData = new ArrayList(); // Can be used to store all serialized raw data from analysis services
private HashMap concepts = new HashMap();
private HashMap disambiguatedEntities = new HashMap(); // only contains disambiguatedentities
private HashMap entitiesAmbiguous = new HashMap(); // contains nondisambiguated entities
private HashMap keywords = new HashMap();
private HashMap taxonomies = new HashMap();
/**
* Constructor.
*
* @param describe
* description of the aggregate data set
*
*/
public AggregateData(String describe) {
description = describe;
}
/**
* Constructor.
*
* @param describe
* description of the aggregate data set
* @param document
* The document to which this corresponds
* @param analysisData
* data from the text analysis service to which this corresponds
*
*/
public AggregateData(String describe, String document, String analysisData) {
description = describe;
documents.add(document);
rawData.add(analysisData);
}
String getAnalysisResults() {
return analysisResults;
}
public void setAnalysisResults(String results) {
analysisResults = results;
}
/**
* Returns description
*
* @return
* description
*
*/
public String getDescription() {
return description;
}
/**
* Updates description
*
* @param describe
* new value for description
*
*/
public void setDescription(String describe) {
description = describe;
}
/**
* Returns concepts
*
* @return
* concepts
*
*/
public HashMap getConcepts() {
return concepts;
}
void setConcepts(HashMap newConcepts) {
concepts = newConcepts;
}
/**
* Return disambiguated entities
*
* @return
* disambiguated entities
*
*/
public HashMap getDisambiguatedEntities() {
return disambiguatedEntities;
}
void setDisambiguatedEntities(HashMap newEntities) {
disambiguatedEntities = newEntities;
}
/**
* Return entities which have not been disambiguated
*
* @return
* entities which have not been disambiguated
*
*/
public HashMap getEntitiesAmbiguous() {
return entitiesAmbiguous;
}
void setEntitiesAmbiguous(HashMap newEntities) {
entitiesAmbiguous = newEntities;
}
/**
* Return keywords
*
* @return
* keywords
*
*/
public HashMap getKeywords() {
return keywords;
}
void setKeywords(HashMap newKeywords) {
keywords = newKeywords;
}
/**
* Get taxonomies
*
* @return
* taxonomies
*
*/
public HashMap getTaxonomies() {
return taxonomies;
}
void setTaxonomies(HashMap newTaxonomies) {
taxonomies = newTaxonomies;
}
/**
* Get list of documents
*
* @return
* list of documents
*
*/
public ArrayList getDocuments() {
return documents;
}
/**
* Add a document to list of documents
*
* @param document
* document to add
*
*/
public void addDocument(String document) {
documents.add(document);
}
/**
* Return list of raw data
*
* @return
* list of raw data
*
*/
public ArrayList getRawData() {
return rawData;
}
/**
* Add new raw data to list of raw data
*
*/
public void addRawData() {
rawData.add(analysisResults);
}
/**
* Add data from another AggregateData object to this
*
* @param data
* AggregateData object to add to this
* @param addRawData
* Indicates whether documents and rawData from "data" should be added to this
*/
public void combineData(AggregateData data, boolean addRawData) {
combineHashMaps(concepts, data.concepts);
combineHashMaps(disambiguatedEntities, data.disambiguatedEntities);
combineHashMaps(entitiesAmbiguous, data.entitiesAmbiguous);
combineHashMaps(keywords, data.keywords);
combineHashMaps(taxonomies, data.taxonomies);
if (addRawData) {
documents.addAll(data.documents);
rawData.addAll(data.rawData);
}
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder(1000);
sb.append("Description: " + description + "\n");
sb.append("Concepts: " + concepts + "\n");
sb.append("Disambiguated Entities: " + disambiguatedEntities + "\n");
sb.append("Ambiguous Entities: " + entitiesAmbiguous + "\n");
sb.append("Keywords: " + keywords + "\n");
sb.append("Categories/Taxonomies: " + taxonomies + "\n");
sb.append("Documents Analyzed: " + documents + "\n");
sb.append("Raw data from text analysis services: " + rawData + "\n");
return sb.toString();
}
private void combineHashMaps(HashMap hashMap1, HashMap hashMap2) {
for (Map.Entry entry : hashMap2.entrySet()) {
String key = entry.getKey();
Data val2 = entry.getValue();
Data val1 = hashMap1.get(key);
if (val1 == null) {
hashMap1.put(key, val2);
}
else {
val1.addData(val2);
hashMap1.put(key, val1);
}
}
}
private HashMap selectHashMap(Type type) {
switch(type) {
case CONCEPT:
return concepts;
case DISAMBIGUATEDENTITY:
return disambiguatedEntities;
case ENTITYAMBIGUOUS:
return entitiesAmbiguous;
case KEYWORD:
return keywords;
default:
return taxonomies;
}
}
/**
* Add new data corresponding to analyzed text
*
* @param key
* corresponds to an entity, keyword, etc.
* @param countAdd
* number of times it appeared
* @param relAdd
* relevance
* @param scoreAdd
* score (e.g. sentiment analysis score)
* @param type
* type
*
*/
public void addData(String key, int countAdd, double relAdd, double scoreAdd, Type type) {
HashMap hashMap = selectHashMap(type);
Data newData = new Data(countAdd, relAdd, scoreAdd);
Data prevData = hashMap.get(key);
if (prevData == null) {
hashMap.put(key, newData);
}
else {
prevData.addData(newData);
hashMap.put(key, prevData);
}
}
/**
* Write this to a file
*
* @param filename
* file name
*
*/
public void writeToFile(String filename) {
String results = analysisResults;
analysisResults = null;
byte[] bytes = Serializer.serializeToByteArray(this);
analysisResults = results;
Util.byteArrayToFile(bytes, filename);
}
/**
* Read Aggregate data from a file
*
* @param filename
* file name
* @return aggregate data read from file
*
*/
public static AggregateData readFromFile(String filename) {
byte[] bytes = Util.fileToByteArray(filename);
return Serializer.deserializeFromByteArray(bytes);
}
/**
* Get sorted values for a feature
*
* @param feature
* feature
* @param parameter
* field to sort values for
* @return ArrayList of sorted key, data pairs
*
*/
public ArrayList> getSortedValues(Type feature, DataType parameter) {
HashMap hashMap = selectHashMap(feature);
Set> dataSet = hashMap.entrySet();
ArrayList> sortedList = new ArrayList>(dataSet);
switch(parameter) {
case COUNT:
Collections.sort(sortedList, new Comparator>() {
public int compare(Map.Entry o1,
Map.Entry o2) {
return ((Integer) o2.getValue().getCount()).compareTo(o1.getValue().getCount());
}});
break;
case RELEVANCE:
Collections.sort(sortedList, new Comparator>() {
public int compare(Map.Entry o1,
Map.Entry o2) {
return ((Double) o2.getValue().getRelevance()).compareTo(o1.getValue().getRelevance());
}});
break;
default:
Collections.sort(sortedList, new Comparator>() {
public int compare(Map.Entry o1,
Map.Entry o2) {
return ((Double) o2.getValue().getScore()).compareTo(o1.getValue().getScore());
}});
break;
}
return sortedList;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy