com.ibm.watson.developer_cloud.cognitive_client.AggregateData Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-client-java Show documentation
A Java library providing enhanced client support for IBM's Watson Developer Cloud.
The newest version!
/**
 * 
 */
package com.ibm.watson.developer_cloud.cognitive_client;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;

/**
 * @author ArunIyengar
 * For storing aggregate data from text analysis
 */
public class AggregateData implements Serializable {

    /**
     * Generated by Eclipse
     */
    private static final long serialVersionUID = 3476200215120220334L;
    
    /**
     * Data for a particular entity, keyword, concept, taxonomy, etc.
     */
    public static class Data implements Serializable {

        /**
         * Generated by Eclipse
         */
        private static final long serialVersionUID = -5649818767370818194L;
        
        private int count;  // typically represents the # of times something is found
        private double relevance;  // the relevance of something within a document
        private double score; // can be used for sentiment analysis scores, among other things
        
        
        private Data(int countVal, double relevanceVal,double scoreVal) {
            count = countVal;
            relevance = relevanceVal;
            score = scoreVal;
        }
        
        /**
         * Returns count field
         * 
         * @return count field
         *      
         */
        public int getCount() {
            return count;
        }
        
        /**
         * Sets count field
         * 
         * @param countVal
         *      value for count field
         *      
         */
        void setCount(int countVal) {
            count = countVal;
        }
                
        /**
         * Returns relevance value
         * 
         * @return relevance value
         * 
         */
        public double getRelevance() {
            return relevance;
        }
        
        /**
         * Sets relevance value
         * 
         * @param relevanceVal
         *      value for relevance field
         *      
         */
        void setRelevance(double relevanceVal) {
            score = relevanceVal;
        }
 
        /**
         * Returns score value
         * 
         * @return score value
         * 
         */
        public double getScore() {
            return score;
        }
        
        /**
         * Sets score value
         * 
         * @param scoreVal
         *      value for score
         *      
         */
        void setScore(double scoreVal) {
            score = scoreVal;
        }
        
        private void addData(Data data) {
            score = ((count*score)+(data.count*data.score))/(count+data.count);
            count += data.count;
            relevance += data.relevance;
        }
        
        @Override
        public String toString() {
            StringBuilder sb = new StringBuilder(70);
            sb.append("Count: " + count);
            sb.append(", Relevance: " + relevance);
            sb.append(", Score: " + score + "\n");
            return sb.toString();
            
        }
    }
    

    /**
     * Correponds to fields of Data objects which users might want to sort, for example
     */
    public enum DataType {
        COUNT,
        RELEVANCE,
        SCORE
    }

    
    /**
     * Corresponds to types of data maintained by AggregateData
     */
    public enum Type {
        CONCEPT,
        DISAMBIGUATEDENTITY,
        ENTITYAMBIGUOUS,
        KEYWORD,
        TAXONOMY
    }
    
    
    private String description;
    private String analysisResults;  // used for storing results from a natural language service 
    
    private ArrayList documents = new ArrayList(); // Can be used to store all documents analyzed
    private ArrayList rawData = new ArrayList(); // Can be used to store all serialized raw data from analysis services
    
    private HashMap concepts = new HashMap();
    private HashMap disambiguatedEntities = new HashMap(); // only contains disambiguatedentities
    private HashMap entitiesAmbiguous = new HashMap(); // contains nondisambiguated entities
    private HashMap keywords = new HashMap();
    private HashMap taxonomies = new HashMap();

    
    /**
     * Constructor.
     * 
     * @param describe
     *            description of the aggregate data set
     *            
     */
    public AggregateData(String describe) {
        description = describe;
    }

    /**
     * Constructor.
     * 
     * @param describe
     *            description of the aggregate data set
     * @param document
     *            The document to which this corresponds
     * @param analysisData
     *            data from the text analysis service to which this corresponds 
     *            
     */
    public AggregateData(String describe, String document, String analysisData) {
        description = describe;
        documents.add(document);
        rawData.add(analysisData);
    }
    
    String getAnalysisResults() {
        return analysisResults;
    }
    
    public void setAnalysisResults(String results) {
        analysisResults = results;
    }

    /**
     * Returns description
     * 
     * @return
     *      description
     *      
     */
    public String getDescription() {
        return description;
    }
    
    /**
     * Updates description
     * 
     * @param describe
     *      new value for description
     *      
     */
    public void setDescription(String describe) {
        description = describe;
    }
    
    /**
     * Returns concepts
     * 
     * @return
     *      concepts
     *      
     */
    public HashMap getConcepts() {
        return concepts;
    }

    void setConcepts(HashMap newConcepts) {
        concepts = newConcepts;
    }

    /**
     * Return disambiguated entities
     * 
     * @return
     *      disambiguated entities
     *      
     */
    public HashMap getDisambiguatedEntities() {
        return disambiguatedEntities;
    }

    void setDisambiguatedEntities(HashMap newEntities) {
        disambiguatedEntities = newEntities;
    }

    /**
     * Return entities which have not been disambiguated
     * 
     * @return
     *      entities which have not been disambiguated
     *      
     */
    public HashMap getEntitiesAmbiguous() {
        return entitiesAmbiguous;
    }

    void setEntitiesAmbiguous(HashMap newEntities) {
        entitiesAmbiguous = newEntities;
    }

    /**
     * Return keywords
     * 
     * @return
     *      keywords
     *      
     */
    public HashMap getKeywords() {
        return keywords;
    }

    void setKeywords(HashMap newKeywords) {
        keywords = newKeywords;
    }
  
    /**
     * Get taxonomies
     * 
     * @return
     *      taxonomies
     *      
     */
    public HashMap getTaxonomies() {
        return taxonomies;
    }

    void setTaxonomies(HashMap newTaxonomies) {
        taxonomies = newTaxonomies;
    }
    
    /**
     * Get list of documents
     * 
     * @return
     *      list of documents
     *      
     */
    public ArrayList getDocuments() {
        return documents;
    }
    
    /**
     * Add a document to list of documents
     * 
     * @param document
     *      document to add
     *      
     */
    public void addDocument(String document) {
        documents.add(document);
    }
    
    /**
     * Return list of raw data
     * 
     * @return
     *      list of raw data
     *      
     */
    public ArrayList getRawData() {
        return rawData;
    }
    
    /**
     * Add new raw data to list of raw data
     * 
     */
    public void addRawData() {
        rawData.add(analysisResults);
    }
    
    /**
     * Add data from another AggregateData object to this
     * 
     * @param data
     *      AggregateData object to add to this
     * @param addRawData
     *      Indicates whether documents and rawData from "data" should be added to this
     */
    public void combineData(AggregateData data, boolean addRawData) {
        combineHashMaps(concepts, data.concepts);
        combineHashMaps(disambiguatedEntities, data.disambiguatedEntities);
        combineHashMaps(entitiesAmbiguous, data.entitiesAmbiguous);
        combineHashMaps(keywords, data.keywords);
        combineHashMaps(taxonomies, data.taxonomies);
        if (addRawData) {
            documents.addAll(data.documents);
            rawData.addAll(data.rawData);
        }
    }

    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder(1000);
        sb.append("Description: " + description + "\n");
        sb.append("Concepts: " + concepts + "\n");
        sb.append("Disambiguated Entities: " + disambiguatedEntities + "\n");
        sb.append("Ambiguous Entities: " + entitiesAmbiguous + "\n");
        sb.append("Keywords: " + keywords + "\n");
        sb.append("Categories/Taxonomies: " + taxonomies + "\n");
        sb.append("Documents Analyzed: " + documents + "\n");
        sb.append("Raw data from text analysis services: " + rawData + "\n");
        return sb.toString();
    }
    
    private void combineHashMaps(HashMap hashMap1, HashMap hashMap2) {
        for (Map.Entry entry : hashMap2.entrySet()) {
            String key = entry.getKey();
            Data val2 = entry.getValue();
            Data val1 = hashMap1.get(key);
            if (val1 == null) {
                hashMap1.put(key, val2);
            }
            else {
                val1.addData(val2);
                hashMap1.put(key, val1);                
            }
        }
    }
    
    private HashMap selectHashMap(Type type) {
        switch(type) {
            case CONCEPT:
                return concepts;
            case DISAMBIGUATEDENTITY:
                return disambiguatedEntities;
            case ENTITYAMBIGUOUS:
                return entitiesAmbiguous;
            case KEYWORD:
                return keywords;
            default:
                return taxonomies;
        }        
    }
    
    /**
     * Add new data corresponding to analyzed text
     * 
     * @param key
     *      corresponds to an entity, keyword, etc.
     * @param countAdd
     *      number of times it appeared
     * @param relAdd
     *      relevance
     * @param scoreAdd
     *      score (e.g. sentiment analysis score)
     * @param type
     *      type
     *      
     */
    public void addData(String key, int countAdd, double relAdd, double scoreAdd, Type type) {
        HashMap hashMap = selectHashMap(type);
        Data newData = new Data(countAdd, relAdd, scoreAdd);
        Data prevData = hashMap.get(key);
        if (prevData == null) {
            hashMap.put(key, newData);
        }
        else {
            prevData.addData(newData);
            hashMap.put(key, prevData);
        }
    }
    
    /**
     * Write this to a file
     * 
     * @param filename
     *      file name
     *      
     */
    public void writeToFile(String filename) {
        String results = analysisResults;
        analysisResults = null;
        byte[] bytes = Serializer.serializeToByteArray(this);
        analysisResults = results;
        Util.byteArrayToFile(bytes, filename);
    }
    
    /**
     * Read Aggregate data from a file
     * 
     * @param filename
     *      file name
     * @return aggregate data read from file
     * 
     */
    public static AggregateData readFromFile(String filename) {
        byte[] bytes = Util.fileToByteArray(filename);
        return Serializer.deserializeFromByteArray(bytes);
    }
    
    /**
     * Get sorted values for a feature
     * 
     * @param feature
     *      feature
     * @param parameter
     *      field to sort values for
     * @return ArrayList of sorted key, data pairs
     * 
     */
    public ArrayList> getSortedValues(Type feature, DataType parameter) {
        HashMap hashMap = selectHashMap(feature);
        Set> dataSet = hashMap.entrySet();
        ArrayList> sortedList = new ArrayList>(dataSet);
        switch(parameter) {
        case COUNT:        
            Collections.sort(sortedList, new Comparator>() {
                public int compare(Map.Entry o1,
                                   Map.Entry o2) {
                    return ((Integer) o2.getValue().getCount()).compareTo(o1.getValue().getCount());
                }});
            break;
        case RELEVANCE:
            Collections.sort(sortedList, new Comparator>() {
                public int compare(Map.Entry o1,
                                   Map.Entry o2) {
                    return ((Double) o2.getValue().getRelevance()).compareTo(o1.getValue().getRelevance());
                }});
            break;
        default:
            Collections.sort(sortedList, new Comparator>() {
                public int compare(Map.Entry o1,
                                   Map.Entry o2) {
                    return ((Double) o2.getValue().getScore()).compareTo(o1.getValue().getScore());
                }});
            break;
        }
        return sortedList;
    }
    
}