All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.redlink.sdk.impl.analysis.model.Enhancements Maven / Gradle / Ivy

/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.redlink.sdk.impl.analysis.model;

import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.Collections2;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimap;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
import com.google.common.primitives.Doubles;

/**
 * Analysis Result API. This class eases the management of the RedLink analysis service results, providing
 * an API that hides the complexity of the Enhancement Structure returned by the service
 *
 * @author [email protected]
 * @author [email protected]
 */
public class Enhancements implements Iterable {

    /**
     * Map 
     */
    private Multimap, Enhancement> enhancements;

    /**
     * Map 
     */
    private Map entities;

    /**
     * Annotations' languages (LanguageAnnotation)
     */
    private Collection languages;
    
    private Double documentSentiment;

    Enhancements() {
        this.enhancements = ArrayListMultimap.create();
        this.entities = Maps.newLinkedHashMap();
        this.languages = Sets.newHashSet();
    }

    @Override
    public Iterator iterator() {
        return getEnhancements().iterator();
    }

    /**
     * Returns the {@link Collection} of all {@link Enhancement}s, including Text, Entity and Topic annotations
     *
     * @return
     */
    public Collection getEnhancements() {
        return Collections.unmodifiableCollection(enhancements.values());
    }

    void setEnhancements(Collection enhancements) {
        for (Enhancement e : enhancements) {
            addEnhancement(e);
        }
    }

    void addEnhancement(Enhancement enhancement) {
        enhancements.put(enhancement.getClass(), enhancement);

        if (enhancement instanceof EntityAnnotation) {
            EntityAnnotation ea = (EntityAnnotation) enhancement;
            entities.put(ea.getEntityReference().getUri(),
                    ea.getEntityReference());
        } else if(enhancement instanceof TopicAnnotation){
            TopicAnnotation ta = (TopicAnnotation)enhancement;
            entities.put(ta.getTopicReference().getUri(), ta.getTopicReference());
        }
    }

    /**
     * Returns the {@link Collection} of extracted {@link TextAnnotation}s
     *
     * @return
     */
    @SuppressWarnings("unchecked")
    public Collection getTextAnnotations() {
        Collection result = enhancements
                .get(TextAnnotation.class);
        return result == null ? Collections.emptySet() : 
            Collections.unmodifiableCollection((Collection) result);
    }

    /**
     * Returns the {@link Collection} of extracted {@link EntityAnnotation}s
     *
     * @return
     */
    @SuppressWarnings("unchecked")
    public Collection getEntityAnnotations() {
        Collection result = enhancements.get(EntityAnnotation.class);
        return result == null ? Collections.emptySet() : 
            Collections.unmodifiableCollection((Collection) result);
    }

    /**
     * Returns the {@link Collection} of extracted {@link SentimentAnnotation}s. This
     * allows to process low level sentiment values extracted from sub-sections of the
     * document (e.g. on Sentence level). Use {@link #getDocumentSentiment()} to
     * get the overall sentiment of the document as a whole
     *
     * @return A collection of Sentiment annotations for sub-sections of the document
     * @see #getDocumentSentiment()
     */
    @SuppressWarnings("unchecked")
    public Collection getSentimentAnnotations() {
        Collection result = enhancements.get(SentimentAnnotation.class);
        return result == null ? Collections.emptySet() : 
            Collections.unmodifiableCollection((Collection) result);
    }
    
    /**
     * Returns the {@link Collection} of extracted {@link EntityAnnotation}s
     *
     * @return
     */
    @SuppressWarnings("unchecked")
    public Collection getTopicAnnotations() {
        Collection result = enhancements.get(TopicAnnotation.class);
        return result == null ? Collections.emptySet() : 
            Collections.unmodifiableCollection((Collection) result);
    }

    /**
     * Returns the {@link Collection} of dereferenced {@link Entity}s
     *
     * @return
     */
    public Collection getEntities() {
        return Collections.unmodifiableCollection(entities.values());
    }

    /**
     * Returns a dereferenced entity by its URI
     *
     * @param URI
     * @return
     */
    public Entity getEntity(String URI) {
        return entities.get(URI);
    }

    /**
     * Returns a {@link Collection} of {@link EntityAnnotation}s associated to the {@link TextAnnotation} passed by parameter
     *
     * @param ta {@link TextAnnotation}
     * @return
     */
    public Collection getEntityAnnotations(TextAnnotation ta) {
        Collection eas = getEntityAnnotations();
        Collection result = Sets.newHashSet();

        for (EntityAnnotation ea : eas) {
            if (ea.getRelations() != null && ea.getRelations().contains(ta)) {
                result.add(ea);
            }
        }

        return result;
    }

    /**
     * Returns a {@link Collection} of {@link Entity}s for which associated {@link EntityAnnotation}s has a confidence value
     * greater than or equal to the value passed by parameter
     *
     * @param confidenceValue Threshold confidence value
     * @return
     */
    public Collection getEntitiesByConfidenceValue(final Double confidenceValue) {

        Collection sortedEas = getEntityAnnotationsByConfidenceValue(confidenceValue);

        return Collections2.transform(sortedEas,
                new Function() {
                    @Override
                    public Entity apply(final EntityAnnotation ea) {
                        return ea.getEntityReference();
                    }
                }
        );
    }

    /**
     * Returns a {@link Collection} of {@link TextAnnotation}s which confidences values are greater than or equal
     * to the value passed by parameter
     *
     * @param confidenceValue Threshold confidence value
     * @return
     */
    public Collection getTextAnnotationsByConfidenceValue(final Double confidenceValue) {
        return FluentIterable.from(getTextAnnotations())
                .filter(new Predicate() {
                    @Override
                    public boolean apply(TextAnnotation e) {
                        return e.confidence.doubleValue() >= confidenceValue
                                .doubleValue();
                    }
                }).toList();
    }

    /**
     * Returns a {@link Collection} of {@link EntityAnnotation}s which confidences values are greater than or equal
     * to the value passed by parameter
     *
     * @param confidenceValue Threshold confidence value
     * @return
     */
    public Collection getEntityAnnotationsByConfidenceValue(
            final Double confidenceValue) {
        return FluentIterable.from(getEntityAnnotations())
                .filter(new Predicate() {
                    @Override
                    public boolean apply(EntityAnnotation e) {
                        return e.confidence.doubleValue() >= confidenceValue
                                .doubleValue();
                    }
                }).toList();
    }

    /**
     * Returns a {@link Collection} of {@link EntityAnnotation}s which confidences values are greater than or equal
     * to the value passed by parameter
     *
     * @param minConfidenceValue Minimum threshold confidence value
     * @param maxConfidenceValue Maximum threshold confidence value
     * @return
     */
    public Collection getEntityAnnotationsByConfidenceValue(
            final Double minConfidenceValue, final Double maxConfidenceValue) {
        return FluentIterable.from(getEntityAnnotations())
                .filter(new Predicate() {
                    @Override
                    public boolean apply(EntityAnnotation e) {
                        final double value = e.confidence.doubleValue();
                        return minConfidenceValue.doubleValue() <= value
                                && value <= maxConfidenceValue.doubleValue();
                    }
                }).toList();
    }

    public Multimap getEntityAnnotationsByTextAnnotation() {
        Multimap map = ArrayListMultimap.create();

        Collection eas = getEntityAnnotations();
        for (EntityAnnotation ea : eas) {
            if (ea.relations != null) {
                for (Enhancement e : ea.relations) {
                    if (e instanceof TextAnnotation) {
                        map.put((TextAnnotation) e, ea);
                    }
                }
            }
        }
        return map;
    }

    /**
     * Returns the best {@link EntityAnnotation}s (those with the highest confidence value) for each extracted {@link TextAnnotation}
     *
     * @return best annotations
     */
    public Multimap getBestAnnotations() {

        Ordering o = new Ordering() {
            @Override
            public int compare(EntityAnnotation left, EntityAnnotation right) {
                return Doubles.compare(left.confidence, right.confidence);
            }
        }.reverse();

        Multimap result = ArrayListMultimap.create();
        for (TextAnnotation ta : getTextAnnotations()) {
            List eas = o.sortedCopy(getEntityAnnotations(ta));
            if (!eas.isEmpty()) {
                Collection highest = new HashSet<>();
                Double confidence = eas.get(0).getConfidence();
                for (EntityAnnotation ea : eas) {
                    if (ea.confidence < confidence) {
                        break;
                    } else {
                        highest.add(ea);
                    }
                }
                result.putAll(ta, highest);
            }
        }

        return result;
    }

    /**
     * Returns an {@link EntityAnnotation} by its associated dereferenced {@link Entity} URI
     *
     * @param entityUri
     * @return
     */
    public EntityAnnotation getEntityAnnotation(final String entityUri) {
        return Iterables.find(getEntityAnnotations(),
                new Predicate() {

                    @Override
                    public boolean apply(EntityAnnotation ea) {
                        return ea.getEntityReference().getUri()
                                .equals(entityUri);
                    }

                }
        );
    }

    /**
     * Returns a {@link Collection} of identified languages in the analyzed content
     *
     * @return
     */
    public Collection getLanguages() {
        return languages;
    }

    /**
     * @param languages
     */
    void setLanguages(Collection languages) {
        this.languages = languages;
    }

    /**
     * @param language
     */
    void addLanguage(String language) {
        this.languages.add(language);
    }

    void setDocumentSentiment(Double documentSentiment) {
        this.documentSentiment = documentSentiment;
    }
    /**
     * Getter for the overall sentiment of the Document
     * @return the document sentiment or null if no sentiment component 
     * is configured for the analysis.
     */
    public Double getDocumentSentiment() {
        return documentSentiment;
    }
    
    /**
     * Returns the {@link Collection} of extracted categories (topics) for the analyzed content
     *
     * @return
     */
    @SuppressWarnings("unchecked")
    public Collection getCategories() {
        Collection result = enhancements.get(TopicAnnotation.class);
        return (Collection) result; // Should be safe. Needs to be tested
    }

    /**
     * Returns true if the contents has been categorized with a category identified by the URI passed by parameter
     *
     * @param conceptURI URI of the category concept
     * @return
     */
    public boolean hasCategory(final String conceptURI) {
        return Iterables.any(getCategories(),
                new Predicate() {

                    @Override
                    public boolean apply(TopicAnnotation ta) {
                        return ta.getTopicReference().
                                equals(conceptURI);
                    }

                }
        );
    }
    
    /**
     * Returns the {@link Collection} of extracted {@link EntityAnnotation}s
     *
     * @return
     */
    @SuppressWarnings("unchecked")
    public Collection getKeywordAnnotations() {
        Collection result = enhancements.get(KeywordAnnotation.class);
        return result == null ? Collections.emptySet() : 
            Collections.unmodifiableCollection((Collection) result);
    }

    /**
     * Returns a {@link Collection} of {@link KeywordAnnotation}s which 
     * a 
    *
  • count greater than or equal the required count *
  • metric greater than or equals the required metric *
y * * @param minCount Threshold count value or null for no threshold * @param minMetric Threshold metric value or null for no threshold * @return the {@link KeywordAnnotation}s fulfilling the parsed requirements. */ public Collection getKeywordAnnotationsByCountMetric( Integer minCount, Double minMetric) { final Integer count = minCount == null || minCount < 1 ? null : minCount; final Double metric = minMetric == null || minMetric <= 0 ? null : minMetric; if(count == null && metric == null){ return getKeywordAnnotations(); } else { return FluentIterable.from(getKeywordAnnotations()) .filter(new Predicate() { @Override public boolean apply(KeywordAnnotation e) { boolean apply = true; if(metric != null && (e.getMetric() == null || metric > e.getMetric())){ apply = false; //filter because metric is to small } if(count != null && (count > e.getCount())){ apply = false; //filter because count is to low } return apply; } }).toList(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy