All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fnklabs.draenei.analytics.search.ClusteringTfAlgorithm Maven / Gradle / Ivy

package com.fnklabs.draenei.analytics.search;

import com.fnklabs.draenei.analytics.TextUtils;
import com.fnklabs.draenei.analytics.morphology.Language;
import org.jetbrains.annotations.NotNull;
import org.slf4j.LoggerFactory;

import java.beans.BeanInfo;
import java.beans.IntrospectionException;
import java.beans.Introspector;
import java.beans.PropertyDescriptor;
import java.io.Serializable;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.*;
import java.util.function.BiFunction;
import java.util.stream.Collectors;

/**
 * Content based TF clustering algorithm
 */
class ClusteringTfAlgorithm implements ClusteringAlgorithm {
    @NotNull
    private TextUtils textUtils;

    public ClusteringTfAlgorithm(@NotNull TextUtils textUtils) {
        this.textUtils = textUtils;
    }

    @NotNull
    @Override
    public Set build(@NotNull Object content) {

        Map> facetsMap = buildFacets(content);

        Set facets = new HashSet<>();

        int totalFacets = facetsMap.entrySet()
                                   .stream()
                                   .mapToInt(entry -> entry.getValue().size())
                                   .sum();

        facetsMap.forEach((key, values) -> {
            double rank = TfIdfUtils.calculateTf(values.size(), totalFacets);
            facets.add(new Facet(key, rank, 0));
        });

        return facets;
    }

    @Override
    public Set build(@NotNull Document document) {
        Map> facetsMap = buildFacets(document.getId());


        Set facets = new HashSet<>();

        int totalFacets = facetsMap.entrySet()
                                   .stream()
                                   .mapToInt(entry -> entry.getValue().size())
                                   .sum();

        facetsMap.forEach((key, values) -> {
            double rank = TfIdfUtils.calculateTf(values.size(), totalFacets);
            facets.add(new Facet(key, rank, document.getId()));
        });

        return facets;
    }

    @NotNull
    private Map> buildFacets(@NotNull Object content) {
        Map> facetsMap = new HashMap<>();

        if (!content.getClass().isPrimitive() && !(content instanceof String)) {
            try {
                BeanInfo beanInfo = Introspector.getBeanInfo(content.getClass());

                for (PropertyDescriptor propertyDescriptor : beanInfo.getPropertyDescriptors()) {
                    Method readMethod = propertyDescriptor.getReadMethod();

                    String name = propertyDescriptor.getName();

                    if (name.equals("class")) {
                        continue;
                    }

                    Field field = content.getClass().getDeclaredField(name);

                    boolean annotationPresent = field.isAnnotationPresent(com.fnklabs.draenei.analytics.search.annotation.Facet.class);

                    if (annotationPresent) {

                        Object fieldValue = getFieldValue(content, readMethod);

                        if (fieldValue != null) {
                            List values = transformValue(fieldValue);

                            values.forEach(val -> {
                                FacetType facetType = new FacetType(field.getName(), val.getClass());

                                FacetKey key = new FacetKey(facetType, val);

                                facetsMap.compute(key, new AddFunction());
                            });
                        }
                    }
                }
            } catch (IntrospectionException | NoSuchFieldException e) {
                LoggerFactory.getLogger(getClass()).warn("Can't read value", e);
            }
        } else {
            transformValue(content).forEach(val -> {
                        FacetKey key = new FacetKey(new FacetType("primitive", val.getClass()), val);

                        facetsMap.compute(key, new AddFunction());
                    }
            );
        }
        return facetsMap;
    }

    /**
     * Try to transform field value to simple types
     *
     * @param value
     *
     * @return
     */
    private List transformValue(@NotNull Object value) {
        List values = new ArrayList<>();

        if (value instanceof String) {
            List build = build((String) value);

            values.addAll(build);

        } else if (value instanceof Collection) {
            Collection collection = (Collection) value;
            List collect = collection.stream()
                                                   .flatMap(item -> {
                                                       return transformValue(item).stream();
                                                   })
                                                   .collect(Collectors.toList());

            values.addAll(collect);
        } else {
            values.add((Serializable) value);
        }

        return values;
    }

    private Object getFieldValue(@NotNull Object content, Method field) {
        try {
            return field.invoke(content);
        } catch (InvocationTargetException | IllegalAccessException e) {
            LoggerFactory.getLogger(ClusteringTfAlgorithm.class).warn("Can't get field value", e);
        }

        return null;
    }

    /**
     * Build String facet from text
     *
     * @param text Text content
     *
     * @return Word facets
     */
    private List build(@NotNull String text) {

        List words = textUtils.extractWords(text, Language.RU);

        List wordList = words.stream()
                                     .flatMap(word -> {
                                         return textUtils.getNormalForms(word.toLowerCase(), Language.RU)
                                                         .stream()
                                                         .filter(element -> textUtils.isNormalWord(element, Language.RU));
                                     })
                                     .collect(Collectors.toList());

        return wordList;
    }

    private static class AddFunction implements BiFunction, List> {
        @Override
        public List apply(FacetKey key, List keys) {
            if (keys == null) {
                keys = new ArrayList<>();
            }

            keys.add(key);

            return keys;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy