All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.fbk.twm.utils.LangTopicModel Maven / Gradle / Ivy

The newest version!
package eu.fbk.twm.utils;

import java.util.*;

/**
 * Created with IntelliJ IDEA.
 * User: alessio
 * Date: 23/01/14
 * Time: 17:51
 * To change this template use File | Settings | File Templates.
 */
public class LangTopicModel extends LangModel {

    TopicOntology ontology = null;

    public TopicOntology getOntology() {
        return ontology;
    }

    public void setOntology(TopicOntology ontology) {
        this.ontology = ontology;
    }

    public LangTopicModel() {
        super();
    }

    public LangTopicModel(TopicOntology ontology) {
        super();
        this.ontology = ontology;
    }

    private void searchCat(Set categories, WeightedSet weightedSet, int depth, Set visitedSet) {
        //logger.debug("searching: " + categories + "\t" + depth + "...");

        //if (depth > 5 && WeightedSet.size() > 0) {
        if (categories == null || depth > maxDepth) {
            //logger.debug("{{{" + depth + "}}}");
            //logger.debug("stop1 " + depth);
            return;
        }

        Iterator it = categories.iterator();
        while (it.hasNext()) {
            //String label = topMap.get(categories[i]);
            String normalizedCategory = normalizePageName(it.next());
            String label = catProperties.getProperty(normalizedCategory);
            //logger.debug(i + "\t" + normalizedCategory + "\t" + label);
            //String label = categories[i];

            if (label != null) {
                //logger.debug("<<<" + normalizedCategory + "\t" + label + "\t" + depth + ">>>");
                //WeightedSet.add(categories[i]);

                if (label.length() == 0) {
                    //logger.warn("stop category " + normalizedCategory);
                    logger.trace(tabulator(depth) + "<" + normalizedCategory + ", STOP, " + depth + ">");
                } else {
                    weightedSet.add(label, (double) 1 / depth);
                    logger.trace(tabulator(depth) + "<" + normalizedCategory + ", '" + label + "', " + depth + ">");
                    //weightedSet.add(label, 1.0);
                }

            } else {
                if (!visitedSet.contains(normalizedCategory)) {
                    visitedSet.add(normalizedCategory);
                    try {
                        Set superCategories = catSuperMap.get(normalizedCategory);
                        if (superCategories != null) {
                            //logger.debug(i + "\t" + depth + "\t" + normalizedCategory + ": " + superCategories);
                            logger.trace(
                                    tabulator(depth) + "{" + normalizedCategory + ", " + depth + ", " + superCategories
                                            .size() + ", " + superCategories + "}");
                            searchCat(superCategories, weightedSet, depth + 1, visitedSet);
                        }
                    } catch (Exception e) {
                        logger.error(e);
                    }
                }
            }
        }
    }

    private void searchSimple(Map> s, Properties p, String page, WeightedSet weightedSet) {
        searchSimple(s, p, page, weightedSet, 1);
    }

    private void searchSimple(Map> s, Properties p, String page, WeightedSet weightedSet,
            double weight) {
        Set result = s.get(page);
        HashSet okResults = new HashSet();

        if (result != null) {
            for (String value : result) {
                if (p != null) {
                    if (p.getProperty(value) != null && !p.getProperty(value).equals("")) {
                        String topic = p.getProperty(value);
                        okResults.add(topic);
                        logger.debug("Key: " + value + " - Topic: " + topic);
                    }
                }
            }
        }

        for (String topic : okResults) {
            weightedSet.add(topic, weight / okResults.size());
        }

    }

    public void search(String page, WeightedSet weightedSet) {
        search(page, weightedSet, 1);
    }

    public void search(String page, WeightedSet weightedSet, double weight) {

        logger.debug("PAGE: " + page);

        // Categories
        if (useCategories) {
            logger.debug("Searching categories");
            Set visitedSet = new HashSet();
            Set categories = catMap.get(page);
            logger.trace("searching page " + page + " (" + weight + ")...");
            if (categories == null) {
                logger.trace("This page has no categories");
            } else {
                logger.trace("categories\t" + categories.size() + "\t" + categories);
                searchCat(categories, weightedSet, 1, visitedSet);
                logger.debug(page + "\t" + weightedSet.size() + "\t" + weightedSet.toSortedMap());
            }
        }

        // Portals
        if (usePortals) {
            logger.debug("Searching portals");
            searchSimple(portalMap, portalProperties, page, weightedSet);
        }

        // Navigation templates
        if (useNavs) {
            logger.debug("Searching navigation templates");
            searchSimple(navMap, navProperties, page, weightedSet);
        }

        // Suffix
        if (useSuffixes) {
            logger.debug("Searching suffix");
            ParsedPageTitle title = new ParsedPageTitle(page);
            if (title.hasSuffix()) {
                String suffix = title.getSuffix();
                if (suffixProperties != null) {
                    if (suffixProperties.getProperty(suffix) != null && !suffixProperties.getProperty(suffix)
                            .equals("")) {
                        String topic = suffixProperties.getProperty(suffix);
                        weightedSet.add(topic);
                        logger.debug("Key: " + suffix + " - Topic: " + topic);
                    }
                }
            }
        }

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy