All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wikibrain.spatial.matcher.GeoResolver Maven / Gradle / Ivy

The newest version!
package org.wikibrain.spatial.matcher;

import com.typesafe.config.Config;
import com.vividsolutions.jts.geom.Geometry;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.lang.LocalId;
import org.wikibrain.core.lang.LocalString;
import org.wikibrain.core.model.LocalPage;
import org.wikibrain.phrases.PhraseAnalyzer;
import org.wikibrain.sr.disambig.Disambiguator;
import org.wikibrain.utils.WpCollectionUtils;

import java.util.*;

/**
 * @author Shilad Sen
 */
public class GeoResolver {
    private final Env env;
    private final Config config;
    private final List titleFields;
    private final Disambiguator disambig;
    private final List contextFields;
    private final Language language;
    private final LocalPageDao pageDao;

    private List scorers;

    public GeoResolver(Env env, Config config) throws ConfigurationException {
        this.env = env;
        this.config = config;
        this.titleFields = config.getStringList("titles");
        this.contextFields = config.getStringList("context");
        this.language = env.getLanguages().getDefaultLanguage();
        this.pageDao = env.getConfigurator().get(LocalPageDao.class);
        this.disambig = env.getConfigurator().get(Disambiguator.class, config.getString("dab"), "language", language.getLangCode());

        if (this.language != Language.EN && this.language != Language.SIMPLE) {
            throw new IllegalArgumentException();
        }

        initScorers();
    }

    private void initScorers() throws ConfigurationException {
        this.scorers = new ArrayList();
        for (Config scorerConfig : config.getConfigList("scorers")) {
            String type = scorerConfig.getString("type");
            if (type.equals("instanceOf")) {
                scorers.add(new InstanceOfMatchScorer(env, scorerConfig));
            } else if (type.equals("wikidataValue")) {
                scorers.add(new WikidataValueScorer(env, scorerConfig));
            } else if (type.equals("contains")) {
                scorers.add(new ContainsPointScorer(env, scorerConfig));
            } else {
                throw new ConfigurationException("Unknown score type: " + type);
            }
        }
    }

    public LinkedHashMap resolve(Map row, Geometry geometry, int n) throws DaoException {

        List titles = new ArrayList();
        for (String field : titleFields) {
            for (String t : row.get(field).split("\\|")) {
                if (!titles.contains(t)) {
                    titles.add(t);
                }
            }
        }

        Set context = new HashSet();
        for (String field : contextFields) {
            if (row.get(field) != null) {
                context.add(new LocalString(language, row.get(field)));
            }
        }

        Map scores = new HashMap();
        for (int i = 0; i < titles.size(); i++) {
            double weight = 1.0 * Math.pow(0.7, i);
            LinkedHashMap result = disambig.disambiguate(new LocalString(language, titles.get(i)), context);
            if (result == null) {
                continue;
            }
            for (LocalId lid : result.keySet()) {
                if (!scores.containsKey(lid)) {
                    scores.put(lid, 0.0);
                }
                scores.put(lid, scores.get(lid) + weight * result.get(lid));
            }
        }

        for (AbstractMatchScorer scorer : scorers) {
            for (LocalId id : scores.keySet()) {
                scores.put(id, scores.get(id) + scorer.getWeight() * scorer.score(id, row, geometry));
            }
        }

        LinkedHashMap result = new LinkedHashMap();
        for (LocalId id : WpCollectionUtils.sortMapKeys(scores, true)) {
            result.put(pageDao.getById(id), scores.get(id));
            if (result.size() >= n) {
                break;
            }
        }

        return result;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy