All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wikibrain.spatial.cookbook.SimpleToblersLawEvaluator Maven / Gradle / Ivy

The newest version!
package org.wikibrain.spatial.cookbook;

import com.vividsolutions.jts.geom.Geometry;
import com.vividsolutions.jts.geom.Point;
import org.geotools.referencing.GeodeticCalculator;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.conf.Configurator;
import org.wikibrain.core.WikiBrainException;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.cmd.EnvBuilder;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.LocalPageDao;
import org.wikibrain.core.dao.UniversalPageDao;
import org.wikibrain.core.lang.Language;
import org.wikibrain.core.model.Title;
import org.wikibrain.core.model.UniversalPage;
import org.wikibrain.spatial.dao.SpatialDataDao;
import org.wikibrain.sr.SRMetric;
import org.wikibrain.sr.SRResult;
import org.wikibrain.utils.ParallelForEach;
import org.wikibrain.utils.Procedure;
import org.wikibrain.utils.WpIOUtils;

import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 *
 */
public class SimpleToblersLawEvaluator {

    private static final Logger LOG = LoggerFactory.getLogger(SimpleToblersLawEvaluator.class);
    private static int NUM_SAMPLES = 1000000;

    private Random random = new Random();

    private final SpatialDataDao sdDao;
    private final LocalPageDao lpDao;
    private final UniversalPageDao upDao;
    private final List langs;
    private final Map metrics;

    private final List concepts = new ArrayList();
    private final Map locations = new HashMap();
    private final Env env;
    private BufferedWriter output;


    public SimpleToblersLawEvaluator(Env env) throws ConfigurationException {
        this.env = env;
        this.langs = new ArrayList(env.getLanguages().getLanguages());

        // Get data access objects
        Configurator c = env.getConfigurator();
        this.sdDao = c.get(SpatialDataDao.class);
        this.lpDao = c.get(LocalPageDao.class);
        this.upDao = c.get(UniversalPageDao.class);

        // build SR metrics
        this.metrics = new HashMap();
        for(Language lang : langs){
            SRMetric m = c.get(SRMetric.class, "ensemble", "language", lang.getLangCode());
            metrics.put(lang, m);
        }
    }

    public void retrieveLocations() throws DaoException {
        // Get all known concept geometries
        Map geometries = sdDao.getAllGeometriesInLayer("wikidata", "earth");
        LOG.info(String.format("Get %d geometries, now building id-name mapping", geometries.size()));

        // Build up list of concepts in all languages
        for (Integer conceptId : geometries.keySet()){
            UniversalPage concept = upDao.getById(conceptId);
            if (concept != null && concept.hasAllLanguages(env.getLanguages())) {
                concepts.add(concept);
                locations.put(concept, geometries.get(conceptId).getCentroid());
                if (concepts.size() % 1000 == 0) {
                    LOG.info(String.format("Loaded %d geometries with articles in %s...", concepts.size(), env.getLanguages()));
                }
            }
        }
        LOG.info(String.format("Found %d geometries with articles in %s", concepts.size(), env.getLanguages()));
    }

    public void evaluate(File outputPath, int numSamples) throws IOException {
        this.output = WpIOUtils.openWriter(outputPath);
        writeHeader();

        ParallelForEach.range(0, numSamples, new Procedure() {
            @Override
            public void call(Integer i) throws Exception {
                evaluateOneSample();
            }
        });

        this.output.close();
    }

    public void evaluateOneSample() throws DaoException, WikiBrainException, IOException {
        UniversalPage c1 = concepts.get(random.nextInt(concepts.size()));
        UniversalPage c2 = concepts.get(random.nextInt(concepts.size()));

        List results = new ArrayList();
        for (Language lang : langs) {
            SRMetric sr = metrics.get(lang);
            results.add(sr.similarity(c1.getLocalId(lang), c2.getLocalId(lang), false));
        }

        writeRow(c1, c2, results);
    }

    private void writeHeader() throws IOException {
        output.write("ITEM_NAME_1");
        output.write("\tITEM_ID_1");
        output.write("\tITEM_NAME_2");
        output.write("\tITEM_ID_2");
        output.write("\tSPATIAL_DISTANCE");
        for (Language lang : langs) {
            output.write("\t" + lang.getLangCode() + "_SR");
        }
    }

    private void writeRow(UniversalPage c1, UniversalPage c2, List results) throws WikiBrainException, IOException {
        Point p1 = locations.get(c1).getCentroid();
        Point p2 = locations.get(c2).getCentroid();

        GeodeticCalculator geoCalc = new GeodeticCalculator();
        geoCalc.setStartingGeographicPoint(p1.getX(), p1.getY());
        geoCalc.setDestinationGeographicPoint(p2.getX(), p2.getY());
        double km = geoCalc.getOrthodromicDistance() / 1000;

        Title t1 = c1.getBestEnglishTitle(lpDao, true);
        Title t2 = c2.getBestEnglishTitle(lpDao, true);
        synchronized (output) {
            output.write(t1.getCanonicalTitle() +
                    "\t" + c1.getUnivId() +
                    "\t" + t2.getCanonicalTitle() +
                    "\t" + c2.getUnivId() +
                    "\t" + km
            );
            for (SRResult result : results) {
                output.write("\t" + result.getScore());
            }
            output.write("\n");
        }
    }


    public static void main(String[] args) throws Exception {
        Env env = EnvBuilder.envFromArgs(args);
        SimpleToblersLawEvaluator eval = new SimpleToblersLawEvaluator(env);
        eval.retrieveLocations();
        eval.evaluate(new File("toblers_eval.tsv"), NUM_SAMPLES);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy