All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.wikibrain.spatial.matcher.ShapeFileMatcher Maven / Gradle / Ivy

There is a newer version: 0.9.1
Show newest version
package org.wikibrain.spatial.matcher;

import com.typesafe.config.Config;
import com.vividsolutions.jts.geom.Geometry;
import org.apache.commons.io.FileUtils;
import org.geotools.data.simple.SimpleFeatureIterator;
import org.opengis.feature.simple.SimpleFeature;
import org.supercsv.io.CsvListWriter;
import org.supercsv.io.CsvMapReader;
import org.supercsv.prefs.CsvPreference;
import org.wikibrain.conf.ConfigurationException;
import org.wikibrain.core.cmd.Env;
import org.wikibrain.core.cmd.EnvBuilder;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.model.LocalPage;
import org.wikibrain.spatial.loader.SpatialDataDownloader;
import org.wikibrain.spatial.loader.SpatialDataFolder;
import org.wikibrain.spatial.WikiBrainShapeFile;
import org.wikibrain.utils.WpIOUtils;

import java.io.*;
import java.text.SimpleDateFormat;
import java.util.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Creates or updates a mapping csv file from a shapefile to WikiBrain.
 *
 * @author Shilad Sen
 */
public class ShapeFileMatcher {
    private static final char STATUS_UNKNOWN = 'U';
    private static final char STATUS_VERIFIED = 'V';

    private static final Logger LOG = LoggerFactory.getLogger(ShapeFileMatcher.class);
    private final Env env;
    private final SpatialDataFolder dir;
    private final SpatialDataDownloader downloader;

    public ShapeFileMatcher(Env env) {
        this.env = env;
        this.dir = new SpatialDataFolder(new File(env.getConfiguration().get().getString("spatial.dir")));
        this.downloader = new SpatialDataDownloader(env);
    }

    public void match(String refSys, String layerGroup, String datasetName) throws IOException, InterruptedException, DaoException, ConfigurationException {
        Config config = env.getConfiguration().getConfig("spatial.datasets", refSys, layerGroup, datasetName);
        WikiBrainShapeFile shapeFile = downloader.download(refSys, layerGroup, datasetName, false);
        writeMatches(config, shapeFile);
    }

    public void writeMatches(Config config, WikiBrainShapeFile shapeFile) throws IOException, ConfigurationException, DaoException {
        Map existing = readExisting(shapeFile);
        File newFile = File.createTempFile("wbmapping", "csv");
        CsvListWriter csv = new CsvListWriter(WpIOUtils.openWriter(newFile), CsvPreference.STANDARD_PREFERENCE);

        // Fields from the shapefile that should be included in the final CSV
        List extraFields = new ArrayList();
        for (String fieldsKey : new String[] { "titles", "context", "other" }) {
            if (config.hasPath(fieldsKey)) {
                for (String field : config.getStringList(fieldsKey)) {
                    extraFields.add(field);
                }
            }
        }

        List featureNames = shapeFile.getFeatureNames();
        GeoResolver resolver = new GeoResolver(env, config);
        try {
            writeHeader(csv, extraFields);
            SimpleFeatureIterator iter = shapeFile.getFeatureIter();
            int n = 0;
            while (iter.hasNext()) {
                if (n++ % 1000 == 0) {
                    LOG.info("Mapping row " + n + " of " + shapeFile.getFile());
                }
                SimpleFeature row = iter.next();
                Map rowMap = makeRow(featureNames, config.getStringList("key"), row);
                Geometry geometry = (Geometry) row.getDefaultGeometry();
                writeRow(resolver, csv, extraFields, rowMap, geometry, existing);
            }
            iter.close();
        } finally {
            csv.close();
        }

        // Move original to a backup if it exists
        if (shapeFile.getMappingFile().exists()) {
            File backup = new File(shapeFile.getMappingFile().getAbsoluteFile() + ".bak");
            FileUtils.deleteQuietly(backup);
            FileUtils.moveFile(shapeFile.getMappingFile(), backup);
        }
        FileUtils.moveFile(newFile, shapeFile.getMappingFile());
    }

    /**
     * TODO: keep track of duplicate or missing keys with special status codes
     * @param shapeFile
     * @return
     * @throws IOException
     */
    private Map readExisting(WikiBrainShapeFile shapeFile) throws IOException {
        HashMap mapping = new HashMap();
        if (!shapeFile.hasMappingFile()) {
            return mapping;
        }
        CsvMapReader reader = new CsvMapReader(
                WpIOUtils.openBufferedReader(shapeFile.getMappingFile()),
                CsvPreference.STANDARD_PREFERENCE
        );
        String [] header = reader.getHeader(true);
        while (true) {
            Map row = reader.read(header);
            if (row == null) {
                break;
            }
            MappingInfo info = new MappingInfo(row);
            if (!info.isUnknown()) {
                mapping.put(info.key, info);
            }
        }
        return mapping;
    }

    private Map makeRow(List featureNames, List keyFields, SimpleFeature row) {
        Map rowMap = new HashMap();
        for (int i = 0; i < row.getAttributeCount(); i++) {
            rowMap.put(featureNames.get(i).toUpperCase(), row.getAttribute(i).toString());
        }
        rowMap.put("WB_ID", row.getID());

        String key = "";
        for (String field : keyFields) {
            if (key.length() != 0) {
                key += "|";
            }
            key += rowMap.get(field);
        }
        rowMap.put("WB_KEY", key);

        return rowMap;
    }

    private void writeHeader(CsvListWriter writer, List extraFields) throws IOException {
        List fields = new ArrayList();
        fields.add("WB_ID");
        fields.add("WB_KEY");
        fields.add("WB_UPDATED");
        fields.add("WB_STATUS");
        fields.add("WB_TITLE");
        fields.add("WB_GUESS1");
        fields.add("WB_GUESS2");
        fields.add("WB_GUESS3");
        fields.add("WB_SCORE");
        fields.addAll(extraFields);
        writer.write(fields);
    }

    private void writeRow(GeoResolver resolver, CsvListWriter writer, List extraFields, Map row, Geometry geometry, Map existing) throws DaoException, IOException {
        String tstamp = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());

        LinkedHashMap guesses = resolver.resolve(row, geometry, 3);
        List sorted = new ArrayList(guesses.keySet());

        List newRow = new ArrayList();
        MappingInfo prev = existing.get(row.get("WB_KEY"));

        newRow.add(row.get("WB_ID"));
        newRow.add(row.get("WB_KEY"));
        newRow.add(prev == null ? tstamp : prev.timestamp);
        newRow.add(String.valueOf(prev == null ? STATUS_UNKNOWN : prev.status));

        // Calculate best title
        String title = "";
        if (prev != null) title = prev.title;
        else if (sorted.size() > 0) title = sorted.get(0).getTitle().getTitleStringWithoutNamespace();
        newRow.add(title);

        for (int i = 0; i < 3; i++) {
            if (sorted.size() > i) {
                newRow.add(sorted.get(i).getTitle().getTitleStringWithoutNamespace());
            } else {
                newRow.add("");
            }
        }

        double score = 0;
        if (sorted.size() >= 2) {
            score = 2 * guesses.get(sorted.get(0)) - guesses.get(sorted.get(1));
        } else if (sorted.size() == 1) {
            score = guesses.get(sorted.get(0));
        }
        newRow.add(""+score);

        for (String f : extraFields) {
            newRow.add(row.get(f).toString());
        }
        writer.write(newRow);
    }

    public static class MappingInfo {
        public final String key;
        public final String timestamp;
        public final char status;
        public final String title;

        public MappingInfo(Map row) {
            key = row.get("WB_KEY");
            timestamp = row.get("WB_UPDATED");
            status = row.get("WB_STATUS").toUpperCase().charAt(0);
            title = row.get("WB_TITLE");
        }

        public boolean isUnknown() {
            return status == STATUS_UNKNOWN;
        }
    }

    public static void main(String args[]) throws Exception {
        Env env = EnvBuilder.envFromArgs(args);
        ShapeFileMatcher matcher = new ShapeFileMatcher(env);
        matcher.match("earth", "marine", "naturalEarth");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy