org.wikibrain.spatial.loader.WikidataLayerLoader Maven / Gradle / Ivy
The newest version!
package org.wikibrain.spatial.loader;
import com.vividsolutions.jts.geom.Geometry;
import gnu.trove.TCollections;
import gnu.trove.set.TIntSet;
import gnu.trove.set.hash.TIntHashSet;
import org.wikibrain.core.dao.DaoException;
import org.wikibrain.core.dao.MetaInfoDao;
import org.wikibrain.core.lang.LanguageSet;
import org.wikibrain.core.model.UniversalPage;
import org.wikibrain.spatial.constants.Layers;
import org.wikibrain.spatial.constants.RefSys;
import org.wikibrain.spatial.dao.SpatialDataDao;
import org.wikibrain.spatial.util.WikiBrainSpatialUtils;
import org.wikibrain.utils.ParallelForEach;
import org.wikibrain.utils.Procedure;
import org.wikibrain.utils.WpThreadUtils;
import org.wikibrain.wikidata.WikidataDao;
import org.wikibrain.wikidata.WikidataFilter;
import org.wikibrain.wikidata.WikidataStatement;
import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Loads points from wikidata as a layer.
*
* @author bjhecht, Shilad
*/
public class WikidataLayerLoader {
private static final Logger LOG = LoggerFactory.getLogger(WikidataLayerLoader.class);
private static final int COORDINATE_LOCATION_PROPERTY_ID = 625;
private final WikidataDao wdDao;
private final SpatialDataDao spatialDao;
private final MetaInfoDao miDao;
public WikidataLayerLoader(MetaInfoDao metaDao, WikidataDao wdDao, SpatialDataDao spatialDao) {
this.wdDao = wdDao;
this.spatialDao = spatialDao;
this.miDao = metaDao;
}
public final void loadData(final LanguageSet langs) throws DaoException {
final TIntSet savedConcepts = TCollections.synchronizedSet(new TIntHashSet());
final AtomicInteger matches = new AtomicInteger();
final AtomicInteger count = new AtomicInteger();
WikidataFilter filter = (new WikidataFilter.Builder()).withPropertyId(COORDINATE_LOCATION_PROPERTY_ID).build();
Iterable statements = wdDao.get(filter);
ParallelForEach.iterate(statements.iterator(), WpThreadUtils.getMaxThreads(), 100, new Procedure() {
@Override
public void call(WikidataStatement statement) throws Exception {
try {
if (storeStatement(savedConcepts, langs, statement)) {
matches.incrementAndGet();
}
} catch (Exception e) {
LOG.error("storage of statement failed: " + statement.toString(), e);
miDao.incrementErrorsQuietly(Geometry.class);
}
if (count.incrementAndGet() % 10000 == 0){
LOG.info("Matched " + matches + " out of " + count + " statements from " + this.getClass().getName());
}
}
}, Integer.MAX_VALUE);
}
private boolean storeStatement(TIntSet savedConcepts, LanguageSet langs, WikidataStatement statement) throws DaoException {
UniversalPage uPage = wdDao.getUniversalPage(statement.getItem().getId());
if (uPage == null || !uPage.isInLanguageSet(langs, false)){
return false;
}
int itemId = statement.getItem().getId();
Geometry g = WikiBrainSpatialUtils.jsonToGeometry(statement.getValue().getJsonValue().getAsJsonObject());
if (g == null) {
return false;
}
if (savedConcepts.contains(itemId)) {
return false;
}
savedConcepts.add(itemId);
spatialDao.saveGeometry(itemId, Layers.WIKIDATA, RefSys.EARTH, g);
miDao.incrementRecords(Geometry.class);
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy