it.uniroma2.art.lime.profiler.impl.AbstractLexicalizationModelProfiler Maven / Gradle / Ivy
The newest version!
package it.uniroma2.art.lime.profiler.impl;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.eclipse.rdf4j.common.iteration.Iterations;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.impl.LinkedHashModel;
import org.eclipse.rdf4j.model.util.Literals;
import org.eclipse.rdf4j.model.vocabulary.VOID;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.TupleQuery;
import org.eclipse.rdf4j.query.TupleQueryResult;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import it.uniroma2.art.lime.model.language.LanguageTagUtils;
import it.uniroma2.art.lime.model.repo.LIMERepositoryConnectionWrapper;
import it.uniroma2.art.lime.profiler.LexicalizationModelProfiler;
import it.uniroma2.art.lime.profiler.LexicalizationSetStatistics;
import it.uniroma2.art.lime.profiler.LexiconStats;
import it.uniroma2.art.lime.profiler.ProfilerContext;
import it.uniroma2.art.lime.profiler.ProfilerException;
import it.uniroma2.art.lime.profiler.ProfilerOptions;
import it.uniroma2.art.lime.profiler.ReferenceDatasetStatistics;
public abstract class AbstractLexicalizationModelProfiler implements LexicalizationModelProfiler {
public abstract IRI getLexicalizationModel();
@Override
public Collection profile(RepositoryConnection conn, IRI[] graphs,
IRI referenceDataset, ReferenceDatasetStatistics referenceDatasetStats,
Map lexiconStats) throws ProfilerException {
throw new UnsupportedOperationException();
}
@Override
public boolean profile(ProfilerContext profilerContext,
LIMERepositoryConnectionWrapper metadataConnection, RepositoryConnection dataConnection,
IRI dataGraph, Resource dataset) throws ProfilerException {
ProfilerOptions options = profilerContext.getOptions();
TupleQuery statsQuery = prepareQuery(options, metadataConnection, dataConnection, dataGraph);
Map additionalRefDatasetStats = new HashMap<>();
Map additionalRefDatasetRes = new HashMap<>();
boolean processed = false;
try (TupleQueryResult statsResults = statsQuery.evaluate()) {
while (statsResults.hasNext()) {
BindingSet stats = statsResults.next();
Literal lang = (Literal) stats.getValue("lang");
if (lang == null)
continue;
Resource referenceDataset = (Resource) stats.getValue("referenceDataset");
Literal uriSpace = (Literal) stats.getValue("uriSpace");
Resource lexiconDataset = (Resource) stats.getValue("lexiconDataset");
Literal references = (Literal) stats.getValue("references");
Literal lexicalEntries = (Literal) stats.getValue("lexicalEntries");
Literal lexicalizations = (Literal) stats.getValue("lexicalizations");
Resource mainDataset = metadataConnection
.getMainDataset(options.isIncludeInferred(), options.getContexts()).get();
LexicalizationSetStatistics statsObj = new LexicalizationSetStatistics();
if (referenceDataset != null) {
statsObj.setReferenceDataset((IRI) referenceDataset);
} else {
if (uriSpace == null) {
referenceDataset = mainDataset;
statsObj.setReferenceDataset(mainDataset);
} else {
Resource newReferenceDataset = additionalRefDatasetRes.get(uriSpace.stringValue());
if (newReferenceDataset == null) {
newReferenceDataset = profilerContext.mintDatasetResource();
ReferenceDatasetStatistics newRefDatStats = new ReferenceDatasetStatistics();
newRefDatStats.setUriSpace(uriSpace.stringValue());
additionalRefDatasetStats.put(uriSpace.stringValue(), newRefDatStats);
}
referenceDataset = newReferenceDataset;
statsObj.setReferenceDataset(newReferenceDataset);
}
}
Resource lexicalizationSetResource = profilerContext
.mintLexicalizationSetResource(referenceDataset, lexiconDataset, lang.stringValue());
if (lexiconDataset != null) {
statsObj.setLexiconDataset((IRI) lexiconDataset);
}
BigInteger referencesBigInteger = Literals.getIntegerValue(references, BigInteger.ZERO);
statsObj.setReferences(referencesBigInteger);
if (lexicalEntries != null) {
statsObj.setLexicalEntries(Literals.getIntegerValue(lexicalEntries, BigInteger.ZERO));
}
BigInteger lexicalizationsBigInteger = Literals.getIntegerValue(lexicalizations,
BigInteger.ZERO);
statsObj.setLexicalizations(lexicalizationsBigInteger);
if (referenceDataset != null) {
Iterations
.asSet(metadataConnection.getPropertyIntegers(referenceDataset, VOID.ENTITIES,
options.isIncludeInferred(), options.getContexts()))
.stream().findAny().ifPresent(referenceDatasetEntities -> {
BigDecimal referenceDatasetEntitiesDecimal = new BigDecimal(
referenceDatasetEntities);
statsObj.setPercentage(new BigDecimal(referencesBigInteger).divide(
referenceDatasetEntitiesDecimal, 3, BigDecimal.ROUND_CEILING));
statsObj.setAvgNumOfLexicalizations(new BigDecimal(lexicalizationsBigInteger)
.divide(referenceDatasetEntitiesDecimal, 3,
BigDecimal.ROUND_CEILING));
});
}
statsObj.setLexicalizationModel(getLexicalizationModel());
String langTag = lang.getLabel();
statsObj.setLanguageTag(langTag);
LanguageTagUtils.toLexvo(langTag).ifPresent(statsObj::setLanguageLexvo);
LanguageTagUtils.toLOC(langTag).ifPresent(statsObj::setLanguageLOC);
Model graph = new LinkedHashModel();
statsObj.serialize(graph, lexicalizationSetResource);
metadataConnection.add(graph);
metadataConnection.add(mainDataset, VOID.SUBSET, lexicalizationSetResource);
processed = true;
}
}
if (processed) {
Model graph = new LinkedHashModel();
for (Map.Entry entry : additionalRefDatasetStats.entrySet()) {
Resource refDatRes = additionalRefDatasetRes.get(entry.getKey());
entry.getValue().serialize(graph, refDatRes);
}
metadataConnection.add(graph);
}
return processed;
}
protected abstract TupleQuery prepareQuery(ProfilerOptions options,
LIMERepositoryConnectionWrapper metadataConnection, RepositoryConnection dataConnection,
IRI dataGraph);
protected abstract boolean requiresLexicon();
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy