it.uniroma2.art.lime.profiler.LIMEProfiler Maven / Gradle / Ivy
The newest version!
/* This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL
* was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. */
/* Portions created by ART Group, University of Rome Tor Vergata are Copyright (C) 2013 */
package it.uniroma2.art.lime.profiler;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.eclipse.rdf4j.common.exception.RDF4JException;
import org.eclipse.rdf4j.common.iteration.Iterations;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.LinkedHashModel;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.util.Literals;
import org.eclipse.rdf4j.model.vocabulary.DCTERMS;
import org.eclipse.rdf4j.model.vocabulary.FOAF;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.VOID;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.QueryResults;
import org.eclipse.rdf4j.query.TupleQuery;
import org.eclipse.rdf4j.query.TupleQueryResult;
import org.eclipse.rdf4j.query.impl.SimpleDataset;
import org.eclipse.rdf4j.queryrender.RenderUtils;
import org.eclipse.rdf4j.repository.RepositoryConnection;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import it.uniroma2.art.lime.model.language.LanguageTagUtils;
import it.uniroma2.art.lime.model.repo.LIMERepositoryConnectionWrapper;
import it.uniroma2.art.lime.model.vocabulary.LIME;
import it.uniroma2.art.lime.profiler.impl.OWLSemanticModelProfiler;
import it.uniroma2.art.lime.profiler.impl.OntoLexLexicalizationModelProfiler;
import it.uniroma2.art.lime.profiler.impl.RDFSLexicalizationModelProfiler;
import it.uniroma2.art.lime.profiler.impl.ResourceLocationUtilsInternal;
import it.uniroma2.art.lime.profiler.impl.SKOSLexicalizationModelProfiler;
import it.uniroma2.art.lime.profiler.impl.SKOSSemanticModelProfiler;
import it.uniroma2.art.lime.profiler.impl.SKOSXLLexicalizationModelProfiler;
import org.eclipse.rdf4j.repository.RepositoryResult;
public class LIMEProfiler {
private LIMERepositoryConnectionWrapper metadataConnection;
private RepositoryConnection dataConnection;
private IRI dataGraph;
private static Map knownSemanticModelProfilers;
private static Map knownLexicalizationModelProfilers;
private IRI metadataBaseURI;
static {
ValueFactory vf = SimpleValueFactory.getInstance();
knownSemanticModelProfilers = new LinkedHashMap<>();
knownSemanticModelProfilers.put(vf.createIRI("http://www.w3.org/2004/02/skos/core"),
new SKOSSemanticModelProfiler());
knownSemanticModelProfilers.put(vf.createIRI("http://www.w3.org/2002/07/owl"),
new OWLSemanticModelProfiler());
knownLexicalizationModelProfilers = new LinkedHashMap<>();
knownLexicalizationModelProfilers.put(vf.createIRI("http://www.w3.org/ns/lemon/ontolex"),
new OntoLexLexicalizationModelProfiler());
knownLexicalizationModelProfilers.put(vf.createIRI("http://www.w3.org/2008/05/skos-xl"),
new SKOSXLLexicalizationModelProfiler());
knownLexicalizationModelProfilers.put(vf.createIRI("http://www.w3.org/2004/02/skos/core"),
new SKOSLexicalizationModelProfiler());
knownLexicalizationModelProfilers.put(vf.createIRI("http://www.w3.org/2000/01/rdf-schema"),
new RDFSLexicalizationModelProfiler());
}
public LIMEProfiler(RepositoryConnection metadataConnection, IRI metadataBaseURI,
RepositoryConnection dataConnection, IRI dataGraph) {
this.metadataConnection = new LIMERepositoryConnectionWrapper(metadataConnection.getRepository(),
metadataConnection);
this.metadataBaseURI = metadataBaseURI;
this.dataConnection = dataConnection;
this.dataGraph = dataGraph;
}
public void profile() throws ProfilerException {
profile(new ProfilerOptions());
}
public void profile(ProfilerOptions options) throws ProfilerException {
ProfilerContext profilerContext = createNewProfilerContext(options);
boolean includeInferred = options.isIncludeInferred();
IRI[] contexts = options.getContexts();
Optional mainDatasetHolder = metadataConnection.getMainDataset(includeInferred, contexts);
Resource mainDataset;
if (mainDatasetHolder.isPresent()) {
mainDataset = mainDatasetHolder.get();
} else {
mainDataset = profilerContext.mintMainDatasetResource();
metadataConnection.add(metadataBaseURI, RDF.TYPE, VOID.DATASET_DESCRIPTION);
metadataConnection.add(metadataBaseURI, FOAF.PRIMARY_TOPIC, mainDataset);
metadataConnection.add(mainDataset, RDF.TYPE, VOID.DATASET);
}
// -- Overall statistics:
// Triple count
BigInteger triples = BigInteger.valueOf(dataConnection.size(dataGraph));
metadataConnection.add(mainDataset, VOID.TRIPLES,
metadataConnection.getValueFactory().createLiteral(triples));
// Distinct subjects
TupleQuery distinctSubjectsQuery = dataConnection.prepareTupleQuery(
// @formatter:off
" SELECT (COUNT(DISTINCT ?s) AS ?c) WHERE {\n " +
" ?s ?p ?o . \n" +
" }\n"
// @formatter:on
);
SimpleDataset dataset = new SimpleDataset();
dataset.addDefaultGraph(dataGraph);
distinctSubjectsQuery.setIncludeInferred(includeInferred);
Literal distinctSubjectsLiteral = (Literal) QueryResults
.singleResult(distinctSubjectsQuery.evaluate()).getValue("c");
metadataConnection.add(mainDataset, VOID.DISTINCT_SUBJECTS, distinctSubjectsLiteral);
// Distinct objects
TupleQuery distinctObjectsQuery = dataConnection.prepareTupleQuery(
// @formatter:off
" SELECT (COUNT(DISTINCT ?o) AS ?c) WHERE {\n " +
" ?s ?p ?o . \n" +
" }\n"
// @formatter:on
);
distinctObjectsQuery.setIncludeInferred(includeInferred);
Literal distinctObjectsLiteral = (Literal) QueryResults.singleResult(distinctObjectsQuery.evaluate())
.getValue("c");
metadataConnection.add(mainDataset, VOID.DISTINCT_OBJECTS, distinctObjectsLiteral);
// -- Reference dataset statistics
Set semanticModels = Sets
.intersection(
QueryResults.asSet(metadataConnection.getPropertyIRIs(mainDataset,
DCTERMS.CONFORMS_TO, includeInferred, contexts)),
knownSemanticModelProfilers.keySet());
if (semanticModels.size() > 1) {
throw new AmbiguousSemanticModelException(mainDataset, semanticModels);
}
for (Map.Entry entry : knownSemanticModelProfilers.entrySet()) {
IRI semanticModel = entry.getKey();
SemanticModelProfiler semanticModelProfiler = entry.getValue();
boolean processed = semanticModelProfiler.profile(profilerContext, metadataConnection,
dataConnection, dataGraph, mainDataset);
if (processed)
break;
}
// -- Lexicons statistics
profileLexicons(this, options, metadataConnection, dataConnection, dataGraph, mainDataset);
// -- LexicalizationSet statistics
for (Entry entry : knownLexicalizationModelProfilers.entrySet()) {
IRI semanticModel = entry.getKey();
LexicalizationModelProfiler lexicalizationModelProfiler = entry.getValue();
boolean processed = lexicalizationModelProfiler.profile(profilerContext, metadataConnection,
dataConnection, dataGraph, mainDataset);
if (processed)
break;
}
// -- ConceptSet statistics
profileConceptSets(this, options, metadataConnection, dataConnection, dataGraph, mainDataset);
// -- ConceptualizationSet statistics
profileConceptualizationSets(this, profilerContext, metadataConnection, dataConnection, dataGraph,
mainDataset);
// -- Recognize void:Linkset(s)
StringBuilder linksetQueryStringBuilder = new StringBuilder();
linksetQueryStringBuilder.append(
// @formatter:off
" PREFIX rdfs: \n" +
" PREFIX owl: \n" +
" PREFIX skos: \n" +
" \n" +
" SELECT ?subject_referenceDataset ?subject_referenceDatasetUriSpace \n" +
" ?object_referenceDataset ?object_referenceDatasetUriSpace ?mappingProp (COUNT(?subject) as ?linkCount) { \n" +
" VALUES(?mappingBaseProp) { \n" +
" (owl:differentFrom) \n" +
" (owl:sameAs) \n" +
" (rdfs:subClassOf) \n" +
" (rdfs:subPropertyOf) \n" +
" (owl:disjointWith) \n" +
" (owl:equivalentClass) \n" +
" (owl:equivalentProperty) \n" +
" (owl:propertyDisjointWith) \n" +
" (skos:mappingRelation) \n" +
" } \n" +
" ?mappingProp rdfs:subPropertyOf* ?mappingBaseProp . \n" +
" GRAPH ?dataGraph { \n" +
" ?subject ?mappingProp ?object . \n"
// @formatter:on
);
ResourceLocationUtilsInternal.appendUriSpaceLogic(options, dataGraph, linksetQueryStringBuilder,
metadataConnection, "subject_", "subject");
ResourceLocationUtilsInternal.appendUriSpaceLogic(options, dataGraph, linksetQueryStringBuilder,
metadataConnection, "object_", "object");
linksetQueryStringBuilder.append(
// @formatter:off
" FILTER (!(BOUND(?subject_referenceDataset) && BOUND(?object_referenceDataset)) || !sameTerm(?subject_referenceDataset, ?object_referenceDataset)) \n" +
" FILTER (!(BOUND(?subject_referenceDatasetUriSpace) && BOUND(?object_referenceDatasetUriSpace)) || !sameTerm(?subject_referenceDatasetUriSpace, ?object_referenceDatasetUriSpace)) \n" +
" FILTER (BOUND(?subject_referenceDataset) || BOUND(?object_referenceDataset) || BOUND(?subject_referenceDatasetUriSpace) || BOUND(?object_referenceDatasetUriSpace))\n"+
" FILTER (!BOUND(?subject_referenceDatasetUriSpace) \n" +
" || ?subject_referenceDatasetUriSpace NOT IN (\"http://www.w3.org/2002/07/owl#\", \n" +
" \"http://www.w3.org/2000/01/rdf-schema#\")) \n" +
" FILTER (!BOUND(?object_referenceDatasetUriSpace) \n" +
" || ?object_referenceDatasetUriSpace NOT IN (\"http://www.w3.org/2002/07/owl#\", \n" +
" \"http://www.w3.org/2000/01/rdf-schema#\")) \n" +
" } \n" +
" } \n" +
" GROUP BY ?subject_referenceDataset ?subject_referenceDatasetUriSpace ?object_referenceDataset ?object_referenceDatasetUriSpace ?mappingProp \n" +
" HAVING (?linkCount > 0) \n"
// @formatter:on
);
TupleQuery linksetQuery = dataConnection.prepareTupleQuery(linksetQueryStringBuilder.toString());
linksetQuery.setBinding("dataGraph", dataGraph);
linksetQuery.setIncludeInferred(includeInferred);
Map additionalDatasets = new HashMap<>();
Multimap, LinksetStats> pair2linksets = HashMultimap.create();
try (TupleQueryResult results = linksetQuery.evaluate()) {
while (results.hasNext()) {
BindingSet bs = results.next();
Resource subjectDataset = ResourceLocationUtilsInternal.getDatasetOrMintNew(profilerContext,
additionalDatasets, mainDataset, (IRI) bs.getValue("subject_referenceDataset"),
Optional.ofNullable(bs.getValue("subject_referenceDatasetUriSpace"))
.map(Value::stringValue).orElse(null));
Resource objectDataset = ResourceLocationUtilsInternal.getDatasetOrMintNew(profilerContext,
additionalDatasets, mainDataset, (IRI) bs.getValue("object_referenceDataset"),
Optional.ofNullable(bs.getValue("object_referenceDatasetUriSpace"))
.map(Value::stringValue).orElse(null));
Pair datasetPair = new ImmutablePair<>(subjectDataset, objectDataset);
LinksetStats linksetStats = new LinksetStats();
linksetStats.setSubjectsTarget(subjectDataset);
linksetStats.setObjectsTarget(objectDataset);
linksetStats.setLinkPredicate((IRI) bs.getValue("mappingProp"));
linksetStats.setTriples(Literals.getIntegerValue(bs.getValue("linkCount"), BigInteger.ZERO));
pair2linksets.put(datasetPair, linksetStats);
}
}
Model linksetStatsModel = new LinkedHashModel();
for (Pair datasetPair : pair2linksets.keySet()) {
Collection relevantLinksets = pair2linksets.get(datasetPair);
Resource parentLinkset = null;
long cumulativeLinks = 0;
if (relevantLinksets.size() > 1) {
parentLinkset = profilerContext.mintLinksetResource(datasetPair.getKey(),
datasetPair.getValue(), null);
}
for (LinksetStats linksetStats : relevantLinksets) {
Resource linkset = profilerContext.mintLinksetResource(datasetPair.getKey(),
datasetPair.getValue(),
parentLinkset != null ? linksetStats.getLinkPredicate() : null);
linksetStats.serialize(linksetStatsModel, linkset);
if (parentLinkset != null) {
linksetStatsModel.add(parentLinkset, VOID.SUBSET, linkset);
cumulativeLinks += linksetStats.getTriples().longValue();
} else {
linksetStatsModel.add(mainDataset, VOID.SUBSET, linkset);
}
}
if (parentLinkset != null) {
LinksetStats parentLinksetStats = new LinksetStats();
parentLinksetStats.setSubjectsTarget(datasetPair.getKey());
parentLinksetStats.setObjectsTarget(datasetPair.getValue());
parentLinksetStats.setTriples(BigInteger.valueOf(cumulativeLinks));
parentLinksetStats.serialize(linksetStatsModel, parentLinkset);
metadataConnection.add(mainDataset, VOID.SUBSET, parentLinkset);
}
}
additionalDatasets.forEach((uriSpace, dat) -> {
linksetStatsModel.add(dat, RDF.TYPE, VOID.DATASET);
linksetStatsModel.add(dat, VOID.URI_SPACE,
SimpleValueFactory.getInstance().createLiteral(uriSpace));
});
metadataConnection.add(linksetStatsModel);
}
protected ProfilerContext createNewProfilerContext(ProfilerOptions options) {
ProfilerContext profilerContext = new ProfilerContext();
profilerContext.setMetadataBaseURI(metadataBaseURI);
profilerContext.setMetadataConnection(metadataConnection);
profilerContext.setOptions(options);
return profilerContext;
}
// public void profileReferenceDataset(Resource dataset, boolean includeInferred, Resource... contexts)
// throws ProfilerException {
// Optional semanticModelProfilerHolder = QueryResults
// .stream(metadataConnection.getProperties(dataset, DCTERMS.CONFORMS_TO, includeInferred,
// contexts))
// .flatMap(new CastFlatMapper<>(IRI.class)).map(this::getSemanticModelProfiler2)
// .filter(Optional::isPresent).map(Optional::get).findAny();
//
// if (semanticModelProfilerHolder.isPresent()) {
// SemanticModelProfiler semanticModelProfiler = semanticModelProfilerHolder.get();
// semanticModelProfiler.profile(metadataConnection, dataConnection, dataset);
// } else {
// throw new SemanticModelNotRecognizedException();
// }
// }
public void profileLexicons(LIMEProfiler profiler, ProfilerOptions options,
LIMERepositoryConnectionWrapper metadataConnection, RepositoryConnection dataConnection,
IRI dataGraph, Resource mainDataset) throws ProfilerException {
TupleQuery query = dataConnection.prepareTupleQuery(
// @formatter:off
" PREFIX ontolex: \n" +
" PREFIX lime: \n" +
" \n" +
" SELECT ?lexiconDataset (GROUP_CONCAT(DISTINCT ?langT) as ?lang) \n" +
" (COUNT(DISTINCT ?lexicalEntry) as ?lexicalEntries){ \n" +
" GRAPH ?dataGraph { \n" +
" ?lexiconDataset lime:entry ?lexicalEntry . \n" +
" ?lexicalEntry ontolex:canonicalForm/ontolex:writtenRep ?label . \n" +
" BIND(LANG(?label) as ?langT) \n" +
" } \n" +
" } \n" +
" GROUP BY ?lexiconDataset \n" +
" HAVING BOUND(?lexiconDataset) \n"
// @formatter:on
);
query.setBinding("dataGraph", dataGraph);
try (TupleQueryResult results = query.evaluate()) {
while (results.hasNext()) {
BindingSet lexiconBindingSet = results.next();
Resource lexiconDataset = (Resource) lexiconBindingSet.getValue("lexiconDataset");
String lang = lexiconBindingSet.getValue("lang").stringValue();
BigInteger lexicalEntries = Literals
.getIntegerValue(lexiconBindingSet.getValue("lexicalEntries"), BigInteger.ZERO);
if (lang.contains(",")) {
throw new ProfilerException("Lexicon \"" + lexiconDataset.stringValue()
+ "\" has ambiguous languages: " + lang);
}
if (lang.isEmpty()) {
throw new ProfilerException(
"No language information can be computed for lexicon: " + lexiconDataset);
}
LexiconStats lexiconStats = new LexiconStats();
lexiconStats.setLanguageTag(lang);
LanguageTagUtils.toLexvo(lang).ifPresent(lexiconStats::setLanguageLexvo);
LanguageTagUtils.toLOC(lang).ifPresent(lexiconStats::setLanguageLOC);
lexiconStats.setLexicalEntries(lexicalEntries);
Model model = new LinkedHashModel();
lexiconStats.serialize(model, lexiconDataset);
metadataConnection.add(model);
try (RepositoryResult repositoryResult = dataConnection.getStatements(lexiconDataset, DCTERMS.TITLE, null, dataGraph);
Stream stream = QueryResults.stream(repositoryResult)) {
stream.forEach(s -> metadataConnection.add(s, (Resource) null));
}
metadataConnection.add(mainDataset, VOID.SUBSET, lexiconDataset);
}
}
}
public void profileConceptSets(LIMEProfiler profiler, ProfilerOptions options,
LIMERepositoryConnectionWrapper metadataConnection, RepositoryConnection dataConnection,
IRI dataGraph, Resource mainDataset) throws ProfilerException {
TupleQuery query = dataConnection.prepareTupleQuery(
// @formatter:off
" PREFIX ontolex: \n" +
" PREFIX skos: \n" +
" \n" +
" SELECT ?conceptSet (COUNT(DISTINCT ?concept) as ?concepts) { \n" +
" ?conceptSetCls rdfs:subClassOf* ontolex:ConceptSet . \n" +
" GRAPH ?dataGraph { \n" +
" ?conceptSet a ?conceptSetCls . \n" +
" ?concept skos:inScheme|skos:topConceptOf ?conceptSet . \n" +
" } \n" +
" } \n" +
" GROUP BY ?conceptSet \n" +
" HAVING BOUND(?conceptSet) \n"
// @formatter:on
);
query.setBinding("dataGraph", dataGraph);
try (TupleQueryResult results = query.evaluate()) {
while (results.hasNext()) {
BindingSet conceptSetBindingSet = results.next();
Resource conceptSet = (Resource) conceptSetBindingSet.getValue("conceptSet");
BigInteger concepts = Literals.getIntegerValue(conceptSetBindingSet.getValue("concepts"),
BigInteger.ZERO);
ConceptSetStats conceptSetStats = new ConceptSetStats();
conceptSetStats.setConcepts(concepts);
Model model = new LinkedHashModel();
conceptSetStats.serialize(model, conceptSet);
try (RepositoryResult repositoryResult = dataConnection.getStatements(conceptSet, DCTERMS.TITLE, null, dataGraph);
Stream stream = QueryResults.stream(repositoryResult)) {
stream.forEach(s -> metadataConnection.add(s, (Resource) null));
}
metadataConnection.add(model);
metadataConnection.add(mainDataset, VOID.SUBSET, conceptSet);
}
}
}
public void profileConceptualizationSets(LIMEProfiler profiler, ProfilerContext profilerContext,
LIMERepositoryConnectionWrapper metadataConnection, RepositoryConnection dataConnection,
IRI dataGraph, Resource mainDataset) throws ProfilerException {
ValueFactory vf = metadataConnection.getValueFactory();
try {
StringBuilder sb = new StringBuilder(
// @formatter:off
"PREFIX rdfs: \n" +
"PREFIX ontolex: \n" +
"PREFIX lime: \n" +
"PREFIX skos: \n" +
"SELECT ?conceptSet ?lexiconDataset \n" +
" ?conceptualization_referenceDatasetUriSpace \n" +
" (COUNT(DISTINCT ?concept) as ?concepts) \n" +
" (COUNT(?concept) as ?conceptualizations) \n" +
" (COUNT(DISTINCT ?lexicalEntry) as ?lexicalEntries) { \n" +
" {SELECT DISTINCT ?lexicalEntry ?concept ?conceptSet ?lexiconDataset { \n" +
" GRAPH " + RenderUtils.toSPARQL(dataGraph) + " { \n" +
" ?lexicalEntry \n" +
" (ontolex:sense|^ontolex:isSenseOf) \n" +
" /(ontolex:isLexicalizedSenseOf|^ontolex:lexicalizedSense) \n" +
" |(ontolex:evokes|^ontolex:isEvokedBy) \n" +
" ?concept . \n" +
" OPTIONAL { \n" +
" ?concept skos:topConceptOf|skos:inScheme ?conceptSet . \n" +
" } \n" +
" ?lexiconDataset lime:entry ?lexicalEntry . \n" +
" } \n" +
" }} \n" +
(!profilerContext.getOptions().isDefaultToLocalReference()
?
" bind(IF(BOUND(?conceptSet), ?unboundVariable, IF(not exists {graph " + RenderUtils.toSPARQL(dataGraph) + " {?concept a []}}, \n" +
" REPLACE(STR(?concept), \"(.+(#|\\\\/)).*\", \"$1\"), \n" +
" ?unboundVariable)) as ?conceptualization_referenceDatasetUriSpace) \n"
:
"") +
" \n" +
" } \n" +
"GROUP BY ?conceptSet ?conceptualization_referenceDatasetUriSpace ?lexiconDataset \n" +
"HAVING BOUND(?lexiconDataset)"
// @formatter:on
);
Map additionalRefDatasetStats = new HashMap<>();
Map additionalRefDatasetRes = new HashMap<>();
TupleQuery query = dataConnection.prepareTupleQuery(sb.toString());
try (TupleQueryResult statResult = query.evaluate()) {
while (statResult.hasNext()) {
BindingSet stats = statResult.next();
/* @Nullable */ Resource conceptSet = (Resource) stats.getValue("conceptSet");
/* @Nullable */ String conceptualization_referenceDatasetUriSpace = Optional
.ofNullable(stats.getValue("conceptualization_referenceDatasetUriSpace"))
.map(Value::stringValue).orElse(null);
Resource lexiconDataset = (Resource) stats.getValue("lexiconDataset");
BigInteger concepts = Literals.getIntegerValue((Literal) stats.getValue("concepts"),
BigInteger.ZERO);
BigInteger conceptualizations = Literals
.getIntegerValue((Literal) stats.getValue("conceptualizations"), BigInteger.ZERO);
BigInteger lexicalEntries = Literals
.getIntegerValue((Literal) stats.getValue("lexicalEntries"), BigInteger.ZERO);
if (conceptSet == null) {
if (conceptualization_referenceDatasetUriSpace == null) {
conceptSet = mainDataset;
} else {
conceptSet = additionalRefDatasetRes
.get(conceptualization_referenceDatasetUriSpace);
if (conceptSet == null) {
conceptSet = profilerContext.mintDatasetResource();
additionalRefDatasetRes.put(conceptualization_referenceDatasetUriSpace,
conceptSet);
ConceptSetStats missingConceptSetStats = new ConceptSetStats();
missingConceptSetStats
.setUriSpace(conceptualization_referenceDatasetUriSpace);
additionalRefDatasetStats.put(conceptualization_referenceDatasetUriSpace,
missingConceptSetStats);
}
}
}
ConceptualizationSetStatistics statsObj = new ConceptualizationSetStatistics();
statsObj.setConcepts(concepts);
statsObj.setConceptualizations(conceptualizations);
statsObj.setLexicalEntries(lexicalEntries);
statsObj.setConceptualDataset(conceptSet);
statsObj.setLexiconDataset(lexiconDataset);
Optional lexiconEntries;
try (RepositoryResult repositoryResult = metadataConnection.getStatements(lexiconDataset, LIME.LEXICAL_ENTRIES,
null, profilerContext.getOptions().getContexts());
Stream stream = QueryResults.stream(repositoryResult)) {
lexiconEntries = stream
.map(Statement::getObject).filter(Literal.class::isInstance)
.map(l -> Literals.getDecimalValue(l, BigDecimal.ZERO)).findFirst();
}
Optional conceptSetConcepts;
try (RepositoryResult repositoryResult = metadataConnection.getStatements(conceptSet, LIME.CONCEPTS, null,
profilerContext.getOptions().getContexts());
Stream stream = QueryResults.stream(repositoryResult)) {
conceptSetConcepts = stream
.map(Statement::getObject).filter(Literal.class::isInstance)
.map(l -> Literals.getDecimalValue(l, BigDecimal.ZERO)).findFirst();
}
BigDecimal conceptualizationsAsDecimal = new BigDecimal(conceptualizations);
lexiconEntries.ifPresent(le -> statsObj.setAvgAmbiguity(
conceptualizationsAsDecimal.divide(le, 3, BigDecimal.ROUND_CEILING)));
conceptSetConcepts.ifPresent(cs -> statsObj.setAvgSynonymy(
conceptualizationsAsDecimal.divide(cs, 3, BigDecimal.ROUND_CEILING)));
Resource conceptualizationSetResource = profilerContext
.mintConceptualizationSetResource(conceptSet, lexiconDataset);
Model graph = new LinkedHashModel();
statsObj.serialize(graph, conceptualizationSetResource);
metadataConnection.add(graph);
metadataConnection.add(mainDataset, VOID.SUBSET, conceptualizationSetResource);
metadataConnection.add(graph);
}
}
Model graph = new LinkedHashModel();
for (Map.Entry entry : additionalRefDatasetStats.entrySet()) {
Resource refDatRes = additionalRefDatasetRes.get(entry.getKey());
entry.getValue().serialize(graph, refDatRes);
}
metadataConnection.add(graph);
} catch (RDF4JException e) {
throw new ProfilerException(e);
}
}
// public void profileLexicalLinksets(RepositoryConnection conn, IRI semanticModel, IRI[] graphs)
// throws ProfilerException {
// Map conceptSetsStats = profileConceptSets(conn, graphs);
// SemanticModelProfiler semanticModelProfiler = getSemanticModelProfiler(semanticModel);
// ReferenceDatasetStatistics referenceDatasetStats = semanticModelProfiler.profile(conn, graphs);
//
// ValueFactory vf = conn.getValueFactory();
//
// try {
// TupleQuery query = conn.prepareTupleQuery(
// // @formatter:off
// " PREFIX rdfs: \n" +
// " PREFIX ontolex: \n" +
// " PREFIX lime: \n" +
// " PREFIX skos: \n" +
// " \n" +
// " SELECT ?conceptSet (COUNT(DISTINCT ?resource) as ?references) (COUNT (DISTINCT ?lexicalConcept) AS ?concepts)(COUNT(?lexicalConcept) as ?links) \n" +
// " WHERE { \n" +
// " ?resource ontolex:concept|^ontolex:isConceptOf ?lexicalConcept . \n" +
// " OPTIONAL { \n" +
// " ?lexicalConcept skos:inScheme ?conceptSet \n" +
// " } \n" +
// " } \n" +
// " GROUP BY ?conceptSet \n"
// // @formatter:on
// );
//
// Optional defaultConceptSet = conceptSetsStats.entrySet().stream()
// .filter(e -> !e.getValue().isEntriesExplicit()).map(e -> e.getKey()).findAny();
//
// SimpleDataset dataset = new SimpleDataset();
// Arrays.stream(graphs).forEach(dataset::addDefaultGraph);
// query.setIncludeInferred(false);
//
// Collection linksetStats = QueryResults.stream(query.evaluate()).map(bs -> {
// LexicalLinksetStats stats = new LexicalLinksetStats();
//
// stats.setLinkPredicate(ONTOLEX.CONCEPT);
// stats.setReferences(
// Literals.getIntegerValue((Literal) bs.getValue("references"), BigInteger.ZERO));
// stats.setConcepts(
// Literals.getIntegerValue((Literal) bs.getValue("concepts"), BigInteger.ZERO));
// stats.setLinks(Literals.getIntegerValue((Literal) bs.getValue("links"), BigInteger.ZERO));
//
// Value conceptSetValue = bs.getValue("conceptSet");
// ConceptSetStats singleConceptSetStats;
//
// if (conceptSetValue == null) {
// if (defaultConceptSet.isPresent()) {
// conceptSetValue = defaultConceptSet.get();
// }
// }
//
// if (conceptSetValue == null) {
// return null;
// }
//
// singleConceptSetStats = conceptSetsStats.get(conceptSetValue);
// if (singleConceptSetStats == null) {
// return null;
// }
//
// stats.setAvgNumOfLinks(new BigDecimal(
// Literals.getIntegerValue((Literal) bs.getValue("links"), BigInteger.ZERO)).divide(
// new BigDecimal(referenceDatasetStats.getEntities()), 3,
// BigDecimal.ROUND_CEILING));
// return stats;
// }).filter(o -> o != null).collect(Collectors.toList());
//
// Model statsModel = new TreeModel();
//
// referenceDatasetStats.serialize(statsModel, vf.createIRI("http://referenceDataset"));
// conceptSetsStats.forEach((iri, stats) -> {
// stats.serialize(statsModel, iri);
// });
//
// int counter = 0;
//
// for (LexicalLinksetStats stats : linksetStats) {
// IRI iri = vf.createIRI("http://lexical_linkset/" + (++counter));
//
// stats.serialize(statsModel, iri);
// }
//
// } catch (RDF4JException e) {
// throw new ProfilerException(e);
// }
// }
protected SemanticModelProfiler getSemanticModelProfiler(IRI semanticModel)
throws UnknownSemanticModelException {
return getSemanticModelProfiler2(semanticModel)
.orElseThrow(() -> new UnknownSemanticModelException(semanticModel));
}
protected Optional getSemanticModelProfiler2(IRI semanticModel) {
if (semanticModel.stringValue().equals("http://www.w3.org/2002/07/owl")) {
return Optional.of(new OWLSemanticModelProfiler());
} else if (semanticModel.stringValue().equals("http://www.w3.org/2004/02/skos/core")) {
return Optional.of(new SKOSSemanticModelProfiler());
} else {
return Optional.empty();
}
}
protected LexicalizationModelProfiler getLexicalizationModelProfiler(IRI lexicalizationModel)
throws UnknownLexicalizationModelException {
return getLexicalizationModelProfiler2(lexicalizationModel)
.orElseThrow(() -> new UnknownLexicalizationModelException(lexicalizationModel));
}
protected Optional getLexicalizationModelProfiler2(IRI lexicalizationModel) {
if (lexicalizationModel.equals(RDFSLexicalizationModelProfiler.RDFS_LEXICALIZATION_MODEL)) {
return Optional.of(new RDFSLexicalizationModelProfiler());
} else if (lexicalizationModel.equals(SKOSLexicalizationModelProfiler.SKOS_LEXICALIZATION_MODEL)) {
return Optional.of(new SKOSLexicalizationModelProfiler());
} else if (lexicalizationModel
.equals(SKOSXLLexicalizationModelProfiler.SKOSXL_LEXICALIZATION_MODEL)) {
return Optional.of(new SKOSXLLexicalizationModelProfiler());
} else if (lexicalizationModel
.equals(OntoLexLexicalizationModelProfiler.ONTOLEX_LEXICALIZATION_MODEL)) {
return Optional.of(new OntoLexLexicalizationModelProfiler());
} else {
return Optional.empty();
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy