All Downloads are FREE. Search and download functionalities are using the official Maven repository.

it.uniroma2.art.lime.profiler.impl.OWLSemanticModelProfiler Maven / Gradle / Ivy

The newest version!
package it.uniroma2.art.lime.profiler.impl;

import java.math.BigInteger;
import java.util.Arrays;
import java.util.stream.Collectors;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.impl.LinkedHashModel;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.util.Literals;
import org.eclipse.rdf4j.model.vocabulary.OWL;
import org.eclipse.rdf4j.query.QueryResults;
import org.eclipse.rdf4j.query.TupleQuery;
import org.eclipse.rdf4j.query.impl.SimpleDataset;
import org.eclipse.rdf4j.queryrender.RenderUtils;
import org.eclipse.rdf4j.repository.RepositoryConnection;

import com.google.common.collect.Sets;

import it.uniroma2.art.lime.model.repo.LIMERepositoryConnectionWrapper;
import it.uniroma2.art.lime.profiler.ProfilerContext;
import it.uniroma2.art.lime.profiler.ProfilerException;
import it.uniroma2.art.lime.profiler.ReferenceDatasetStatistics;
import it.uniroma2.art.lime.profiler.SemanticModelProfiler;

public class OWLSemanticModelProfiler implements SemanticModelProfiler {

	@Override
	public ReferenceDatasetStatistics profile(RepositoryConnection conn, IRI[] graphs)
			throws ProfilerException {
		long triples = conn.size(graphs);

		SimpleDataset dataset = new SimpleDataset();
		Arrays.stream(graphs).forEach(dataset::addDefaultGraph);

		String queryString =
				// @formatter:off
				" prefix owl:                           \n" +
				" prefix rdfs:                   \n" +
	            "                                                                       \n" +
				" select (count(distinct ?resource) as ?c) {                            \n" +
				" 	?metaclass rdfs:subClassOf* ?cls .                                  \n" +
				" 	?resource a ?metaclass .                                            \n" +
				" }                                                                     \n" +
				" VALUES(?cls){%PLACEHOLDER%}\n"
				// @formatter:on
		;

		TupleQuery classCounter = conn.prepareTupleQuery(
				queryString.replace("%PLACEHOLDER%", "(" + RenderUtils.toSPARQL(OWL.CLASS) + ")"));

		classCounter.setDataset(dataset);
		classCounter.setIncludeInferred(false);

		BigInteger classCount = Literals.getIntegerValue(
				QueryResults.singleResult(classCounter.evaluate()).getValue("c"), BigInteger.ZERO);

		String replacement = (String) Arrays
				.asList(OWL.OBJECTPROPERTY, OWL.DATATYPEPROPERTY, OWL.ANNOTATIONPROPERTY,
						OWL.ONTOLOGYPROPERTY)
				.stream().map(r -> "(" + RenderUtils.toSPARQL(r) + ")").collect(Collectors.joining(""));
		TupleQuery propertyCounter = conn
				.prepareTupleQuery(queryString.replace("%PLACEHOLDER%", replacement));

		propertyCounter.setDataset(dataset);
		propertyCounter.setIncludeInferred(false);

		BigInteger propertyCount = Literals.getIntegerValue(
				QueryResults.singleResult(propertyCounter.evaluate()).getValue("c"), BigInteger.ZERO);

		TupleQuery instanceCounter = conn.prepareTupleQuery(
				// @formatter:off
				" prefix owl:                           \n" +
				" prefix rdfs:                   \n" +
	            "                                                                       \n" +
				" select (count(distinct ?resource) as ?c) {                            \n" +
				" 	?resource a ?cls .                                                  \n" +
				" 	FILTER EXISTS {                                                     \n" +
				" 		?metaclass rdfs:subClassOf* owl:Class .                         \n" +
				" 		?cls a ?metaclass .                                             \n" +
				" 	}                                                                   \n" +
				" }                                                                     \n"
				// @formatter:on
		);

		instanceCounter.setDataset(dataset);
		instanceCounter.setIncludeInferred(false);

		BigInteger instanceCount = Literals.getIntegerValue(
				QueryResults.singleResult(instanceCounter.evaluate()).getValue("c"), BigInteger.ZERO);

		BigInteger entityCount = classCount.add(propertyCount).add(instanceCount);

		OWLReferenceDatasetStatistics stats = new OWLReferenceDatasetStatistics();
		stats.setTriples(BigInteger.valueOf(triples));
		stats.setConformance(
				Sets.newHashSet(SimpleValueFactory.getInstance().createIRI("http://www.w3.org/2002/07/owl")));
		stats.setEntities(entityCount);
		stats.setClassNumber(classCount);
		stats.setPropertyNumber(propertyCount);

		return stats;
	}

	@Override
	public boolean profile(ProfilerContext profilerContext, LIMERepositoryConnectionWrapper metadataConn,
			RepositoryConnection dataConn, Resource dataGraph, Resource mainDataset)
			throws ProfilerException {
		IRI[] graphs = profilerContext.getOptions().getContexts();
		boolean includeInferred = profilerContext.getOptions().isIncludeInferred();

		SimpleDataset dataset = new SimpleDataset();
		Arrays.stream(graphs).forEach(dataset::addDefaultGraph);

		String queryString =
				// @formatter:off
				" prefix owl:                           \n" +
				" prefix rdfs:                   \n" +
	            "                                                                       \n" +
				" select (count(distinct ?resource) as ?c) {                            \n" +
				" 	?metaclass rdfs:subClassOf* ?cls .                                  \n" +
				" 	GRAPH ?dataGraph {                                                  \n" +
				" 	  ?resource a ?metaclass .                                          \n" +
				"   }                                                                   \n" +
				" }                                                                     \n" +
				" VALUES(?cls){%PLACEHOLDER%}\n"
				// @formatter:on
		;

		TupleQuery classCounter = dataConn.prepareTupleQuery(
				queryString.replace("%PLACEHOLDER%", "(" + RenderUtils.toSPARQL(OWL.CLASS) + ")"));

		classCounter.setDataset(dataset);
		classCounter.setIncludeInferred(includeInferred);
		classCounter.setBinding("dataGraph", dataGraph);

		BigInteger classCount = Literals.getIntegerValue(
				QueryResults.singleResult(classCounter.evaluate()).getValue("c"), BigInteger.ZERO);

		String replacement = (String) Arrays
				.asList(OWL.OBJECTPROPERTY, OWL.DATATYPEPROPERTY, OWL.ANNOTATIONPROPERTY,
						OWL.ONTOLOGYPROPERTY)
				.stream().map(r -> "(" + RenderUtils.toSPARQL(r) + ")").collect(Collectors.joining(""));
		TupleQuery propertyCounter = dataConn
				.prepareTupleQuery(queryString.replace("%PLACEHOLDER%", replacement));

		propertyCounter.setDataset(dataset);
		propertyCounter.setIncludeInferred(includeInferred);
		propertyCounter.setBinding("dataGraph", dataGraph);

		BigInteger propertyCount = Literals.getIntegerValue(
				QueryResults.singleResult(propertyCounter.evaluate()).getValue("c"), BigInteger.ZERO);

		TupleQuery instanceCounter = dataConn.prepareTupleQuery(
				// @formatter:off
				" prefix owl:                           \n" +
				" prefix rdfs:                   \n" +
	            "                                                                       \n" +
				" select (count(distinct ?resource) as ?c) {                            \n" +
				" 	GRAPH ?dataGraph {                                                  \n" +
				" 	  ?resource a ?cls .                                                \n" +
				"   }                                                                   \n" +
				" 	FILTER EXISTS {                                                     \n" +
				"     ?metaclass rdfs:subClassOf* owl:Class .                           \n" +
				" 	  GRAPH ?dataGraph {                                                \n" +
				" 	    ?cls a ?metaclass .                                             \n" +
				"     }                                                                 \n" +
				" 	}                                                                   \n" +
				" }                                                                     \n"
				// @formatter:on
		);

		instanceCounter.setDataset(dataset);
		instanceCounter.setIncludeInferred(includeInferred);
		instanceCounter.setBinding("dataGraph", dataGraph);

		BigInteger instanceCount = Literals.getIntegerValue(
				QueryResults.singleResult(instanceCounter.evaluate()).getValue("c"), BigInteger.ZERO);

		BigInteger entityCount = classCount.add(propertyCount).add(instanceCount);

		if (entityCount.equals(BigInteger.ZERO)) {
			return false;
		}

		OWLReferenceDatasetStatistics stats = new OWLReferenceDatasetStatistics();
		stats.setConformance(
				Sets.newHashSet(SimpleValueFactory.getInstance().createIRI("http://www.w3.org/2002/07/owl")));
		stats.setEntities(entityCount);
		stats.setClassNumber(classCount);
		stats.setPropertyNumber(propertyCount);

		Model graph = new LinkedHashModel();
		stats.serialize(graph, mainDataset);

		metadataConn.add(graph);

		return true;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy