![JAR search and dependency download from the Maven repository](/logo.png)
net.sansa_stack.rdf.spark.qualityassessment.package.scala Maven / Gradle / Ivy
package net.sansa_stack.rdf.spark
import net.sansa_stack.rdf.spark.qualityassessment.metrics.availability.DereferenceableUris
import net.sansa_stack.rdf.spark.qualityassessment.metrics.completeness._
import net.sansa_stack.rdf.spark.qualityassessment.metrics.conciseness.ExtensionalConciseness
import net.sansa_stack.rdf.spark.qualityassessment.metrics.interlinking.ExternalSameAsLinks
import net.sansa_stack.rdf.spark.qualityassessment.metrics.licensing._
import net.sansa_stack.rdf.spark.qualityassessment.metrics.performance.NoHashURIs
import net.sansa_stack.rdf.spark.qualityassessment.metrics.relevancy._
import net.sansa_stack.rdf.spark.qualityassessment.metrics.reprconciseness._
import net.sansa_stack.rdf.spark.qualityassessment.metrics.syntacticvalidity._
import net.sansa_stack.rdf.spark.qualityassessment.metrics.understandability.LabeledResources
import org.apache.jena.graph.Triple
import org.apache.spark.rdd.RDD
/**
* @author Gezim Sejdiu
*/
package object qualityassessment {
implicit class QualityAssessmentOperations(triples: RDD[Triple]) {
/**
* This metric calculates the number of valid redirects of URI.
* It computes the ratio between the number of all valid redirects
* (subject + predicates + objects)a.k.a dereferencedURIS and
* the total number of URIs on the dataset.
*/
def assessDereferenceableUris(): Double =
DereferenceableUris.assessDereferenceableUris(triples)
/**
* This metric measures the extent to which a resource includes
* all triples from the dataset that have the resource's URI as the object.
* The ratio computed is the number of objects that are "back-links"
* (are part of the resource's URI) and the total number of objects.
*/
def assessDereferenceableBackLinks(): Double =
DereferenceableUris.assessDereferenceableBackLinks(triples)
/**
* This metric measures the extent to which a resource includes
* all triples from the dataset that have the resource's URI as the subject.
* The ratio computed is the number of subjects that are "forward-links"
* (are part of the resource's URI) and the total number of subjects.
*/
def assessDereferenceableForwardLinks(): Double =
DereferenceableUris.assessDereferenceableForwardLinks(triples)
/**
* This metric measures the interlinking completeness. Since any resource of a
* dataset can be interlinked with another resource of a foreign dataset this
* metric makes a statement about the ratio of interlinked resources to
* resources that could potentially be interlinked.
*/
def assessInterlinkingCompleteness(): Long =
InterlinkingCompleteness.assessInterlinkingCompleteness(triples)
/**
* This metric measures the property completeness by checking
* the missing object values for the given predicate and given class of subjects.
* A user specifies the RDF class and the RDF predicate, then it checks for each pair
* whether instances of the given RDF class contain the specified RDF predicate.
*/
def assessPropertyCompleteness(): Long =
PropertyCompleteness.assessPropertyCompleteness(triples)
/**
* This metric measures the ratio of the number of classes and relations
* of the gold standard existing in g, and the number of classes and
* relations in the gold standard.
*/
def assessSchemaCompleteness(): Double =
SchemaCompleteness.assessSchemaCompleteness(triples)
/**
* The extensional conciseness
* This metric metric checks for redundant resources in the assessed dataset,
* and thus measures the number of unique instances found in the dataset.
* @return No. of unique subjects / Total No. of subjects
*/
def assessExtensionalConciseness(): Double =
ExtensionalConciseness.assessExtensionalConciseness(triples)
/**
* Checks the sameAs externals links.
*/
def assessExternalSameAsLinks(): Double =
ExternalSameAsLinks.assessExternalSameAsLinks(triples)
/**
* Human -readable indication of a license
* This metric checks whether a human-readable text, stating the of licensing model
* attributed to the resource, has been provided as part of the dataset.
* It looks for objects containing literal values and analyzes the text
* searching for key, licensing related terms.
*/
def assessHumanReadableLicense(): Double =
HumanReadableLicense.assessHumanReadableLicense(triples)
/**
* Machine -readable indication of a license
* This metric checks whether a machine-readable text, stating the of licensing model
* attributed to the resource, has been provided as part of the dataset.
* It looks for objects containing literal values and analyzes the text
* searching for key, licensing related terms.
*/
def assessMachineReadableLicense(): Double =
MachineReadableLicense.assessMachineReadableLicense(triples)
/**
* Checks if a URI contains hashs.
*/
def assessNoHashUris(): Double =
NoHashURIs.assessNoHashUris(triples)
/**
* Computes the size of the triples.
*/
def assessAmountOfTriples(): Double =
AmountOfTriples.assessAmountOfTriples(triples)
/**
* This metric measures the the coverage (i.e. number of entities described
* in a dataset) and level of detail (i.e. number of properties) in a dataset
* to ensure that the data retrieved is appropriate for the task at hand.
*/
def assessCoverageDetail(): Double =
CoverageDetail.assessCoverageDetail(triples)
/**
* This metric calculate the coverage of a dataset referring to the covered scope.
* This covered scope is expressed as the number of 'instances' statements are made about.
*/
def assessCoverageScope(): Double =
CoverageScope.assessCoverageScope(triples)
/**
* This metric calculates the number of non Queryable URIs.
* It computes the ratio between the number of all non queryable URIs
* and the total number of URIs on the dataset.
*/
def assessQueryParamFreeURIs(): Double =
QueryParamFreeURIs.assessQueryParamFreeURIs(triples)
/**
* This metric calculates the number of long URIs.
* It computes the ratio between the number of all long URIs
* and the total number of URIs on the dataset.
*/
def assessShortURIs(): Double =
ShortURIs.assessShortURIs(triples)
/**
* Check if the incorrect numeric range for the given predicate and given class of subjects.
* A user should specify the RDF class, the RDF property for which he would like to verify
* if the values are in the specified range determined by the user.
* The range is specified by the user by indicating the lower and the upper bound of the value.
*/
def assessLiteralNumericRangeChecker(): Long =
LiteralNumericRangeChecker.assessLiteralNumericRangeChecker(triples)
/**
* Check if the value of a typed literal is valid with regards to
* the given xsd datatype.
*/
def assessXSDDatatypeCompatibleLiterals(): Long =
XSDDatatypeCompatibleLiterals.assessXSDDatatypeCompatibleLiterals(triples)
/**
* This metric assess the labeled resources.
*/
def assessLabeledResources(): Double =
LabeledResources.assessLabeledResources(triples)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy