![JAR search and dependency download from the Maven repository](/logo.png)
net.sansa_stack.rdf.spark.mappings.R2RMLMappings.scala Maven / Gradle / Ivy
package net.sansa_stack.rdf.spark.mappings
import net.sansa_stack.rdf.common.partition.core.RdfPartitionerDefault
import net.sansa_stack.rdf.spark.partition._
import org.apache.jena.graph.Triple
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.SparkSession
/**
* Provide a set of functions to deal with SQL tables and R2RML mappings.
*
* FIXME We now have a prope R2RML API and we are refactoring partitioning; so this class is a removal candidate ~ Claus 2020-01-16
*
*
* @author dgraux
*/
object R2RMLMappings extends Serializable {
@transient val spark: SparkSession = SparkSession.builder().getOrCreate()
/**
* Return instructions to create SQL tables
* @param Path to the triple file
* @param Current SparkSession
*/
def loadSQLTables(triples: RDD[Triple], spark: SparkSession): Iterable[String] = {
val partitions = triples.partitionGraph()
// Generating commands to create SQL tables.
val schemaSQLTable = partitions.map {
case (p, rdd) =>
/* Since data can be wrongly formatted,
* just skip those details for the moment…
p.layout.schema.toString match {
case "net.sansa_stack.rdf.common.partition.schema.SchemaStringDouble" => "CREATE TABLE " + p.predicate.replaceAll("[^A-Za-z0-9]", "-") + " (s STRING, o FLOAT);" // No 'double' in SQL
case "net.sansa_stack.rdf.common.partition.schema.SchemaStringLong" => "CREATE TABLE " + p.predicate.replaceAll("[^A-Za-z0-9]", "-") + " (s STRING, o BIGINT);" // No 'long' in SQL
case "net.sansa_stack.rdf.common.partition.schema.SchemaStringString" => "CREATE TABLE " + p.predicate.replaceAll("[^A-Za-z0-9]", "-") + " (s STRING, o STRING);"
case "net.sansa_stack.rdf.common.partition.schema.SchemaStringStringLang" => "CREATE TABLE " + p.predicate.replaceAll("[^A-Za-z0-9]", "-") + " (s STRING, o STRING, l STRING);"
case "net.sansa_stack.rdf.common.partition.schema.SchemaStringStringType" => "CREATE TABLE " + p.predicate.replaceAll("[^A-Za-z0-9]", "-") + " (s STRING, o STRING, t STRING);"
case _ => println("error: schema unknown")
}
*/
"CREATE TABLE IF NOT EXISTS " + p.predicate.replaceAll("[^A-Za-z0-9]", "_") + " (s STRING, o STRING, l STRING)"
}
schemaSQLTable
}
/**
* Return instructions to fill SQL tables
* @param Path to the triple file
* @param Current SparkSession
*/
def insertSQLTables(triples: RDD[Triple], spark: SparkSession): RDD[String] = {
val insertSQL = triples.map { // .getTriples.map {
case t =>
var tablename = t.getPredicate.toString().replaceAll("[^A-Za-z0-9]", "_");
var subj = RdfPartitionerDefault.getUriOrBNodeString(t.getSubject);
var complement = if (t.getObject.isLiteral) {
if (t.getObject.getLiteralLanguage != "") {
"\"" + t.getObject.getLiteralLexicalForm + "\" , \"" + t.getObject.getLiteralLanguage + "\""
} else {
"\"" + t.getObject.getLiteralLexicalForm + "\" , \"" + t.getObject.getLiteralDatatypeURI + "\""
}
} else {
"\"" + RdfPartitionerDefault.getUriOrBNodeString(t.getObject) + "\" , \"\""
};
var statement = "INSERT INTO " + tablename + " VALUES ( \"" + subj + "\" , " + complement + " ) ";
statement;
}
insertSQL
}
/**
* Return R2RML mappings
* @param Path to the triple file
* @param Current SparkSession
*/
def generateR2RMLMappings(triples: RDD[Triple], spark: SparkSession): Iterable[String] = {
var mappingNumber = 1
val partitions = triples.partitionGraph()
val r2rmlMappings = partitions.map {
case (p, rdd) =>
// p.layout.schema.toString match {
RdfPartitionerDefault.determineLayout(p).schema.toString match {
case _ =>
var mapping = " a rr:TriplesMapClass ; "
mapping += "rr:logicalTable [rr:SQLQuery \"\"\"SELECT s , o , l FROM " + p.predicate.replaceAll("[^A-Za-z0-9]", "_") + " \"\"\"] ; "
mapping += "rr:subjectMap [ rr:column \"s\"] ; "
mapping += "rr:predicateObjectMap [ rr:predicate " + p.predicate + " ; rr:objectMap [ rr:column \"o\"] ] . "
mappingNumber += 1;
mapping;
}
// Since data can be bad-formatted we still have to be a bit prudent.
}
r2rmlMappings
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy