All Downloads are FREE. Search and download functionalities are using the official Maven repository.

software.amazon.neptune.csv2rdf.PropertyGraph2RdfMapping Maven / Gradle / Ivy

Go to download

A tool for Amazon Neptune that converts property graphs stored as comma separated values into RDF graphs.

The newest version!
/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package software.amazon.neptune.csv2rdf;

import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Map;

import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.RDFS;
import org.eclipse.rdf4j.model.vocabulary.XMLSchema;

import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;

import lombok.AccessLevel;
import lombok.Getter;
import lombok.NonNull;
import lombok.Setter;
import software.amazon.neptune.csv2rdf.NeptuneCsvUserDefinedColumn.DataType;

/**
 *
 * {@link PropertyGraph2RdfMapping} contains the configuration for mapping
 * property graph vertices and edges to RDF statements. The configuration
 * consists of several namespaces, a default type, a default named graph,
 * {@link PropertyGraph2RdfMapping#pgVertexType2PropertyForRdfsLabel} for
 * mapping certain properties to RDFS labels, and
 * {@link PropertyGraph2RdfMapping#pgProperty2RdfResourcePattern} for creating
 * RDF resources from property values. 
* It provides access to {@link PropertyGraphVertex2RdfMapping} for mapping * vertices and to {@link PropertyGraphEdge2RdfMapping} for mapping edges to RDF * statements. * */ @JsonAutoDetect(fieldVisibility = Visibility.NONE) public class PropertyGraph2RdfMapping { public static final String DEFAULT_TYPE_NAMESPACE = "http://aws.amazon.com/neptune/csv2rdf/class/"; public static final String DEFAULT_VERTEX_NAMESPACE = "http://aws.amazon.com/neptune/csv2rdf/resource/"; public static final String DEFAULT_EDGE_NAMESPACE = "http://aws.amazon.com/neptune/csv2rdf/objectProperty/"; public static final String DEFAULT_VERTEX_PROPERTY_NAMESPACE = "http://aws.amazon.com/neptune/csv2rdf/datatypeProperty/"; public static final String DEFAULT_EDGE_PROPERTY_NAMESPACE = "http://aws.amazon.com/neptune/csv2rdf/datatypeProperty/"; public static final String DEFAULT_TYPE = "http://www.w3.org/2002/07/owl#Thing"; public static final String DEFAULT_PREDICATE = DEFAULT_EDGE_NAMESPACE + "edge"; public static final String DEFAULT_NAMED_GRAPH = "http://aws.amazon.com/neptune/vocab/v01/DefaultNamedGraph"; private final ValueFactory vf = SimpleValueFactory.getInstance(); /** * * Namespace in which types are stored */ @Getter(AccessLevel.PACKAGE) @Setter private String typeNamespace = DEFAULT_TYPE_NAMESPACE; /** * * Namespace in which nodes are stored */ @Getter @Setter private String vertexNamespace = DEFAULT_VERTEX_NAMESPACE; /** * * Namespace in which edges are stored */ @Getter @Setter private String edgeNamespace = DEFAULT_EDGE_NAMESPACE; /** * * Namespace in which vertex properties are stored */ @Getter @Setter private String vertexPropertyNamespace = DEFAULT_VERTEX_PROPERTY_NAMESPACE; /** * * Namespace in which edge properties are stored */ @Getter @Setter private String edgePropertyNamespace = DEFAULT_EDGE_PROPERTY_NAMESPACE; @Getter private IRI defaultType = this.toValidatedIri(DEFAULT_TYPE); @Getter private IRI defaultPredicate = this.toValidatedIri(DEFAULT_PREDICATE); @Getter private IRI defaultNamedGraph = this.toValidatedIri(DEFAULT_NAMED_GRAPH); /** * *

Mapping from property graph vertex types to instance label * properties

* * Properties that need to be used for RDFS labels are represented as map from * vertex type to property name. * *
* A property that is selected for an RDFS label will be added as normal * property statement or not depending on the the configuration of * {@link PropertyGraph2RdfMapper#alwaysAddPropertyStatements}. * *

Example:

* * {@code pgVertexType2PropertyForRdfsLabel.country=code}
* defines the property value of code as label for vertices of type * country. */ @Getter @Setter private Map pgVertexType2PropertyForRdfsLabel = new HashMap<>(); /** * *

Mapping from property graph properties to RDF resources

* * Properties that need to be mapped to resources are represented as a map from * a property name to an IRI pattern. The pattern must contain the * {{VALUE}} substring. This will be substituted with the property * value. * *

Example:

* * pgProperty2RdfResourcePattern.country=http://example.org/resource/country/{{VALUE}} *
* converts the property value 'FR' of country into * http://example.org/resource/country/FR. */ @Getter private Map pgProperty2RdfResourcePattern = new HashMap<>(); /** * * A {@link PropertyGraphVertex2RdfMapping} exposing methods for creating RDF * statements for vertices according to this {@link PropertyGraph2RdfMapping}. */ @Getter private PropertyGraphVertex2RdfMapping vertex2RdfMapping = new PropertyGraphVertex2RdfMapping(this); /** * * A {@link PropertyGraphEdge2RdfMapping} exposing methods for creating RDF * statements for edges according to this {@link PropertyGraph2RdfMapping}. * * {@link PropertyGraph2RdfMapping}. */ @Getter private PropertyGraphEdge2RdfMapping edge2RdfMapping = new PropertyGraphEdge2RdfMapping(this); /** * * Set the map from property graph properties to RDF resource patterns. RDF * resource patterns must contain the string {{VALUE}}. * * @param pgProperty2RdfResourcePattern a map from properties to RDF resource * patterns * @throws Csv2RdfException if a pattern in the map does not contain {{VALUE}} */ public void setPgProperty2RdfResourcePattern(Map pgProperty2RdfResourcePattern) { for (String pattern : pgProperty2RdfResourcePattern.values()) { if (!pattern.contains(PropertyGraph2RdfConverter.REPLACEMENT_VARIABLE)) { throw new Csv2RdfException( "The pattern <" + pattern + "> for the new URI must contain the replacement variable " + PropertyGraph2RdfConverter.REPLACEMENT_VARIABLE + "."); } } this.pgProperty2RdfResourcePattern = pgProperty2RdfResourcePattern; } public void setDefaultNamedGraph(String defaultNamedGraph) { this.defaultNamedGraph = toValidatedIri(defaultNamedGraph); } public void setDefaultType(String defaultType) { this.defaultType = toValidatedIri(defaultType); } public void setDefaultPredicate(String defaultProperty) { this.defaultPredicate = toValidatedIri(defaultProperty); } /** * * Create an IRI that represents a vertex type as class (aka type) in RDF * * @param type local name, will be URI encoded * @return {@link PropertyGraph2RdfMapping#typeNamespace} + encoded type * @throws Csv2RdfException if the IRI cannot be created */ // visible for testing IRI typeIri(@NonNull String type) { String labelUpperCase; if (type.isEmpty()) { labelUpperCase = type; } else { labelUpperCase = Character.toUpperCase(type.charAt(0)) + type.substring(1); } String iri = typeNamespace + encode(labelUpperCase); return toValidatedIri(iri); } /** * * Create an IRI that represents a vertex in RDF * * @param vertex local name, will be URI encoded * @return {@link PropertyGraph2RdfMapping#vertexNamespace} + encoded vertex * @throws Csv2RdfException if the IRI cannot be created */ // visible for testing IRI vertexIri(@NonNull String vertex) { String iri = vertexNamespace + encode(vertex); return toValidatedIri(iri); } /** * * Create an IRI that represents an edge in RDF * * @param edge local name, will be URI encoded * @return {@link PropertyGraph2RdfMapping#edgeNamespace} + encoded edge * @throws Csv2RdfException if the IRI cannot be created */ // visible for testing IRI edgeIri(@NonNull String edge) { String iri = edgeNamespace + encode(edge); return toValidatedIri(iri); } /** * Create an IRI that represents a vertex property in RDF * * @param vertexProperty local name, will be URI encoded * @return {@link PropertyGraph2RdfMapping#vertexPropertyNamespace} + encoded * vertex property * @throws Csv2RdfException if the IRI cannot be created */ // visible for testing IRI vertexPropertyIri(@NonNull String vertexProperty) { String iri = vertexPropertyNamespace + encode(vertexProperty); return toValidatedIri(iri); } /** * * Create an IRI that represents an edge property in RDF * * @param edgeProperty local name, will be URI encoded * @return {@link PropertyGraph2RdfMapping#edgePropertyNamespace} + encoded edge * property * @throws Csv2RdfException if the IRI cannot be created */ // visible for testing IRI edgePropertyIri(@NonNull String edgeProperty) { String iri = edgePropertyNamespace + encode(edgeProperty); return toValidatedIri(iri); } /** * * URI encode a value using the UTF-8 encoding scheme * * @param value * @return URI encoded value * @throws Csv2RdfException if the value could not be encoded */ private String encode(String value) { try { return URLEncoder.encode(value, StandardCharsets.UTF_8.name()); } catch (UnsupportedEncodingException e) { throw new Csv2RdfException("Could not encode '" + value + "' when mapping to RDF.", e); } } /** * * Convert a string into an IRI * * @param iri * @return new {@link IRI} * @throws Csv2RdfException if the IRI cannot be created */ private IRI toValidatedIri(String iri) { try { return vf.createIRI(new URI(iri).toString()); } catch (URISyntaxException | IllegalArgumentException e) { throw new Csv2RdfException("Invalid resource URI <" + iri + "> generated when mapping to RDF.", e); } } /** * * Return a literal value including an XML schema data type for all type in * {@link DataType} except {@link DataType#STRING}: *
    *
  • Values of {@link DataType#STRING} are not appended with an XML schema * data type.
  • *
* * @param value * @param datatype * @return literal with XML schema data type except for strings * @throws IllegalArgumentException if the data type is not recognized */ // visible for testing Literal value(@NonNull String value, @NonNull DataType datatype) { switch (datatype) { case BYTE: return vf.createLiteral(value, XMLSchema.BYTE); case BOOL: return vf.createLiteral(value, XMLSchema.BOOLEAN); case SHORT: return vf.createLiteral(value, XMLSchema.SHORT); case INT: return vf.createLiteral(value, XMLSchema.INTEGER); case LONG: return vf.createLiteral(value, XMLSchema.LONG); case FLOAT: return vf.createLiteral(value, XMLSchema.FLOAT); case DOUBLE: return vf.createLiteral(value, XMLSchema.DOUBLE); case STRING: return vf.createLiteral(value); case DATETIME: return vf.createLiteral(value, XMLSchema.DATE); default: throw new IllegalArgumentException("Data type not recognized: " + datatype + " for value " + value); } } /** * * Create a RDF statement. * * @param subject * @param predicate * @param object * @param graph * @return a new RDF statement */ private Statement statement(@NonNull IRI subject, @NonNull IRI predicate, @NonNull Value object, @NonNull IRI graph) { return vf.createStatement(subject, predicate, object, graph); } /** * * {@link PropertyGraphVertex2RdfMapping} contains methods that are necessary to * create RDF statements from vertices. * */ public static class PropertyGraphVertex2RdfMapping { /** * * This mapping is used for creating RDF statements. */ private final PropertyGraph2RdfMapping mapping; private PropertyGraphVertex2RdfMapping(PropertyGraph2RdfMapping mapping) { this.mapping = mapping; } /** * * Check if values of the given property can be mapped to an RDF resource. * * @param property vertex property * @return {@code true} if there is pattern to build a resource, else * {@code false} */ public boolean containsRdfResourcePatternForProperty(String property) { return mapping.pgProperty2RdfResourcePattern.containsKey(property); } /** * * Create a resource for the value of the given property using the configured * resource pattern for the property. The configuration needs to be done in * {@link PropertyGraph2RdfMapping#pgProperty2RdfResourcePattern}. * * @param property vertex property * @param value value of the property * @return a resource IRI */ public IRI mapPropertyValue2RdfResource(String property, String value) { String resourcePattern = mapping.pgProperty2RdfResourcePattern.get(property); if (resourcePattern == null) { return null; } String resource = resourcePattern.replace(PropertyGraph2RdfConverter.REPLACEMENT_VARIABLE, mapping.encode(value)); return mapping.toValidatedIri(resource); } /** * * Get the property whose values should be used as RDFS labels for the given * vertex type. The mapping from vertex type to property needs to be configured * in {@link PropertyGraph2RdfMapping#pgVertexType2PropertyForRdfsLabel}. * * @param vertexType type of the vertex (property ~label) * @return property for creating RDFS labels */ public String getPropertyForRdfsLabel(String vertexType) { return mapping.pgVertexType2PropertyForRdfsLabel.get(vertexType); } /** * * @param subject local name of the subject, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @param type local name of the type, will be prefixed with * {@link PropertyGraph2RdfMapping#typeIri} * @return a type statement in * {@link PropertyGraph2RdfMapping#defaultNamedGraph} */ public Statement createTypeStatement(@NonNull String subject, @NonNull String type) { return mapping.statement(mapping.vertexIri(subject), RDF.TYPE, mapping.typeIri(type), mapping.getDefaultNamedGraph()); } /** * * @param subject local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @return a type statement in * {@link PropertyGraph2RdfMapping#defaultNamedGraph} using * {@link PropertyGraph2RdfMapping#defaultType} as type */ public Statement createTypeStatement(@NonNull String subject) { return mapping.statement(mapping.vertexIri(subject), RDF.TYPE, mapping.getDefaultType(), mapping.getDefaultNamedGraph()); } /** * * @param subject local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @param label RDFS label value * @return a statement in {@link PropertyGraph2RdfMapping#defaultNamedGraph} */ public Statement createRdfsLabelStatement(@NonNull String subject, @NonNull String label) { return mapping.statement(mapping.vertexIri(subject), RDFS.LABEL, mapping.value(label, DataType.STRING), mapping.getDefaultNamedGraph()); } /** * * @param subject local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @param predicate local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexPropertyIri} * @param literal literal value * @param dataType data type of the value * @return a statement in {@link PropertyGraph2RdfMapping#defaultNamedGraph} */ public Statement createLiteralStatement(@NonNull String subject, @NonNull String predicate, @NonNull String literal, @NonNull DataType dataType) { return mapping.statement(mapping.vertexIri(subject), mapping.vertexPropertyIri(predicate), mapping.value(literal, dataType), mapping.getDefaultNamedGraph()); } /** * * @param subject local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @param predicate local name, will be prefixed with * {@link PropertyGraph2RdfMapping#edgeIri} * @param value value will be mapped to an RDF resource by * {@link #mapPropertyValue2RdfResource} * @return a statement in {@link PropertyGraph2RdfMapping#defaultNamedGraph} */ public Statement createRelationStatement(@NonNull String subject, @NonNull String predicate, @NonNull String value) { return mapping.statement(mapping.vertexIri(subject), mapping.edgeIri(predicate), mapPropertyValue2RdfResource(predicate, value), mapping.getDefaultNamedGraph()); } } /** * * {@link PropertyGraphEdge2RdfMapping} contains methods that are necessary to * create RDF statements from edges. */ public static class PropertyGraphEdge2RdfMapping { /** * * This mapping is used for creating RDF statements. */ private final PropertyGraph2RdfMapping mapping; private PropertyGraphEdge2RdfMapping(PropertyGraph2RdfMapping mapping) { this.mapping = mapping; } /** * * @param subject local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @param predicate local name, will be prefixed with * {@link PropertyGraph2RdfMapping#edgeIri} * @param object local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @param context local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @return a statement in {@link PropertyGraph2RdfMapping#vertexIri}(context) */ public Statement createRelationStatement(@NonNull String subject, @NonNull String predicate, @NonNull String object, @NonNull String context) { return mapping.statement(mapping.vertexIri(subject), mapping.edgeIri(predicate), mapping.vertexIri(object), mapping.vertexIri(context)); } /** * * @param subject local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @param object local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @param context local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @return a statement in {@link PropertyGraph2RdfMapping#vertexIri}(context) * using {@link PropertyGraph2RdfMapping#defaultPredicate} as predicate */ public Statement createRelationStatement(@NonNull String subject, @NonNull String object, @NonNull String context) { return mapping.statement(mapping.vertexIri(subject), mapping.getDefaultPredicate(), mapping.vertexIri(object), mapping.vertexIri(context)); } /** * * @param subject local name, will be prefixed with * {@link PropertyGraph2RdfMapping#vertexIri} * @param predicate local name, will be prefixed with * {@link PropertyGraph2RdfMapping#edgePropertyIri} * @param literal literal value * @param dataType data type of the value * @return a statement in {@link PropertyGraph2RdfMapping#defaultNamedGraph} */ public Statement createLiteralStatement(@NonNull String subject, @NonNull String predicate, @NonNull String literal, @NonNull DataType dataType) { return mapping.statement(mapping.vertexIri(subject), mapping.edgePropertyIri(predicate), mapping.value(literal, dataType), mapping.getDefaultNamedGraph()); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy