All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.carml.engine.rdf.RdfTermGeneratorFactory Maven / Gradle / Ivy

There is a newer version: 0.4.11
Show newest version
package io.carml.engine.rdf;

import static io.carml.engine.rdf.RdfPredicateObjectMapper.createObjectMapGenerators;
import static io.carml.engine.rdf.RdfPredicateObjectMapper.createPredicateGenerators;
import static io.carml.util.Models.streamCartesianProductStatements;

import io.carml.engine.ExpressionEvaluation;
import io.carml.engine.GetTemplateValue;
import io.carml.engine.TermGenerator;
import io.carml.engine.TermGeneratorFactory;
import io.carml.engine.TermGeneratorFactoryException;
import io.carml.engine.function.ExecuteFunction;
import io.carml.engine.template.Template;
import io.carml.engine.template.TemplateParser;
import io.carml.model.DatatypeMap;
import io.carml.model.ExpressionMap;
import io.carml.model.GraphMap;
import io.carml.model.LanguageMap;
import io.carml.model.ObjectMap;
import io.carml.model.PredicateMap;
import io.carml.model.PredicateObjectMap;
import io.carml.model.SubjectMap;
import io.carml.model.TermMap;
import io.carml.model.TermType;
import io.carml.model.TriplesMap;
import io.carml.util.IriSafeMaker;
import io.carml.util.RdfValues;
import io.carml.vocab.Rdf;
import java.text.Normalizer;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.function.UnaryOperator;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import org.eclipse.rdf4j.common.net.ParsedIRI;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Model;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.ValueFactory;
import org.eclipse.rdf4j.model.util.Literals;
import org.eclipse.rdf4j.model.util.ModelCollector;
import org.eclipse.rdf4j.model.util.Models;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@SuppressWarnings("java:S1135")
@AllArgsConstructor(access = AccessLevel.PRIVATE)
public class RdfTermGeneratorFactory implements TermGeneratorFactory {

  // TODO cache results of evaluated expressions?

  private static final Logger LOG = LoggerFactory.getLogger(RdfTermGeneratorFactory.class);

  private final RdfTermGeneratorConfig rdfTermGeneratorConfig;

  private final ValueFactory valueFactory;

  private final UnaryOperator makeIriSafe;

  private final TemplateParser templateParser;

  public static RdfTermGeneratorFactory of(RdfTermGeneratorConfig rdfTermGeneratorConfig,
      TemplateParser templateParser) {
    return new RdfTermGeneratorFactory(rdfTermGeneratorConfig, rdfTermGeneratorConfig.getValueFactory(), IriSafeMaker
        .create(rdfTermGeneratorConfig.getNormalizationForm(), rdfTermGeneratorConfig.isIriUpperCasePercentEncoding()),
        templateParser);
  }

  @Override
  @SuppressWarnings("unchecked")
  public TermGenerator getSubjectGenerator(SubjectMap map) {
    return (TermGenerator) getGenerator(map, Set.of(TermType.BLANK_NODE, TermType.IRI), Set.of(IRI.class));
  }

  @Override
  @SuppressWarnings("unchecked")
  public TermGenerator getPredicateGenerator(PredicateMap map) {
    return (TermGenerator) getGenerator(map, Set.of(TermType.IRI), Set.of(IRI.class));
  }

  @Override
  @SuppressWarnings("unchecked")
  public TermGenerator getObjectGenerator(ObjectMap map) {
    return (TermGenerator) getGenerator(map, Set.of(TermType.IRI, TermType.BLANK_NODE, TermType.LITERAL),
        Set.of(IRI.class, Literal.class));
  }

  @Override
  @SuppressWarnings("unchecked")
  public TermGenerator getGraphGenerator(GraphMap map) {
    return (TermGenerator) getGenerator(map, Set.of(TermType.IRI), Set.of(IRI.class));
  }

  @SuppressWarnings("unchecked")
  private TermGenerator getDatatypeGenerator(DatatypeMap map) {
    return (TermGenerator) getGenerator(map, Set.of(TermType.IRI), Set.of(IRI.class));
  }

  @SuppressWarnings("unchecked")
  private TermGenerator getLanguageGenerator(LanguageMap map) {
    return (TermGenerator) getGenerator(map, Set.of(TermType.LITERAL), Set.of(Literal.class));
  }

  private TermGenerator getGenerator(ExpressionMap map, Set allowedTermTypes,
      Set> allowedConstantTypes) {
    List> generators = Stream.>>>of(

        // constant
        () -> getConstantGenerator(map, allowedConstantTypes),

        // reference
        () -> getReferenceGenerator(map, allowedTermTypes),

        // template
        () -> getTemplateGenerator(map, allowedTermTypes),

        // functionValue
        () -> getFunctionValueGenerator(map, allowedTermTypes)

    )
        .map(Supplier::get)
        .filter(Optional::isPresent)
        .map(Optional::get)
        .collect(Collectors.toList());

    if (generators.isEmpty()) {
      throw new TermGeneratorFactoryException(String
          .format("No constant, reference, template or function value found for term map [%s]", map.getResourceName()));
    }
    if (generators.size() > 1) {
      throw new TermGeneratorFactoryException(
          String.format("%s value generators were created for term map [%s], where only 1 is expected.",
              generators.size(), map.getResourceName()));
    }
    return generators.get(0);
  }

  private TermGenerator getGenerator(ExpressionMap termMap,
      Function> getValue, Set allowedTermTypes, TermType termType) {

    Function, TermGenerator> createGenerator =
        generateTerm -> expressionEvaluation -> generateValues(getValue, expressionEvaluation, generateTerm);

    if (!allowedTermTypes.contains(termType)) {
      throw new TermGeneratorFactoryException(
          String.format("encountered disallowed term type [%s]%nin TermMap:%n%s%n%n allowed TermTypes: %s", termType,
              termMap, allowedTermTypes));
    }

    switch (termType) {

      case IRI:
        return createGenerator.apply(this::generateIriTerm);

      case BLANK_NODE:
        return createGenerator.apply(this::generateBNodeTerm);

      case LITERAL:

        // term map is assumed to be an object map if it has term type literal
        ObjectMap objectMap = (ObjectMap) termMap;

        if (objectMap.getLanguageMap() != null) {
          return getLanguageTaggedLiteralGenerator(objectMap, getValue);
        }

        if (objectMap.getDatatypeMap() != null) {
          return getDatatypedLiteralGenerator(objectMap, getValue);
        }

        // f.createLiteral(label, datatype) // TODO infer datatype, see
        // https://www.w3.org/TR/r2rml/#generated-rdf-term - f.e. xsd:integer for Integer instances
        return createGenerator.apply(valueFactory::createLiteral);

      default:
        throw new TermGeneratorFactoryException(
            String.format("unknown term type [%s]%nin TermMap:%s", termType, termMap));

    }
  }

  private List generateValues(Function> getValue,
      ExpressionEvaluation expressionEvaluation, Function generateTerm) {
    Optional referenceValue = getValue.apply(expressionEvaluation);
    if (LOG.isTraceEnabled()) {
      LOG.trace("with result: {}", referenceValue.orElse("null"));
    }

    return referenceValue.map(value -> unpackEvaluatedExpression(value, generateTerm))
        .orElse(List.of());
  }

  private TermGenerator getDatatypedLiteralGenerator(ObjectMap objectMap,
      Function> getLabelValue) {

    return expressionEvaluation -> {
      // determine label values
      List labels = generateValues(getLabelValue, expressionEvaluation, valueFactory::createLiteral);

      // determine datatypes by creating a nested term generator
      List datatypes = getDatatypeGenerator(objectMap.getDatatypeMap()).apply(expressionEvaluation);

      // return literals for all combinations of label and datatype
      return labels.stream()
          .map(Value::stringValue)
          .flatMap(label -> datatypes.stream()
              .map(datatype -> valueFactory.createLiteral(label, datatype)))
          .collect(Collectors.toUnmodifiableList());
    };
  }

  private TermGenerator getLanguageTaggedLiteralGenerator(ObjectMap objectMap,
      Function> getLabelValue) {

    return expressionEvaluation -> {
      // determine label values
      List labels = generateValues(getLabelValue, expressionEvaluation, valueFactory::createLiteral);

      // determine languages by creating a nested term generator
      // TODO languages arent really literals, but that would require some refactoring
      List languages = getLanguageGenerator(objectMap.getLanguageMap()).apply(expressionEvaluation);

      // return literals for all combinations of label and datatype
      return labels.stream()
          .map(Value::stringValue)
          .flatMap(label -> languages.stream()
              .map(Literal::getLabel)
              .filter(language -> {
                if (!Literals.isValidLanguageTag(language)) {
                  throw new TermGeneratorFactoryException(
                      String.format("Invalid lang tag '%s' used in object map %n%s", language, objectMap));
                }
                return true;
              })
              .map(language -> valueFactory.createLiteral(label, language)))
          .collect(Collectors.toUnmodifiableList());
    };
  }

  @Override
  public Optional> getConstantGenerator(ExpressionMap map,
      Set> allowedConstantTypes) {
    Value constant = map.getConstant();
    if (constant == null) {
      return Optional.empty();
    }
    if (allowedConstantTypes.stream()
        .noneMatch(c -> c.isInstance(constant))) {
      throw new TermGeneratorFactoryException(
          "encountered constant value of type " + constant.getClass() + ", which is not allowed for this term map");
    }
    List constants = List.of(constant);
    if (LOG.isTraceEnabled()) {
      LOG.trace("Generated constant values: {}", constants);
    }

    return Optional.of(e -> constants);
  }

  @Override
  public Optional> getReferenceGenerator(ExpressionMap map,
      Set allowedTermTypes) {

    String reference = map.getReference();
    if (reference == null) {
      return Optional.empty();
    }

    Function> getValue =
        expressionEvaluation -> expressionEvaluation.apply(reference);

    return Optional.of(getGenerator(map, getValue, allowedTermTypes, determineTermType(map)));
  }

  @Override
  public Optional> getTemplateGenerator(ExpressionMap map,
      Set allowedTermTypes) {

    String templateStr = map.getTemplate();
    if (templateStr == null) {
      return Optional.empty();
    }

    Template template = templateParser.parse(templateStr);

    TermType termType = determineTermType(map);

    // for IRI term types, make template values 'IRI-safe'.
    // otherwise, do not transform template values.
    UnaryOperator transformValue = termType == TermType.IRI ? makeIriSafe : v -> v;

    Function> getValue =
        new GetTemplateValue(template, template.getExpressions(), transformValue, this::createNaturalRdfLexicalForm);

    return Optional.of(getGenerator(map, getValue, allowedTermTypes, termType));
  }

  @Override
  public Optional> getFunctionValueGenerator(ExpressionMap expressionMap,
      Set allowedTermTypes) {
    var executionMap = expressionMap.getFunctionValue();
    if (executionMap == null) {
      return Optional.empty();
    }

    Function> getValue =
        expressionEvaluation -> mapFunctionExecution(expressionEvaluation, expressionMap, executionMap);

    return Optional.of(getGenerator(expressionMap, getValue, allowedTermTypes, determineTermType(expressionMap)));
  }

  private Optional mapFunctionExecution(ExpressionEvaluation expressionEvaluation, ExpressionMap expressionMap,
      TriplesMap executionMap) {
    Resource functionExecution = valueFactory.createBNode();

    var executionStatements = executionMap.getPredicateObjectMaps()
        .stream()
        .flatMap(pom -> getFunctionPredicateObjectMapModel(functionExecution, executionMap, pom, expressionEvaluation))
        .collect(new ModelCollector());

    var termType = determineTermType(expressionMap);

    // for IRI term types, make values valid IRIs.
    UnaryOperator returnValueAdapter = termType == TermType.IRI ? this::iriEncodeResult : v -> v;

    return mapExecution(executionStatements, returnValueAdapter);
  }

  private Stream getFunctionPredicateObjectMapModel(Resource functionExecution, TriplesMap executionMap,
      PredicateObjectMap pom, ExpressionEvaluation expressionEvaluation) {
    var predicateGenerators = createPredicateGenerators(pom, executionMap, this);
    var objectGenerators = createObjectMapGenerators(pom.getObjectMaps(), executionMap, this);

    Set predicates = predicateGenerators.stream()
        .map(g -> g.apply(expressionEvaluation))
        .flatMap(List::stream)
        .collect(Collectors.toUnmodifiableSet());

    if (predicates.isEmpty()) {
      return Stream.empty();
    }

    Set objects = objectGenerators.map(g -> g.apply(expressionEvaluation))
        .flatMap(List::stream)
        .collect(Collectors.toUnmodifiableSet());

    if (objects.isEmpty()) {
      return Stream.empty();
    }

    return streamCartesianProductStatements(Set.of(functionExecution), predicates, objects, Set.of());
  }

  private Optional mapExecution(Model executionStatements, UnaryOperator returnValueAdapter) {
    Optional optionalExecution = Models.subject(executionStatements);

    return optionalExecution.map(execution -> {
      IRI functionIri = getFunctionIri(execution, executionStatements);
      ExecuteFunction function = rdfTermGeneratorConfig.getFunctions()
          .getFunction(functionIri)
          .orElseThrow(
              () -> new TermGeneratorFactoryException("no function registered for function IRI [" + functionIri + "]"));

      return function.execute(executionStatements, execution, returnValueAdapter);
    });
  }

  private Object iriEncodeResult(Object result) {
    if (result instanceof Collection) {
      return ((Collection) result).stream()
          .map(this::encodeAsIri)
          .collect(Collectors.toUnmodifiableList());
    } else {
      return encodeAsIri(result);
    }
  }

  private Object encodeAsIri(Object value) {
    String iriValue;

    if (value instanceof Value) {
      iriValue = ((Value) value).stringValue();
    } else {
      iriValue = value.toString();
    }

    // perform unicode normalization
    iriValue = Normalizer.normalize(iriValue, rdfTermGeneratorConfig.getNormalizationForm());

    return ParsedIRI.create(iriValue)
        .toString();
  }

  private IRI getFunctionIri(Resource execution, Model model) {
    return Models.objectIRI(model.filter(execution, Rdf.Fno.executes, null))
        .orElseGet(() -> Models.objectIRI(model.filter(execution, Rdf.Fno.old_executes, null))
            .orElseThrow(
                () -> new TermGeneratorFactoryException("function execution does not have fno:executes value")));
  }

  private TermType determineTermType(ExpressionMap map) {
    if (map instanceof DatatypeMap) {
      return TermType.IRI;
    } else if (map instanceof LanguageMap) {
      return TermType.LITERAL;
    } else if (map instanceof TermMap) {
      TermMap termMap = (TermMap) map;

      TermType termType = termMap.getTermType();
      if (termType != null) {
        return termType;
      }

      if (map instanceof ObjectMap) {
        ObjectMap objectMap = (ObjectMap) map;
        if (isReferenceTermMap(termMap) || objectMap.getLanguageMap() != null || objectMap.getDatatypeMap() != null) {
          return TermType.LITERAL;
        }
      }

      return TermType.IRI;
    } else {
      throw new IllegalStateException(String.format("Unknown expression map type %s for %s", map.getClass()
          .getSimpleName(), map));
    }
  }

  private boolean isReferenceTermMap(TermMap map) {
    return map.getConstant() == null && map.getReference() != null;
  }

  private List unpackEvaluatedExpression(Object result, Function generateTerm) {
    if (result instanceof Collection) {
      return ((Collection) result).stream()
          .filter(Objects::nonNull)
          .map(i -> generateTerm.apply(createNaturalRdfLexicalForm(i)))
          .collect(Collectors.toUnmodifiableList());
    }

    Value value = generateTerm.apply(createNaturalRdfLexicalForm(result));

    return value == null ? List.of() : List.of(value);
  }

  private IRI generateIriTerm(String lexicalForm) {
    if (RdfValues.isValidIri(lexicalForm)) {
      return valueFactory.createIRI(lexicalForm);
    }

    String iri = rdfTermGeneratorConfig.getBaseIri()
        .stringValue() + lexicalForm;
    if (RdfValues.isValidIri(iri)) {
      return valueFactory.createIRI(iri);
    }

    throw new TermGeneratorFactoryException(String.format(
        "Could not generate a valid iri from term lexical form [%s] as-is, or prefixed with base iri [%s]", lexicalForm,
        rdfTermGeneratorConfig.getBaseIri()));
  }

  private BNode generateBNodeTerm(String lexicalForm) {
    String id = createValidBNodeId(lexicalForm);
    return valueFactory.createBNode(id);
  }

  private String createValidBNodeId(String lexicalForm) {
    return lexicalForm.replaceAll("[^a-zA-Z_0-9-]+", "");
  }

  private String createNaturalRdfLexicalForm(Object value) {
    // TODO https://www.w3.org/TR/r2rml/#dfn-natural-rdf-literal
    return value.toString();
  }

}