io.carml.engine.rdf.RdfTriplesMapper Maven / Gradle / Ivy
package io.carml.engine.rdf;
import static io.carml.util.LogUtil.exception;
import io.carml.engine.ExpressionEvaluation;
import io.carml.engine.RefObjectMapper;
import io.carml.engine.TermGenerator;
import io.carml.engine.TriplesMapper;
import io.carml.engine.TriplesMapperException;
import io.carml.engine.join.ParentSideJoinConditionStore;
import io.carml.engine.join.ParentSideJoinConditionStoreProvider;
import io.carml.engine.join.ParentSideJoinKey;
import io.carml.logicalsourceresolver.LogicalSourceRecord;
import io.carml.logicalsourceresolver.LogicalSourceResolver;
import io.carml.model.GraphMap;
import io.carml.model.Join;
import io.carml.model.SubjectMap;
import io.carml.model.TriplesMap;
import io.carml.vocab.Rdf;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.UnaryOperator;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import lombok.AccessLevel;
import lombok.AllArgsConstructor;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Statement;
import reactor.core.publisher.Flux;
@Slf4j
@AllArgsConstructor(access = AccessLevel.PRIVATE)
public class RdfTriplesMapper implements TriplesMapper {
static UnaryOperator defaultGraphModifier = graph -> graph.equals(Rdf.Rr.defaultGraph) ? null : graph;
static Consumer logAddStatements = statement -> {
if (LOG.isTraceEnabled()) {
LOG.trace("Adding statement {} {} {} {} to result set", statement.getSubject(), statement.getPredicate(),
statement.getObject(), statement.getContext());
}
};
@NonNull
private final TriplesMap triplesMap;
private final Set subjectMappers;
private final Set predicateObjectMappers;
private final Set incomingRefObjectMappers;
@NonNull
private final LogicalSourceResolver.ExpressionEvaluationFactory expressionEvaluationFactory;
@NonNull
private final RdfMapperConfig rdfMapperConfig;
@NonNull
private final ParentSideJoinConditionStore parentSideJoinConditions;
public static RdfTriplesMapper of(@NonNull TriplesMap triplesMap, Set refObjectMappers,
Set incomingRefObjectMappers,
@NonNull LogicalSourceResolver.ExpressionEvaluationFactory expressionEvaluatorFactory,
@NonNull RdfMapperConfig rdfMapperConfig,
@NonNull ParentSideJoinConditionStoreProvider parentSideJoinConditionStoreProvider) {
if (LOG.isDebugEnabled()) {
LOG.debug("Creating mapper for TriplesMap {}", triplesMap.getResourceName());
}
Set subjectMappers = createSubjectMappers(triplesMap, rdfMapperConfig);
Set predicateObjectMappers =
createPredicateObjectMappers(triplesMap, rdfMapperConfig, refObjectMappers);
return new RdfTriplesMapper<>(triplesMap, subjectMappers, predicateObjectMappers, incomingRefObjectMappers,
expressionEvaluatorFactory, rdfMapperConfig,
parentSideJoinConditionStoreProvider.createParentSideJoinConditionStore(triplesMap.getId()));
}
static Set> createGraphGenerators(Set graphMaps,
RdfTermGeneratorFactory termGeneratorFactory) {
return graphMaps.stream()
.map(termGeneratorFactory::getGraphGenerator)
.collect(Collectors.toUnmodifiableSet());
}
@SuppressWarnings("java:S3864")
private static Set createSubjectMappers(TriplesMap triplesMap, RdfMapperConfig rdfMapperConfig) {
Set subjectMaps = triplesMap.getSubjectMaps();
if (subjectMaps.isEmpty()) {
throw new TriplesMapperException(
String.format("Subject map must be specified in triples map %s", exception(triplesMap, triplesMap)));
}
return subjectMaps.stream()
.peek(sm -> LOG.debug("Creating mapper for SubjectMap {}", sm.getResourceName()))
.map(sm -> RdfSubjectMapper.of(sm, triplesMap, rdfMapperConfig))
.collect(Collectors.toUnmodifiableSet());
}
@SuppressWarnings("java:S3864")
private static Set createPredicateObjectMappers(TriplesMap triplesMap,
RdfMapperConfig rdfMapperConfig, Set refObjectMappers) {
return triplesMap.getPredicateObjectMaps()
.stream()
.peek(pom -> LOG.debug("Creating mapper for PredicateObjectMap {}", pom.getResourceName()))
.map(pom -> RdfPredicateObjectMapper.of(pom, triplesMap, refObjectMappers, rdfMapperConfig))
.collect(Collectors.toUnmodifiableSet());
}
@Override
public TriplesMap getTriplesMap() {
return triplesMap;
}
@Override
public Set> getRefObjectMappers() {
return predicateObjectMappers.stream()
.flatMap(pom -> pom.getRdfRefObjectMappers()
.stream())
.collect(Collectors.toUnmodifiableSet());
}
Set> getConnectedRefObjectMappers() {
return Stream.concat(getRefObjectMappers().stream(), incomingRefObjectMappers.stream())
.collect(Collectors.toUnmodifiableSet());
}
@Override
public ParentSideJoinConditionStore getParentSideJoinConditions() {
return parentSideJoinConditions;
}
@Override
@SuppressWarnings("unchecked")
public Flux map(LogicalSourceRecord> logicalSourceRecord) {
var lsRecord = (R) logicalSourceRecord.getRecord();
LOG.trace("Mapping triples for record {}", logicalSourceRecord);
ExpressionEvaluation expressionEvaluation = expressionEvaluationFactory.apply(lsRecord);
return mapEvaluation(expressionEvaluation);
}
@Override
public Flux mapEvaluation(ExpressionEvaluation expressionEvaluation) {
Set subjectMapperResults = subjectMappers.stream()
.map(subjectMapper -> subjectMapper.map(expressionEvaluation))
.collect(Collectors.toUnmodifiableSet());
Set subjects = subjectMapperResults.stream()
.map(RdfSubjectMapper.Result::getSubjects)
.flatMap(Set::stream)
.collect(Collectors.toUnmodifiableSet());
if (subjects.isEmpty()) {
return Flux.empty();
}
Map, Set> subjectsAndSubjectGraphs = new HashMap<>();
List> subjectStatementFluxes = new ArrayList<>();
for (RdfSubjectMapper.Result subjectMapperResult : subjectMapperResults) {
Set resultSubjects = subjectMapperResult.getSubjects();
if (!resultSubjects.isEmpty()) {
subjectsAndSubjectGraphs.put(resultSubjects, subjectMapperResult.getGraphs());
subjectStatementFluxes.add(subjectMapperResult.getTypeStatements());
}
}
Flux subjectStatements = Flux.merge(subjectStatementFluxes);
Flux pomStatements = Flux.fromIterable(predicateObjectMappers)
.flatMap(predicateObjectMapper -> predicateObjectMapper.map(expressionEvaluation, subjectsAndSubjectGraphs));
cacheParentSideJoinConditions(expressionEvaluation, subjects);
return Flux.merge(subjectStatements, pomStatements);
}
private void cacheParentSideJoinConditions(ExpressionEvaluation expressionEvaluation, Set subjects) {
incomingRefObjectMappers.forEach(incomingRefObjectMapper -> incomingRefObjectMapper.getRefObjectMap()
.getJoinConditions()
.forEach(join -> processJoinCondition(join, expressionEvaluation, subjects)));
}
private void processJoinCondition(Join join, ExpressionEvaluation expressionEvaluation, Set subjects) {
String parentReference = join.getParent();
expressionEvaluation.apply(parentReference)
.ifPresent(referenceResult -> ExpressionEvaluation.extractValues(referenceResult)
.forEach(parentValue -> processJoinConditionParentValue(subjects, parentReference, parentValue)));
}
private void processJoinConditionParentValue(Set subjects, String parentReference, String parentValue) {
ParentSideJoinKey parentSideJoinKey = ParentSideJoinKey.of(parentReference, parentValue);
Set parentSubjects = new HashSet<>(subjects);
if (parentSideJoinConditions.containsKey(parentSideJoinKey)) {
// merge incoming subjects with already cached subjects for key
parentSubjects.addAll(parentSideJoinConditions.get(parentSideJoinKey));
}
parentSideJoinConditions.put(ParentSideJoinKey.of(parentReference, parentValue), parentSubjects);
}
public void cleanup() {
parentSideJoinConditions.clear();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy