org.apache.jena.kafka.KafkaConnectorAssembler Maven / Gradle / Ivy
Show all versions of jena-fmod-kafka Show documentation
/*
* Copyright (c) Telicent Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.kafka;
import static org.apache.jena.kafka.Assem2.onError;
import static org.apache.jena.kafka.utils.EnvVariables.checkForEnvironmentVariableValue;
import java.util.List;
import java.util.Properties;
import java.util.UUID;
import org.apache.commons.lang3.StringUtils;
import org.apache.jena.assembler.Assembler;
import org.apache.jena.assembler.JA;
import org.apache.jena.assembler.Mode;
import org.apache.jena.assembler.assemblers.AssemblerBase;
import org.apache.jena.atlas.lib.IRILib;
import org.apache.jena.atlas.lib.StrUtils;
import org.apache.jena.graph.Graph;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.ResourceFactory;
import org.apache.jena.rdf.model.impl.Util;
import org.apache.jena.riot.out.NodeFmtLib;
import org.apache.jena.sparql.engine.binding.Binding;
import org.apache.jena.sparql.exec.QueryExec;
import org.apache.jena.sparql.exec.RowSet;
import org.apache.jena.system.G;
import org.apache.jena.system.RDFDataException;
import org.apache.kafka.clients.consumer.ConsumerConfig;
/**
* Assembler for a Fuseki-Kafka connector that takes Kafka events and executes them on
* a Fuseki server.
*
* The Kafka event has a header "Content-type" and acts the same as HTTP.
*
* This is an update stream, not publishing data to Kafka.
*
* Illustration, required settings for use in Fuseki:
*
* <#connector> rdf:type fk:Connector ;
* # Destination Fuseki service name (when used with Fuseki).
* fk:fusekiServiceName "ds";
*
* # Using Kafka-RAFT
* fk:bootstrapServers "localhost:9092";
*
* # Kafka topic
* fk:topic "RDF";
*
* # File used to track the state (the last offset processes)
* # Used across Fuseki restarts.
* fk:stateFile "Databases/RDF.state";
* .
*
*/
public class KafkaConnectorAssembler extends AssemblerBase implements Assembler {
private static String NS = "http://jena.apache.org/fuseki/kafka#";
public static String getNS() { return NS; }
/** Type of a connector description */
private static Resource tKafkaConnector = ResourceFactory.createResource(NS+"Connector");
// Preferred: "fusekiServiceName"
// Alternative: "datasetName"
/** Destination dataset and endpoint for dispatching Kafka events. */
public static Node pFusekiServiceName = NodeFactory.createURI(NS+"fusekiServiceName");
/** @deprecated Use {@link pFusekiServiceName} */
@Deprecated
private static Node pFusekiDatasetName = NodeFactory.createURI(NS+"datasetName"); // Old name.
/** Currently unused - will be a remote SPARQL endpoint to use this connector as a relay. */
public static Node pRemoteEndpointName = NodeFactory.createURI(NS+"remoteEndpoint");
/** Kafka topic to listen to */
public static Node pKafkaTopic = NodeFactory.createURI(NS+"topic");
/** File used to record topic and last read offset */
public static Node pStateFile = NodeFactory.createURI(NS+"stateFile");
/** Sync on startup? */
public static Node pSyncTopic = NodeFactory.createURI(NS+"syncTopic");
/** Replay whole topic on startup? */
private static Node pReplayTopic = NodeFactory.createURI(NS+"replayTopic");
/**
* Destination for dumped events.
* A destination of "" is stdout. "stdout" and "stderr" map to the channels of the same name.
*/
private static Node pEventLog = NodeFactory.createURI(NS+"eventLog");
/**
* Read events from a source instead of Kafka.
*
* A source is a directory of files, one file per event, and the filenames must
* contain an index number and end ".http".
*/
private static Node pEventSource = NodeFactory.createURI(NS+"eventSource");
// Kafka cluster
public static Node pKafkaProperty = NodeFactory.createURI(NS+"config");
public static Node pKafkaBootstrapServers = NodeFactory.createURI(NS+"bootstrapServers");
public static Node pKafkaGroupId = NodeFactory.createURI(NS+"groupId");
// Default values.
private static boolean dftSyncTopic = true;
private static boolean dftReplayTopic = false;
public static String dftKafkaGroupId = "JenaFusekiKafka";
public static Resource getType() {
return tKafkaConnector;
}
@Override
public Object open(Assembler a, Resource root, Mode mode) {
return create(root.getModel().getGraph(), root.asNode(), tKafkaConnector.asNode());
}
private KConnectorDesc create(Graph graph, Node node, Node type) {
try {
return createSub(graph, node, type);
} catch (RuntimeException ex) {
System.err.println(ex.getMessage());
ex.printStackTrace();
return null;
}
}
static RDFDataException dataException(Node node, String msg) {
return new RDFDataException(NodeFmtLib.displayStr(node)+" : "+msg);
}
static RDFDataException dataException(Node node, Node property, String msg) {
return new RDFDataException(NodeFmtLib.displayStr(node)+" "+NodeFmtLib.displayStr(property)+" : "+msg);
}
private static Assem2.OnError errorException = JenaKafkaException::new;
static JenaKafkaException error(Node node, String msg) {
return new JenaKafkaException(NodeFmtLib.displayStr(node)+" : "+msg);
}
static JenaKafkaException error(Node node, Node property, String msg) {
return new JenaKafkaException(NodeFmtLib.displayStr(node)+" "+NodeFmtLib.displayStr(property)+" : "+msg);
}
private KConnectorDesc createSub(Graph graph, Node node, Node type) {
/*
* PREFIX fk:
*
* [] rdf:type fk:Connector ;
* ## Required
* fk:topic "TOPIC";
* fk:bootstrapServers "localhost:9092";
* fk:stateFile "dir/filename.state" ;
* fk:fusekiServiceName "/ds"; ## Or a "/ds/service"
*
* ## Optional - with defaults
* ## Root of group name - this is made globally unique
* ## so every message is seen by every connector.
* fk:groupId "JenaFusekiKafka";
*
* ## false means don't sync on startup.
* fk:syncTopic true;
*
* ## false means replay from the start (ignore sync)
* fk:replayTopic false;
*
* ## Relay to a remote triplestore.
* fk:remoteEndpoint "http://host/triplestore";
* .
*/
// Required!
String topic = getConfigurationValue(graph, node, pKafkaTopic, errorException);
String datasetName = datasetName(graph, node);
datasetName = /*DataAccessPoint.*/canonical(datasetName);
String remoteEndpoint = remoteEndpointName(graph, node);
String bootstrapServers = getConfigurationValue(graph, node, pKafkaBootstrapServers, errorException);
boolean syncTopic = Assem2.getBooleanOrDft(graph, node, pSyncTopic, dftSyncTopic, errorException);
boolean replayTopic = Assem2.getBooleanOrDft(graph, node, pReplayTopic, dftReplayTopic, errorException);
String stateFile = getConfigurationValue(graph, node, pStateFile, errorException);
// The file name can be a relative file name as a string or a
// file: can URL place the area next to the configuration file.
// Turn "file:/" to a filename.
if ( stateFile.startsWith("file:") )
stateFile = IRILib.IRIToFilename(stateFile);
String groupIdAssembler = Assem2.getStringOrDft(graph, node, pKafkaGroupId, dftKafkaGroupId, errorException);
// We need the group id to be unique so multiple servers will
// see all the messages topic partition.
String groupId = groupIdAssembler+"-"+UUID.randomUUID().toString();
// ----
Properties kafkaConsumerProps = kafkaConsumerProps(graph, node, topic, bootstrapServers, groupId);
return new KConnectorDesc(topic, bootstrapServers,
datasetName, remoteEndpoint, stateFile, syncTopic,
replayTopic, kafkaConsumerProps);
}
private Properties kafkaConsumerProps(Graph graph, Node node,
String topic,
String bootstrapServers, String groupId) {
Properties props = SysJenaKafka.consumerProperties(bootstrapServers);
// "group.id"
props.put(ConsumerConfig.GROUP_ID_CONFIG, groupId);
// Optional Kafka configuration as pairs of (key-value) as RDF lists.
String queryString = StrUtils.strjoinNL
( "PREFIX ja: <"+JA.getURI()+">"
, "SELECT ?k ?v { ?X ?P (?k ?v) }"
);
QueryExec.graph(graph)
.query(queryString)
.substitution("X", node)
.substitution("P", pKafkaProperty)
.build().select()
.forEachRemaining(row->{
Node nk = row.get("k");
String key = nk.getLiteralLexicalForm();
Node nv = row.get("v");
String value = nv.getLiteralLexicalForm();
props.setProperty(key, value);
});
// // These are ignored if the deserializers are in the Kafka consumer constructor.
// props.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
// props.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, DeserializerActionFK.class.getName());
return props;
}
private static String PREFIXES = StrUtils.strjoinNL("PREFIX ja: <"+JA.getURI()+">"
,"PREFIX fk: <"+NS+">"
,"" );
private String datasetName(Graph graph, Node node) {
String queryString = StrUtils.strjoinNL
( PREFIXES
, "SELECT ?n { "
, " OPTIONAL { ?X ?fusekiServiceName ?N1 }"
, " OPTIONAL { ?X ?fusekiDatasetName ?N2 }" // Old name.
, " BIND(COALESCE( ?N1, ?N2, '' ) AS ?n)"
, "}"
);
RowSet rowSet = QueryExec.graph(graph)
.query(queryString)
.substitution("X", node)
.substitution("fusekiServiceName", pFusekiServiceName)
.substitution("fusekiDatasetName", pFusekiDatasetName)
.build()
.select();
if ( !rowSet.hasNext() )
throw new JenaKafkaException("Can't find the datasetName: "+NodeFmtLib.displayStr(node));
Binding row = rowSet.next();
if ( rowSet.hasNext() )
throw new JenaKafkaException("Multiple datasetNames: "+NodeFmtLib.displayStr(node));
Node n = row.get("n");
if ( n == null )
throw new JenaKafkaException("Can't find the datasetName: "+NodeFmtLib.displayStr(node));
if ( ! Util.isSimpleString(n) )
throw new JenaKafkaException("Dataset name is not a string: "+NodeFmtLib.displayStr(node));
String name = n.getLiteralLexicalForm();
if ( StringUtils.isBlank(name) )
throw new JenaKafkaException("Dataset name is blank: "+NodeFmtLib.displayStr(node));
return name;
}
private String remoteEndpointName(Graph graph, Node node) {
List x = G.listSP(graph, node, pRemoteEndpointName);
if ( x.isEmpty() )
return FusekiKafka.noRemoteEndpointName;
if ( x.size() > 1 )
throw onError(node, "Multiple service names", errorException);
Node n = x.get(0);
if ( ! Util.isSimpleString(n) )
throw onError(node, "Service name is not a string", errorException);
String remoteEndpoint = n.getLiteralLexicalForm();
if ( StringUtils.isBlank(remoteEndpoint) )
return FusekiKafka.noRemoteEndpointName;
if ( remoteEndpoint.contains(" ") )
throw onError(node, "Service name can not contain spaces", errorException);
return remoteEndpoint;
}
// Copy of DataAccessPoint.canonical.
public static String canonical(String datasetPath) {
if ( datasetPath == null )
return datasetPath;
if (datasetPath.isEmpty())
return "/";
if ( datasetPath.equals("/") )
return datasetPath;
if ( !datasetPath.startsWith("/") )
datasetPath = "/" + datasetPath;
if ( datasetPath.endsWith("/") )
datasetPath = datasetPath.substring(0, datasetPath.length() - 1);
return datasetPath;
}
public static String getConfigurationValue(Graph graph, Node node, Node configNode, Assem2.OnError errorException) {
String configurationValue = Assem2.getString(graph, node, configNode, errorException);
configurationValue = checkForEnvironmentVariableValue(configNode.getURI(), configurationValue);
return configurationValue;
}
}