All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jena.fuseki.kafka.FMod_FusekiKafka Maven / Gradle / Ivy

The newest version!
/*
 *  Copyright (c) Telicent Ltd.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.apache.jena.fuseki.kafka;

import static org.apache.jena.kafka.FusekiKafka.LOG;

import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.jena.assembler.Assembler;
import org.apache.jena.atlas.lib.Pair;
import org.apache.jena.atlas.logging.FmtLog;
import org.apache.jena.atlas.logging.Log;
import org.apache.jena.fuseki.Fuseki;
import org.apache.jena.fuseki.main.FusekiServer;
import org.apache.jena.fuseki.main.FusekiServer.Builder;
import org.apache.jena.fuseki.main.sys.FusekiAutoModule;
import org.apache.jena.fuseki.server.DataAccessPointRegistry;
import org.apache.jena.kafka.KConnectorDesc;
import org.apache.jena.kafka.KafkaConnectorAssembler;
import org.apache.jena.kafka.RequestFK;
import org.apache.jena.kafka.SysJenaKafka;
import org.apache.jena.kafka.common.DataState;
import org.apache.jena.kafka.common.PersistentState;
import org.apache.jena.rdf.model.Model;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.shared.JenaException;
import org.apache.jena.sparql.core.assembler.AssemblerUtils;
import org.apache.jena.sparql.util.graph.GraphUtils;

/**
 * Connect Kafka to a dataset. Messages on a Kafka topic are HTTP-like: updates (add data), SPARQL Update or RDF patch.
 */
public class FMod_FusekiKafka implements FusekiAutoModule {

    private static AtomicBoolean INITIALIZED = new AtomicBoolean(false);

    private static void init() {
        boolean b = INITIALIZED.getAndSet(true);
        if (b)
        // Already done.
        {
            return;
        }
        AssemblerUtils.registerAssembler(null, KafkaConnectorAssembler.getType(), new KafkaConnectorAssembler());
    }

    public FMod_FusekiKafka() {
    }

    // The Fuseki modules build lifecycle is same-thread.
    // This passes information from 'prepare' to 'server'
    // [BATCHER] Change in concurrent hash map (topic -> Pair>)
    private ThreadLocal>> buildState = ThreadLocal.withInitial(ArrayList::new);

    @Override
    public String name() {
        return "FMod FusekiKafka";
    }

    protected String logMessage() {
        return String.format("Fuseki-Kafka Connector Module (%s)", SysJenaKafka.VERSION);
    }

    @Override
    public void prepare(FusekiServer.Builder builder, Set names, Model configModel) {
        init();
        Log.info(Fuseki.configLog, logMessage());

        if (configModel == null) {
            // FmtLog.error(LOG, "No server configuration. Can't build connector");
            return;
        }
        List connectors = GraphUtils.findRootsByType(configModel, KafkaConnectorAssembler.getType());
        if (connectors.isEmpty()) {
            // FmtLog.error(LOG, "No connector in server configuration");
            return;
        }
        connectors.forEach(connector -> oneConnector(builder, connector, configModel));
    }

    /*package*/ void oneConnector(FusekiServer.Builder builder, Resource connector, Model configModel) {
        KConnectorDesc conn;
        try {
            conn = (KConnectorDesc) Assembler.general.open(connector);
        } catch (JenaException ex) {
            FmtLog.error(LOG, "Failed to build a connector", ex);
            return;
        }

        String dispatchURI = conn.getLocalDispatchPath();
        String remoteEndpoint = conn.getRemoteEndpoint();

//        // Endpoint to dataset. Abbreviate a endpoint-based dispatch name to a dataset.
//        String datasetName = datasetName(dispatchURI);
//        DatasetGraph dsg = builder.getDataset(datasetName);
//        if ( dsg == null )
//            throw new FusekiKafkaException("No datasets for '" + conn.getLocalEndpoint() + "'");
        PersistentState state = new PersistentState(conn.getStateFile());

        DataState dataState = DataState.restoreOrCreate(state, dispatchURI, remoteEndpoint, conn.getTopic());
        long lastOffset = dataState.getLastOffset();
        FmtLog.info(LOG, "Initial offset for topic %s = %d (%s)", conn.getTopic(), lastOffset, dispatchURI);
        recordConnector(builder, conn, dataState);
    }

    // Passing connector across stages by using a ThreadLocal.
    private void recordConnector(Builder builder, KConnectorDesc conn, DataState dataState) {
        Pair pair = Pair.create(conn, dataState);
        buildState.get().add(pair);
        FKRegistry.get().register(conn.getTopic(), conn);
    }

    private List> connectors(FusekiServer server) {
        return buildState.get();
    }

    /**
     * Add each connector, with the state tracker, to the server configured server.
     * 

* See notes on {@link #startKafkaConnectors(FusekiServer)} about potential pitfalls of starting connectors at this * point in the server lifecycle. Derived modules MAY wish to override this method to not start * the connectors at this stage and override {@link #serverAfterStarting(FusekiServer)}, or another lifecycle method * instead. *

*/ @Override public void serverBeforeStarting(FusekiServer server) { // server(FusekiServer server) -- after build, before returning to builder caller // See also serverAfterStarting which is an even later delayed setup point. startKafkaConnectors(server); } /** * Starts the Kafka Connectors *

* This needs to be called once, and only once, by this module. By default this gets called from the * {@link #serverBeforeStarting(FusekiServer)} method, this means that Fuseki will guarantee it is up to date on all * topics before it starts servicing any requests. *

*

* Note that if the server is lagging far behind the Kafka topic(s) it is being subscribed to then it will take a * long time before Fuseki is able to service requests. This may be problematic if you are deployed this in an * environment that relies on HTTP Health Probes as the HTTP Server is not started while Fuseki is catching up on * the Kafka topics. This can result in the service being placed into a Crash Restart Loop as it fails health * checks and is restarted. *

*

* Also bear in mind that if the producers writing data to the Kafka topic(s) that Fuseki is consuming are doing so * at a rate faster than Fuseki can process those updates the service can potentially never reach a ready state. *

*

* Therefore, derived implementations MAY prefer to call this at a different point in the lifecycle * e.g. {@link #serverAfterStarting(FusekiServer)} and live with the graph being eventually consistent *

* * @param server Fuseki Server */ protected void startKafkaConnectors(FusekiServer server) { List> connectors = connectors(server); if (connectors == null) { return; } connectors.forEach(pair -> { KConnectorDesc conn = pair.getLeft(); DataState dataState = pair.getRight(); FmtLog.info(LOG, "[%s] Starting connector between %s topic %s and endpoint %s", conn.getTopic(), conn.getBootstrapServers(), conn.getTopic(), conn.dispatchLocal() ? conn.getLocalDispatchPath() : conn.getRemoteEndpoint()); FKBatchProcessor batchProcessor = makeFKBatchProcessor(conn, server); FKS.addConnectorToServer(conn, server, dataState, batchProcessor); }); } /** * Clear-up build state. */ @Override public void serverAfterStarting(FusekiServer server) { // Clear up thread local. buildState.get().clear(); buildState.remove(); } /** * Make a {@link FKBatchProcessor} for the Fuseki Server being built. The default is one that loops on the * ConsumerRecords ({@link RequestFK}) sending each to the Fuseki server for dispatch. Other policies are possible * such as aggregating batches or directly applying to a dataset. */ protected FKBatchProcessor makeFKBatchProcessor(KConnectorDesc conn, FusekiServer server) { if (DataAccessPointRegistry.get(server.getServletContext()) == null) { DataAccessPointRegistry.set(server.getServletContext(), server.getDataAccessPointRegistry()); } return FKS.plainFKBatchProcessor(conn, server.getServletContext()); } @Override public void serverStopped(FusekiServer server) { List> connectors = connectors(server); if (connectors == null) { return; } connectors.forEach(pair -> { KConnectorDesc conn = pair.getLeft(); DataState dataState = pair.getRight(); FKRegistry.get().unregister(conn.getTopic()); }); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy