All Downloads are FREE. Search and download functionalities are using the official Maven repository.

foxx.spline.services.lineage-overview.js Maven / Gradle / Ivy

There is a newer version: 0.7.9
Show newest version
/*
 * Copyright 2020 ABSA Group Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

"use strict";
const {db, aql} = require('@arangodb');
const {memoize} = require('../utils/common');
const {GraphBuilder} = require('../utils/graph');
const {observedWritesByRead} = require('./observed-writes-by-read');

/**
 * Return a high-level lineage overview of the given write event.
 *
 * @param eventKey write event key
 * @param maxDepth maximum number of job nodes in any path of the resulted graph (excluding cycles).
 * It shows how far the traversal should looks for the lineage.
 * @returns za.co.absa.spline.consumer.service.model.LineageOverview
 */
function lineageOverview(eventKey, maxDepth) {

    const executionEvent = db._query(aql`
        WITH progress
        RETURN FIRST(FOR p IN progress FILTER p._key == ${eventKey} RETURN p)
    `).next();

    const targetDataSource = executionEvent && db._query(aql`
        WITH progress, progressOf, executionPlan, affects, dataSource
        RETURN FIRST(FOR ds IN 2 OUTBOUND ${executionEvent} progressOf, affects RETURN ds)
    `).next();

    const lineageGraph = eventLineageOverviewGraph(executionEvent, maxDepth);

    return lineageGraph && {
        "info": {
            "timestamp": executionEvent.timestamp,
            "applicationId": executionEvent.extra.appId,
            "targetDataSourceId": targetDataSource._key
        },
        "graph": {
            "depthRequested": maxDepth,
            "depthComputed": lineageGraph.depth || -1,
            "nodes": lineageGraph.vertices,
            "edges": lineageGraph.edges
        }
    }
}

function eventLineageOverviewGraph(startEvent, maxDepth) {
    if (!startEvent || maxDepth < 0) {
        return null;
    }

    const startSource = db._query(aql`
        WITH progress, progressOf, executionPlan, affects, dataSource
        FOR ds IN 2 OUTBOUND ${startEvent} progressOf, affects 
            LIMIT 1
            RETURN {
                "_id": ds._key,
                "_class": "za.co.absa.spline.consumer.service.model.DataSourceNode",
                "name": ds.uri
            }
        `
    ).next();

    const graphBuilder = new GraphBuilder([startSource]);

    const collectPartialGraphForEvent = event => {
        const partialGraph = db._query(aql`
            WITH progress, progressOf, executionPlan, affects, depends, dataSource

            LET exec = FIRST(FOR ex IN 1 OUTBOUND ${event} progressOf RETURN ex)
            LET affectedDsEdge = FIRST(FOR v, e IN 1 OUTBOUND exec affects RETURN e)
            LET rdsWithInEdges = (FOR ds, e IN 1 OUTBOUND exec depends RETURN [ds, e])
            LET readSources = rdsWithInEdges[*][0]
            LET readDsEdges = rdsWithInEdges[*][1]
            
            LET vertices = (
                FOR vert IN APPEND(readSources, exec)
                    LET vertType = SPLIT(vert._id, '/')[0]
                    RETURN vertType == "dataSource"
                        ? {
                            "_id": vert._key,
                            "_class": "za.co.absa.spline.consumer.service.model.DataSourceNode",
                            "name": vert.uri
                        }
                        : MERGE(KEEP(vert, ["systemInfo", "agentInfo"]), {
                            "_id": vert._key,
                            "_class": "za.co.absa.spline.consumer.service.model.ExecutionNode",
                            "name": vert.name || ""
                        })
            )
            
            LET edges = (
                FOR edge IN APPEND(readDsEdges, affectedDsEdge)
                    LET edgeType = SPLIT(edge._id, '/')[0]
                    LET exKey = SPLIT(edge._from, '/')[1]
                    LET dsKey = SPLIT(edge._to, '/')[1]
                    RETURN {
                        "source": edgeType == "depends" ? dsKey : exKey,
                        "target": edgeType == "affects" ? dsKey : exKey
                    }
            )
            
            RETURN {vertices, edges}
        `).next();

        graphBuilder.add(partialGraph);
    };

    const traverse = memoize(e => e._id, (event, depth) => {
        let remainingDepth = depth - 1;
        if (depth > 1) {
            observedWritesByRead(event)
                .forEach(writeEvent => {
                    const remainingDepth_i = traverse(writeEvent, depth - 1);
                    remainingDepth = Math.min(remainingDepth, remainingDepth_i);
                })
        }

        collectPartialGraphForEvent(event);

        return remainingDepth;
    });

    const remainingDepth = maxDepth > 0 ? traverse(startEvent, maxDepth) : 0;
    const resultedGraph = graphBuilder.graph();

    return {
        depth: maxDepth - remainingDepth,
        vertices: resultedGraph.vertices,
        edges: resultedGraph.edges
    }
}

module.exports = {
    lineageOverview
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy