All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.arcadeanalytics.provider.orient2.OrientDBDataSourceGraphDataProvider.kt Maven / Gradle / Ivy

There is a newer version: 1.0.13
Show newest version
/*-
 * #%L
 * Arcade Data
 * %%
 * Copyright (C) 2018 - 2019 ArcadeAnalytics
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package com.arcadeanalytics.provider.orient2

import com.arcadeanalytics.provider.*
import com.google.common.collect.Maps
import com.orientechnologies.common.collection.OMultiValue
import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx
import com.orientechnologies.orient.core.db.record.OIdentifiable
import com.orientechnologies.orient.core.db.record.ridbag.ORidBag
import com.orientechnologies.orient.core.id.ORID
import com.orientechnologies.orient.core.metadata.schema.OType
import com.orientechnologies.orient.core.record.impl.ODocument
import com.orientechnologies.orient.core.record.impl.ODocumentHelper
import com.orientechnologies.orient.core.sql.query.OSQLAsynchQuery
import com.tinkerpop.blueprints.Direction
import com.tinkerpop.blueprints.impls.orient.OrientEdge
import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx
import com.tinkerpop.blueprints.impls.orient.OrientVertex
import org.apache.commons.lang3.RegExUtils.removeFirst
import org.apache.commons.lang3.StringUtils.*
import org.slf4j.LoggerFactory
import java.util.*
import kotlin.collections.HashSet

/**
 * Specialized provider for OrientDB2
 * @author Roberto Franchini
 */
class OrientDBDataSourceGraphDataProvider : DataSourceGraphDataProvider {

    private val log = LoggerFactory.getLogger(OrientDBDataSourceGraphDataProvider::class.java)

    override fun supportedDataSourceTypes(): Set = setOf("ORIENTDB")

    override fun testConnection(dataSource: DataSourceInfo): Boolean {

        log.info("testing connection to :: '{}' ", dataSource.id)

        try {
            open(dataSource)
                    .use {

                        log.info("connection works fine:: '{}' ", it.url)

                    }
        } catch (e: Exception) {
            throw RuntimeException(e)
        }

        return true
    }


    override fun fetchData(dataSource: DataSourceInfo, query: String, limit: Int): GraphData {

        log.info("fetching data from '{}' with query '{}' ", dataSource.id, truncate(query, 256))

        open(dataSource)
                .use { db ->
                    val collector = OrientDBDocumentCollector()

                    db.query>(OSQLAsynchQuery(query, OrientDBResultListener(collector, limit)))

                    log.info("Query executed, returned {} records with limit {} ", collector.size(), limit)

                    val data = mapResultSet(db, collector)
                    log.info("Fetched {} nodes and {} edges ", data.nodes.size, data.edges.size)

                    return data

                }

    }

    private fun toData(doc: ODocument): CytoData {

        val record = transformToMap(doc)

        val ins = HashMap()
        record["@in"] = ins
        val outs = HashMap()
        record["@out"] = outs

        val keys = record.entries
                .asSequence()
                .filter { e -> e.key.startsWith("in_") }
                .map { e ->
                    ins[removeFirst(e.key, "in_")] = e.value
                    e.key
                }
                .toMutableSet()

        keys.addAll(record.entries
                .asSequence()
                .filter { e -> e.key.startsWith("out_") }
                .map { e ->
                    outs[removeFirst(e.key, "out_")] = e.value
                    e.key
                }.toSet())

        keys.stream()
                .forEach { k -> record.remove(k) }

        cleanRecord(record)

        when {
            doc.isEdgeType() -> {
                val source = doc.field("@outId")
                val target = doc.field("@inId")
                val id = doc.field("@id")
                val data = Data(id = id, record = record, source = source, target = target)
                return CytoData(group = "edge", data = data, classes = doc.field("@class"))
            }
            else -> {
                val id = doc.field("@id")
                val data = Data(id = id, record = record)
                return CytoData(group = "nodes", data = data, classes = doc.field("@class"))
            }
        }


    }

    private fun cleanRecord(record: MutableMap) {

        record.remove("@type")
        record.remove("@rid")
        record.remove("@id")
        record.remove("@inId")
        record.remove("@outId")
        record.remove("@class")
        record.remove("@version")
        record.remove("@fieldtypes")
    }

    private fun transformToMap(doc: ODocument): MutableMap {
        val map = HashMap()
        for (field in doc.fieldNames()) {
            val fieldType = doc.fieldType(field)
            if (fieldType == OType.LINK ||
                    fieldType == OType.LINKBAG ||
                    fieldType == OType.LINKLIST ||
                    fieldType == OType.LINKSET ||
                    fieldType == OType.LINKMAP
            ) continue

            var value = doc.field(field)

            if (value == null) continue

            if (value is ODocument)
                value = transformToMap(value)
            else if (value is ORID)
                value = value.toString()

            map[field] = value
        }

        val id = doc.identity
        if (id.isValid)
            map[ODocumentHelper.ATTRIBUTE_RID] = id.toString()

        val className = doc.className
        if (className != null)
            map[ODocumentHelper.ATTRIBUTE_CLASS] = className

        return map
    }

    private fun mapField(doc: ODocument, fieldName: String): Any {

        val type = doc.fieldType(fieldName)

        if (type.isEmbedded) {
            doc.field(fieldName, OType.EMBEDDED)
        }

        return doc.field(fieldName)
    }

    fun mapResultSet(db: ODatabaseDocumentTx,
                     collector: OrientDBDocumentCollector): GraphData {

        val graph = OrientGraphNoTx(db)

        // DIVIDE VERTICES FROM EDGES
        val nodes = HashSet()
        val edges = HashSet()
        val resultSet = collector.collected()

        resultSet.asSequence()
                .forEach { doc ->
                    if (doc.isVertexType()) {
                        val vertex = graph.getVertex(doc)
                        vertex.record.isTrackingChanges = false
                        vertex.record.field("@edgeCount", vertex.countEdges(Direction.BOTH).toInt())
                        nodes.add(vertex)
                    } else if (doc.isEdgeType()) {
                        val edge = graph.getEdge(doc)
                        edges.add(edge)
                        nodes.add(graph.getVertex(edge.getVertex(Direction.IN)))
                        nodes.add(graph.getVertex(edge.getVertex(Direction.OUT)))
                    }
                }

        log.info("Computing edge map on {} edges...", edges.size)

        val edgeClasses = HashMap>()
        val cytoEdges = edges.asSequence()
                .map { e -> e.record }
                .map { d -> populateClasses(edgeClasses, d) }
                .map { d -> mapRid(d) }
                .map { d -> mapInAndOut(d) }
                .map { d -> countInAndOut(d) }
                .map { d -> toData(d) }
                .toSet()

        log.info("Computing vertex map on {} vertices...", nodes.size)

        val nodeClasses = HashMap>()
        val cytoNodes = nodes.asSequence()
                .map { e -> e.record }
                .map { d -> populateClasses(nodeClasses, d) }
                .map { d -> mapRid(d) }
                .map { d -> countInAndOut(d) }
                .map { d -> toData(d) }
                .toSet()


        return GraphData(nodeClasses, edgeClasses, cytoNodes, cytoEdges, collector.isTruncated)
    }

    private fun addConnectedVertex(connectedVertices: MutableSet, vertex: OrientVertex) {
        val record = vertex.record
        if (connectedVertices.add(record)) {
            record.isTrackingChanges = false
            record.field("@edgeCount", vertex.countEdges(Direction.BOTH))
        }
    }

    private fun countInAndOut(doc: ODocument): ODocument {

        doc.fieldNames()
                .asSequence()
                .filter { f -> f.startsWith("out_") || f.startsWith("in_") }
                .forEach { f ->
                    val size = OMultiValue.getSize(doc.field(f))
                    doc.removeField(f)
                    doc.field(f, size)
                }

        return doc
    }

    protected fun clean(d: ODocument): ODocument {

        for (f in d.fieldNames()) {
            val fieldValue = d.field(f)
            if (fieldValue is ORidBag || fieldValue is OIdentifiable)
            // IGNORE LINKS
                d.removeField(f)
        }
        d.detach()
        return d
    }

    private fun mapInAndOut(d: ODocument): ODocument {
        var rid: ORID
        if (!d.containsField("out"))
            return d

        rid = (d.rawField("out") as OIdentifiable).identity
        d.field("@outId", rid.clusterId.toString() + "_" + rid.clusterPosition)
        d.removeField("out")

        rid = (d.rawField("in") as OIdentifiable).identity
        d.field("@inId", rid.clusterId.toString() + "_" + rid.clusterPosition)
        d.removeField("in")
        return d
    }

    private fun mapRid(doc: ODocument): ODocument {
        val rid = doc.identity

        doc.field("@id", rid.clusterId.toString() + "_" + rid.clusterPosition)

        return doc
    }

    private fun populateClasses(classes: MutableMap>, element: ODocument): ODocument {

        classes.putIfAbsent(element.className, Maps.newHashMap())

        populateProperties(classes, element)
        return element
    }

    private fun populateProperties(classes: Map>, element: ODocument) {

        val properties = classes[element.className]

        element.fieldNames()
                .asSequence()
                .filter { f -> !f.startsWith("@") }
                .filter { f -> !f.startsWith("in_") }
                .filter { f -> !f.startsWith("out_") }
                .filter { f ->
                    val fieldType = element.fieldType(f)
                    fieldType != OType.LINK &&
                            fieldType != OType.LINKMAP &&
                            fieldType != OType.LINKSET &&
                            fieldType != OType.LINKLIST &&
                            fieldType != OType.LINKBAG
                }
                .forEach { f ->
                    val type = element.fieldType(f)
                    if (type != null)
                        (properties as MutableMap).putIfAbsent(f, mapType(type.name))
                }

    }


    override fun expand(dataSource: DataSourceInfo,
                        ids: Array,
                        direction: String,
                        edgeLabel: String,
                        maxTraversal: Int): GraphData {

        val cleanedEdgeLabel = wrap(trimToEmpty(edgeLabel), "'")

        var query = "SELECT FROM (TRAVERSE "

        when (direction) {
            "out" -> query += "outE($cleanedEdgeLabel), inV()"
            "in" -> query += "inE($cleanedEdgeLabel), outV()"
            "both" -> query += "bothE($cleanedEdgeLabel), bothV()"
        }

        query += " FROM ["

        query += ids
                .asSequence()
                .map { r -> "#" + r.replace('_', ':') }
                .joinToString { it }

        query += "] MAXDEPTH 2) LIMIT $maxTraversal"

        return fetchData(dataSource, query, maxTraversal)

    }

    override fun load(dataSource: DataSourceInfo, ids: Array): GraphData {

        var query = "SELECT FROM ["

        query += ids
                .asSequence()
                .map { r -> "#" + r.replace('_', ':') }
                .joinToString { it }

        query += "] "

        return fetchData(dataSource, query, ids.size)

    }


    override fun loadFromClass(dataSource: DataSourceInfo, className: String, limit: Int): GraphData {
        val query = "select * from $className limit $limit"
        return fetchData(dataSource, query, limit)
    }

    override fun loadFromClass(dataSource: DataSourceInfo, className: String, propName: String, propValue: String, limit: Int): GraphData {
        val query = "select * from $className where $propName = '$propValue' limit $limit"
        return fetchData(dataSource, query, limit)
    }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy