com.arcadeanalytics.provider.orient2.OrientDBDataSourceGraphDataProvider.kt Maven / Gradle / Ivy
/*-
* #%L
* Arcade Data
* %%
* Copyright (C) 2018 - 2019 ArcadeAnalytics
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
package com.arcadeanalytics.provider.orient2
import com.arcadeanalytics.provider.*
import com.google.common.collect.Maps
import com.orientechnologies.common.collection.OMultiValue
import com.orientechnologies.orient.core.db.document.ODatabaseDocumentTx
import com.orientechnologies.orient.core.db.record.OIdentifiable
import com.orientechnologies.orient.core.db.record.ridbag.ORidBag
import com.orientechnologies.orient.core.id.ORID
import com.orientechnologies.orient.core.metadata.schema.OType
import com.orientechnologies.orient.core.record.impl.ODocument
import com.orientechnologies.orient.core.record.impl.ODocumentHelper
import com.orientechnologies.orient.core.sql.query.OSQLAsynchQuery
import com.tinkerpop.blueprints.Direction
import com.tinkerpop.blueprints.impls.orient.OrientEdge
import com.tinkerpop.blueprints.impls.orient.OrientGraphNoTx
import com.tinkerpop.blueprints.impls.orient.OrientVertex
import org.apache.commons.lang3.RegExUtils.removeFirst
import org.apache.commons.lang3.StringUtils.*
import org.slf4j.LoggerFactory
import java.util.*
import kotlin.collections.HashSet
/**
* Specialized provider for OrientDB2
* @author Roberto Franchini
*/
class OrientDBDataSourceGraphDataProvider : DataSourceGraphDataProvider {
private val log = LoggerFactory.getLogger(OrientDBDataSourceGraphDataProvider::class.java)
override fun supportedDataSourceTypes(): Set = setOf("ORIENTDB")
override fun testConnection(dataSource: DataSourceInfo): Boolean {
log.info("testing connection to :: '{}' ", dataSource.id)
try {
open(dataSource)
.use {
log.info("connection works fine:: '{}' ", it.url)
}
} catch (e: Exception) {
throw RuntimeException(e)
}
return true
}
override fun fetchData(dataSource: DataSourceInfo, query: String, limit: Int): GraphData {
log.info("fetching data from '{}' with query '{}' ", dataSource.id, truncate(query, 256))
open(dataSource)
.use { db ->
val collector = OrientDBDocumentCollector()
db.query>(OSQLAsynchQuery(query, OrientDBResultListener(collector, limit)))
log.info("Query executed, returned {} records with limit {} ", collector.size(), limit)
val data = mapResultSet(db, collector)
log.info("Fetched {} nodes and {} edges ", data.nodes.size, data.edges.size)
return data
}
}
private fun toData(doc: ODocument): CytoData {
val record = transformToMap(doc)
val ins = HashMap()
record["@in"] = ins
val outs = HashMap()
record["@out"] = outs
val keys = record.entries
.asSequence()
.filter { e -> e.key.startsWith("in_") }
.map { e ->
ins[removeFirst(e.key, "in_")] = e.value
e.key
}
.toMutableSet()
keys.addAll(record.entries
.asSequence()
.filter { e -> e.key.startsWith("out_") }
.map { e ->
outs[removeFirst(e.key, "out_")] = e.value
e.key
}.toSet())
keys.stream()
.forEach { k -> record.remove(k) }
cleanRecord(record)
when {
doc.isEdgeType() -> {
val source = doc.field("@outId")
val target = doc.field("@inId")
val id = doc.field("@id")
val data = Data(id = id, record = record, source = source, target = target)
return CytoData(group = "edge", data = data, classes = doc.field("@class"))
}
else -> {
val id = doc.field("@id")
val data = Data(id = id, record = record)
return CytoData(group = "nodes", data = data, classes = doc.field("@class"))
}
}
}
private fun cleanRecord(record: MutableMap) {
record.remove("@type")
record.remove("@rid")
record.remove("@id")
record.remove("@inId")
record.remove("@outId")
record.remove("@class")
record.remove("@version")
record.remove("@fieldtypes")
}
private fun transformToMap(doc: ODocument): MutableMap {
val map = HashMap()
for (field in doc.fieldNames()) {
val fieldType = doc.fieldType(field)
if (fieldType == OType.LINK ||
fieldType == OType.LINKBAG ||
fieldType == OType.LINKLIST ||
fieldType == OType.LINKSET ||
fieldType == OType.LINKMAP
) continue
var value = doc.field(field)
if (value == null) continue
if (value is ODocument)
value = transformToMap(value)
else if (value is ORID)
value = value.toString()
map[field] = value
}
val id = doc.identity
if (id.isValid)
map[ODocumentHelper.ATTRIBUTE_RID] = id.toString()
val className = doc.className
if (className != null)
map[ODocumentHelper.ATTRIBUTE_CLASS] = className
return map
}
private fun mapField(doc: ODocument, fieldName: String): Any {
val type = doc.fieldType(fieldName)
if (type.isEmbedded) {
doc.field(fieldName, OType.EMBEDDED)
}
return doc.field(fieldName)
}
fun mapResultSet(db: ODatabaseDocumentTx,
collector: OrientDBDocumentCollector): GraphData {
val graph = OrientGraphNoTx(db)
// DIVIDE VERTICES FROM EDGES
val nodes = HashSet()
val edges = HashSet()
val resultSet = collector.collected()
resultSet.asSequence()
.forEach { doc ->
if (doc.isVertexType()) {
val vertex = graph.getVertex(doc)
vertex.record.isTrackingChanges = false
vertex.record.field("@edgeCount", vertex.countEdges(Direction.BOTH).toInt())
nodes.add(vertex)
} else if (doc.isEdgeType()) {
val edge = graph.getEdge(doc)
edges.add(edge)
nodes.add(graph.getVertex(edge.getVertex(Direction.IN)))
nodes.add(graph.getVertex(edge.getVertex(Direction.OUT)))
}
}
log.info("Computing edge map on {} edges...", edges.size)
val edgeClasses = HashMap>()
val cytoEdges = edges.asSequence()
.map { e -> e.record }
.map { d -> populateClasses(edgeClasses, d) }
.map { d -> mapRid(d) }
.map { d -> mapInAndOut(d) }
.map { d -> countInAndOut(d) }
.map { d -> toData(d) }
.toSet()
log.info("Computing vertex map on {} vertices...", nodes.size)
val nodeClasses = HashMap>()
val cytoNodes = nodes.asSequence()
.map { e -> e.record }
.map { d -> populateClasses(nodeClasses, d) }
.map { d -> mapRid(d) }
.map { d -> countInAndOut(d) }
.map { d -> toData(d) }
.toSet()
return GraphData(nodeClasses, edgeClasses, cytoNodes, cytoEdges, collector.isTruncated)
}
private fun addConnectedVertex(connectedVertices: MutableSet, vertex: OrientVertex) {
val record = vertex.record
if (connectedVertices.add(record)) {
record.isTrackingChanges = false
record.field("@edgeCount", vertex.countEdges(Direction.BOTH))
}
}
private fun countInAndOut(doc: ODocument): ODocument {
doc.fieldNames()
.asSequence()
.filter { f -> f.startsWith("out_") || f.startsWith("in_") }
.forEach { f ->
val size = OMultiValue.getSize(doc.field(f))
doc.removeField(f)
doc.field(f, size)
}
return doc
}
protected fun clean(d: ODocument): ODocument {
for (f in d.fieldNames()) {
val fieldValue = d.field(f)
if (fieldValue is ORidBag || fieldValue is OIdentifiable)
// IGNORE LINKS
d.removeField(f)
}
d.detach()
return d
}
private fun mapInAndOut(d: ODocument): ODocument {
var rid: ORID
if (!d.containsField("out"))
return d
rid = (d.rawField("out") as OIdentifiable).identity
d.field("@outId", rid.clusterId.toString() + "_" + rid.clusterPosition)
d.removeField("out")
rid = (d.rawField("in") as OIdentifiable).identity
d.field("@inId", rid.clusterId.toString() + "_" + rid.clusterPosition)
d.removeField("in")
return d
}
private fun mapRid(doc: ODocument): ODocument {
val rid = doc.identity
doc.field("@id", rid.clusterId.toString() + "_" + rid.clusterPosition)
return doc
}
private fun populateClasses(classes: MutableMap>, element: ODocument): ODocument {
classes.putIfAbsent(element.className, Maps.newHashMap())
populateProperties(classes, element)
return element
}
private fun populateProperties(classes: Map>, element: ODocument) {
val properties = classes[element.className]
element.fieldNames()
.asSequence()
.filter { f -> !f.startsWith("@") }
.filter { f -> !f.startsWith("in_") }
.filter { f -> !f.startsWith("out_") }
.filter { f ->
val fieldType = element.fieldType(f)
fieldType != OType.LINK &&
fieldType != OType.LINKMAP &&
fieldType != OType.LINKSET &&
fieldType != OType.LINKLIST &&
fieldType != OType.LINKBAG
}
.forEach { f ->
val type = element.fieldType(f)
if (type != null)
(properties as MutableMap).putIfAbsent(f, mapType(type.name))
}
}
override fun expand(dataSource: DataSourceInfo,
ids: Array,
direction: String,
edgeLabel: String,
maxTraversal: Int): GraphData {
val cleanedEdgeLabel = wrap(trimToEmpty(edgeLabel), "'")
var query = "SELECT FROM (TRAVERSE "
when (direction) {
"out" -> query += "outE($cleanedEdgeLabel), inV()"
"in" -> query += "inE($cleanedEdgeLabel), outV()"
"both" -> query += "bothE($cleanedEdgeLabel), bothV()"
}
query += " FROM ["
query += ids
.asSequence()
.map { r -> "#" + r.replace('_', ':') }
.joinToString { it }
query += "] MAXDEPTH 2) LIMIT $maxTraversal"
return fetchData(dataSource, query, maxTraversal)
}
override fun load(dataSource: DataSourceInfo, ids: Array): GraphData {
var query = "SELECT FROM ["
query += ids
.asSequence()
.map { r -> "#" + r.replace('_', ':') }
.joinToString { it }
query += "] "
return fetchData(dataSource, query, ids.size)
}
override fun loadFromClass(dataSource: DataSourceInfo, className: String, limit: Int): GraphData {
val query = "select * from $className limit $limit"
return fetchData(dataSource, query, limit)
}
override fun loadFromClass(dataSource: DataSourceInfo, className: String, propName: String, propValue: String, limit: Int): GraphData {
val query = "select * from $className where $propName = '$propValue' limit $limit"
return fetchData(dataSource, query, limit)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy