overflowdb.formats.graphml.GraphMLExporter.scala Maven / Gradle / Ivy
The newest version!
package overflowdb.formats.graphml
import overflowdb.formats.{ExportResult, Exporter, isList, resolveOutputFileSingle, writeFile}
import overflowdb.{Edge, Element, Node}
import java.lang.System.lineSeparator
import java.nio.file.Path
import java.util.concurrent.atomic.AtomicInteger
import scala.collection.mutable
import scala.jdk.CollectionConverters.MapHasAsScala
import scala.xml.{PrettyPrinter, XML}
/** Exports OverflowDB Graph to GraphML
*
* Warning: list properties are not natively supported by graphml... We initially built some support for those which
* deviated from the spec, but given that other tools don't support it, some refusing to import the remainder, we've
* dropped it. Now, lists are serialised to `;`-separated strings.
*
* https://en.wikipedia.org/wiki/GraphML http://graphml.graphdrawing.org/primer/graphml-primer.html
*/
object GraphMLExporter extends Exporter {
override def defaultFileExtension = "xml"
override def runExport(nodes: IterableOnce[Node], edges: IterableOnce[Edge], outputFile: Path) = {
val outFile = resolveOutputFileSingle(outputFile, s"export.$defaultFileExtension")
val nodePropertyContextById = mutable.Map.empty[String, PropertyContext]
val edgePropertyContextById = mutable.Map.empty[String, PropertyContext]
val discardedListPropertyCount = new AtomicInteger(0)
val nodeEntries = nodes.iterator.map { node =>
s"""
| ${node.label}
| ${dataEntries("node", node, nodePropertyContextById, discardedListPropertyCount)}
|
|""".stripMargin
}.toSeq
val edgeEntries = edges.iterator.map { edge =>
s"""
| ${edge.label}
| ${dataEntries("edge", edge, edgePropertyContextById, discardedListPropertyCount)}
|
|""".stripMargin
}.toSeq
def propertyKeyXml(forAttr: String, propsMap: mutable.Map[String, PropertyContext]): String = {
propsMap
.map { case (key, PropertyContext(name, tpe)) =>
s""" """
}
.mkString(lineSeparator)
}
val nodePropertyKeyEntries = propertyKeyXml("node", nodePropertyContextById)
val edgePropertyKeyEntries = propertyKeyXml("edge", edgePropertyContextById)
val xml = s"""
|
|
|
|
| $nodePropertyKeyEntries
| $edgePropertyKeyEntries
|
| ${nodeEntries.mkString(lineSeparator)}
| ${edgeEntries.mkString(lineSeparator)}
|
|
|""".stripMargin.trim
writeFile(outFile, xml)
xmlFormatInPlace(outFile)
val additionalInfo =
Some(discardedListPropertyCount.get)
.filter(_ > 0)
.map { count =>
s"warning: discarded $count list properties (because they are not supported by the graphml spec)"
}
ExportResult(
nodeCount = nodeEntries.size,
edgeCount = edgeEntries.size,
files = Seq(outFile),
additionalInfo
)
}
/** warning: updates type information based on runtime instances (in mutable.Map `propertyTypeByName`) warning2:
* updated the `discardedListPropertyCount` counter - if we need to discard any list properties, display a warning to
* the user
*/
private def dataEntries(
prefix: String,
element: Element,
propertyContextById: mutable.Map[String, PropertyContext],
discardedListPropertyCount: AtomicInteger
): String = {
element.propertiesMap.asScala
.map { case (propertyName, propertyValue) =>
if (isList(propertyValue.getClass)) {
discardedListPropertyCount.incrementAndGet()
"" // discard list properties
} else { // scalar value
val encodedPropertyName = s"${prefix}__${element.label}__$propertyName"
val graphMLTpe = Type.fromRuntimeClass(propertyValue.getClass)
/* update type information based on runtime instances */
if (!propertyContextById.contains(encodedPropertyName)) {
propertyContextById.update(encodedPropertyName, PropertyContext(propertyName, graphMLTpe))
}
val xmlEncoded = xml.Utility.escape(propertyValue.toString)
s"""$xmlEncoded"""
}
}
.mkString(lineSeparator)
}
private def xmlFormatInPlace(xmlFile: Path): Unit = {
val xml = XML.loadFile(xmlFile.toFile)
val prettyPrinter = new PrettyPrinter(120, 2)
val formatted = prettyPrinter.format(xml)
writeFile(xmlFile, formatted)
}
}