All Downloads are FREE. Search and download functionalities are using the official Maven repository.

flatgraph.formats.neo4jcsv.ColumnDefinitions.scala Maven / Gradle / Ivy

There is a newer version: 0.0.91
Show newest version
package flatgraph.formats.neo4jcsv

import flatgraph.formats.iterableForList

import scala.collection.immutable.ArraySeq
import scala.collection.mutable
import scala.jdk.CollectionConverters.IterableHasAsScala

sealed trait ColumnDef
case class ScalarColumnDef(valueType: ColumnType.Value)                                              extends ColumnDef
case class ArrayColumnDef(valueType: Option[ColumnType.Value], iteratorAccessor: Any => Iterable[?]) extends ColumnDef

class ColumnDefinitions(propertyNames: Iterable[String]) {
  private val propertyNamesOrdered     = propertyNames.toSeq.sorted
  private val _columnDefByPropertyName = mutable.Map.empty[String, ColumnDef]

  def columnDefByPropertyName(name: String): Option[ColumnDef] = _columnDefByPropertyName.get(name)

  def updateWith(propertyName: String, value: Any): ColumnDef = {
    _columnDefByPropertyName
      .updateWith(propertyName) {
        case None =>
          // we didn't see this property before - try to derive it's type from the runtime class
          Option(deriveNeo4jType(value))
        case Some(ArrayColumnDef(None, _)) =>
          // value is an array that we've seen before, but we don't have the valueType yet, most likely because previous occurrences were empty arrays
          Option(deriveNeo4jType(value))
        case completeDef =>
          completeDef // we already have everything we need, no need to change anything
      }
      .get
  }

  /** for header file */
  def propertiesWithTypes: Seq[String] = {
    propertyNamesOrdered.map { name =>
      columnDefByPropertyName(name) match {
        case Some(ScalarColumnDef(valueType)) =>
          s"$name:$valueType"
        case Some(ArrayColumnDef(Some(valueType), _)) =>
          s"$name:$valueType[]"
        case _ =>
          name
      }
    }
  }

  /** for data file updates our internal `_columnDefByPropertyName` model with type information based on runtime values, so that we later
    * have all metadata required for the header file
    */
  def propertyValues(byNameAccessor: String => Option[?]): Seq[String] = {
    propertyNamesOrdered.map { propertyName =>
      byNameAccessor(propertyName) match {
        case None =>
          "" // property value empty for this element
        case Some(value) =>
          updateWith(propertyName, value) match {
            case ScalarColumnDef(_) =>
              value.toString // scalar property value
            case ArrayColumnDef(_, iteratorAccessor) =>
              /** Array property value - separated by `;` according to the spec
                *
                * Note: if all instances of this array property type are empty, we will not have the valueType (because it's derived from
                * the runtime class). At the same time, it doesn't matter for serialization, because the csv entry is always empty for all
                * empty arrays.
                */
              iteratorAccessor(value).mkString(";")
          }
      }
    }
  }

  /** for cypher file  why does neo4j have 4 different ways to import a CSV, out of which only one works, and really the only help we
    * get is a csv file reader, and we need to specify exactly how each column needs to be parsed and mapped...? 
    */
  def propertiesMappingsForCypher(startIndex: Int): Seq[String] = {
    var idx = startIndex - 1
    propertyNamesOrdered.map { name =>
      idx += 1
      val accessor = s"line[$idx]"
      columnDefByPropertyName(name) match {
        case Some(ScalarColumnDef(columnType)) =>
          val adaptedAccessor =
            cypherScalarConversionFunctionMaybe(columnType)
              .map(f => s"$f($accessor)")
              .getOrElse(accessor)
          s"$name: $adaptedAccessor"
        case Some(ArrayColumnDef(columnType, _)) =>
          val accessor = s"""split(line[$idx], ";")"""
          val adaptedAccessor =
            columnType
              .flatMap(cypherListConversionFunctionMaybe)
              .map(f => s"$f($accessor)")
              .getOrElse(accessor)
          s"$name: $adaptedAccessor"
        case None =>
          s"$name: $accessor"
      }
    }
  }

  /** optionally choose one of https://neo4j.com/docs/cypher-manual/current/functions/scalar/, depending on the columnType
    */
  private def cypherScalarConversionFunctionMaybe(columnType: ColumnType.Value): Option[String] = {
    columnType match {
      case ColumnType.Id | ColumnType.Int | ColumnType.Long | ColumnType.Byte | ColumnType.Short =>
        Some("toInteger")
      case ColumnType.Float | ColumnType.Double =>
        Some("toFloat")
      case ColumnType.Boolean =>
        Some("toBoolean")
      case _ => None
    }
  }

  /** optionally choose one of https://neo4j.com/docs/cypher-manual/current/functions/list/#functions-tobooleanlist, depending on the
    * columnType
    */
  private def cypherListConversionFunctionMaybe(columnType: ColumnType.Value): Option[String] = {
    columnType match {
      case ColumnType.Id | ColumnType.Int | ColumnType.Long | ColumnType.Byte | ColumnType.Short =>
        Some("toIntegerList")
      case ColumnType.Float | ColumnType.Double =>
        Some("toFloatList")
      case ColumnType.Boolean =>
        Some("toBooleanList")
      case ColumnType.String =>
        Some("toStringList")
      case _ => None
    }
  }

  /** derive property types based on the runtime class note: this ignores the edge case that there may be different runtime types for the
    * same property
    */
  private def deriveNeo4jType(value: Any): ColumnDef = {
    def deriveNeo4jTypeForArray(iteratorAccessor: Any => Iterable[?]): ArrayColumnDef = {
      // Iterable is immutable, so we can safely (try to) get it's first element
      val valueTypeMaybe = iteratorAccessor(value).iterator
        .nextOption()
        .map(_.getClass)
        .map(deriveNeo4jTypeForScalarValue)
      ArrayColumnDef(valueTypeMaybe, iteratorAccessor)
    }

    value match {
      case _: Iterable[_] =>
        deriveNeo4jTypeForArray(_.asInstanceOf[Iterable[?]])
      case _: IterableOnce[_] =>
        deriveNeo4jTypeForArray(_.asInstanceOf[IterableOnce[?]].iterator.toSeq)
      case _: java.lang.Iterable[_] =>
        deriveNeo4jTypeForArray(_.asInstanceOf[java.lang.Iterable[?]].asScala)
      case _: Array[_] =>
        deriveNeo4jTypeForArray(x => ArraySeq.unsafeWrapArray(x.asInstanceOf[Array[?]]))
      case scalarValue =>
        ScalarColumnDef(deriveNeo4jTypeForScalarValue(scalarValue.getClass))
    }
  }

  private def deriveNeo4jTypeForScalarValue(clazz: Class[?]): ColumnType.Value = {
    if (clazz == classOf[String])
      ColumnType.String
    else if (clazz == classOf[Int] || clazz == classOf[Integer])
      ColumnType.Int
    else if (clazz == classOf[Long] || clazz == classOf[java.lang.Long])
      ColumnType.Long
    else if (clazz == classOf[Float] || clazz == classOf[java.lang.Float])
      ColumnType.Float
    else if (clazz == classOf[Double] || clazz == classOf[java.lang.Double])
      ColumnType.Double
    else if (clazz == classOf[Boolean] || clazz == classOf[java.lang.Boolean])
      ColumnType.Boolean
    else if (clazz == classOf[Byte] || clazz == classOf[java.lang.Byte])
      ColumnType.Byte
    else if (clazz == classOf[Short] || clazz == classOf[java.lang.Short])
      ColumnType.Short
    else if (clazz == classOf[Char])
      ColumnType.Char
    else
      throw new NotImplementedError(s"unable to derive a Neo4j type for given runtime type $clazz")
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy