All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opencypher.morpheus.api.GraphSources.scala Maven / Gradle / Ivy

/*
 * Copyright (c) 2016-2019 "Neo4j Sweden, AB" [https://neo4j.com]
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Attribution Notice under the terms of the Apache License 2.0
 *
 * This work was created by the collective efforts of the openCypher community.
 * Without limiting the terms of Section 6, any Derivative Work that is not
 * approved by the public consensus process of the openCypher Implementers Group
 * should not be described as “Cypher” (and Cypher® is a registered trademark of
 * Neo4j Inc.) or as "openCypher". Extensions by implementers or prototypes or
 * proposals for change that have been documented or implemented should only be
 * described as "implementation extensions to Cypher" or as "proposed changes to
 * Cypher that are not yet approved by the openCypher community".
 */
package org.opencypher.morpheus.api

import java.nio.file.Paths

import org.opencypher.graphddl.GraphDdl
import org.opencypher.morpheus.api.io.FileFormat
import org.opencypher.morpheus.api.io.fs.{CsvGraphSource, EscapeAtSymbol, FSGraphSource}
import org.opencypher.morpheus.api.io.neo4j.{Neo4jBulkCSVDataSink, Neo4jPropertyGraphDataSource}
import org.opencypher.morpheus.api.io.sql.IdGenerationStrategy.IdGenerationStrategy
import org.opencypher.morpheus.api.io.sql.{SqlDataSourceConfig, SqlPropertyGraphDataSource}
import org.opencypher.morpheus.api.io.util.FileSystemUtils._
import org.opencypher.okapi.api.schema.PropertyGraphSchema
import org.opencypher.okapi.neo4j.io.Neo4jConfig

import scala.io.Source
import scala.util.Properties

object GraphSources {
  def fs(
    rootPath: String,
    hiveDatabaseName: Option[String] = None,
    filesPerTable: Option[Int] = Some(1)
  )(implicit session: MorpheusSession) = FSGraphSources(rootPath, hiveDatabaseName, filesPerTable)

  def cypher: CypherGraphSources.type = CypherGraphSources

  def sql(graphDdlPath: String)(implicit session: MorpheusSession) = SqlGraphSources(graphDdlPath)

  def sql(graphDdl: GraphDdl)(implicit session: MorpheusSession) = SqlGraphSources(graphDdl)
}

object FSGraphSources {
  def apply(
    rootPath: String,
    hiveDatabaseName: Option[String] = None,
    filesPerTable: Option[Int] = Some(1)
  )(implicit session: MorpheusSession): FSGraphSourceFactory = FSGraphSourceFactory(rootPath, hiveDatabaseName, filesPerTable)

  case class FSGraphSourceFactory(
    rootPath: String,
    hiveDatabaseName: Option[String] = None,
    filesPerTable: Option[Int] = Some(1)
  )(implicit session: MorpheusSession) {

    def csv: FSGraphSource = new CsvGraphSource(rootPath, filesPerTable)

    def parquet: FSGraphSource = new FSGraphSource(rootPath, FileFormat.parquet, hiveDatabaseName, filesPerTable)

    def orc: FSGraphSource = new FSGraphSource(rootPath, FileFormat.orc, hiveDatabaseName, filesPerTable) with EscapeAtSymbol
  }

  /**
    * Creates a data sink that is capable of writing a property graph into the Neo4j bulk import CSV format
    * (see [[https://neo4j.com/docs/operations-manual/current/tools/import/]]). The data sink generates a shell script
    * within the graph output folder that simplifies the import process.
    *
    * @param rootPath       Directory where the graph is being stored in
    * @param arrayDelimiter Delimiter for array properties
    * @param morpheus       Morpheus session
    * @return Neo4j Bulk CSV data sink
    */
  def neo4jBulk(rootPath: String, arrayDelimiter: String = "|")(implicit morpheus: MorpheusSession): Neo4jBulkCSVDataSink = {
    new Neo4jBulkCSVDataSink(rootPath, arrayDelimiter)
  }
}

object CypherGraphSources {
  /**
    * Creates a Neo4j Property Graph Data Source
    *
    * @param config                     Neo4j connection configuration
    * @param maybeSchema                Optional Neo4j schema to avoid computation on Neo4j server
    * @param omitIncompatibleProperties If set to true, import failures do not throw runtime exceptions but omit the unsupported
    *                                   properties instead and log warnings
    * @param morpheus                   Morpheus session
    * @return Neo4j Property Graph Data Source
    */
  def neo4j(config: Neo4jConfig, maybeSchema: Option[PropertyGraphSchema] = None, omitIncompatibleProperties: Boolean = false)
    (implicit morpheus: MorpheusSession): Neo4jPropertyGraphDataSource =
    Neo4jPropertyGraphDataSource(config, maybeSchema = maybeSchema, omitIncompatibleProperties = omitIncompatibleProperties)

  // TODO: document
  def neo4j(config: Neo4jConfig, schemaFile: String, omitIncompatibleProperties: Boolean)
    (implicit morpheus: MorpheusSession): Neo4jPropertyGraphDataSource = {
    val schemaString = using(Source.fromFile(Paths.get(schemaFile).toUri))(_.getLines().mkString(Properties.lineSeparator))
    Neo4jPropertyGraphDataSource(config, maybeSchema = Some(PropertyGraphSchema.fromJson(schemaString)), omitIncompatibleProperties = omitIncompatibleProperties)
  }
}

import org.opencypher.morpheus.api.io.sql.IdGenerationStrategy._

object SqlGraphSources {

  case class SqlGraphSourceFactory(graphDdl: GraphDdl, idGenerationStrategy: IdGenerationStrategy)
    (implicit morpheus: MorpheusSession) {

    def withIdGenerationStrategy(idGenerationStrategy: IdGenerationStrategy): SqlGraphSourceFactory =
      copy(idGenerationStrategy = idGenerationStrategy)

    def withSqlDataSourceConfigs(sqlDataSourceConfigsPath: String): SqlPropertyGraphDataSource = {
      val jsonString = using(Source.fromFile(sqlDataSourceConfigsPath, "UTF-8"))(_.getLines().mkString(Properties.lineSeparator))
      val sqlDataSourceConfigs = SqlDataSourceConfig.dataSourcesFromString(jsonString)
      withSqlDataSourceConfigs(sqlDataSourceConfigs)
    }

    def withSqlDataSourceConfigs(sqlDataSourceConfigs: (String, SqlDataSourceConfig)*): SqlPropertyGraphDataSource =
      withSqlDataSourceConfigs(sqlDataSourceConfigs.toMap)

    def withSqlDataSourceConfigs(sqlDataSourceConfigs: Map[String, SqlDataSourceConfig]): SqlPropertyGraphDataSource =
      SqlPropertyGraphDataSource(graphDdl, sqlDataSourceConfigs, idGenerationStrategy)
  }

  def apply(graphDdlPath: String)(implicit morpheus: MorpheusSession): SqlGraphSourceFactory =
    SqlGraphSources(GraphDdl(using(Source.fromFile(graphDdlPath, "UTF-8"))(_.mkString)))

  def apply(graphDdl: GraphDdl)(implicit morpheus: MorpheusSession): SqlGraphSourceFactory =
    SqlGraphSourceFactory(graphDdl = graphDdl, idGenerationStrategy = SerializedId)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy