All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.neo4j.syntax.SCollectionSyntax.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2022 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.neo4j.syntax

import com.spotify.scio.coders.{Coder, CoderMaterializer}
import com.spotify.scio.io.ClosedTap
import com.spotify.scio.neo4j.{Neo4jIO, Neo4jOptions}
import com.spotify.scio.values.SCollection
import magnolify.neo4j.ValueType
import org.apache.beam.sdk.io.{neo4j => beam}

/** Enhanced version of [[com.spotify.scio.values.SCollection SCollection]] with Neo4J methods. */
final class Neo4jSCollectionOps[T](private val self: SCollection[T]) extends AnyVal {

  import Neo4jIO._

  /**
   * Execute parallel instances of the provided Cypher query to the specified Neo4j database; one
   * instance of the query will be executed for each element in this [[SCollection]]. Results from
   * each query invocation will be added to the resulting [[SCollection]] as if by a `flatMap`
   * transformation (where the Neo4j-query-execution returns an `Iterable`).
   *
   * This operation parameterizes each query invocation by first transforming each [[SCollection]]
   * element via [[magnolify.neo4j.ValueType.to]]. Parameter names in the provided Cypher query
   * [[String]] must match the names of keys in the [[org.neo4j.driver.Value]] generated by the
   * (implicit) [[ValueType]] for the input [[SCollection]] type. (If the input type is a case
   * class, that means that parameter names in the query should correspond to the fields defined in
   * the case class.)
   *
   * @see
   *   ''Reading from Neo4j'' in the
   *   [[https://beam.apache.org/releases/javadoc/current/org/apache/beam/sdk/io/neo4j/Neo4jIO.html Beam `Neo4jIO` documentation]]
   * @see
   *   syntax for Cypher query parameters defined in
   *   [[https://neo4j.com/docs/cypher-manual/current/syntax/parameters Cypher Manual (Syntax / Parameters)]]
   * @param neo4jConf
   *   options for configuring a Neo4J driver
   * @param cypher
   *   parameterized Cypher query
   * @return
   *   [[SCollection]] containing the union of query results from a parameterized query invocation
   *   for each original [[SCollection]] element
   */
  def neo4jCypher[U](
    neo4jConf: Neo4jOptions,
    cypher: String
  )(implicit
    neo4jInType: ValueType[T],
    neo4jOutType: ValueType[U],
    coder: Coder[U]
  ): SCollection[U] =
    self.applyTransform(
      beam.Neo4jIO
        .readAll[T, U]()
        .withDriverConfiguration(dataSourceConfiguration(neo4jConf.connectionOptions))
        .withSessionConfig(neo4jConf.sessionConfig)
        .withTransactionConfig(neo4jConf.transactionConfig)
        .withCypher(cypher)
        .withParametersFunction(neo4jInType.to(_).asMap())
        .withRowMapper(neo4jOutType.from(_))
        .withCoder(CoderMaterializer.beam(self.context, coder))
    )

  /**
   * Save this SCollection as a Neo4J database.
   *
   * @param neo4jOptions
   *   options for configuring a Neo4J driver
   * @param unwindCypher
   *   Neo4J cypher query representing an
   *   [[https://neo4j.com/docs/cypher-manual/current/clauses/unwind/#unwind-creating-nodes-from-a-list-parameter UNWIND parameter]]
   *   cypher statement
   * @param batchSize
   *   batch size when executing the unwind cypher query. Default batch size of 5000
   */
  def saveAsNeo4j(
    neo4jOptions: Neo4jOptions,
    unwindCypher: String,
    batchSize: Long = WriteParam.DefaultBatchSize
  )(implicit neo4jType: ValueType[T], coder: Coder[T]): ClosedTap[Nothing] =
    self.write(Neo4jIO[T](neo4jOptions, unwindCypher))(WriteParam(batchSize))
}

trait SCollectionSyntax {
  implicit def neo4jSCollectionOps[T](sc: SCollection[T]): Neo4jSCollectionOps[T] =
    new Neo4jSCollectionOps(sc)
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy