All Downloads are FREE. Search and download functionalities are using the official Maven repository.

pigpen.cascading.clj Maven / Gradle / Ivy

There is a newer version: 0.3.3
Show newest version
;;
;;
;;  Copyright 2015 Netflix, Inc.
;;
;;     Licensed under the Apache License, Version 2.0 (the "License");
;;     you may not use this file except in compliance with the License.
;;     You may obtain a copy of the License at
;;
;;         http://www.apache.org/licenses/LICENSE-2.0
;;
;;     Unless required by applicable law or agreed to in writing, software
;;     distributed under the License is distributed on an "AS IS" BASIS,
;;     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;;     See the License for the specific language governing permissions and
;;     limitations under the License.
;;
;;

(ns pigpen.cascading
    "Functions to convert a PigPen query into a Cascading flow.
"
  (:require [pigpen.cascading.core :as cascading]
            [pigpen.raw :as raw]
            [pigpen.cascading.oven :as oven])
  (:import [cascading.tap Tap]
           [cascading.flow.hadoop HadoopFlowConnector]))

;; ********** Flow **********

(defn generate-flow
  "Transforms the relation specified into a Cascading flow that is ready to be
executed.

Optionally takes a Cascading FlowConnector (defaults to HadoopFlowConnector)

  Example:

    (generate-flow (pig/store-clj \"output.clj\" foo))
"
  {:added "0.3.0"}
  ([query] (generate-flow (HadoopFlowConnector.) query))
  ([connector query]
    (->> query
      (oven/bake {})
      (cascading/commands->flow connector))))

;; ********** Customer loaders **********

;; TODO this needs to be a macro
(defn load-tap
  "A thin wrapper around a tap. By default a vector of the tap's source fields
is created and returned as a single field. A custom function can be provided to
map the tap's source fields onto a single value.

  Example:

    (load-tap tap)
    (load-tap tap (partial zipmap [:a :b :c]))

"
  {:added "0.3.0"}
  ([^Tap tap]
    (load-tap tap 'clojure.core/vector))
  ([^Tap tap f]
    (let [fields (mapv symbol (.getSourceFields tap))]
      (->>
        (raw/load$ (.toString tap) :tap fields {:tap tap})
        (raw/bind$
          `(pigpen.runtime/map->bind ~f)
          {:field-type-in :native})))))

;; TODO take a list of field names to project from a map
(defn store-tap
  "A thin wrapper around a sink tap. The tap must accept a single sink field
which is the value to store."
  {:added "0.3.0"}
  [^Tap tap relation]
  {:pre [(<= (.size (.getSinkFields tap)) 1)]}
  (->> relation
    (raw/bind$ `(pigpen.runtime/map->bind identity)
               {:alias (or (seq (map symbol (.getSinkFields tap)))
                           (:fields relation))
                :field-type :native})
    (raw/store$ (.toString tap) :tap {:tap tap})))




© 2015 - 2024 Weber Informatics LLC | Privacy Policy