All Downloads are FREE. Search and download functionalities are using the official Maven repository.

pigpen.pig.clj Maven / Gradle / Ivy

There is a newer version: 0.3.3
Show newest version
;;
;;
;;  Copyright 2013-2015 Netflix, Inc.
;;
;;     Licensed under the Apache License, Version 2.0 (the "License");
;;     you may not use this file except in compliance with the License.
;;     You may obtain a copy of the License at
;;
;;         http://www.apache.org/licenses/LICENSE-2.0
;;
;;     Unless required by applicable law or agreed to in writing, software
;;     distributed under the License is distributed on an "AS IS" BASIS,
;;     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;;     See the License for the specific language governing permissions and
;;     limitations under the License.
;;
;;

(ns pigpen.pig
  "Functions to convert a PigPen query into a Pig script.
"
  (:require [pigpen.raw :as raw]
            [pigpen.pig.oven :as oven]
            [pigpen.pig.script :as script]))

(set! *warn-on-reflection* true)

(defn generate-script
  "Generates a Pig script from the relation specified and returns it as a string.
You can pass any relation to this and it will generate a Pig script - it doesn't
have to be an output. However, if there are no store commands, the script won't
do much. If you have more than one store command, use pigpen.core/store-many to
combine them. Optionally takes a map of options.

  Example:

    (generate-script (pig/store-clj \"output.clj\" foo))
    (generate-script {:debug \"/temp/\"} (pig/store-clj \"output.clj\" foo))

  Options:

    :debug - Enables debugging, which writes the output of every step to a file.
             The value is a path to place the debug output.

    :dedupe - Set to false to disable command deduping.

    :pigpen-jar-location - The location where your uberjar resides.
                           Defaults to 'pigpen.jar'.

  See also: pigpen.pig/write-script, pigpen.core/store-many
"
  {:added "0.1.0"}
  ([query] (generate-script {} query))
  ([opts query]
    (->> query
      (oven/bake opts)
      script/commands->script)))

(defn write-script
  "Generates a Pig script from the relation specified and writes it to location.
You can pass any relation to this and it will generate a Pig script - it doesn't
have to be an output. However, if there are no store commands, the script won't
do much. If you have more than one store command, use pigpen.core/store-many to
combine them. Optionally takes a map of options.

  Example:

    (write-script \"my-script.pig\" (pig/store-clj \"output.clj\" foo))
    (write-script \"my-script.pig\" {:debug \"/temp/\"} (pig/store-clj \"output.clj\" foo))

  Options:

    :debug - Enables debugging, which writes the output of every step to a file.
             The value is a path to place the debug output.

    :dedupe - Set to false to disable command deduping.

    :pigpen-jar-location - The location where your uberjar resides.
                           Defaults to 'pigpen.jar'.

  See also: pigpen.pig/generate-script, pigpen.core/store-many
"
  {:added "0.1.0"}
  ([location query] (write-script location {} query))
  ([location opts query]
    (spit location (generate-script opts query))))

(defn set-options
  "Used to set options in a pig script. `opts` is a map of string/keyword/symbol
to anything. str is called on values.

  Example:

    (set-options {:pig.maxCombinedSplitSize 1000000} relation)

  Note: Pig options are global and apply to the entire script.
"
  [opts relation]
  (update-in relation [:opts :pig-options] merge opts))




© 2015 - 2024 Weber Informatics LLC | Privacy Policy