
pigpen.map.clj Maven / Gradle / Ivy
The newest version!
;;
;;
;; Copyright 2013-2015 Netflix, Inc.
;;
;; Licensed under the Apache License, Version 2.0 (the "License");
;; you may not use this file except in compliance with the License.
;; You may obtain a copy of the License at
;;
;; http://www.apache.org/licenses/LICENSE-2.0
;;
;; Unless required by applicable law or agreed to in writing, software
;; distributed under the License is distributed on an "AS IS" BASIS,
;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;; See the License for the specific language governing permissions and
;; limitations under the License.
;;
;;
(ns pigpen.map
"Commands to transform data.
Note: Most of these are present in pigpen.core. Normally you should use those instead.
"
(:refer-clojure :exclude [map mapcat map-indexed sort sort-by])
(:require [pigpen.extensions.core :refer [pp-str]]
[pigpen.raw :as raw]
[pigpen.code :as code]))
(set! *warn-on-reflection* true)
(defn map*
"Similar to pigpen.core/map, but is a function and takes a quoted function as
an argument.
Examples:
(defn do-stuff [f data]
(map* f data))
(do-stuff 'inc)
(do-stuff
(pigpen.core.fn/trap
(fn [x] (* x x))))
Note that the above example would not work with pigpen.core/map because f would
be compiled before do-stuff is called.
See also: pigpen.core/map, pigpen.core.fn/trap
"
{:added "0.3.0"}
([f relation]
(map* f {} relation))
([f opts relation]
{:pre [(map? relation) f]}
(code/assert-arity f (-> relation :fields count))
(raw/bind$ `(pigpen.runtime/map->bind ~f) opts relation)))
(defmacro map
"Returns a relation of f applied to every item in the source relation.
Function f should be a function of one argument.
Example:
(pig/map inc foo)
(pig/map (fn [x] (* x x)) foo)
Note: Unlike clojure.core/map, pigpen.core/map takes only one relation. This
is due to the fact that there is no defined order in pigpen. See pig/join,
pig/cogroup, and pig/union for combining sets of data.
See also: pigpen.core/mapcat, pigpen.core/map-indexed, pigpen.core/join,
pigpen.core/cogroup, pigpen.core/union
"
{:added "0.1.0"}
[f relation]
`(map* (code/trap ~f) {:description ~(pp-str f)} ~relation))
(defn mapcat*
"Similar to pigpen.core/mapcat, but is a function and takes a quoted function
as an argument.
Examples:
(mapcat*
(trap (fn [x] [(dec x) x (inc x)]))
data)
See also: pigpen.core/mapcat, pigpen.core.fn/trap
"
{:added "0.3.0"}
([f relation]
(mapcat* f {} relation))
([f opts relation]
{:pre [(map? relation) f]}
(code/assert-arity f (-> relation :fields count))
(raw/bind$ `(pigpen.runtime/mapcat->bind ~f) opts relation)))
(defmacro mapcat
"Returns the result of applying concat, or flattening, the result of applying
f to each item in relation. Thus f should return a collection.
Example:
(pig/mapcat (fn [x] [(dec x) x (inc x)]) foo)
See also: pigpen.core/map, pigpen.core/map-indexed
"
{:added "0.1.0"}
[f relation]
`(mapcat* (code/trap ~f) {:description ~(pp-str f)} ~relation))
(defn map-indexed*
"Similar to pigpen.core/map-indexed, but is a function and takes a quoted
function as an argument.
Examples:
(map-indexed*
(trap (fn [i x] (* i x)))
data)
See also: pigpen.core/map-indexed, pigpen.core.fn/trap
"
{:added "0.3.0"}
([f relation]
(map-indexed* f {} relation))
([f opts relation]
{:pre [(map? relation) f]}
(code/assert-arity f 2)
(->> relation
(raw/rank$ opts)
(raw/bind$ `(pigpen.runtime/map->bind ~f) {}))))
(defmacro map-indexed
"Returns a relation of applying f to the the index and value of every item in
the source relation. Function f should be a function of two arguments: the index
and the value. If you require sequential ids, use option {:dense true}.
Example:
(pig/map-indexed (fn [i x] (* i x)) foo)
(pig/map-indexed vector {:dense true} foo)
Options:
:dense - force sequential ids (pig only)
Note: If you require sorted data, use sort or sort-by immediately before
this command.
Note: Pig will assign the same index to any equal values, regardless of how
many times they appear.
Note: The cascading implementation of map-indexed uses a single reducer
See also: pigpen.core/sort, pigpen.core/sort-by, pigpen.core/map, pigpen.core/mapcat
"
{:added "0.1.0"}
([f relation] `(map-indexed ~f {} ~relation))
([f opts relation]
`(map-indexed* (code/trap ~f) (assoc ~opts :description ~(pp-str f)) ~relation)))
(defn sort*
"Similar to pigpen.core/sort-by, but is a function and takes a quoted
function as an argument.
Examples:
(sort*
(trap (fn [x] (* x x)))
:asc
data)
See also: pigpen.core/sort, pigpen.core/sort-by, pigpen.core.fn/trap
"
{:added "0.3.0"}
([comp relation]
(sort* 'identity comp {} relation))
([key-selector comp relation]
(sort* key-selector comp {} relation))
([key-selector comp opts relation]
{:pre [(map? relation) (#{:asc :desc} comp)]}
(->> relation
(raw/bind$ `(pigpen.runtime/key-selector->bind ~key-selector)
{:field-type :native-key-frozen-val
:alias ['key 'value]})
(raw/sort$ 'key comp opts))))
(defmacro sort
"Sorts the data with an optional comparator. Takes an optional map of options.
Example:
(pig/sort foo)
(pig/sort :desc foo)
(pig/sort :desc {:parallel 20} foo)
Notes:
The default comparator is :asc (ascending sort order).
Only :asc and :desc are supported comparators.
The values must be primitive values (string, int, etc).
Maps, vectors, etc are not supported.
Options:
:parallel - The degree of parallelism to use (pig only)
Note: The cascading implementation of sort uses a single reducer
See also: pigpen.core/sort-by
"
{:added "0.1.0"}
([relation] `(sort :asc {} ~relation))
([comp relation] `(sort ~comp {} ~relation))
([comp opts relation]
`(sort* `identity '~comp ~opts ~relation)))
(defmacro sort-by
"Sorts the data by the specified key-fn with an optional comparator. Takes an
optional map of options.
Example:
(pig/sort-by :a foo)
(pig/sort-by #(count %) :desc foo)
(pig/sort-by (fn [x] (* x x)) :desc {:parallel 20} foo)
Notes:
The default comparator is :asc (ascending sort order).
Only :asc and :desc are supported comparators.
The key-fn values must be primitive values (string, int, etc).
Maps, vectors, etc are not supported.
Options:
:parallel - The degree of parallelism to use (pig only)
Note: The cascading implementation of sort-by uses a single reducer
See also: pigpen.core/sort
"
{:added "0.1.0"}
([key-fn relation] `(sort-by ~key-fn :asc {} ~relation))
([key-fn comp relation] `(sort-by ~key-fn ~comp {} ~relation))
([key-fn comp opts relation]
`(sort* (code/trap ~key-fn)
'~comp
(assoc ~opts :description ~(pp-str key-fn))
~relation)))
© 2015 - 2025 Weber Informatics LLC | Privacy Policy