com.basho.riak.client.query.MapReduce Maven / Gradle / Ivy
/*
* This file is provided to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.basho.riak.client.query;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.LinkedList;
import com.basho.riak.client.IRiakClient;
import com.basho.riak.client.RiakException;
import com.basho.riak.client.operations.RiakOperation;
import com.basho.riak.client.query.functions.Function;
import com.basho.riak.client.query.serialize.FunctionToJson;
import com.basho.riak.client.raw.RawClient;
import com.basho.riak.client.raw.query.MapReduceSpec;
import com.fasterxml.jackson.core.JsonEncoding;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.databind.ObjectMapper;
/**
* An operation for defining and runnig a Map/Reduce query on Riak.
*
*
* See Map/Reduce for details.
*
* @author russell
*
* @see IRiakClient#mapReduce()
* @see IRiakClient#mapReduce(String)
*/
public abstract class MapReduce implements RiakOperation {
private final RawClient client;
private Collection phases = new LinkedList();
private Long timeout;
/**
* Create the MapRedcue operation with the {@link RawClient} to delegate to.
*
* @param client
* a {@link RawClient}
*
* @see IRiakClient#mapReduce()
* @see IRiakClient#mapReduce(String)
*/
public MapReduce(RawClient client) {
this.client = client;
}
/**
* Run the Map/Reduce job against the {@link RawClient} the operation was
* constructed with.
*
* @return a {@link MapReduceResult} containing the results of the query.
* @throws RiakException
* @throws InvalidMapReduceException
*/
public MapReduceResult execute() throws RiakException {
validate();
final String strSpec = writeSpec();
MapReduceSpec spec = new MapReduceSpec(strSpec);
try {
return client.mapReduce(spec);
} catch (IOException e) {
throw new RiakException(e);
}
}
/**
* Check that this map/reduce job is valid
* @throws InvalidMapReduceException
*/
protected void validate() {
// Changed to no-op; phase no longer required
// Left in place so as not to break inheritence
}
/**
* Creates the JSON string of the M/R job for submitting to the
* {@link RawClient}
*
* Uses Jackson to write out the JSON string. I'm not very happy with this
* method, it is a candidate for change.
*
* TODO re-evaluate this method, look for something smaller and more elegant.
*
* @return a String of JSON
* @throws RiakException
* if, for some reason, we can't create a JSON string.
*/
private String writeSpec() throws RiakException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
JsonGenerator jg = new JsonFactory().createJsonGenerator(out, JsonEncoding.UTF8);
jg.setCodec(new ObjectMapper());
jg.writeStartObject();
jg.writeFieldName("inputs");
writeInput(jg);
jg.writeFieldName("query");
jg.writeStartArray();
writePhases(jg);
jg.writeEndArray();
if (timeout != null) {
jg.writeNumberField("timeout", timeout);
}
jg.writeEndObject();
jg.flush();
return out.toString("UTF8");
} catch (IOException e) {
throw new RiakException(e);
}
}
/**
* Write the collection of phases to the json output generator
* @param jg
* a {@link JsonGenerator}
*/
private void writePhases(JsonGenerator jg) throws IOException {
writeMapReducePhases(jg);
}
/**
* @param jg
*/
private void writeMapReducePhases(JsonGenerator jg) throws IOException {
int cnt = 0;
synchronized (phases) {
final int lastPhase = phases.size();
for (MapReducePhase phase : phases) {
cnt++;
jg.writeStartObject();
jg.writeFieldName(phase.getType().toString());
jg.writeStartObject();
switch (phase.getType()) {
case MAP:
case REDUCE:
MapPhase mapPhase = (MapPhase)phase;
FunctionToJson.newWriter(mapPhase.getPhaseFunction(), jg).write();
if(mapPhase.getArg() != null) {
jg.writeObjectField("arg", mapPhase.getArg());
}
break;
case LINK:
jg.writeStringField("bucket", ((LinkPhase) phase).getBucket());
jg.writeStringField("tag", ((LinkPhase) phase).getTag());
break;
}
//the final phase results should be returned, unless specifically set otherwise
if(cnt == lastPhase) {
jg.writeBooleanField("keep", isKeepResult(true, phase.isKeep()));
} else {
jg.writeBooleanField("keep", isKeepResult(false, phase.isKeep()));
}
jg.writeEndObject();
jg.writeEndObject();
}
}
}
/**
* Decide if a map/reduce phase result should be kept (returned) or not.
*
* @param isLastPhase
* is the phase being considered the last phase in an m/r job?
* @param phaseKeepValue
* the Boolean value from a {@link MapPhase} (null|true|false)
* @return phaseKeepValue
if not null, otherwise
* true
if isLastPhase
is true, false
* otherwise.
*/
private boolean isKeepResult(boolean isLastPhase, Boolean phaseKeepValue) {
if (phaseKeepValue != null) {
return phaseKeepValue;
} else {
return isLastPhase;
}
}
/**
* Set the operations timeout
* @param timeout
* @return this
*/
public MapReduce timeout(long timeout) {
this.timeout = timeout;
return this;
}
/**
* Add {@link MapPhase} to the query
*
* @param phaseFunction
* the {@link Function}
* @param keep
* keep the results and return them with the query results?
* @return this
*/
public MapReduce addMapPhase(Function phaseFunction, boolean keep) {
synchronized (phases) {
phases.add(new MapPhase(phaseFunction, keep));
}
return this;
}
/**
* Add a MapPhase
*
* @param phaseFunction
* the {@link Function}
* @param arg
* an argument that will be passed to the phase verbatim
* (Object#toString)
* @param keep
* if the result should be returned or merely provide input for
* the next phase.
* @return this
*/
public MapReduce addMapPhase(Function phaseFunction, Object arg, boolean keep) {
synchronized (phases) {
phases.add(new MapPhase(phaseFunction, arg, keep));
}
return this;
}
/**
* Add a MapPhase
*
* @param phaseFunction
* the {@link Function}
* @param arg
* an argument that will be passed to the phase verbatim
* (Object#toString)
* @return this
*/
public MapReduce addMapPhase(Function phaseFunction, Object arg) {
synchronized (phases) {
phases.add(new MapPhase(phaseFunction, arg));
}
return this;
}
/**
* Add a MapPhase
*
* @param phaseFunction
* the {@link Function}
* @return this
*/
public MapReduce addMapPhase(Function phaseFunction) {
synchronized (phases) {
phases.add(new MapPhase(phaseFunction));
}
return this;
}
/**
* Add {@link ReducePhase} to the query
*
* @param phaseFunction
* the {@link Function}
* @param keep
* keep the results and return them with the query results?
* @return this
*/
public MapReduce addReducePhase(Function phaseFunction, boolean keep) {
synchronized (phases) {
phases.add(new ReducePhase(phaseFunction, keep));
}
return this;
}
/**
* Add a {@link ReducePhase}
*
* @param phaseFunction
* the {@link Function}
* @param arg
* an argument that will be passed to the phase verbatim
* (Object#toString)
* @param keep
* if the result should be returned or merely provide input for
* the next phase.
* @return this
*/
public MapReduce addReducePhase(Function phaseFunction, Object arg, boolean keep) {
synchronized (phases) {
phases.add(new ReducePhase(phaseFunction, arg, keep));
}
return this;
}
/**
* Add a {@link ReducePhase}
*
* @param phaseFunction
* the {@link Function}
* @param arg
* an argument that will be passed to the phase verbatim
* @return this
*/
public MapReduce addReducePhase(Function phaseFunction, Object arg) {
synchronized (phases) {
phases.add(new ReducePhase(phaseFunction, arg));
}
return this;
}
/**
* Add a {@link ReducePhase}
*
* @param phaseFunction
* @return this
*/
public MapReduce addReducePhase(Function phaseFunction) {
synchronized (phases) {
phases.add(new ReducePhase(phaseFunction));
}
return this;
}
/**
* Add a Link Phase that points to bucket
/ tag
* .
*
* @param bucket
* the bucket at the end of the link (or "_" or "" for wildcard)
* @param tag
* the tag (or ("_", or "" for wildcard)
* @param keep
* to keep the result of this phase and return it at the end of
* the operation
*/
public MapReduce addLinkPhase(String bucket, String tag, boolean keep) {
synchronized (phases) {
phases.add(new LinkPhase(bucket, tag, keep));
}
return this;
}
/**
* Create a Link Phase that points to bucket
/ tag
* keep
will be false
*
* @param bucket
* the bucket at the end of the link (or "_" or "" for wildcard)
* @param tag
* the tag (or ("_", or "" for wildcard)
*/
public MapReduce addLinkPhase(String bucket, String tag) {
synchronized (phases) {
phases.add(new LinkPhase(bucket, tag));
}
return this;
}
/**
* Override to write the input specification of the M/R job.
*
* @param jsonGenerator a Jackson {@link JsonGenerator} to write the input spec to
* @throws IOException
*/
protected abstract void writeInput(JsonGenerator jsonGenerator) throws IOException;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy