com.basho.riak.client.api.commands.mapreduce.MapReduce Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of riak-client Show documentation
Show all versions of riak-client Show documentation
HttpClient-based client for Riak
The newest version!
/*
* This file is provided to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.basho.riak.client.api.commands.mapreduce;
import com.basho.riak.client.api.RiakException;
import com.basho.riak.client.api.StreamableRiakCommand;
import com.basho.riak.client.api.convert.ConversionException;
import com.basho.riak.client.core.FutureOperation;
import com.basho.riak.client.core.StreamingRiakFuture;
import com.basho.riak.client.core.operations.MapReduceOperation;
import com.basho.riak.client.core.query.functions.Function;
import com.basho.riak.client.core.util.BinaryValue;
import com.fasterxml.jackson.core.JsonEncoding;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.Version;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.fasterxml.jackson.databind.node.ArrayNode;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.TransferQueue;
/**
* Base abstract class for all MapReduce commands.
* See Map/Reduce for details.
*
* @author Dave Rusek
* @since 2.0
*/
public abstract class MapReduce extends StreamableRiakCommand.StreamableRiakCommandWithSameInfo
{
private final MapReduceSpec spec;
@SuppressWarnings("unchecked")
protected MapReduce(MapReduceInput input, Builder builder)
{
this.spec = new MapReduceSpec(input, builder.phases, builder.timeout);
}
@Override
protected MapReduceOperation buildCoreOperation(boolean streamResults)
{
BinaryValue jobSpec;
try
{
String spec = writeSpec();
jobSpec = BinaryValue.create(spec);
}
catch (RiakException e)
{
throw new RuntimeException(e);
}
return new MapReduceOperation.Builder(jobSpec)
.streamResults(streamResults)
.build();
}
@Override
protected Response convertResponse(FutureOperation request,
MapReduceOperation.Response coreResponse)
{
return new Response(coreResponse.getResults());
}
@Override
protected Response createResponse(int timeout, StreamingRiakFuture coreFuture)
{
return new Response(coreFuture, timeout);
}
/**
* Creates the JSON string of the M/R job for submitting to the client
*
* Uses Jackson to write out the JSON string. I'm not very happy with this method, it is a candidate for change.
*
* TODO re-evaluate this method, look for something smaller and more elegant.
*
* @return a String of JSON
* @throws RiakException if, for some reason, we can't create a JSON string.
*/
String writeSpec() throws RiakException
{
final ByteArrayOutputStream out = new ByteArrayOutputStream();
try
{
JsonGenerator jg = new JsonFactory().createGenerator(out, JsonEncoding.UTF8);
jg.setCodec(mrObjectMapper);
List phases = spec.getPhases();
phases.get(phases.size() - 1).setKeep(true);
jg.writeObject(spec);
jg.flush();
return out.toString("UTF-8");
}
catch (IOException e)
{
throw new RiakException(e);
}
}
static ObjectMapper mrObjectMapper = initializeMRObjectMapper();
private static ObjectMapper initializeMRObjectMapper()
{
final ObjectMapper objectMapper = new ObjectMapper();
final SimpleModule specModule = new SimpleModule("SpecModule", Version.unknownVersion());
specModule.addSerializer(LinkPhase.class, new LinkPhaseSerializer());
specModule.addSerializer(FunctionPhase.class, new FunctionPhaseSerializer());
specModule.addSerializer(BucketInput.class, new BucketInputSerializer());
specModule.addSerializer(SearchInput.class, new SearchInputSerializer());
specModule.addSerializer(BucketKeyInput.class, new BucketKeyInputSerializer());
specModule.addSerializer(IndexInput.class, new IndexInputSerializer());
specModule.addSerializer(BinaryValue.class, new BinaryValueSerializer());
objectMapper.registerModule(specModule);
return objectMapper;
}
/**
* Base abstract class for all MapReduce command builders.
*/
protected static abstract class Builder>
{
protected final List phases = new LinkedList<>();
protected Long timeout;
/**
* Set the operations timeout
*
* @param timeout
* @return this
*/
public T timeout(long timeout)
{
this.timeout = timeout;
return self();
}
/**
* Add {@link MapPhase} to the query
*
* @param phaseFunction the {@link Function}
* @param keep keep the results and return them with the query results?
* @return a reference to this object.
*/
public T withMapPhase(Function phaseFunction, boolean keep)
{
synchronized (phases)
{
phases.add(new MapPhase(phaseFunction, keep));
}
return self();
}
/**
* Add a MapPhase
*
* @param phaseFunction the {@link Function}
* @param arg an argument that will be passed to the phase verbatim (Object#toString)
* @param keep if the result should be returned or merely provide input for the next phase.
* @return a reference to this object.
*/
public T withMapPhase(Function phaseFunction, Object arg, boolean keep)
{
synchronized (phases)
{
phases.add(new MapPhase(phaseFunction, arg, keep));
}
return self();
}
/**
* Add a MapPhase
*
* @param phaseFunction the {@link Function}
* @param arg an argument that will be passed to the phase verbatim (Object#toString)
* @return a reference to this object.
*/
public T withMapPhase(Function phaseFunction, Object arg)
{
synchronized (phases)
{
phases.add(new MapPhase(phaseFunction, arg));
}
return self();
}
/**
* Add a MapPhase
*
* @param phaseFunction the {@link Function}
* @return a reference to this object.
*/
public T withMapPhase(Function phaseFunction)
{
synchronized (phases)
{
phases.add(new MapPhase(phaseFunction));
}
return self();
}
/**
* Add {@link ReducePhase} to the query
*
* @param phaseFunction the {@link Function}
* @param keep keep the results and return them with the query results?
* @return a reference to this object.
*/
public T withReducePhase(Function phaseFunction, boolean keep)
{
synchronized (phases)
{
phases.add(new ReducePhase(phaseFunction, keep));
}
return self();
}
/**
* Add a {@link ReducePhase}
*
* @param phaseFunction the {@link Function}
* @param arg an argument that will be passed to the phase verbatim (Object#toString)
* @param keep if the result should be returned or merely provide input for the next phase.
* @return a reference to this object.
*/
public T withReducePhase(Function phaseFunction, Object arg, boolean keep)
{
synchronized (phases)
{
phases.add(new ReducePhase(phaseFunction, arg, keep));
}
return self();
}
/**
* Add a {@link ReducePhase}
*
* @param phaseFunction the {@link Function}
* @param arg an argument that will be passed to the phase verbatim
* @return a reference to this object.
*/
public T withReducePhase(Function phaseFunction, Object arg)
{
synchronized (phases)
{
phases.add(new ReducePhase(phaseFunction, arg));
}
return self();
}
/**
* Add a {@link ReducePhase}
*
* @param phaseFunction
* @return a reference to this object.
*/
public T withReducePhase(Function phaseFunction)
{
synchronized (phases)
{
phases.add(new ReducePhase(phaseFunction));
}
return self();
}
/**
* Add a Link Phase that points to bucket
/ tag
.
*
* @param bucket the bucket at the end of the link (or "_" or "" for wildcard)
* @param tag the tag (or ("_", or "" for wildcard)
* @param keep to keep the result of this phase and return it at the end of the operation
* @return a reference to this object.
*/
public T withLinkPhase(String bucket, String tag, boolean keep)
{
synchronized (phases)
{
phases.add(new LinkPhase(bucket, tag, keep));
}
return self();
}
/**
* Create a Link Phase that points to bucket
/ tag
keep
will be
* false
*
* @param bucket the bucket at the end of the link (or "_" or "" for wildcard)
* @param tag the tag (or ("_", or "" for wildcard)
* @return a reference to this object.
*/
public T withLinkPhase(String bucket, String tag)
{
synchronized (phases)
{
phases.add(new LinkPhase(bucket, tag));
}
return self();
}
protected abstract T self();
}
/**
* Response from a MapReduce command.
*/
public static class Response extends StreamableRiakCommand.StreamableResponse
{
private final Map results;
private final MapReduceResponseIterator responseIterator;
Response(StreamingRiakFuture coreFuture,
int pollTimeout)
{
responseIterator = new MapReduceResponseIterator(coreFuture, pollTimeout);
results = null;
}
public Response(Map results)
{
this.results = results;
responseIterator = null;
}
@Override
public boolean isStreaming()
{
return responseIterator != null;
}
public boolean hasResultForPhase(int i)
{
return results.containsKey(i);
}
public ArrayNode getResultForPhase(int i)
{
return results.get(i);
}
public ArrayNode getResultsFromAllPhases()
{
return flattenResults();
}
public Collection getResultsFromAllPhases(Class resultType)
{
ArrayNode flat = flattenResults();
ObjectMapper mapper = new ObjectMapper();
try
{
return mapper.readValue(flat.toString(),
mapper.getTypeFactory().constructCollectionType(Collection.class, resultType));
}
catch (IOException ex)
{
throw new ConversionException("Could not convert Mapreduce response", ex);
}
}
private ArrayNode flattenResults()
{
final JsonNodeFactory factory = JsonNodeFactory.instance;
ArrayNode flatArray = factory.arrayNode();
for (Map.Entry entry : results.entrySet())
{
flatArray.addAll(entry.getValue());
}
return flatArray;
}
@Override
public Iterator iterator()
{
if (isStreaming()) {
return responseIterator;
}
// TODO: add support for not streamable responses
throw new UnsupportedOperationException("Iterating is only supported for streamable response.");
}
private class MapReduceResponseIterator implements Iterator
{
final StreamingRiakFuture coreFuture;
final TransferQueue resultsQueue;
private final int pollTimeout;
MapReduceResponseIterator(StreamingRiakFuture coreFuture,
int pollTimeout)
{
this.coreFuture = coreFuture;
this.resultsQueue = coreFuture.getResultsQueue();
this.pollTimeout = pollTimeout;
}
/**
* Returns {@code true} if the iteration has more elements.
* (In other words, returns {@code true} if {@link #next} would
* return an element rather than throwing an exception.)
*
* This method will block and wait for more data if none is immediately available.
*
* Riak Java Client Note: Since this class polls for
* new "streaming" data, it is advisable to check {@link Thread#isInterrupted()}
* in environments where thread interrupts must be obeyed.
*
* @return {@code true} if the iteration has more elements
*/
@Override
public boolean hasNext()
{
// Check & clear interrupted flag so we don't get an
// InterruptedException every time if the user
// doesn't clear it / deal with it.
boolean interrupted = Thread.interrupted();
try
{
boolean foundEntry = false;
boolean interruptedLastLoop;
do
{
interruptedLastLoop = false;
try
{
foundEntry = peekWaitForNextQueueEntry();
}
catch (InterruptedException e)
{
interrupted = true;
interruptedLastLoop = true;
}
} while (interruptedLastLoop);
return foundEntry;
}
finally
{
if (interrupted)
{
// Reset interrupted flag if we came in with it
// or we were interrupted while waiting.
Thread.currentThread().interrupt();
}
}
}
private boolean peekWaitForNextQueueEntry() throws InterruptedException
{
while (resultsQueue.isEmpty() && !coreFuture.isDone())
{
if (resultsQueue.isEmpty())
{
Thread.sleep(pollTimeout);
}
}
return !resultsQueue.isEmpty();
}
/**
* Returns the next element in the iteration.
* This method will block and wait for more data if none is immediately available.
*
* Riak Java Client Note: Since this class polls for
* new "streaming" data, it is advisable to check {@link Thread#isInterrupted()}
* in environments where thread interrupts must be obeyed.
*
* @return the next element in the iteration
* @throws NoSuchElementException if the iteration has no more elements
*/
@Override
public Response next()
{
final MapReduceOperation.Response responseChunk = resultsQueue.remove();
return new Response(responseChunk.getResults());
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy