All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.calrissian.flowmix.bolt.JoinBolt Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2014 The Calrissian Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.calrissian.flowmix.bolt;


import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import org.calrissian.flowmix.model.*;
import org.calrissian.flowmix.model.op.FlowOp;
import org.calrissian.flowmix.model.op.JoinOp;
import org.calrissian.flowmix.support.Window;
import org.calrissian.flowmix.support.WindowItem;
import org.calrissian.mango.domain.event.BaseEvent;
import org.calrissian.mango.domain.event.Event;

import java.util.*;
import java.util.concurrent.TimeUnit;

import static com.google.common.collect.Iterables.concat;
import static org.calrissian.flowmix.Constants.*;
import static org.calrissian.flowmix.FlowmixFactory.declareOutputStreams;
import static org.calrissian.flowmix.FlowmixFactory.fields;
import static org.calrissian.flowmix.spout.MockFlowLoaderSpout.FLOW_LOADER_STREAM;

/**
 * Sliding window join semantics are defined very similar to that of InfoSphere Streams. The join operator,
 * by default, trigger on each single input event from the stream on the right hand side.
 *
 * The stream on the right is joined with the stream on the left where the stream on the left is collected into a
 * window which is evicted by the given policy. The stream on the right has a default eviction policy of COUNT with
 * a threshold of 1. Every time a tuple on the right stream is encountered, it is compared against the window on the
 * left and a new tuple is emitted for each find in the join.
 *
 * By default, if no partition has been done before the join, every event received on the right stream will be joined will
 * be joined with every event currently in the window for the left hand stream.
 *
 * It's possible for events to have multi-valued keys, thus it's possible for merged tuples to make a single-valued key
 * into a multi-valued key.
 */
public class JoinBolt extends BaseRichBolt {

    Map rulesMap;
    Map> windows;

    OutputCollector collector;

    @Override
    public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
        this.collector = outputCollector;
        rulesMap = new HashMap();
        windows = new HashMap>();
    }

    @Override
    public void execute(Tuple tuple) {

        /**
         * Update rules if necessary
         */
        if(FLOW_LOADER_STREAM.equals(tuple.getSourceStreamId())) {

            Collection rules = (Collection) tuple.getValue(0);
            Set rulesToRemove = new HashSet();

            // find deleted rules and remove them
            for(Flow rule : rulesMap.values()) {
                if(!rules.contains(rule))
                    rulesToRemove.add(rule.getId());
            }

            /**
             * Remove any deleted rules
             */
            for(String ruleId : rulesToRemove) {
                rulesMap.remove(ruleId);
                windows.remove(ruleId);
            }

            for(Flow rule : rules) {
                /**
                 * If a rule has been updated, let's drop the window windows and start out fresh.
                 */
                if(rulesMap.get(rule.getId()) != null && !rulesMap.get(rule.getId()).equals(rule) ||
                        !rulesMap.containsKey(rule.getId())) {
                    rulesMap.put(rule.getId(), rule);
                    windows.remove(rule.getId());
                }
            }

        } else if("tick".equals(tuple.getSourceStreamId())) {

            /**
             * Don't bother evaluating if we don't even have any rules
             */
            if(rulesMap.size() > 0) {

                for(Flow rule : rulesMap.values()) {

                    for(StreamDef stream : rule.getStreams()) {

                        int idx = 0;
                        for(FlowOp curOp : stream.getFlowOps()) {

                            if(curOp instanceof JoinOp) {

                                JoinOp op = (JoinOp) curOp;
                                /**
                                 * If we need to trigger any time-based policies, let's do that here.
                                 */
                                if(op.getEvictionPolicy() == Policy.TIME) {

                                    Cache buffersForRule = windows.get(rule.getId() + "\0" + stream.getName() + "\0" + idx);
                                    if(buffersForRule != null)
                                        for (Window buffer : buffersForRule.asMap().values())
                                            buffer.timeEvict(op.getEvictionThreshold());
                                }
                            }
                            idx++;
                        }

                    }

                }
            }

        } else {

            /**
             * Short circuit if we don't have any rules.
             */
            if (rulesMap.size() > 0) {

                String ruleId = tuple.getStringByField(FLOW_ID);
                String hash = tuple.contains(PARTITION) ? tuple.getStringByField(PARTITION) : "";
                Event event = (Event) tuple.getValueByField(EVENT);
                int idx = tuple.getIntegerByField(FLOW_OP_IDX);
                idx++;

                String streamName = tuple.getStringByField(STREAM_NAME);
                String previousStream = tuple.getStringByField(LAST_STREAM);
                Flow flow = rulesMap.get(ruleId);

                JoinOp op = (JoinOp) flow.getStream(streamName).getFlowOps().get(idx);

                // do processing on lhs
                if(previousStream.equals(op.getLeftStream())) {

                    Cache buffersForRule = windows.get(flow.getId() + "\0" + streamName + "\0" + idx);
                    Window buffer;
                    if (buffersForRule != null) {
                        buffer = buffersForRule.getIfPresent(hash);

                        if (buffer != null) {    // if we have a buffer already, process it
                            /**
                             * If we need to evict any buffered items, let's do it here
                             */
                            if(op.getEvictionPolicy() == Policy.TIME)
                                buffer.timeEvict(op.getEvictionThreshold());
                        }
                    } else {
                        buffersForRule = CacheBuilder.newBuilder().expireAfterAccess(60, TimeUnit.MINUTES).build(); // just in case we get some rogue data, we don't wan ti to sit for too long.
                        buffer = op.getEvictionPolicy() == Policy.TIME ? new Window(hash) :
                                new Window(hash, op.getEvictionThreshold());
                        buffersForRule.put(hash, buffer);
                        windows.put(flow.getId() + "\0" + streamName + "\0" + idx, buffersForRule);
                    }

                    buffer.add(event, previousStream);

                } else if(previousStream.equals(op.getRightStream())) {

                    Cache buffersForRule = windows.get(flow.getId() + "\0" + streamName + "\0" + idx);
                    Window buffer;
                    if (buffersForRule != null) {
                        buffer = buffersForRule.getIfPresent(hash);

                        for(WindowItem bufferedEvent : buffer.getEvents()) {
                          Event joined = new BaseEvent(bufferedEvent.getEvent().getId(), bufferedEvent.getEvent().getTimestamp());
                          // the hashcode will filter duplicates
                          joined.putAll(concat(bufferedEvent.getEvent().getTuples()));
                          joined.putAll(concat(event.getTuples()));
                          String nextStream = idx+1 < flow.getStream(streamName).getFlowOps().size() ? flow.getStream(streamName).getFlowOps().get(idx+1).getComponentName() : "output";

                          if((nextStream.equals("output") && flow.getStream(streamName).isStdOutput()) || !nextStream.equals("output"))
                              collector.emit(nextStream, new Values(flow.getId(), joined, idx, streamName, bufferedEvent.getPreviousStream()));

                          // send to any other streams that are configured (aside from output)
                          if(nextStream.equals("output")) {
                            if(flow.getStream(streamName).getOutputs() != null) {
                              for(String output : flow.getStream(streamName).getOutputs()) {
                                String outputComponent = flow.getStream(output).getFlowOps().get(0).getComponentName();
                                collector.emit(outputComponent, new Values(flow.getId(), joined, -1, output, streamName));
                              }
                            }
                          }
                        }
                    }
                } else {
                    throw new RuntimeException("Received event for stream that does not match the join. Flowbox has been miswired.");
                }
            }

        }

        collector.ack(tuple);
    }

    @Override
    public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
        declareOutputStreams(outputFieldsDeclarer, fields);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy