All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.math.SumCountMap Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.math;

import java.util.HashMap;
import java.util.Map;

import org.apache.commons.lang.mutable.MutableDouble;
import org.apache.commons.lang.mutable.MutableInt;

import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OutputPortFieldAnnotation;
import com.datatorrent.lib.util.BaseNumberKeyValueOperator;
import com.datatorrent.lib.util.UnifierHashMapInteger;
import com.datatorrent.lib.util.UnifierHashMapSumKeys;

/**
 * Emits the sum and count of values for each key at the end of window.
 * 

* Application accumulate sum across streaming window by setting cumulative flag * to true.
* This is an end of window operator
*
* StateFull : Yes, Sum is computed over application window and streaming * window.
* Partitions : Yes, Sum is unified at output port.
*
* Ports:
* data: expects Map<K,V extends Number>
* sum: emits HashMap<K,V>
* count: emits HashMap<K,Integer>
*
* Properties:
* inverse: if set to true the key in the filter will block tuple
* filterBy: List of keys to filter on
* cumulative: boolean flag, if set the sum is not cleared at the end of * window,
* hence generating cumulative sum across streaming windows. Default is false.
*
* @displayName Sum Count Map * @category Math * @tags number, sum, counting, map * @since 0.3.3 */ public class SumCountMap extends BaseNumberKeyValueOperator { /** * Key/double sum map. */ protected HashMap sums = new HashMap(); /** * Key/integer sum map. */ protected HashMap counts = new HashMap(); /** * Cumulative sum flag. */ protected boolean cumulative = false; /** * Input port that takes a map.  It adds the values for each key and counts the number of occurrences for each key. */ public final transient DefaultInputPort> data = new DefaultInputPort>() { /** * For each tuple (a HashMap of keys,val pairs) Adds the values for each * key, Counts the number of occurrences of each key */ @Override public void process(Map tuple) { for (Map.Entry e : tuple.entrySet()) { K key = e.getKey(); if (!doprocessKey(key)) { continue; } if (sum.isConnected()) { MutableDouble val = sums.get(key); if (val == null) { val = new MutableDouble(e.getValue().doubleValue()); } else { val.add(e.getValue().doubleValue()); } sums.put(cloneKey(key), val); } if (SumCountMap.this.count.isConnected()) { MutableInt count = counts.get(key); if (count == null) { count = new MutableInt(0); counts.put(cloneKey(key), count); } count.increment(); } } } }; /** * Key,sum map output port. */ @OutputPortFieldAnnotation(optional = true) public final transient DefaultOutputPort> sum = new DefaultOutputPort>() { @Override public Unifier> getUnifier() { return new UnifierHashMapSumKeys(); } }; /** * Key,double sum map output port. */ @OutputPortFieldAnnotation(optional = true) public final transient DefaultOutputPort> sumDouble = new DefaultOutputPort>() { @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public Unifier> getUnifier() { UnifierHashMapSumKeys ret = new UnifierHashMapSumKeys(); ret.setType(Double.class); return ret; } }; /** * Key,integer sum output port. */ @OutputPortFieldAnnotation(optional = true) public final transient DefaultOutputPort> sumInteger = new DefaultOutputPort>() { @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public Unifier> getUnifier() { UnifierHashMapSumKeys ret = new UnifierHashMapSumKeys(); ret.setType(Integer.class); return ret; } }; /** * Key,long sum output port. */ @OutputPortFieldAnnotation(optional = true) public final transient DefaultOutputPort> sumLong = new DefaultOutputPort>() { @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public Unifier> getUnifier() { UnifierHashMapSumKeys ret = new UnifierHashMapSumKeys(); ret.setType(Long.class); return ret; } }; /** * Key,short sum output port. */ @OutputPortFieldAnnotation(optional = true) public final transient DefaultOutputPort> sumShort = new DefaultOutputPort>() { @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public Unifier> getUnifier() { UnifierHashMapSumKeys ret = new UnifierHashMapSumKeys(); ret.setType(Short.class); return ret; } }; /** * Key,float sum output port. */ @OutputPortFieldAnnotation(optional = true) public final transient DefaultOutputPort> sumFloat = new DefaultOutputPort>() { @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public Unifier> getUnifier() { UnifierHashMapSumKeys ret = new UnifierHashMapSumKeys(); ret.setType(Float.class); return ret; } }; /** * Key,integer sum output port. */ @OutputPortFieldAnnotation(optional = true) public final transient DefaultOutputPort> count = new DefaultOutputPort>() { @Override public Unifier> getUnifier() { return new UnifierHashMapInteger(); } }; /** * Get cumulative flag. * * @return cumulative flag */ public boolean isCumulative() { return cumulative; } /** * set cumulative flag. * * @param cumulative * input flag */ public void setCumulative(boolean cumulative) { this.cumulative = cumulative; } /** * Emits on all ports that are connected. Data is precomputed during process * on input port endWindow just emits it for each key Clears the internal data * before return */ @Override public void endWindow() { // Should allow users to send each key as a separate tuple to load balance // This is an aggregate node, so load balancing would most likely not be // needed HashMap tuples = new HashMap(); HashMap ctuples = new HashMap(); HashMap dtuples = new HashMap(); HashMap ituples = new HashMap(); HashMap ftuples = new HashMap(); HashMap ltuples = new HashMap(); HashMap stuples = new HashMap(); for (Map.Entry e : sums.entrySet()) { K key = e.getKey(); MutableDouble val = e.getValue(); tuples.put(key, getValue(val.doubleValue())); dtuples.put(key, val.doubleValue()); ituples.put(key, val.intValue()); ftuples.put(key, val.floatValue()); ltuples.put(key, val.longValue()); stuples.put(key, val.shortValue()); // ctuples.put(key, counts.get(e.getKey()).toInteger()); MutableInt c = counts.get(e.getKey()); if (c != null) { ctuples.put(key, c.toInteger()); } } sum.emit(tuples); sumDouble.emit(dtuples); sumInteger.emit(ituples); sumLong.emit(ltuples); sumShort.emit(stuples); sumFloat.emit(ftuples); count.emit(ctuples); clearCache(); } /** * Clear sum maps. */ private void clearCache() { if (!cumulative) { sums.clear(); counts.clear(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy