com.datatorrent.lib.algo.MostFrequentKeyValueMap Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.datatorrent.lib.algo;
import java.util.HashMap;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OperatorAnnotation;
import com.datatorrent.lib.util.AbstractBaseFrequentKeyValueMap;
/**
* This operator filters the incoming stream of key value pairs by finding the value or values (if there is a tie),
* for each key, that occur the largest number of times within each window.
* Each key and its corresponding most values are emitted at the end of each window.
*
* Occurrences of all values for each key is counted and at the end of window the most frequent values are emitted on output port least per key
*
*
* This module is an end of window module
*
* Ports:
* data: expects HashMap<K,V>
* most: emits HashMap<String, HashMap<String, Integer>>(1)
*
*
* Properties: None
*
* Compile time checks: None
* Specific run time checks: None
*
* Benchmarks: Blast as many tuples as possible in inline mode
*
* In-Bound Out-bound Comments
* > 30 Million K,V pairs/s Emits only 1 tuple per window per key In-bound throughput is the main determinant of performance.
* The benchmark was done with immutable objects. If K or V are mutable the benchmark may be lower
*
*
*
* Function Table (K=String,V=Integer);:
*
* Tuple Type (api) In-bound (process) Out-bound (emit)
* data(HashMap<K,V>) most(HashMap<K,HashMap<Integer>>)
* Begin Window (beginWindow()) N/A N/A
* Data (process()) {a=1,b=5,c=110}
* Data (process()) {a=55,c=2000,b=45}
* Data (process()) {d=2}
* Data (process()) {a=55,b=5,c=22}
* Data (process()) {h=20,a=2,z=5}
* Data (process()) {a=4,c=110}
* Data (process()) {a=4,z=5}
* End Window (endWindow()) N/A {a={4=2,55=2},b={5=2},c={110=2},d={2=1},h={20=1},z={5=2}
*
*
*
*
*
* @displayName Emit Most Frequent Keyval Pair
* @category Rules and Alerts
* @tags filter, key value, count
*
* @since 0.3.2
*/
@OperatorAnnotation(partitionable = false)
public class MostFrequentKeyValueMap extends AbstractBaseFrequentKeyValueMap
{
/**
* The output port which emits a map from keys to their most values.
*/
public final transient DefaultOutputPort>> most = new DefaultOutputPort>>();
/**
* returns val1 < val2
* @param val1
* @param val2
* @return val1 > val2
*/
@Override
public boolean compareValue(int val1, int val2)
{
return (val1 > val2);
}
/**
* Emits tuple on port "most"
* @param tuple is emitted on port "most"
*/
@Override
public void emitTuple(HashMap> tuple)
{
most.emit(tuple);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy