All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.algo.MostFrequentKeyValueMap Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.algo;

import java.util.HashMap;

import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OperatorAnnotation;

import com.datatorrent.lib.util.AbstractBaseFrequentKeyValueMap;

/**
 * This operator filters the incoming stream of key value pairs by finding the value or values (if there is a tie),
 * for each key, that occur the largest number of times within each window. 
 * Each key and its corresponding most values are emitted at the end of each window.
 * 

* Occurrences of all values for each key is counted and at the end of window the most frequent values are emitted on output port least per key *

*

* This module is an end of window module
*
* Ports:
* data: expects HashMap<K,V>
* most: emits HashMap<String, HashMap<String, Integer>>(1)
*
*
* Properties: None
*
* Compile time checks: None
* Specific run time checks: None
*
* Benchmarks: Blast as many tuples as possible in inline mode
*

* * *
In-BoundOut-boundComments
> 30 Million K,V pairs/sEmits only 1 tuple per window per keyIn-bound throughput is the main determinant of performance. * The benchmark was done with immutable objects. If K or V are mutable the benchmark may be lower

*

*

* Function Table (K=String,V=Integer);: *

* * * * * * * * * * * *
Tuple Type (api)In-bound (process)Out-bound (emit)
data(HashMap<K,V>)most(HashMap<K,HashMap<Integer>>)
Begin Window (beginWindow())N/AN/A
Data (process()){a=1,b=5,c=110}
Data (process()){a=55,c=2000,b=45}
Data (process()){d=2}
Data (process()){a=55,b=5,c=22}
Data (process()){h=20,a=2,z=5}
Data (process()){a=4,c=110}
Data (process()){a=4,z=5}
End Window (endWindow())N/A{a={4=2,55=2},b={5=2},c={110=2},d={2=1},h={20=1},z={5=2}
*
*
*

* * @displayName Emit Most Frequent Keyval Pair * @category Rules and Alerts * @tags filter, key value, count * * @since 0.3.2 */ @OperatorAnnotation(partitionable = false) public class MostFrequentKeyValueMap extends AbstractBaseFrequentKeyValueMap { /** * The output port which emits a map from keys to their most values. */ public final transient DefaultOutputPort>> most = new DefaultOutputPort>>(); /** * returns val1 < val2 * @param val1 * @param val2 * @return val1 > val2 */ @Override public boolean compareValue(int val1, int val2) { return (val1 > val2); } /** * Emits tuple on port "most" * @param tuple is emitted on port "most" */ @Override public void emitTuple(HashMap> tuple) { most.emit(tuple); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy