All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.algo.MostFrequentValue Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.algo;

import java.util.ArrayList;
import java.util.HashMap;

import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OperatorAnnotation;

import com.datatorrent.lib.util.AbstractBaseFrequentKey;
import com.datatorrent.lib.util.UnifierArrayHashMapFrequent;
import com.datatorrent.lib.util.UnifierHashMapFrequent;

/**
 * This operator filters the incoming stream of values by emitting the value or values (if there is a tie)
 * that occurred the largest number of times within each window to the output port "list". 
 * One of the values is emitted to the output port "least" at the end of each window.
 * 

* Occurrences of each tuple is counted and at the end of window any of the most frequent tuple is emitted on output port least and all least frequent * tuples on output port list *

*

* This module is an end of window module
* In case of a tie any of the least key would be emitted. The list port would however have all the tied keys *
* StateFull : Yes, Values are compared all over application window can be > 1.
* Partitions : Yes, Result is unified on output port.
*
* Ports:
* data: expects K
* most: emits HashMap<K,Integer>(1), Where K is the least occurring key in the window. In case of tie any of the least key would be emitted
* list: emits ArrayList<HashMap<K,Integer>(1)>, Where the list includes all the keys that are least frequent
*
* Properties: None
*
* Compile time checks: None
* Specific run time checks: None
*
*

* @displayName Emit Most Frequent Value * @category Rules and Alerts * @tags filter, count * * @since 0.3.2 */ @OperatorAnnotation(partitionable = true) public class MostFrequentValue extends AbstractBaseFrequentKey { /** * The input port which receives incoming tuples. */ public final transient DefaultInputPort data = new DefaultInputPort() { /** * Calls super.processTuple(tuple) */ @Override public void process(K tuple) { processTuple(tuple); } }; /** * The output port on which all the tuples, * which occurred the most number of times, * is emitted. */ public final transient DefaultOutputPort> most = new DefaultOutputPort>() { @SuppressWarnings({ "rawtypes", "unchecked" }) @Override public Unifier> getUnifier() { UnifierHashMapFrequent ret = new UnifierHashMapFrequent(); ret.setLeast(false); return ret; } }; public final transient DefaultOutputPort>> list = new DefaultOutputPort>>() { @SuppressWarnings({"rawtypes", "ConstantConditions"}) @Override public Unifier>> getUnifier() { Unifier>> ret = new UnifierArrayHashMapFrequent(); ((UnifierHashMapFrequent)ret).setLeast(false); return ret; } }; /** * Emits tuple on port "most" * @param tuple */ @Override public void emitTuple(HashMap tuple) { most.emit(tuple); } /** * Emits tuple on port "list" * @param tlist */ @Override public void emitList(ArrayList> tlist) { list.emit(tlist); } /** * returns val1 < val2 * @param val1 * @param val2 * @return val1 > val2 */ @Override public boolean compareCount(int val1, int val2) { return val1 > val2; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy