All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.algo.UniqueCounter Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.algo;

import java.util.HashMap;
import java.util.Map;

import org.apache.commons.lang.mutable.MutableInt;

import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OperatorAnnotation;
import com.datatorrent.lib.util.BaseUniqueKeyCounter;
import com.datatorrent.lib.util.UnifierHashMapSumKeys;

/**
 * This operator counts the number of times a tuple exists in a window. A map from tuples to counts is emitted at the end of each window.
 * 

* Counts the number of times a key exists in a window; Count is emitted at end of window in a single HashMap. *

*

* This is an end of window operator
*
* StateFull : yes, Tuples are aggregated over application window(s).
* Partitions : Yes, Unique count is unified at output port.
*
* Ports:
* data: expects K
* count: emits HashMap<K,Integer>
* Properties: None
*
*

* * @displayName Count Unique Tuples * @category Stats and Aggregations * @tags count * * @since 0.3.2 */ @OperatorAnnotation(partitionable = true) public class UniqueCounter extends BaseUniqueKeyCounter { private boolean cumulative; /** * The input port which receives incoming tuples. */ public final transient DefaultInputPort data = new DefaultInputPort() { /** * Reference counts tuples */ @Override public void process(K tuple) { processTuple(tuple); } }; /** * The output port which emits a map from keys to the number of times they occurred within an application window. */ public final transient DefaultOutputPort> count = new DefaultOutputPort>() { @Override public Unifier> getUnifier() { UnifierHashMapSumKeys unifierHashMapSumKeys = new UnifierHashMapSumKeys<>(); unifierHashMapSumKeys.setType(Integer.class); return unifierHashMapSumKeys; } }; /** * Emits one HashMap as tuple */ @Override public void endWindow() { HashMap tuple = null; for (Map.Entry e: map.entrySet()) { if (tuple == null) { tuple = new HashMap(); } tuple.put(e.getKey(), e.getValue().toInteger()); } if (tuple != null) { count.emit(tuple); } if (!cumulative) { map.clear(); } } /** * Gets the cumulative mode. * @return The cumulative mode. */ public boolean isCumulative() { return cumulative; } /** * If enabled then the unique keys is counted and maintained in memory for the life of the operator. If not enabled * keys are counted a per window bases.
* Note: If cumulative mode is enabled and the operator receives many unique keys, then this operator * could eventually run out of memory. * @param cumulative */ public void setCumulative(boolean cumulative) { this.cumulative = cumulative; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy