All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.algo.DistinctMap Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.algo;

import java.util.HashMap;
import java.util.Map;

import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OperatorAnnotation;

import com.datatorrent.lib.util.BaseKeyValueOperator;
import com.datatorrent.lib.util.UnifierHashMap;

/**
 * This operator computes and emits distinct key,val pairs (i.e drops duplicates).
 * 

* Computes and emits distinct key,val pairs (i.e drops duplicates) *

*

* This is a pass through operator
*
* This module is same as a "FirstOf" metric on any key,val pair. At end of window all data is flushed.
*
* StateFull : Yes, tuple are compare across application window(s).
* Partitions : Yes, distinct output is unified by unifier hash map operator.
*
* Ports:
* data: Input data port expects Map<K,V>
* distinct: Output data port, emits HashMap<K,V>(1)
*
*

* * @displayName Distinct Key Value Merge * @category Stream Manipulators * @tags filter, unique, key value * * @since 0.3.2 */ @OperatorAnnotation(partitionable = true) public class DistinctMap extends BaseKeyValueOperator { /** * The input port on which key value pairs are received. */ public final transient DefaultInputPort> data = new DefaultInputPort>() { /** * Process HashMap tuple on input port data, and emits if match not found. Updates the cache * with new key,val pair */ @Override public void process(Map tuple) { for (Map.Entry e: tuple.entrySet()) { HashMap vals = mapkeyval.get(e.getKey()); if ((vals == null) || !vals.containsKey(e.getValue())) { HashMap otuple = new HashMap(1); otuple.put(cloneKey(e.getKey()), cloneValue(e.getValue())); distinct.emit(otuple); if (vals == null) { vals = new HashMap(); mapkeyval.put(cloneKey(e.getKey()), vals); } vals.put(cloneValue(e.getValue()), null); } } } }; /** * The output port on which distinct key value pairs are emitted. */ public final transient DefaultOutputPort> distinct = new DefaultOutputPort>() { @Override public Unifier> getUnifier() { return new UnifierHashMap(); } }; protected HashMap> mapkeyval = new HashMap>(); /** * Clears the cache/hash */ @Override public void endWindow() { mapkeyval.clear(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy