All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.algo.UniqueValueCount Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.algo;

import java.util.Map;
import java.util.Set;

import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

import com.datatorrent.api.Context;
import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OperatorAnnotation;
import com.datatorrent.api.annotation.Stateless;
import com.datatorrent.common.util.BaseOperator;
import com.datatorrent.lib.util.KeyValPair;

/**
 * This operator counts the number of unique values corresponding to a key within a window. 
 * At the end of each window each key, its count of unique values, and its set of unique values is emitted.
 * 

* Counts no. of unique values of a key within a window.
* Emits {@link InternalCountOutput} which contains the key, count of its unique values.
* When the operator is partitioned, the unifier uses the internal set of values to * compute the count of unique values again.
*
* Partitions: yes, uses {@link UniqueCountUnifier} to merge partitioned output.
* Stateful: no
*

*

* * @param Type of Key objects * @displayName Unique Values Per Key * @category Stream Manipulators * @tags count, key value * @since 0.3.5 */ @Stateless @OperatorAnnotation(partitionable = true) public class UniqueValueCount extends BaseOperator { private final Map> interimUniqueValues; /** * The input port that receives key value pairs. */ public transient DefaultInputPort> input = new DefaultInputPort>() { @Override public void process(KeyValPair pair) { Set values = interimUniqueValues.get(pair.getKey()); if (values == null) { values = Sets.newHashSet(); interimUniqueValues.put(pair.getKey(), values); } values.add(pair.getValue()); } }; /** * The output port which emits key/unique value count pairs. */ public transient DefaultOutputPort> output = new DefaultOutputPort>() { @Override @SuppressWarnings({"rawtypes", "unchecked"}) public Unifier> getUnifier() { return (Unifier)new UniqueCountUnifier(); } }; /** * The output port which emits key and set containing unique values */ public final transient DefaultOutputPort>> outputValues = new DefaultOutputPort>>() { @SuppressWarnings({"unchecked", "rawtypes"}) @Override public Unifier>> getUnifier() { return (Unifier)new UniqueCountSetUnifier(); } }; public UniqueValueCount() { this.interimUniqueValues = Maps.newHashMap(); } @Override public void endWindow() { for (K key : interimUniqueValues.keySet()) { Set values = interimUniqueValues.get(key); if (output.isConnected()) { output.emit(new InternalCountOutput<>(key, values.size(), values)); } if (outputValues.isConnected()) { outputValues.emit(new KeyValPair<>(key, values)); } } interimUniqueValues.clear(); } /** * State which contains a key, a set of values of that key, and a count of unique values of that key.

* * @param Type of key objects */ public static class InternalCountOutput extends KeyValPair { private final Set interimUniqueValues; @SuppressWarnings("unused") protected InternalCountOutput() { this(null, null, null); } public InternalCountOutput(K k, Integer count, Set interimUniqueValues) { super(k, count); this.interimUniqueValues = interimUniqueValues; } public Set getInternalSet() { return interimUniqueValues; } } /** * Unifier for {@link UniqueValueCount} operator.
* It uses the internal set of values emitted by the operator and * emits {@link KeyValPair} of the key and a set of unique values matching key.

* * @param Type of Key objects */ static class UniqueCountSetUnifier implements Unifier>> { public final transient DefaultOutputPort>> output = new DefaultOutputPort<>(); private final Map> finalUniqueValues; public UniqueCountSetUnifier() { this.finalUniqueValues = Maps.newHashMap(); } @Override public void process(KeyValPair> tuple) { Set values = finalUniqueValues.get(tuple.getKey()); if (values == null) { values = Sets.newHashSet(); finalUniqueValues.put(tuple.getKey(), values); } values.addAll(tuple.getValue()); } @Override public void beginWindow(long l) { } @Override public void endWindow() { for (Map.Entry> entry : finalUniqueValues.entrySet()) { output.emit(new KeyValPair<>(entry.getKey(), entry.getValue())); } finalUniqueValues.clear(); } @Override public void setup(Context.OperatorContext operatorContext) { } @Override public void teardown() { } } /** * Unifier for {@link UniqueValueCount} operator.
* It uses the internal set of values emitted by the operator and * emits {@link KeyValPair} of the key and its unique count.

* * @param Type of Key objects */ static class UniqueCountUnifier implements Unifier> { public final transient DefaultOutputPort> output = new DefaultOutputPort<>(); private final Map> finalUniqueValues; public UniqueCountUnifier() { this.finalUniqueValues = Maps.newHashMap(); } @Override public void process(InternalCountOutput tuple) { Set values = finalUniqueValues.get(tuple.getKey()); if (values == null) { values = Sets.newHashSet(); finalUniqueValues.put(tuple.getKey(), values); } values.addAll(tuple.interimUniqueValues); } @Override public void beginWindow(long l) { } @Override public void endWindow() { for (K key : finalUniqueValues.keySet()) { output.emit(new InternalCountOutput<>(key, finalUniqueValues.get(key).size(), finalUniqueValues.get(key))); } finalUniqueValues.clear(); } @Override public void setup(Context.OperatorContext operatorContext) { } @Override public void teardown() { } } }