com.datatorrent.lib.testbench.EventClassifier Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.datatorrent.lib.testbench;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.common.util.BaseOperator;
/**
* An implementation of BaseOperator that creates a load with pair of keys by taking in an input stream event and adding to incoming keys
* to create a new tuple that is emitted on output port data.
*
* Takes a input stream event and adds to incoming keys to create a new tuple that is emitted on output port data.
*
* Examples of pairs include
* publisher,advertizer
* automobile,model
*
* The keys to be inserted are given by the property keys. Users can choose to insert their
* own values via property values. Insertion can be done as replacement, addition, multiply,
* or append (append is not yet supported)
. For each incoming key users can provide an insertion
* probability for the insert keys. This allows for randomization of the insert key choice
*
* Tuple Schema: Each tuple is HashMap on both the ports. Currently other schemas are not supported
* Port Interface
* data: emits HashMap
* event: expects HashMap
*
* Properties:
* None
*
* Compile time checks are:
* keys cannot be empty
* values if specified has to be comma separated doubles and their number must match the number of keys
* weights if specified the format has to be "key1:val1,val2,...,valn;key2:val1,val2,...,valn;...", where n has to be
* number of keys in parameter keys. If not specified all weights are equal
*
*
* Benchmarks: This node has been benchmarked at over 5 million tuples/second in local/inline mode
*
* @displayName Event Classifier
* @category Test Bench
* @tags hashmap,classification
* @since 0.3.2
*/
public class EventClassifier extends BaseOperator
{
public final transient DefaultInputPort> event = new DefaultInputPort>()
{
@Override
public void process(HashMap tuple)
{
for (Map.Entry e : tuple.entrySet()) {
String inkey = e.getKey();
ArrayList alist = null;
if (inkeys != null) {
alist = inkeys.get(e.getKey());
}
if (alist == null) {
alist = noweight;
}
// now alist are the weights
int rval = random.nextInt(alist.get(alist.size() - 1));
int j = 0;
int wval = 0;
for (Integer ew : alist) {
wval += ew.intValue();
if (wval >= rval) {
break;
}
j++;
}
HashMap otuple = new HashMap(1);
String key = wtostr_index.get(j); // the key
Double keyval = null;
if (hasvalues) {
if (voper == value_operation.VOPR_REPLACE) { // replace the incoming value
keyval = keys.get(key);
} else if (voper == value_operation.VOPR_ADD) {
keyval = keys.get(key) + e.getValue();
} else if (voper == value_operation.VOPR_MULT) {
keyval = keys.get(key) * e.getValue();
} else if (voper == value_operation.VOPR_APPEND) { // not supported yet
keyval = keys.get(key);
}
} else { // pass on the value from incoming tuple
keyval = e.getValue();
}
otuple.put(key + "," + inkey, keyval);
data.emit(otuple);
}
}
};
/**
* Output data port that emits a hashmap of <string,double>.
*/
public final transient DefaultOutputPort> data = new DefaultOutputPort>();
HashMap keys = new HashMap();
HashMap wtostr_index = new HashMap();
// One of inkeys (Key to weight hash) or noweight (even weight) would be not null
HashMap> inkeys = null;
ArrayList noweight = null;
boolean hasvalues = false;
int total_weight = 0;
private Random random = new Random();
enum value_operation
{
VOPR_REPLACE, VOPR_ADD, VOPR_MULT, VOPR_APPEND
}
value_operation voper = value_operation.VOPR_REPLACE;
public void setOperationReplace()
{
voper = value_operation.VOPR_REPLACE;
}
public void setOperationAdd()
{
voper = value_operation.VOPR_ADD;
}
public void setOperationMult()
{
voper = value_operation.VOPR_MULT;
}
public void setOperationAppend()
{
voper = value_operation.VOPR_MULT;
}
public void setKeyWeights(HashMap> map)
{
if (inkeys == null) {
inkeys = new HashMap>();
}
for (Map.Entry> e: map.entrySet()) {
inkeys.put(e.getKey(), e.getValue());
}
for (Map.Entry> e: inkeys.entrySet()) {
ArrayList list = e.getValue();
int total = 0;
for (Integer i: list) {
total += i.intValue();
}
list.add(total);
}
}
@Override
public void setup(OperatorContext context)
{
noweight = new ArrayList();
for (int i = 0; i < keys.size(); i++) {
noweight.add(100); // Even distribution
total_weight += 100;
}
noweight.add(total_weight);
}
public void setKeyMap(HashMap map)
{
int i = 0;
// First load up the keys and the index hash (wtostr_index) for randomization to work
boolean foundvalue = false;
for (Map.Entry e: map.entrySet()) {
keys.put(e.getKey(), e.getValue());
foundvalue = foundvalue || (e.getValue() != null);
wtostr_index.put(i, e.getKey());
i += 1;
}
hasvalues = foundvalue;
}
}