All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.testbench.SeedEventGenerator Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.testbench;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Random;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.InputOperator;
import com.datatorrent.common.util.BaseOperator;
import com.datatorrent.lib.util.KeyValPair;

/**
 * Generates a one time seed load based on the range provided by the keys,
 * and adds new classification to incoming keys. 
 * Generated tuples are emitted on the keyvalpair_list, val_list, string_data, and val_data output ports.
 * 

* Examples of getting seed distributions include
* Clients data of a company for every clientId (key is clienId)
* Persons age, gender, for every phone number (key is phone number)
* Year, color, mileage for every car make (key is car make model)
*
* The classification to be done is based on the value of the property key. This property provides all the classification * information and their ranges
The range of values for the key is given in the format described below
*
* Benchmarks: Generate as many tuples as possible in inline mode
* HashMap: 8 million/sec with no classification; 1.8 million tuples/sec with classification
* HashMap>: 8 million/sec with no classification; 3.5 million tuples/sec with classification
*
* Default schema:
* Schema for port data: The default schema is HashMap>, where valueData is class{String, Integer}
* String schema: The string is "key;valkey1:value1;valkey2:value2;..."
* HashMap schema: Key is String, and Value is a ArrrayList
* The value in both the schemas is an integer (for choice of strings, these are enum values) *
* Port Interface
* data: Output port for emitting the new classified seed
*
* Properties: * seed_start: An integer for the seed to start from
* seed_end: An integer for the seed to end with
*
string_schema: If set to true, operates in string schema mode
*
key: Classifier keys to be inserted randomly. Format is "key1,key1start, key1end; key2, key2start, key2end;..." *
* Compile time checks are:
* seed_startHas to be an integer
* seed_endHas to be an integer
* keyIf provided has to be in format "key1,key1start,key1end;key2, key2start, key2end; ..." *
* Benchmarks: Blast as many tuples as possible in inline mode
* With key: Benchmarked at over 1 million tuples/second in local/in-line mode
* Without key: Benchmarked at over 4 million tuples/second in local/in-line mode
*

* @displayName Seed Event Generator * @category Test Bench * @tags generate * @since 0.3.2 */ public class SeedEventGenerator extends BaseOperator implements InputOperator { @SuppressWarnings("rawtypes") /** * This output port emits generated tuples as a HashMap<String, ArrayList<KeyValPair>>. * The key in the map is a seed integer in the form of a string. * The value in the map is a list of key value pairs. * Each of these key value pairs is comprised of a specified string key, * and a randomly generated integer which lies within a specified range. */ public final transient DefaultOutputPort>> keyvalpair_list = new DefaultOutputPort>>(); /** * This output port emits generator tuples as a HashMap<String, ArrayList<Integer>>>. * The key in the map is a seed integer in the form of a string. * The value in the map is a list of integers. * Each integer is randomly generated and lies within a specified range. */ public final transient DefaultOutputPort>> val_list = new DefaultOutputPort>>(); /** * This output port emits generator tuples as a HashMap<String, String>>. * The key in the map is a seed integer in the form of a string. * The value in the map is a list of integers and their corresponding keys (in string form) that are randomly generated and lies within a specified range. */ public final transient DefaultOutputPort> string_data = new DefaultOutputPort>(); /** * This output port emits generator tuples as a HashMap<String, String>>. * The key in the map is a seed integer in the form of a string. * The value in the map is a list of integers (in string form) that are randomly generated and lies within a specified range. */ public final transient DefaultOutputPort> val_data = new DefaultOutputPort>(); private static Logger LOG = LoggerFactory.getLogger(SeedEventGenerator.class); /** * Data for classification values */ ArrayList keys = null; ArrayList keys_min = null; ArrayList keys_range = null; int s_start = 0; int s_end = 99; private final Random random = new Random(); public void setSeedStart(int i) { s_start = i; } public void setSeedEnd(int i) { s_end = i; } @Override public void emitTuples() { int lstart = s_start; int lend = s_end; if (lstart < lend) { for (int i = lstart; i < lend; i++) { emitTuple(i); } } else { for (int i = lstart; i > lend; i--) { emitTuple(i); } } // done generating data LOG.info("Finished generating data."); BaseOperator.shutdown(); } /** * * Inserts a tuple for a given outbound key * * @param i */ @SuppressWarnings({ "rawtypes", "unchecked" }) public void emitTuple(int i) { HashMap stuple; HashMap> atuple; String key = Integer.toString(i); if (keys == null) { if (string_data.isConnected()) { stuple = new HashMap(1); stuple.put(key, null); string_data.emit(stuple); } if (keyvalpair_list.isConnected()) { atuple = new HashMap>(1); atuple.put(key, null); keyvalpair_list.emit(atuple); } return; } ArrayList alist = null; ArrayList vlist = null; String str = new String(); String vstr = new String(); boolean iskv = keyvalpair_list.isConnected(); boolean isvl = val_list.isConnected(); boolean issd = string_data.isConnected(); boolean isvd = val_data.isConnected(); int j = 0; for (String s: keys) { if (iskv) { if (alist == null) { alist = new ArrayList(keys.size()); } alist.add(new KeyValPair(s, new Integer(keys_min.get(j) + random.nextInt(keys_range.get(j))))); } if (isvl) { if (vlist == null) { vlist = new ArrayList(keys.size()); } vlist.add(new Integer(keys_min.get(j) + random.nextInt(keys_range.get(j)))); } if (issd) { if (!str.isEmpty()) { str += ';'; } str += s + ":" + Integer.toString(keys_min.get(j) + random.nextInt(keys_range.get(j))); } if (isvd) { if (!vstr.isEmpty()) { vstr += ';'; } vstr += Integer.toString(keys_min.get(j) + random.nextInt(keys_range.get(j))); } j++; } if (iskv) { atuple = new HashMap>(1); atuple.put(key, alist); keyvalpair_list.emit(atuple); } if (isvl) { HashMap> ituple = new HashMap>(1); ituple.put(key, vlist); val_list.emit(ituple); } if (issd) { stuple = new HashMap(1); stuple.put(key, str); string_data.emit(stuple); } if (isvd) { HashMap vtuple = new HashMap(1); vtuple.put(key, vstr); val_data.emit(vtuple); } } /** * * Add a key data. By making a single call we ensure that all three Arrays are not corrupt and that the addition is atomic/one place * * @param key * @param low * @param high */ public void addKeyData(String key, int low, int high) { if (keys == null) { keys = new ArrayList(); keys_min = new ArrayList(); keys_range = new ArrayList(); } keys.add(key); keys_min.add(low); keys_range.add(high - low + 1); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy