All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.logs.LineToTokenHashMap Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.logs;

import java.util.ArrayList;
import java.util.HashMap;

import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OperatorAnnotation;
import com.datatorrent.api.annotation.Stateless;

import com.datatorrent.lib.util.BaseLineTokenizer;
import com.datatorrent.lib.util.UnifierHashMap;

/**
 * This operator splits string objects into tokens. 
 * A key value pair is emitted where the key is the first token in an input tuple
 * and the value is a list of the other tokens in an input tuple., and emits as a HashMap where the first token.
 * 

* This module is a pass through
*
* StateFull : No, tokens are processed in current window.
* Partitions : Yes, output port unifier operator.
*
* Ports:
* data: Input port, expects String
* tokens: Output port, emits HashMap<String, ArrayList<String>>
*
* Properties:
* splitby: The characters used to split the line. Default is ";\t "
* splittokenby: The characters used to split a token into key,val1,val2,.... Default is "", i.e. tokens are not split, and key=token, val=""
*

* * @displayName Line To Token (HashMap) * @category Tuple Converters * @tags string, hashmap * * @since 0.3.2 */ @Stateless @OperatorAnnotation(partitionable = true) public class LineToTokenHashMap extends BaseLineTokenizer { /** * This output port emits the split strings. */ public final transient DefaultOutputPort>> tokens = new DefaultOutputPort>>() { @Override public Unifier>> getUnifier() { return new UnifierHashMap>(); } }; protected transient HashMap> otuple = null; protected transient ArrayList vals = null; protected transient String tok = ""; /** * sets up output tuple */ @Override public void beginProcessTokens() { otuple = new HashMap>(); } /** * clears data for subtokens */ @Override public void beginProcessSubTokens() { vals = null; tok = ""; } /** * first token is key, the rest are added to ArrayList * @param subtok */ @Override public void processSubToken(String subtok) { if (vals == null) { tok = subtok; vals = new ArrayList(); } else { vals.add(subtok); } } /** * Adds key,Arraylist pair to output tuple. */ @Override public void endProcessSubTokens() { addSubToken(tok, vals); tok = ""; vals = null; } /** * If you have multiple subtokens with same value, override and aggregate the values and then put * in the map * @param stok subtoken * @param svals subtoken val list */ public void addSubToken(String stok, ArrayList svals) { otuple.put(stok, svals); } /** * emits output tuple */ @Override public void endProcessTokens() { if (tokens.isConnected()) { tokens.emit(otuple); otuple = null; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy