All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.logs.LineTokenizerKeyVal Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.logs;

import java.util.HashMap;

import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OperatorAnnotation;
import com.datatorrent.api.annotation.Stateless;
import com.datatorrent.lib.util.BaseLineTokenizer;
import com.datatorrent.lib.util.UnifierHashMap;

/**
 * This operator splits lines into tokens, and tokens into sub-tokens. 
 * Emitted tuples are key value pairs where tokens are the keys and sub tokens are the values.
 * 

* Useful to convert String (log lines) into a POJO (HashMap) *

*

* This module is a pass through
*
* StateFull : No, tokens are processed in current window.
* Partitions : Yes, output unifier.
*
*
* Ideal for applications like log processing
* Ports:
* data: expects String
* tokens: emits HashMap<String,String>
*
* Properties:
* splitby: The characters used to split the line. Default is ";\t "
* splittokenby: The characters used to split a token into key,val pair. Default is "", i.e. tokens are not * split, and key is set to token, and val is null
*
*

* @displayName Line Tokenizer Key Value * @category Tuple Converters * @tags string, key value * * @since 0.3.2 */ @Stateless @OperatorAnnotation(partitionable = true) public class LineTokenizerKeyVal extends BaseLineTokenizer { /** * This output port emits key value pairs where the key is a token in an input string, * and the value is a sub token of the key token. */ public final transient DefaultOutputPort> tokens = new DefaultOutputPort>() { @Override public Unifier> getUnifier() { return new UnifierHashMap(); } }; private transient HashMap map = null; private transient String skey = ""; private transient String sval = ""; /** * sets up the cache */ @Override public void beginProcessTokens() { map = new HashMap(); } /** * emits tokens on port "tokens", and clears the cache */ @Override public void endProcessTokens() { if (map != null) { tokens.emit(map); map = null; } } /** * clears subtoken key,val pair */ @Override public void beginProcessSubTokens() { skey = ""; sval = ""; } /** * inserts subtoken key,val pair in subtoken hash. If there are multiple keys with the same value * override this call and append values */ @Override public void endProcessSubTokens() { if (!skey.isEmpty()) { map.put(skey, sval); skey = ""; sval = ""; } } /** * first subtoken is the key, the next is the val. * No error is flagged for third token as yet. * @param subtok */ @Override public void processSubToken(String subtok) { if (skey.isEmpty()) { skey = subtok; } else if (sval.isEmpty()) { sval = subtok; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy