All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.algo.InvertIndex Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.algo;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.Operator.Unifier;
import com.datatorrent.api.annotation.OperatorAnnotation;

import com.datatorrent.lib.util.BaseKeyValueOperator;

/**
 * This operator takes a stream of key value pairs each window,
 * and outputs a set of inverted key value pairs at the end of each window.
 * 

* Inverts the index and sends out the tuple on output port "index" at the end of the window. *

*

* This is an end of window operator
*
* StateFull : Yes, tuple are compare across application window(s).
* Partitions : Yes, inverted indexes are unified by instance of same operator.
*
*
* Ports:
* data: expects <K,V>
* index: emits <V,ArrayList<K>>(1); one HashMap per V
*
*

* * @displayName Invert Key Value Pairs * @category Stream Manipulators * @tags key value * * @since 0.3.2 */ @OperatorAnnotation(partitionable = true) public class InvertIndex extends BaseKeyValueOperator implements Unifier>> { /** * Inverted key/value map. */ protected HashMap> map = new HashMap>(); /** * The input port on which key value pairs are received. */ public final transient DefaultInputPort> data = new DefaultInputPort>() { /** * Reverse indexes a HashMap> tuple */ @Override public void process(HashMap tuple) { for (Map.Entry e: tuple.entrySet()) { if (e.getValue() == null) { // error tuple? continue; } insert(e.getValue(), cloneKey(e.getKey())); } } }; /** * The output port on which inverted key value pairs are emitted. */ public final transient DefaultOutputPort>> index = new DefaultOutputPort>>() { @Override public Unifier>> getUnifier() { return new InvertIndex(); } }; /** * * Returns the ArrayList stored for a key * * @param key */ void insert(V val, K key) { ArrayList list = map.get(val); if (list == null) { list = new ArrayList(4); map.put(cloneValue(val), list); } list.add(key); } /** * Emit all the data and clear the hash * Clears internal data */ @Override public void endWindow() { for (Map.Entry> e: map.entrySet()) { HashMap> tuple = new HashMap>(1); tuple.put(e.getKey(), e.getValue()); index.emit(tuple); } map.clear(); } /** * Unifier override. */ @Override public void process(HashMap> tuple) { for (Map.Entry> e: tuple.entrySet()) { ArrayList keys; if (map.containsKey(e.getKey())) { keys = map.remove(e.getKey()); } else { keys = new ArrayList(); } keys.addAll(e.getValue()); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy