com.datatorrent.lib.join.SemiJoinOperator Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.datatorrent.lib.join;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.classification.InterfaceStability.Evolving;
import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.Operator;
import com.datatorrent.api.annotation.OperatorAnnotation;
import com.datatorrent.lib.streamquery.condition.Condition;
import com.datatorrent.lib.streamquery.index.Index;
/**
* An implementation of Operator that reads table row data from two table data input ports.
*
* Operator semi-joins row on given condition and selected names, emits
* semi-joined result at output port.
*
* Note: A semi-join is not a left-join or left-outer-join. In semi-join only the joined rows
* from the left table are returned. However, in a left-outer-join, all the rows from left table
* will be returned (also padding with nulls for columns from the right table when not joined).
*
* For more information see {@link http://docs.oracle.com/cd/B28359_01/server.111/b28286/statements_10002.htm#i2166436}
*
*
* StateFull : Yes, Operator aggregates input over application window.
* Partitions : No, will yield wrong result(s).
*
* Ports :
* inport1 : Input port for table 1, expects HashMap<String, Object>
* inport2 : Input port for table 2, expects HashMap<String, Object>
* outport : Output semi-joined row port, emits HashMap<String, ArrayList<Object>>
*
* Properties :
* joinCondition : Join condition for table rows.
* table1Columns : Columns to be selected from table1.
* table2Columns : Columns to be selected from table2.
*
*
* @displayName Semi join
* @category Join Manipulators
* @tags sql, semi join operator
* @since 0.3.3
*/
@OperatorAnnotation(partitionable = false)
@Evolving
public class SemiJoinOperator implements Operator
{
/**
* Join Condition.
*/
private Condition joinCondition;
/**
* Table1 select columns.
* Note: only left table (Table1) will be output in an semi-join
*/
private ArrayList table1Columns = new ArrayList<>();
/**
* Collect data rows from input port 1.
*/
private List