All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datatorrent.lib.streamquery.GroupByHavingOperator Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.streamquery;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import javax.validation.constraints.NotNull;

import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OperatorAnnotation;
import com.datatorrent.common.util.BaseOperator;
import com.datatorrent.lib.streamquery.condition.Condition;
import com.datatorrent.lib.streamquery.condition.HavingCondition;
import com.datatorrent.lib.streamquery.function.FunctionIndex;
import com.datatorrent.lib.streamquery.index.ColumnIndex;

/**
 * An implementation of BaseOperator that provides sql group by querying semantics on live data stream. 
*

* Stream rows satisfying given select condition are processed by group by * column names and aggregate column function.
* If having condition is specified for aggregate index(s), it must also be * satisfied by row. HashMap of column name(s) and aggregate alias is emitted on * output port.
*
* StateFull : Yes, Operator aggregates input over application window.
* Partitions : No, will yield wrong result(s).
*
* Ports:
* inport : Input hash map(row) port, expects * HashMap<String,Object><
* outport : Output hash map(row) port, emits * HashMap<String,Object>
*
* Properties :
* condition : Select condition for deleting rows.
* columnGroupIndexes : Group by names list.
* indexes : Select column indexes.
* havingConditions : Having filter conditions for aggregate(s).
*
* @displayName GroupBy Having Operator * @category Stream Manipulators * @tags sql, groupby operator, condition, index * @since 0.3.4 */ @OperatorAnnotation(partitionable = false) public class GroupByHavingOperator extends BaseOperator { /** * aggregate indexes. */ private ArrayList aggregates = new ArrayList(); /** * Column, Group by names */ private ArrayList columnGroupIndexes = new ArrayList(); /** * where condition. */ private Condition condition; /** * having aggregate condtion; */ private ArrayList havingConditions = new ArrayList(); /** * Table rows. */ private ArrayList> rows = new ArrayList>(); public void addAggregateIndex(@NotNull FunctionIndex index) { aggregates.add(index); } public void addColumnGroupByIndex(@NotNull ColumnIndex index) { columnGroupIndexes.add(index); } public void addHavingCondition(@NotNull HavingCondition condition) { havingConditions.add(condition); } /** * @param condition condition */ public void setCondition(Condition condition) { this.condition = condition; } /** * Input port that takes a map of <string,object>. */ public final transient DefaultInputPort> inport = new DefaultInputPort>() { @Override public void process(Map tuple) { if ((condition != null) && (!condition.isValidRow(tuple))) { return; } rows.add(tuple); } }; /** * Output port that emits a map of <string,object>. */ public final transient DefaultOutputPort> outport = new DefaultOutputPort>(); /** * Create aggregate at end window. */ @Override public void endWindow() { // group names if (columnGroupIndexes.size() == 0) { rows = new ArrayList>(); return; } // group rows HashMap>> groups = new HashMap>>(); for (Map row : rows) { MultiKeyCompare key = new MultiKeyCompare(); for (ColumnIndex index : columnGroupIndexes) { key.addCompareKey(row.get(index.getColumn())); } ArrayList> subRows; if (groups.containsKey(key)) { subRows = groups.get(key); } else { subRows = new ArrayList>(); groups.put(key, subRows); } subRows.add(row); } // Iterate over groups and emit aggregate values for (Map.Entry>> entry : groups .entrySet()) { ArrayList> subRows = entry.getValue(); // get result Map result = new HashMap(); for (ColumnIndex index : columnGroupIndexes) { index.filter(subRows.get(0), result); } // append aggregate values for (FunctionIndex aggregate : aggregates) { try { aggregate.filter(subRows, result); } catch (Exception e) { e.printStackTrace(); } } // check valid having aggregate boolean isValidHaving = true; for (HavingCondition condition : havingConditions) { try { isValidHaving &= condition.isValidAggregate(subRows); } catch (Exception e) { e.printStackTrace(); return; } } if (isValidHaving) { outport.emit(result); } } rows = new ArrayList>(); } /** * multi key compare class. */ @SuppressWarnings("rawtypes") private class MultiKeyCompare implements Comparable { /** * compare keys. */ ArrayList compareKeys = new ArrayList(); @Override public boolean equals(Object other) { if (other instanceof MultiKeyCompare) { if (compareKeys.size() != ((MultiKeyCompare)other).compareKeys.size()) { return false; } } for (int i = 0; i < compareKeys.size(); i++) { if (!(compareKeys.get(i).equals(((MultiKeyCompare)other).compareKeys.get(i)))) { return false; } } return true; } @Override public int hashCode() { int hashCode = 0; for (int i = 0; i < compareKeys.size(); i++) { hashCode += compareKeys.get(i).hashCode(); } return hashCode; } @Override public int compareTo(Object other) { if (this.equals(other)) { return 0; } return -1; } /** * Add compare key. */ public void addCompareKey(Object value) { compareKeys.add(value); } } }