com.datatorrent.lib.streamquery.GroupByHavingOperator Maven / Gradle / Ivy

Go to download
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package com.datatorrent.lib.streamquery;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

import javax.validation.constraints.NotNull;

import com.datatorrent.api.DefaultInputPort;
import com.datatorrent.api.DefaultOutputPort;
import com.datatorrent.api.annotation.OperatorAnnotation;
import com.datatorrent.common.util.BaseOperator;
import com.datatorrent.lib.streamquery.condition.Condition;
import com.datatorrent.lib.streamquery.condition.HavingCondition;
import com.datatorrent.lib.streamquery.function.FunctionIndex;
import com.datatorrent.lib.streamquery.index.ColumnIndex;

/**
 * An implementation of BaseOperator that provides sql group by querying semantics on live data stream. 

 * 
 * Stream rows satisfying given select condition are processed by group by
 * column names and aggregate column function. 

 * If having condition is specified for aggregate index(s), it must also be
 * satisfied by row. HashMap of column name(s) and aggregate alias is emitted on
 * output port. 

 * 

 * StateFull : Yes, Operator aggregates input over application window. 

 * Partitions : No,  will yield wrong result(s). 

 * 

 * Ports:

 *  inport :  Input hash map(row) port, expects
 * HashMap<String,Object><

 *  outport :  Output hash map(row) port, emits
 * HashMap<String,Object>

 * 

 *  Properties :  

 *  condition :  Select condition for deleting rows. 

 *  columnGroupIndexes :  Group by names list. 

 *  indexes :  Select column indexes. 

 *  havingConditions :  Having filter conditions for aggregate(s). 

 * 

 * @displayName GroupBy Having Operator
 * @category Stream Manipulators
 * @tags sql, groupby operator, condition, index
 * @since 0.3.4
 */
@OperatorAnnotation(partitionable = false)
public class GroupByHavingOperator extends BaseOperator
{

  /**
   * aggregate indexes.
   */
  private ArrayList aggregates = new ArrayList();

  /**
   * Column, Group by names
   */
  private ArrayList columnGroupIndexes = new ArrayList();

  /**
   * where condition.
   */
  private Condition condition;

  /**
   * having aggregate condtion;
   */
  private ArrayList havingConditions = new ArrayList();

  /**
   * Table rows.
   */
  private ArrayList> rows = new ArrayList>();

  public void addAggregateIndex(@NotNull FunctionIndex index)
  {
    aggregates.add(index);
  }

  public void addColumnGroupByIndex(@NotNull ColumnIndex index)
  {
    columnGroupIndexes.add(index);
  }

  public void addHavingCondition(@NotNull HavingCondition condition)
  {
    havingConditions.add(condition);
  }

  /**
   * @param condition condition
   */
  public void setCondition(Condition condition)
  {
    this.condition = condition;
  }

  /**
   * Input port that takes a map of <string,object>.
   */
  public final transient DefaultInputPort> inport = new DefaultInputPort>()
  {

    @Override
    public void process(Map tuple)
    {
      if ((condition != null) && (!condition.isValidRow(tuple))) {
        return;
      }
      rows.add(tuple);
    }
  };

  /**
   * Output port that emits a map of <string,object>.
   */
  public final transient DefaultOutputPort> outport = new DefaultOutputPort>();

  /**
   * Create aggregate at end window.
   */
  @Override
  public void endWindow()
  {
    // group names
    if (columnGroupIndexes.size() == 0) {
      rows = new ArrayList>();
      return;
    }

    // group rows
    HashMap>> groups = new HashMap>>();
    for (Map row : rows) {
      MultiKeyCompare key = new MultiKeyCompare();
      for (ColumnIndex index : columnGroupIndexes) {
        key.addCompareKey(row.get(index.getColumn()));
      }
      ArrayList> subRows;
      if (groups.containsKey(key)) {
        subRows = groups.get(key);
      } else {
        subRows = new ArrayList>();
        groups.put(key, subRows);
      }
      subRows.add(row);
    }

    // Iterate over groups and emit aggregate values
    for (Map.Entry>> entry : groups
        .entrySet()) {
      ArrayList> subRows = entry.getValue();

      // get result
      Map result = new HashMap();
      for (ColumnIndex index : columnGroupIndexes) {
        index.filter(subRows.get(0), result);
      }

      // append aggregate values
      for (FunctionIndex aggregate : aggregates) {
        try {
          aggregate.filter(subRows, result);
        } catch (Exception e) {
          e.printStackTrace();
        }
      }

      // check valid having aggregate
      boolean isValidHaving = true;
      for (HavingCondition condition : havingConditions) {
        try {
          isValidHaving &= condition.isValidAggregate(subRows);
        } catch (Exception e) {
          e.printStackTrace();
          return;
        }
      }
      if (isValidHaving) {
        outport.emit(result);
      }
    }

    rows = new ArrayList>();
  }

  /**
   * multi key compare class.
   */
  @SuppressWarnings("rawtypes")
  private class MultiKeyCompare implements Comparable
  {

    /**
     * compare keys.
     */
    ArrayList