org.apache.lens.cube.parse.AbridgedTimeRangeWriter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lens-cube Show documentation
OLAP cube queries
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.lens.cube.parse;

import static java.util.stream.Collectors.toMap;

import java.util.*;

import org.apache.lens.cube.metadata.FactPartition;
import org.apache.lens.server.api.error.LensException;

import org.apache.commons.lang.StringUtils;

import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

/**
 * Collapses the time range filters using IN operators
 */
public class AbridgedTimeRangeWriter implements TimeRangeWriter {

  /**
   * Return IN clause for the partitions selected in the cube query
   *
   * @param cubeQueryContext cube query context
   * @param tableName        table name
   * @param parts            partitions
   * @return
   * @throws LensException
   */
  @Override
  public String getTimeRangeWhereClause(CubeQueryContext cubeQueryContext,
    String tableName,
    Set parts) throws LensException {
    if (parts == null || parts.isEmpty()) {
      return "";
    }
    // Collect partition specs by column in a map
    // All filters which contain only a single column will be combined in an IN operator clause
    // This clause will be ORed with filters which contain multiple columns.
    List subFilters = new ArrayList();
    for (Map.Entry, Set> entry : groupPartitions(parts).entrySet()) {
      List clauses = new ArrayList();
      String clause;
      clause = getClause(cubeQueryContext, tableName, entry.getKey());
      if (clause != null && !clause.isEmpty()) {
        clauses.add(clause);
      }
      clause = getClause(cubeQueryContext, tableName, entry.getValue());
      if (clause != null && !clause.isEmpty()) {
        clauses.add(clause);
      }
      subFilters.add("(" + StringUtils.join(clauses, " AND ") + ")");
    }
    return StringUtils.join(subFilters, " OR ");
  }

  private String getClause(CubeQueryContext cubeQueryContext,
    String tableName,
    Set parts) throws LensException {
    Map> partFilterMap = new HashMap>();
    List allTimeRangeFilters = new ArrayList();

    for (FactPartition factPartition : parts) {
      String filter = TimeRangeUtils.getTimeRangePartitionFilter(factPartition, cubeQueryContext, tableName);
      if (filter.contains("AND")) {
        allTimeRangeFilters.add("(" + filter + ")");
      } else {
        extractColumnAndCondition(filter, partFilterMap);
      }
    }

    List inClauses = new ArrayList(partFilterMap.size());
    for (String column : partFilterMap.keySet()) {
      String clause =
        "(" + StringUtils.join(partFilterMap.get(column), ",") + ")";
      inClauses.add(column + " IN " + clause);
    }

    allTimeRangeFilters.add(StringUtils.join(inClauses, " AND "));
    return StringUtils.join(allTimeRangeFilters, " OR ");
  }

  /**
   * parts is a collection of FactPartition objects. And FactPartition can be viewed as two boolean conditions, one
   * specified by it's containingPart object, and another specified by itself in the form (partCol = partSpec)
   * 
   * Collection of FactPartition objects can be viewed as an OR clause on all the FactPartition objects -- which by
   * itself is a binary AND clause.
   * 

   * So Collection<FactPartition> is nothing but (a AND b) OR (c AND d) OR (e AND f) ...
   * 

   * This function tries to reduce such a big clause by using Boolean arithmetic. The big thing it aims to reduce is the
   * following class of clauses:
   * 

   * (a AND c) OR (a AND d) OR (b AND c) OR (b AND d) => ((a OR b) AND (c OR d))
   * 
   * Equivalent return value for such a reduction would be an entry in the returned map from set(a,b) to set(c,d).
   * Assuming the argument was set(a(containing=c), a(containing=d), b(containing=c), b(containing=d))
   *
   * @param parts
   * @return
   */
  private Map, Set> groupPartitions(Collection parts) {
    Map> partitionSetMap = new HashMap>();
    for (FactPartition part : parts) {
      partitionSetMap.computeIfAbsent(part.getContainingPart(), k -> Sets.newTreeSet()).add(part.withoutContaining());
    }
    Map, Set> setSetOppositeMap = Maps.newHashMap();
    for (Map.Entry> entry : partitionSetMap.entrySet()) {
      setSetOppositeMap.computeIfAbsent(entry.getValue(), k -> Sets.newTreeSet());
      if (entry.getKey() != null) {
        setSetOppositeMap.get(entry.getValue()).add(entry.getKey());
      }
    }
    // inverse again
    return setSetOppositeMap.entrySet().stream().collect(toMap(Map.Entry::getValue, Map.Entry::getKey));
  }

  // This takes the output of filter generated by TimeRangeUtils.getTimeRangePartitionFilter
  // splits the filters by column names and filters are collected by column name in the
  // map passed as argument
  private void extractColumnAndCondition(String token, Map> partFilterMap) {
    token = token.trim();

    String[] subTokens = StringUtils.split(token, '=');

    String column = subTokens[0].trim();
    String filterValue = subTokens[1].trim();

    partFilterMap.computeIfAbsent(column, k -> new ArrayList<>()).add(filterValue);
  }
}