All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lens.cube.parse.AbridgedTimeRangeWriter Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.lens.cube.parse;

import static java.util.stream.Collectors.toMap;

import java.util.*;

import org.apache.lens.cube.metadata.FactPartition;
import org.apache.lens.server.api.error.LensException;

import org.apache.commons.lang.StringUtils;

import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

/**
 * Collapses the time range filters using IN operators
 */
public class AbridgedTimeRangeWriter implements TimeRangeWriter {

  /**
   * Return IN clause for the partitions selected in the cube query
   *
   * @param cubeQueryContext cube query context
   * @param tableName        table name
   * @param parts            partitions
   * @return
   * @throws LensException
   */
  @Override
  public String getTimeRangeWhereClause(CubeQueryContext cubeQueryContext,
    String tableName,
    Set parts) throws LensException {
    if (parts == null || parts.isEmpty()) {
      return "";
    }
    // Collect partition specs by column in a map
    // All filters which contain only a single column will be combined in an IN operator clause
    // This clause will be ORed with filters which contain multiple columns.
    List subFilters = new ArrayList();
    for (Map.Entry, Set> entry : groupPartitions(parts).entrySet()) {
      List clauses = new ArrayList();
      String clause;
      clause = getClause(cubeQueryContext, tableName, entry.getKey());
      if (clause != null && !clause.isEmpty()) {
        clauses.add(clause);
      }
      clause = getClause(cubeQueryContext, tableName, entry.getValue());
      if (clause != null && !clause.isEmpty()) {
        clauses.add(clause);
      }
      subFilters.add("(" + StringUtils.join(clauses, " AND ") + ")");
    }
    return StringUtils.join(subFilters, " OR ");
  }

  private String getClause(CubeQueryContext cubeQueryContext,
    String tableName,
    Set parts) throws LensException {
    Map> partFilterMap = new HashMap>();
    List allTimeRangeFilters = new ArrayList();

    for (FactPartition factPartition : parts) {
      String filter = TimeRangeUtils.getTimeRangePartitionFilter(factPartition, cubeQueryContext, tableName);
      if (filter.contains("AND")) {
        allTimeRangeFilters.add("(" + filter + ")");
      } else {
        extractColumnAndCondition(filter, partFilterMap);
      }
    }

    List inClauses = new ArrayList(partFilterMap.size());
    for (String column : partFilterMap.keySet()) {
      String clause =
        "(" + StringUtils.join(partFilterMap.get(column), ",") + ")";
      inClauses.add(column + " IN " + clause);
    }

    allTimeRangeFilters.add(StringUtils.join(inClauses, " AND "));
    return StringUtils.join(allTimeRangeFilters, " OR ");
  }

  /**
   * parts is a collection of FactPartition objects. And FactPartition can be viewed as two boolean conditions, one
   * specified by it's containingPart object, and another specified by itself in the form (partCol = partSpec)
   * 

* Collection of FactPartition objects can be viewed as an OR clause on all the FactPartition objects -- which by * itself is a binary AND clause. *

* So Collection<FactPartition> is nothing but (a AND b) OR (c AND d) OR (e AND f) ... *

* This function tries to reduce such a big clause by using Boolean arithmetic. The big thing it aims to reduce is the * following class of clauses: *

* (a AND c) OR (a AND d) OR (b AND c) OR (b AND d) => ((a OR b) AND (c OR d)) *

* Equivalent return value for such a reduction would be an entry in the returned map from set(a,b) to set(c,d). * Assuming the argument was set(a(containing=c), a(containing=d), b(containing=c), b(containing=d)) * * @param parts * @return */ private Map, Set> groupPartitions(Collection parts) { Map> partitionSetMap = new HashMap>(); for (FactPartition part : parts) { partitionSetMap.computeIfAbsent(part.getContainingPart(), k -> Sets.newTreeSet()).add(part.withoutContaining()); } Map, Set> setSetOppositeMap = Maps.newHashMap(); for (Map.Entry> entry : partitionSetMap.entrySet()) { setSetOppositeMap.computeIfAbsent(entry.getValue(), k -> Sets.newTreeSet()); if (entry.getKey() != null) { setSetOppositeMap.get(entry.getValue()).add(entry.getKey()); } } // inverse again return setSetOppositeMap.entrySet().stream().collect(toMap(Map.Entry::getValue, Map.Entry::getKey)); } // This takes the output of filter generated by TimeRangeUtils.getTimeRangePartitionFilter // splits the filters by column names and filters are collected by column name in the // map passed as argument private void extractColumnAndCondition(String token, Map> partFilterMap) { token = token.trim(); String[] subTokens = StringUtils.split(token, '='); String column = subTokens[0].trim(); String filterValue = subTokens[1].trim(); partFilterMap.computeIfAbsent(column, k -> new ArrayList<>()).add(filterValue); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy