All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lens.cube.parse.SegmentationCandidate Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.lens.cube.parse;

import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toMap;

import static org.apache.lens.cube.metadata.DateUtil.formatAbsDate;
import static org.apache.lens.cube.metadata.MetastoreUtil.getStringLiteralAST;

import static org.apache.hadoop.hive.ql.parse.HiveParser.Identifier;
import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_FROM;
import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_HAVING;
import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_INSERT;
import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_ORDERBY;
import static org.apache.hadoop.hive.ql.parse.HiveParser.TOK_SELEXPR;

import java.util.Collection;
import java.util.Comparator;
import java.util.Date;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.OptionalDouble;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collector;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.lens.cube.metadata.Cube;
import org.apache.lens.cube.metadata.CubeColumn;
import org.apache.lens.cube.metadata.CubeInterface;
import org.apache.lens.cube.metadata.FactPartition;
import org.apache.lens.cube.metadata.MetastoreUtil;
import org.apache.lens.cube.metadata.Segment;
import org.apache.lens.cube.metadata.Segmentation;
import org.apache.lens.cube.metadata.TimeRange;
import org.apache.lens.server.api.LensConfConstants;
import org.apache.lens.server.api.error.LensException;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.parse.ASTNode;

import org.antlr.runtime.CommonToken;

import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import lombok.Getter;

/**
 * Created on 09/03/17.
 */
public class SegmentationCandidate implements Candidate {

  Collection columns;
  @Getter
  private final CubeQueryContext cubeQueryContext;
  private Segmentation segmentation;
  private Map cubesOfSegmentation;
  Map cubeQueryContextMap;
  @Getter
  private final Set answerableMeasurePhraseIndices = Sets.newHashSet();
  private Map queriedRangeToMyRange = Maps.newHashMap();

  SegmentationCandidate(CubeQueryContext cubeQueryContext, Segmentation segmentation) throws LensException {
    this.cubeQueryContext = cubeQueryContext;
    this.segmentation = segmentation;
    cubesOfSegmentation = Maps.newHashMap();
    cubeQueryContextMap = Maps.newHashMap();
    for (Segment segment : segmentation.getSegments()) {
      // assuming only base cubes in segmentation
      cubesOfSegmentation.put(segment.getName(), (Cube) getCubeMetastoreClient().getCube(segment.getName()));
    }
  }


  public SegmentationCandidate explode() throws LensException {
    return this;
  }

  private static  Predicate not(Predicate predicate) {
    return predicate.negate();
  }

  boolean rewriteInternal(Configuration conf, HiveConf hconf) throws LensException {
    CubeInterface cube = getCube();
    if (cube == null) {
      return false;
    }
    for (Segment segment : segmentation.getSegments()) {
      // assuming only base cubes in segmentation
      Cube innerCube = (Cube) getCubeMetastoreClient().getCube(segment.getName());
      cubesOfSegmentation.put(segment.getName(), innerCube);
      Set notAnswerable = cubeQueryContext.getQueriedPhrases().stream()
        .filter(not(this::isPhraseAnswerable)).collect(Collectors.toSet());
      // create ast
      ASTNode ast = MetastoreUtil.copyAST(cubeQueryContext.getAst(),
        astNode -> {
          // replace time range
          for (Map.Entry timeRangeTimeRangeEntry : queriedRangeToMyRange.entrySet()) {
            TimeRange queriedTimeRange = timeRangeTimeRangeEntry.getKey();
            TimeRange timeRange = timeRangeTimeRangeEntry.getValue();
            if (astNode.getParent() == queriedTimeRange.getAstNode()) {
              if (astNode.getChildIndex() == 2) {
                return Pair.of(getStringLiteralAST(formatAbsDate(timeRange.getFromDate())), false);
              } else if (astNode.getChildIndex() == 3) {
                return Pair.of(getStringLiteralAST(formatAbsDate(timeRange.getToDate())), false);
              }
              break;
            }
          }
          // else, replace unanswerable measures
          for (QueriedPhraseContext phraseContext : notAnswerable) {
            if ((astNode.getType() != TOK_SELEXPR && astNode == phraseContext.getExprAST())
              || astNode.getParent() == phraseContext.getExprAST()) {
              return Pair.of(MetastoreUtil.copyAST(UnionQueryWriter.DEFAULT_MEASURE_AST), false);
            }
          }
          // else, copy token replacing cube name and ask for recursion on child nodes
          // this is hard copy. Default is soft copy, which is new ASTNode(astNode)
          // Soft copy retains the token object inside it, hard copy copies token object
          return Pair.of(new ASTNode(new CommonToken(astNode.getToken())), true);
        });
      addCubeNameAndAlias(ast, innerCube);
      trimHavingAndOrderby(ast, innerCube);
      Configuration innerConf = conf;
      if (conf.get(LensConfConstants.QUERY_METRIC_UNIQUE_ID_CONF_KEY) != null) {
        innerConf = new Configuration(conf);
        innerConf.set(LensConfConstants.QUERY_METRIC_UNIQUE_ID_CONF_KEY,
          conf.get(LensConfConstants.QUERY_METRIC_UNIQUE_ID_CONF_KEY) + "-" + segment.getName());
      }
      CubeQueryRewriter rewriter = new CubeQueryRewriter(innerConf, hconf);
      CubeQueryContext ctx = rewriter.rewrite(ast);
      cubeQueryContextMap.put(segment.getName(), ctx);
      if (!ctx.getCandidates().isEmpty()) {
        ctx.pickCandidateToQuery();
        for (StorageCandidate storageCandidate : CandidateUtil.getStorageCandidates(ctx.getPickedCandidate())) {
          for (Map.Entry timeRangeTimeRangeEntry : queriedRangeToMyRange.entrySet()) {
            TimeRange timeRange = timeRangeTimeRangeEntry.getKey();
            TimeRange queriedTimeRange = timeRangeTimeRangeEntry.getValue();
            Set rangeToPartition = storageCandidate.getRangeToPartitions().get(timeRange);
            if (rangeToPartition != null) {
              storageCandidate.getRangeToPartitions().put(queriedTimeRange, rangeToPartition);
            }
            String extraWhere = storageCandidate.getRangeToExtraWhereFallBack().get(timeRange);
            if (extraWhere != null) {
              storageCandidate.getRangeToExtraWhereFallBack().put(queriedTimeRange, extraWhere);
            }
          }
        }
      }
    }
    return areCandidatesPicked();
  }

  private void addCubeNameAndAlias(ASTNode ast, Cube innerCube) {
    ASTNode cubeNameNode = findCubeNameNode(HQLParser.findNodeByPath(ast, TOK_FROM));
    assert cubeNameNode != null;
    ASTNode tabrefNode = (ASTNode) cubeNameNode.getParent().getParent();
    cubeNameNode.getToken().setText(innerCube.getName());
    ASTNode aliasNode = new ASTNode(new CommonToken(Identifier,
      getCubeQueryContext().getAliasForTableName(getCube().getName())));
    if (tabrefNode.getChildCount() > 1) {
      tabrefNode.setChild(1, aliasNode);
    } else {
      tabrefNode.addChild(aliasNode);
    }
  }

  private ASTNode findCubeNameNode(ASTNode node) {
    if (node.getType() == Identifier) {
      if (node.getText().equalsIgnoreCase(getCubeQueryContext().getCube().getName())) {
        return node;
      } else {
        return null; // should never come here.
      }
    }
    return node.getChildren().stream().map(ASTNode.class::cast).map(this::findCubeNameNode).filter(Objects::nonNull)
      .findFirst().orElse(null);
  }

  private void trimHavingAndOrderby(ASTNode ast, Cube innerCube) {
    ASTNode havingAst = HQLParser.findNodeByPath(ast, TOK_INSERT, TOK_HAVING);
    if (havingAst != null) {
      ASTNode newHavingAst = HQLParser.trimHavingAst(havingAst, innerCube.getAllFieldNames());
      if (newHavingAst != null) {
        havingAst.getParent().setChild(havingAst.getChildIndex(), newHavingAst);
      } else {
        havingAst.getParent().deleteChild(havingAst.getChildIndex());
      }
    }
    ASTNode orderByAst = HQLParser.findNodeByPath(ast, TOK_INSERT, TOK_ORDERBY);
    if (orderByAst != null) {
      ASTNode newOrderByAst = HQLParser.trimOrderByAst(orderByAst, innerCube.getAllFieldNames());
      if (newOrderByAst != null) {
        orderByAst.getParent().setChild(orderByAst.getChildIndex(), newOrderByAst);
      } else {
        orderByAst.getParent().deleteChild(orderByAst.getChildIndex());
      }
    }
  }


  public SegmentationCandidate(SegmentationCandidate segmentationCandidate) throws LensException {
    this(segmentationCandidate.cubeQueryContext, segmentationCandidate.segmentation);

  }

  @Override
  public Collection getColumns() {
    if (columns == null) {
      columns = cubeStream().map(Cube::getAllFieldNames)
        .reduce(Sets::intersection).orElseGet(Sets::newHashSet)
        .stream().collect(Collectors.toSet());
    }
    return columns;
  }

  @Override
  public Date getStartTime() {
    return segmentation.getStartTime();
  }

  @Override
  public Date getEndTime() {
    return segmentation.getEndTime();
  }

  @Override
  public OptionalDouble getCost() {
    if (areCandidatesPicked()) {
      double cost = 0.0;
      for (Candidate candidate : getChildren()) {
        if (candidate.getCost().isPresent()) {
          cost += candidate.getCost().getAsDouble();
        } else {
          return OptionalDouble.empty();
        }
      }
      return OptionalDouble.of(cost);
    } else {
      return OptionalDouble.empty();
    }
  }

  @Override
  public boolean contains(Candidate candidate) {
    return areCandidatesPicked() && getChildren().contains(candidate);
  }

  @Override
  public Collection getChildren() {
    return candidateStream().collect(Collectors.toSet());
  }

  @Override
  public int getChildrenCount() {
    return segmentation.getSegments().size();
  }

  @Override
  public boolean isTimeRangeCoverable(TimeRange timeRange) throws LensException {
    return true;
  }

  @Override
  public boolean evaluateCompleteness(TimeRange timeRange, TimeRange queriedTimeRange, boolean failOnPartialData)
    throws LensException {
    queriedRangeToMyRange.put(queriedTimeRange, timeRange);
    return true;
  }

  @Override
  public Set getParticipatingPartitions() {
    Set partitionSet = Sets.newHashSet();
    for (CubeQueryContext cubeQueryContext : cubeQueryContextMap.values()) {
      if (cubeQueryContext.getPickedCandidate() != null) {
        partitionSet.addAll(cubeQueryContext.getPickedCandidate().getParticipatingPartitions());
      }
    }
    return partitionSet;
  }

  @Override
  public boolean isExpressionEvaluable(ExpressionResolver.ExpressionContext expr) {
    // expression context is specific to cubequerycontext. So for segmentation candidate,
    // I can't ask my children to check this context for evaluability.
    return cubeStream()
      .map(cube -> cube.getExpressionByName(expr.getExprCol().getName()))
      .allMatch(Objects::nonNull);
  }

  private boolean areCandidatesPicked() {
    return candidateStream().count() == cubesOfSegmentation.size();
  }

  private Stream candidateStream() {
    return contextStream().map(CubeQueryContext::getPickedCandidate).filter(Objects::nonNull);
  }

  private Stream contextStream() {
    return cubeQueryContextMap.values().stream();
  }

  private Stream cubeStream() {
    return cubesOfSegmentation.values().stream();
  }

  @Override
  public boolean isExpressionEvaluable(String expr) {
    return candidateStream().allMatch(cand -> cand.isExpressionEvaluable(expr));
  }

  @Override
  public boolean isDimAttributeEvaluable(String dim) throws LensException {
    if (areCandidatesPicked()) {
      for (Candidate childCandidate : (Iterable) candidateStream()::iterator) {
        if (!childCandidate.isDimAttributeEvaluable(dim)) {
          return false;
        }
      }
      return true;
    }
    return hasColumn(dim);
  }

  @Override
  public Candidate copy() throws LensException {
    return new SegmentationCandidate(this);
  }

  @Override
  public boolean isPhraseAnswerable(QueriedPhraseContext phrase) {
    // TODO consider measure start time etc
    return getColumns().containsAll(phrase.getColumns());
  }

  @Override
  public Optional getColumnStartTime(String column) {
    if (areCandidatesPicked()) {
      return candidateStream()
        .map(c -> c.getColumnStartTime(column))
        .filter(Optional::isPresent)
        .map(Optional::get)
        .min(Comparator.naturalOrder());
    } else {
      return cubeStream()
        .map(cube -> cube.getColumnByName(column))
        .map(CubeColumn::getStartTime).filter(Objects::nonNull)
        .min(Comparator.naturalOrder());
    }
  }

  @Override
  public Optional getColumnEndTime(String column) {
    if (areCandidatesPicked()) {
      return candidateStream()
        .map(c -> c.getColumnEndTime(column))
        .filter(Optional::isPresent) // use flatmap(Optional::stream) after migration to java9
        .map(Optional::get)          // https://bugs.openjdk.java.net/browse/JDK-8050820
        .max(Comparator.naturalOrder());
    } else {
      return cubeStream()
        .map(cube -> cube.getColumnByName(column))
        .map(CubeColumn::getEndTime).filter(Objects::nonNull)
        .max(Comparator.naturalOrder());
    }
  }

  public void addAnswerableMeasurePhraseIndices(int index) {
    answerableMeasurePhraseIndices.add(index);
  }


  public String toString() {
    Collector collector = joining("; ", "SEG[", "]");
    if (areCandidatesPicked()) {
      return candidateStream().map(Candidate::toString).collect(collector);
    } else {
      return cubeStream().map(Cube::getName).collect(collector);
    }
  }

  Map> getPruneCausesOfFailedContexts() {
    return cubeQueryContextMap.entrySet().stream().filter(entry -> entry.getValue().getPickedCandidate() == null)
      .collect(toMap(Map.Entry::getKey, entry -> entry.getValue().getStoragePruningMsgs()));
  }

  @Override
  public boolean equals(Object obj) {
    if (super.equals(obj)) {
      return true;
    }

    if (obj == null || !(obj instanceof SegmentationCandidate)) {
      return false;
    }

    SegmentationCandidate segmantationCandidate = (SegmentationCandidate) obj;
    return (segmantationCandidate.segmentation.getSegments().equals(this.segmentation.getSegments())
        && segmantationCandidate.segmentation.getBaseCube().equals(this.segmentation.getBaseCube()));
  }

  @Override
  public int hashCode() {
    return segmentation.hashCode();
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy