All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.sadikovi.netflowlib.ScanPlanner Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2016 sadikovi
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.github.sadikovi.netflowlib;

import java.util.ArrayList;
import java.util.HashMap;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.github.sadikovi.netflowlib.Strategies.ScanStrategy;
import com.github.sadikovi.netflowlib.Strategies.FullScan;
import com.github.sadikovi.netflowlib.Strategies.FilterScan;
import com.github.sadikovi.netflowlib.Strategies.SkipScan;

import com.github.sadikovi.netflowlib.predicate.Columns.Column;
import com.github.sadikovi.netflowlib.predicate.FilterApi;
import com.github.sadikovi.netflowlib.predicate.Inspectors.Inspector;
import com.github.sadikovi.netflowlib.predicate.Inspectors.ValueInspector;
import com.github.sadikovi.netflowlib.predicate.PredicateTransform;

import com.github.sadikovi.netflowlib.predicate.Operators.FilterPredicate;
import com.github.sadikovi.netflowlib.predicate.Operators.Eq;
import com.github.sadikovi.netflowlib.predicate.Operators.Gt;
import com.github.sadikovi.netflowlib.predicate.Operators.Ge;
import com.github.sadikovi.netflowlib.predicate.Operators.Lt;
import com.github.sadikovi.netflowlib.predicate.Operators.Le;
import com.github.sadikovi.netflowlib.predicate.Operators.In;
import com.github.sadikovi.netflowlib.predicate.Operators.And;
import com.github.sadikovi.netflowlib.predicate.Operators.Or;
import com.github.sadikovi.netflowlib.predicate.Operators.Not;
import com.github.sadikovi.netflowlib.predicate.Operators.TrivialPredicate;

import com.github.sadikovi.netflowlib.statistics.Statistics;

/**
 * [[ScanPlanner]] interface defines strategies for parsing a record and resolving predicate tree.
 */
public final class ScanPlanner implements PredicateTransform {
  private static Logger log = LoggerFactory.getLogger(ScanPlanner.class);

  /** Build appropriate strategy based on pruned columns, predicate tree and statistics */
  public static ScanStrategy buildStrategy(
      Column[] columns,
      FilterPredicate predicate,
      HashMap stats) {
    ScanPlanner planner = new ScanPlanner(columns, predicate, stats);
    return planner.getStrategy();
  }

  public static ScanStrategy buildStrategy(Column[] columns) {
    return buildStrategy(columns, null, null);
  }

  public static ScanStrategy buildStrategy(
      Column[] columns,
      FilterPredicate predicate) {
    return buildStrategy(columns, predicate, null);
  }

  private ScanPlanner(
      Column[] columns,
      FilterPredicate predicate,
      HashMap stats) {
    // Building strategy involves several steps, such as variables check, making sure that we can
    // prune columns, predicate is defined, and statistics can be resolved; next is folding
    // predicate tree and applying statistics to modified predicate; next step is converting
    // predicate tree into inspector tree + mapping of columns to [[ValueInspector]], so it can be
    // used in [[RecordMaterializer]].
    if (columns == null) {
      throw new IllegalArgumentException("Expected columns to select, got " + columns +
        ". Make sure that you provide correct Column instances when requesting a scan");
    }

    // Resolve statistics, if available. Note that we manage statistics as map of column name and
    // [[Statistics]] instance, so internally we are dealing with abstract interface without support
    // of parameterized type, so we just rely on correct statistics type provided for a column in
    // constructor.
    HashMap internalStats = new HashMap();
    if (stats != null) {
      for (Column col: stats.keySet()) {
        internalStats.put(col.getColumnName(), stats.get(col));
      }
    }

    // Resolve predicate, if predicate is not defined we automatically assign positive trivial
    // predicate which will result in full scan of the file, otherwise fold it with statistics. Make
    // sure that there are no `TrivialPredicate` instances in the tree.
    FilterPredicate internalFilter = predicate;
    if (internalFilter == null) {
      internalFilter = FilterApi.trivial(true);
    } else {
      internalFilter = predicate.update(this, internalStats);
    }

    log.debug("Predicate after update: " + internalFilter.toString());

    // Flags for predicate, whether it is known or not, whether result is known or not
    boolean known = (internalFilter instanceof TrivialPredicate);
    boolean result = (known) ? ((TrivialPredicate) internalFilter).getResult() : false;

    // Choose strategy based on result
    if (known && result) {
      // Do full scan
      strategy = new FullScan(columns);
    } else if (known && !result) {
      // Skip scan
      strategy = new SkipScan();
    } else {
      // Predicate scan, convert `internalFilter` into inspector tree, extract columns that are
      // filtered
      Inspector inspectorTree = internalFilter.convert();
      strategy = new FilterScan(columns, inspectorTree, inspectors);
    }

    if (strategy == null) {
      throw new IllegalStateException("Cannot find suitable strategy for scan. Make sure you " +
        "provide correct arguments in constructor");
    }

    log.debug("Strategy chosen: " + strategy.toString());
  }

  private ScanStrategy getStrategy() {
    return strategy;
  }

  protected Object resolveMin(Column column, Statistics stats) {
    if (stats == null) {
      return column.getMin();
    } else {
      return stats.getMin();
    }
  }

  protected Object resolveMax(Column column, Statistics stats) {
    if (stats == null) {
      return column.getMax();
    } else {
      return stats.getMax();
    }
  }

  // Method to compare two objects based on provided class. Interface is similar to `compareTo`
  // method from `Comparable` interface. Note that only certain types are supported
  protected int compare(Class clazz, Object it, Object that) {
    if (clazz.equals(Byte.class)) {
      Byte itValue = Byte.class.cast(it);
      Byte thatValue = Byte.class.cast(that);
      return itValue.compareTo(thatValue);
    } else if (clazz.equals(Short.class)) {
      Short itValue = Short.class.cast(it);
      Short thatValue = Short.class.cast(that);
      return itValue.compareTo(thatValue);
    } else if (clazz.equals(Integer.class)) {
      Integer itValue = Integer.class.cast(it);
      Integer thatValue = Integer.class.cast(that);
      return itValue.compareTo(thatValue);
    } else if (clazz.equals(Long.class)) {
      Long itValue = Long.class.cast(it);
      Long thatValue = Long.class.cast(that);
      return itValue.compareTo(thatValue);
    } else {
      throw new UnsupportedOperationException("Unsupported read type " + clazz.toString());
    }
  }

  protected void addInspector(Column column, ValueInspector inspector) {
    if (!inspectors.containsKey(column)) {
      inspectors.put(column, new ArrayList());
    }

    inspectors.get(column).add(inspector);
  }

  //////////////////////////////////////////////////////////////
  // PredicateTransform API (ColumnPredicate)
  //////////////////////////////////////////////////////////////

  @Override
  public FilterPredicate transform(Eq predicate, HashMap stats) {
    if (predicate.getValue() == null) {
      return FilterApi.trivial(false);
    }

    Column col = predicate.getColumn();
    Object min = resolveMin(col, stats.get(col.getColumnName()));
    Object max = resolveMax(col, stats.get(col.getColumnName()));

    if (compare(col.getColumnType(), predicate.getValue(), min) < 0) {
      return FilterApi.trivial(false);
    }

    if (compare(col.getColumnType(), predicate.getValue(), max) > 0) {
      return FilterApi.trivial(false);
    }

    addInspector(col, predicate.inspector());

    return predicate;
  }

  @Override
  public FilterPredicate transform(Gt predicate, HashMap stats) {
    if (predicate.getValue() == null) {
      return FilterApi.trivial(false);
    }

    Column col = predicate.getColumn();
    Object min = resolveMin(col, stats.get(col.getColumnName()));
    Object max = resolveMax(col, stats.get(col.getColumnName()));

    // If predicate value is less than minimum value then predicate is trivial. Note, if predicate
    // greater than minimum value, we still have to scan, since values can be minimal.
    if (compare(col.getColumnType(), predicate.getValue(), min) < 0) {
      return FilterApi.trivial(true);
    }

    // If predicate value is greater or equals than maximum value then predicate "Greater than" is
    // trivial, since there are no such values greater than upper bound.
    if (compare(col.getColumnType(), predicate.getValue(), max) >= 0) {
      return FilterApi.trivial(false);
    }

    addInspector(col, predicate.inspector());

    return predicate;
  }

  @Override
  public FilterPredicate transform(Ge predicate, HashMap stats) {
    if (predicate.getValue() == null) {
      return FilterApi.trivial(false);
    }

    Column col = predicate.getColumn();
    Object min = resolveMin(col, stats.get(col.getColumnName()));
    Object max = resolveMax(col, stats.get(col.getColumnName()));

    // If predicate value is less than or equals minimum value, then predicate is trivial, since it
    // would mean that we scan entire range anyway.
    if (compare(col.getColumnType(), predicate.getValue(), min) <= 0) {
      return FilterApi.trivial(true);
    }

    // If predicate value is greater than maximum value, then predicate is trivial, since there are
    // no such values in range.
    if (compare(col.getColumnType(), predicate.getValue(), max) > 0) {
      return FilterApi.trivial(false);
    }

    // If "GreaterThanOrEqual" value is a maximum value, then this simply becomes equality
    // predicate, since there are no values greater than maximum value.
    // Note since we return new equality predicate we have to add it to the list of inspectors.
    if (compare(col.getColumnType(), predicate.getValue(), max) == 0) {
      Eq equalityPredicate = FilterApi.eq(predicate.getColumn(), max);

      addInspector(col, equalityPredicate.inspector());
      return equalityPredicate;
    }

    addInspector(col, predicate.inspector());
    return predicate;
  }

  @Override
  public FilterPredicate transform(Lt predicate, HashMap stats) {
    if (predicate.getValue() == null) {
      return FilterApi.trivial(false);
    }

    Column col = predicate.getColumn();
    Object min = resolveMin(col, stats.get(col.getColumnName()));
    Object max = resolveMax(col, stats.get(col.getColumnName()));

    // If predicate value is less than or equal minimum value, then predicate is trivial, since
    // there are no values less than minimum value.
    if (compare(col.getColumnType(), predicate.getValue(), min) <= 0) {
      return FilterApi.trivial(false);
    }

    // If predicate value is greater than maximum value, then predicate is trivial, since all
    // values fall into range (< value which is larger than maximum value).
    if (compare(col.getColumnType(), predicate.getValue(), max) > 0) {
      return FilterApi.trivial(true);
    }

    addInspector(col, predicate.inspector());

    return predicate;
  }

  @Override
  public FilterPredicate transform(Le predicate, HashMap stats) {
    if (predicate.getValue() == null) {
      return FilterApi.trivial(false);
    }

    Column col = predicate.getColumn();
    Object min = resolveMin(col, stats.get(col.getColumnName()));
    Object max = resolveMax(col, stats.get(col.getColumnName()));

    // If predicate value is less than minimum value, then predicate is trivial, see above for more
    // information.
    if (compare(col.getColumnType(), predicate.getValue(), min) < 0) {
      return FilterApi.trivial(false);
    }

    // If predicate value is greater than or equal to maximum value, then predicate is trivial, and
    // full scan is performed.
    if (compare(col.getColumnType(), predicate.getValue(), max) >= 0) {
      return FilterApi.trivial(true);
    }

    // If predicate value equals minimum value, then predicate becomes equality operator.
    // Note since we return new equality predicate we have to add it to the list of inspectors.
    if (compare(col.getColumnType(), predicate.getValue(), min) == 0) {
      Eq equalityPredicate = FilterApi.eq(predicate.getColumn(), min);

      addInspector(col, equalityPredicate.inspector());
      return equalityPredicate;
    }

    addInspector(col, predicate.inspector());
    return predicate;
  }

  @Override
  public FilterPredicate transform(In predicate, HashMap stats) {
    // TODO: we do not do any major updates for "In", since there is very low chance of being out
    // of range {min, max}.
    if (predicate.getValues().isEmpty()) {
      return FilterApi.trivial(false);
    }

    Column col = predicate.getColumn();
    // If predicate values length is 1, we transform it into "Eq" predicate,
    // Note that in this case we need to update equality predicate, since it can be out of
    // statistics. `transform` on `Eq` will add it to the value inspectors list, if necessary, so
    // we do not need to do it here.
    if (predicate.getValues().size() == 1) {
      Eq equalityPredicate = FilterApi.eq(col, predicate.getValues().iterator().next());
      return equalityPredicate.update(this, stats);
    }

    addInspector(predicate.getColumn(), predicate.inspector());
    return predicate;
  }

  //////////////////////////////////////////////////////////////
  // PredicateTransform API (Unary/BinaryLogicalPredicate)
  //////////////////////////////////////////////////////////////

  @Override
  public FilterPredicate transform(And predicate) {
    // Note that "transform" on "And" predicate is called after it is called on children of the
    // predicate, so effectively, leaf nodes of "And" predicate are already resolved, so we just
    // need to check result.

    // both children are trivial
    if (predicate.getLeft() instanceof TrivialPredicate &&
        predicate.getRight() instanceof TrivialPredicate) {
      TrivialPredicate left = (TrivialPredicate)(predicate.getLeft());
      TrivialPredicate right = (TrivialPredicate)(predicate.getRight());

      return FilterApi.trivial(left.getResult() && right.getResult());
    }

    // either left or right is trivial, but not both
    if (predicate.getLeft() instanceof TrivialPredicate) {
      TrivialPredicate left = (TrivialPredicate)(predicate.getLeft());
      if (left.getResult()) {
        return predicate.getRight();
      } else {
        return left;
      }
    }

    if (predicate.getRight() instanceof TrivialPredicate) {
      TrivialPredicate right = (TrivialPredicate)(predicate.getRight());
      if (right.getResult()) {
        return predicate.getLeft();
      } else {
        return right;
      }
    }

    // otherwise return unmodified predicate
    return predicate;
  }

  @Override
  public FilterPredicate transform(Or predicate) {
    // Note that "transform" on "Or" predicate is called after it is called on children of the
    // predicate, so effectively, leaf nodes of "Or" predicate are already resolved, so we just
    // need to check result.

    // both children are trivial
    if (predicate.getLeft() instanceof TrivialPredicate &&
        predicate.getRight() instanceof TrivialPredicate) {
      TrivialPredicate left = (TrivialPredicate)(predicate.getLeft());
      TrivialPredicate right = (TrivialPredicate)(predicate.getRight());

      return FilterApi.trivial(left.getResult() || right.getResult());
    }

    // either left or right is trivial, but not both
    if (predicate.getLeft() instanceof TrivialPredicate) {
      TrivialPredicate left = (TrivialPredicate)(predicate.getLeft());
      if (left.getResult()) {
        return left;
      } else {
        return predicate.getRight();
      }
    }

    if (predicate.getRight() instanceof TrivialPredicate) {
      TrivialPredicate right = (TrivialPredicate)(predicate.getRight());
      if (right.getResult()) {
        return right;
      } else {
        return predicate.getLeft();
      }
    }

    // otherwise return itself
    return predicate;
  }

  @Override
  public FilterPredicate transform(Not predicate) {
    // Note that "transform" on "Not" predicate is called after it is called on a child node of the
    // predicate, so effectively, child leaf node of "Not" predicate is already resolved, so we
    // just need to check result.

    // transform trivial predicate directly to minimize recursion depth.
    if (predicate.getChild() instanceof TrivialPredicate) {
      TrivialPredicate child = (TrivialPredicate)(predicate.getChild());
      return FilterApi.trivial(!child.getResult());
    }

    return predicate;
  }

  private ScanStrategy strategy = null;
  private HashMap> inspectors =
    new HashMap>();
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy