All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.drill.common.expression.SchemaPath Maven / Gradle / Ivy

There is a newer version: 1.21.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.drill.common.expression;

import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.Objects;

import org.apache.drill.common.expression.PathSegment.ArraySegment;
import org.apache.drill.common.expression.PathSegment.NameSegment;
import org.apache.drill.common.expression.visitors.ExprVisitor;
import org.apache.drill.common.parser.LogicalExpressionParser;
import org.apache.drill.common.types.TypeProtos;
import org.apache.drill.common.types.TypeProtos.MajorType;
import org.apache.drill.common.types.Types;
import org.apache.drill.exec.proto.UserBitShared.NamePart;
import org.apache.drill.exec.proto.UserBitShared.NamePart.Type;

import com.fasterxml.jackson.core.JsonParser;
import com.fasterxml.jackson.databind.DeserializationContext;
import com.fasterxml.jackson.databind.deser.std.StdDeserializer;
import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;

/**
 * This is the path for the column in the table
 */
public class SchemaPath extends LogicalExpressionBase {

  // AKA "Wildcard": expand all columns
  public static final String DYNAMIC_STAR = "**";
  public static final SchemaPath STAR_COLUMN = getSimplePath(DYNAMIC_STAR);

  private final NameSegment rootSegment;

  public SchemaPath(SchemaPath path) {
    this(path.rootSegment, path.getPosition());
  }

  public SchemaPath(NameSegment rootSegment) {
    this(rootSegment, ExpressionPosition.UNKNOWN);
  }

  /**
   * @deprecated Use {@link #SchemaPath(NameSegment)}
   * or {@link #SchemaPath(NameSegment, ExpressionPosition)} instead
   */
  @Deprecated
  public SchemaPath(String simpleName, ExpressionPosition pos) {
    this(new NameSegment(simpleName), pos);
  }

  public SchemaPath(NameSegment rootSegment, ExpressionPosition pos) {
    super(pos);
    this.rootSegment = rootSegment;
  }

  public static SchemaPath getSimplePath(String name) {
    return getCompoundPath(name);
  }

  public static SchemaPath getCompoundPath(String... path) {
    return getCompoundPath(path.length, path);
  }

  /**
   * Constructs {@code SchemaPath} based on given {@code path} array up to {@literal n}th element (inclusively).
   *
   * Example: for case when {@code n = 2} and {@code path = {"a", "b", "c", "d", "e", ...}}
   * the method returns {@code a.b}
   *
   * @param n number of elements in {@literal path} array to take when constructing {@code SchemaPath}
   * @param path column path used to construct schema path
   * @return schema path containing {@literal n - 1} children
   */
  public static SchemaPath getCompoundPath(int n, String... path) {
    Preconditions.checkArgument(n > 0);
    NameSegment s = null;
    // loop through strings in reverse order
    for (int i = n - 1; i >= 0; i--) {
      s = new NameSegment(path[i], s);
    }
    return new SchemaPath(s);
  }

  public PathSegment getLastSegment() {
    PathSegment s = rootSegment;
    while (s.getChild() != null) {
      s = s.getChild();
    }
    return s;
  }

  public NamePart getAsNamePart() {
    return getNamePart(rootSegment);
  }

  private static NamePart getNamePart(PathSegment s) {
    if (s == null) {
      return null;
    }
    NamePart.Builder b = NamePart.newBuilder();
    if (s.getChild() != null) {
      NamePart namePart = getNamePart(s.getChild());
      if (namePart != null) {
        b.setChild(namePart);
      }
    }

    if (s.isArray()) {
      if (s.getArraySegment().hasIndex()) {
        throw new IllegalStateException("You cannot convert a indexed schema path to a NamePart.  NameParts can only reference Vectors, not individual records or values.");
      }
      b.setType(Type.ARRAY);
    } else {
      b.setType(Type.NAME);
      b.setName(s.getNameSegment().getPath());
    }
    return b.build();
  }

  private static PathSegment getPathSegment(NamePart n) {
    PathSegment child = n.hasChild() ? getPathSegment(n.getChild()) : null;
    if (n.getType() == Type.ARRAY) {
      return new ArraySegment(child);
    } else {
      return new NameSegment(n.getName(), child);
    }
  }

  public static SchemaPath create(NamePart namePart) {
    Preconditions.checkArgument(namePart.getType() == NamePart.Type.NAME);
    return new SchemaPath((NameSegment) getPathSegment(namePart));
  }

  /**
   * Returns schema path with for arrays without index.
   * Is used to find column statistics in parquet metadata.
   * Example: a.b.c[0] -> a.b.c, a[0].b[1] -> a.b
   *
   * @return un-indexed schema path
   */
  public SchemaPath getUnIndexed() {
    return new SchemaPath(getUnIndexedNameSegment(rootSegment, null));
  }

  /**
   * Traverses path segment to extract named segments, omits all other segments (i.e. array).
   * Order of named segments appearance will be preserved.
   *
   * @param currentSegment current segment
   * @param resultingSegment resulting segment
   * @return named segment
   */
  private NameSegment getUnIndexedNameSegment(PathSegment currentSegment, NameSegment resultingSegment) {
    if (!currentSegment.isLastPath()) {
      resultingSegment = getUnIndexedNameSegment(currentSegment.getChild(), resultingSegment);
    }

    if (currentSegment.isNamed()) {
      String path = currentSegment.getNameSegment().getPath();
      return new NameSegment(path, resultingSegment);
    }

    return resultingSegment;
  }

  /**
   * Parses input string using the same rules which are used for the field in the query.
   * If a string contains dot outside back-ticks, or there are no backticks in the string,
   * will be created {@link SchemaPath} with the {@link NameSegment}
   * which contains one else {@link NameSegment}, etc.
   * If a string contains [] then {@link ArraySegment} will be created.
   *
   * @param expr input string to be parsed
   * @return {@link SchemaPath} instance
   */
  public static SchemaPath parseFromString(String expr) {
    if (expr == null || expr.isEmpty()) {
      return null;
    }

    if (SchemaPath.DYNAMIC_STAR.equals(expr)) {
      return SchemaPath.getSimplePath(expr);
    }

    LogicalExpression logicalExpression = LogicalExpressionParser.parse(expr);
    if (logicalExpression instanceof SchemaPath) {
      return (SchemaPath) logicalExpression;
    } else {
      throw new IllegalStateException(String.format("Schema path is not a valid format: %s.", logicalExpression));
    }
  }

  /**
   * A simple is a path where there are no repeated elements outside the lowest level of the path.
   * @return Whether this path is a simple path.
   */
  public boolean isSimplePath() {
    PathSegment seg = rootSegment;
    while (seg != null) {
      if (seg.isArray() && !seg.isLastPath()) {
        return false;
      }
      seg = seg.getChild();
    }
    return true;
  }

  /**
   * Return whether this name refers to an array. The path must be an array if it
   * ends with an array index; else it may or may not be an entire array.
   *
   * @return true if the path ends with an array index, false otherwise
   */

  public boolean isArray() {
    PathSegment seg = rootSegment;
    while (seg != null) {
      if (seg.isArray()) {
        return true;
      }
      seg = seg.getChild();
    }
    return false;
  }

  /**
   * Determine if this is a one-part name. In general, special columns work only
   * if they are single-part names.
   *
   * @return true if this is a one-part name, false if this is a multi-part
   * name (with either map member or array index parts.)
   */

  public boolean isLeaf() {
    return rootSegment.isLastPath();
  }

  /**
   * Return if this column is the special wildcard ("**") column which means to
   * project all table columns.
   *
   * @return true if the column is "**"
   */

  public boolean isDynamicStar() {
    return isLeaf() && nameEquals(DYNAMIC_STAR);
  }

  /**
   * Returns if this is a simple column and the name matches the given
   * name (ignoring case.) This does not check if the name is an entire
   * match, only the the first (or only) part of the name matches.
   * Also check {@link #isLeaf()} to check for a single-part name.
   *
   * @param name name to match
   * @return true if this is a single-part column with that name.
   */

  public boolean nameEquals(String name) {
    return rootSegment.nameEquals(name);
  }

  /**
   * Return the root name: either the entire name (if one part) or
   * the first part (if multi-part.)
   * 
    *
  • a: returns a
  • *
  • a.b: returns a
  • *
  • a[10]: returns a
  • *
* * @return the root (or only) name */ public String rootName() { return rootSegment.getPath(); } @Override public T accept(ExprVisitor visitor, V value) throws E { return visitor.visitSchemaPath(this, value); } public SchemaPath getChild(String childPath) { NameSegment newRoot = rootSegment.cloneWithNewChild(new NameSegment(childPath)); return new SchemaPath(newRoot); } public SchemaPath getChild(String childPath, Object originalValue, TypeProtos.MajorType valueType) { NameSegment newRoot = rootSegment.cloneWithNewChild(new NameSegment(childPath, originalValue, valueType)); return new SchemaPath(newRoot); } public SchemaPath getChild(int index) { NameSegment newRoot = rootSegment.cloneWithNewChild(new ArraySegment(index)); return new SchemaPath(newRoot); } public SchemaPath getChild(int index, Object originalValue, TypeProtos.MajorType valueType) { NameSegment newRoot = rootSegment.cloneWithNewChild(new ArraySegment(index, originalValue, valueType)); return new SchemaPath(newRoot); } public NameSegment getRootSegment() { return rootSegment; } public String getAsUnescapedPath() { StringBuilder sb = new StringBuilder(); PathSegment seg = getRootSegment(); if (seg.isArray()) { throw new IllegalStateException("Drill doesn't currently support top level arrays"); } sb.append(seg.getNameSegment().getPath()); while ( (seg = seg.getChild()) != null) { if (seg.isNamed()) { sb.append('.'); sb.append(seg.getNameSegment().getPath()); } else { sb.append('['); sb.append(seg.getArraySegment().getIndex()); sb.append(']'); } } return sb.toString(); } @Override public MajorType getMajorType() { return Types.LATE_BIND_TYPE; } @Override public int hashCode() { return ((rootSegment == null) ? 0 : rootSegment.hashCode()); } @Override public boolean equals(Object obj) { return this == obj || obj instanceof SchemaPath && Objects.equals(rootSegment, ((SchemaPath) obj).rootSegment); } public boolean contains(SchemaPath path) { return this == path || path != null && (rootSegment == null || rootSegment.contains(path.rootSegment)); } @Override public Iterator iterator() { return Collections.emptyIterator(); } @Override public String toString() { return ExpressionStringBuilder.toString(this); } public String toExpr() { return ExpressionStringBuilder.toString(this); } /** * Returns path string of {@code rootSegment} * * @return path string of {@code rootSegment} */ public String getRootSegmentPath() { return rootSegment.getPath(); } @SuppressWarnings("serial") public static class De extends StdDeserializer { public De() { super(LogicalExpression.class); } @Override public SchemaPath deserialize(JsonParser jp, DeserializationContext ctxt) throws IOException { return parseFromString(jp.getText()); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy