All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.spark.data.AvroWithSparkSchemaVisitor Maven / Gradle / Ivy

There is a newer version: 1.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg.spark.data;

import java.util.Deque;
import java.util.List;
import org.apache.avro.Schema;
import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.avro.LogicalMap;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.spark.sql.types.ArrayType;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DataTypes;
import org.apache.spark.sql.types.MapType;
import org.apache.spark.sql.types.StringType;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructType;

public abstract class AvroWithSparkSchemaVisitor {
  public static  T visit(StructType struct, Schema schema, AvroWithSparkSchemaVisitor visitor) {
    return visitRecord(struct, schema, visitor);
  }

  public static  T visit(DataType type, Schema schema, AvroWithSparkSchemaVisitor visitor) {
    switch (schema.getType()) {
      case RECORD:
        Preconditions.checkArgument(type instanceof StructType, "Invalid struct: %s is not a struct", type);
        return visitRecord((StructType) type, schema, visitor);

      case UNION:
        return visitUnion(type, schema, visitor);

      case ARRAY:
        return visitArray(type, schema, visitor);

      case MAP:
        Preconditions.checkArgument(type instanceof MapType, "Invalid map: %s is not a map", type);
        MapType map = (MapType) type;
        Preconditions.checkArgument(map.keyType() instanceof StringType,
            "Invalid map: %s is not a string", map.keyType());
        return visitor.map(map, schema, visit(map.valueType(), schema.getValueType(), visitor));

      default:
        return visitor.primitive(type, schema);
    }
  }

  private static  T visitRecord(StructType struct, Schema record, AvroWithSparkSchemaVisitor visitor) {
    // check to make sure this hasn't been visited before
    String name = record.getFullName();
    Preconditions.checkState(!visitor.recordLevels.contains(name),
        "Cannot process recursive Avro record %s", name);
    StructField[] sFields = struct.fields();
    List fields = record.getFields();
    Preconditions.checkArgument(sFields.length == fields.size(),
        "Structs do not match: %s != %s", struct, record);

    visitor.recordLevels.push(name);

    List names = Lists.newArrayListWithExpectedSize(fields.size());
    List results = Lists.newArrayListWithExpectedSize(fields.size());
    for (int i = 0; i < sFields.length; i += 1) {
      StructField sField = sFields[i];
      Schema.Field field = fields.get(i);
      Preconditions.checkArgument(AvroSchemaUtil.makeCompatibleName(sField.name()).equals(field.name()),
          "Structs do not match: field %s != %s", sField.name(), field.name());
      results.add(visit(sField.dataType(), field.schema(), visitor));
    }

    visitor.recordLevels.pop();

    return visitor.record(struct, record, names, results);
  }

  private static  T visitUnion(DataType type, Schema union, AvroWithSparkSchemaVisitor visitor) {
    List types = union.getTypes();
    Preconditions.checkArgument(AvroSchemaUtil.isOptionSchema(union),
        "Cannot visit non-option union: %s", union);
    List options = Lists.newArrayListWithExpectedSize(types.size());
    for (Schema branch : types) {
      if (branch.getType() == Schema.Type.NULL) {
        options.add(visit(DataTypes.NullType, branch, visitor));
      } else {
        options.add(visit(type, branch, visitor));
      }
    }
    return visitor.union(type, union, options);
  }

  private static  T visitArray(DataType type, Schema array, AvroWithSparkSchemaVisitor visitor) {
    if (array.getLogicalType() instanceof LogicalMap || type instanceof MapType) {
      Preconditions.checkState(
          AvroSchemaUtil.isKeyValueSchema(array.getElementType()),
          "Cannot visit invalid logical map type: %s", array);
      Preconditions.checkArgument(type instanceof MapType, "Invalid map: %s is not a map", type);
      MapType map = (MapType) type;
      List keyValueFields = array.getElementType().getFields();
      return visitor.map(map, array,
          visit(map.keyType(), keyValueFields.get(0).schema(), visitor),
          visit(map.valueType(), keyValueFields.get(1).schema(), visitor));

    } else {
      Preconditions.checkArgument(type instanceof ArrayType, "Invalid array: %s is not an array", type);
      ArrayType list = (ArrayType) type;
      return visitor.array(list, array, visit(list.elementType(), array.getElementType(), visitor));
    }
  }

  private Deque recordLevels = Lists.newLinkedList();

  public T record(StructType struct, Schema record, List names, List fields) {
    return null;
  }

  public T union(DataType type, Schema union, List options) {
    return null;
  }

  public T array(ArrayType sArray, Schema array, T element) {
    return null;
  }

  public T map(MapType sMap, Schema map, T key, T value) {
    return null;
  }

  public T map(MapType sMap, Schema map, T value) {
    return null;
  }

  public T primitive(DataType type, Schema primitive) {
    return null;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy