All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.parquet.ParquetConversions Maven / Gradle / Ivy

There is a newer version: 1.0.0.8
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg.parquet;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.UUID;
import java.util.function.Function;
import org.apache.iceberg.expressions.Literal;
import org.apache.iceberg.types.Type;
import org.apache.parquet.io.api.Binary;
import org.apache.parquet.schema.PrimitiveType;

class ParquetConversions {
  private ParquetConversions() {
  }

  @SuppressWarnings("unchecked")
  static  Literal fromParquetPrimitive(Type type, PrimitiveType parquetType, Object value) {
    switch (type.typeId()) {
      case BOOLEAN:
        return (Literal) Literal.of((Boolean) value);
      case INTEGER:
      case DATE:
        return (Literal) Literal.of((Integer) value);
      case LONG:
      case TIME:
      case TIMESTAMP:
        return (Literal) Literal.of((Long) value);
      case FLOAT:
        return (Literal) Literal.of((Float) value);
      case DOUBLE:
        return (Literal) Literal.of((Double) value);
      case STRING:
        Function stringConversion = converterFromParquet(parquetType);
        return (Literal) Literal.of((CharSequence) stringConversion.apply(value));
      case UUID:
        Function uuidConversion = converterFromParquet(parquetType);
        return (Literal) Literal.of((UUID) uuidConversion.apply(value));
      case FIXED:
      case BINARY:
        Function binaryConversion = converterFromParquet(parquetType);
        return (Literal) Literal.of((ByteBuffer) binaryConversion.apply(value));
      case DECIMAL:
        Function decimalConversion = converterFromParquet(parquetType);
        return (Literal) Literal.of((BigDecimal) decimalConversion.apply(value));
      default:
        throw new IllegalArgumentException("Unsupported primitive type: " + type);
    }
  }

  static Function converterFromParquet(PrimitiveType parquetType, Type icebergType) {
    Function fromParquet = converterFromParquet(parquetType);
    if (icebergType != null) {
      if (icebergType.typeId() == Type.TypeID.LONG &&
          parquetType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.INT32) {
        return value -> ((Integer) fromParquet.apply(value)).longValue();
      } else if (icebergType.typeId() == Type.TypeID.DOUBLE &&
          parquetType.getPrimitiveTypeName() == PrimitiveType.PrimitiveTypeName.FLOAT) {
        return value -> ((Float) fromParquet.apply(value)).doubleValue();
      }
    }

    return fromParquet;
  }

  static Function converterFromParquet(PrimitiveType type) {
    if (type.getOriginalType() != null) {
      switch (type.getOriginalType()) {
        case UTF8:
          // decode to CharSequence to avoid copying into a new String
          return binary -> StandardCharsets.UTF_8.decode(((Binary) binary).toByteBuffer());
        case DECIMAL:
          int scale = type.getDecimalMetadata().getScale();
          switch (type.getPrimitiveTypeName()) {
            case INT32:
            case INT64:
              return num -> BigDecimal.valueOf(((Number) num).longValue(), scale);
            case FIXED_LEN_BYTE_ARRAY:
            case BINARY:
              return bin -> new BigDecimal(new BigInteger(((Binary) bin).getBytes()), scale);
            default:
              throw new IllegalArgumentException(
                  "Unsupported primitive type for decimal: " + type.getPrimitiveTypeName());
          }
        default:
      }
    }

    switch (type.getPrimitiveTypeName()) {
      case FIXED_LEN_BYTE_ARRAY:
      case BINARY:
        return binary -> ByteBuffer.wrap(((Binary) binary).getBytes());
      default:
    }

    return obj -> obj;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy