All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.orc.OrcValueReaders Maven / Gradle / Ivy

There is a newer version: 1.0.0.5
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg.orc;

import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.iceberg.MetadataColumns;
import org.apache.iceberg.types.Types;
import org.apache.orc.storage.ql.exec.vector.BytesColumnVector;
import org.apache.orc.storage.ql.exec.vector.ColumnVector;
import org.apache.orc.storage.ql.exec.vector.DoubleColumnVector;
import org.apache.orc.storage.ql.exec.vector.LongColumnVector;
import org.apache.orc.storage.ql.exec.vector.StructColumnVector;


public class OrcValueReaders {
  private OrcValueReaders() {
  }

  public static OrcValueReader booleans() {
    return BooleanReader.INSTANCE;
  }

  public static OrcValueReader ints() {
    return IntegerReader.INSTANCE;
  }

  public static OrcValueReader longs() {
    return LongReader.INSTANCE;
  }

  public static OrcValueReader floats() {
    return FloatReader.INSTANCE;
  }

  public static OrcValueReader doubles() {
    return DoubleReader.INSTANCE;
  }

  public static OrcValueReader bytes() {
    return BytesReader.INSTANCE;
  }

  public static  OrcValueReader constants(C constant) {
    return new ConstantReader<>(constant);
  }

  private static class BooleanReader implements OrcValueReader {
    static final BooleanReader INSTANCE = new BooleanReader();

    private BooleanReader() {
    }

    @Override
    public Boolean nonNullRead(ColumnVector vector, int row) {
      return ((LongColumnVector) vector).vector[row] != 0;
    }
  }

  private static class IntegerReader implements OrcValueReader {
    static final IntegerReader INSTANCE = new IntegerReader();

    private IntegerReader() {
    }

    @Override
    public Integer nonNullRead(ColumnVector vector, int row) {
      return (int) ((LongColumnVector) vector).vector[row];
    }
  }

  private static class LongReader implements OrcValueReader {
    static final LongReader INSTANCE = new LongReader();

    private LongReader() {
    }

    @Override
    public Long nonNullRead(ColumnVector vector, int row) {
      return ((LongColumnVector) vector).vector[row];
    }
  }

  private static class FloatReader implements OrcValueReader {
    private static final FloatReader INSTANCE = new FloatReader();

    private FloatReader() {
    }

    @Override
    public Float nonNullRead(ColumnVector vector, int row) {
      return (float) ((DoubleColumnVector) vector).vector[row];
    }
  }

  private static class DoubleReader implements OrcValueReader {
    private static final DoubleReader INSTANCE = new DoubleReader();

    private DoubleReader() {
    }

    @Override
    public Double nonNullRead(ColumnVector vector, int row) {
      return ((DoubleColumnVector) vector).vector[row];
    }
  }

  private static class BytesReader implements OrcValueReader {
    private static final BytesReader INSTANCE = new BytesReader();

    private BytesReader() {
    }

    @Override
    public byte[] nonNullRead(ColumnVector vector, int row) {
      BytesColumnVector bytesVector = (BytesColumnVector) vector;

      return Arrays.copyOfRange(
          bytesVector.vector[row], bytesVector.start[row], bytesVector.start[row] + bytesVector.length[row]);
    }
  }

  public abstract static class StructReader implements OrcValueReader {
    private final OrcValueReader[] readers;
    private final boolean[] isConstantOrMetadataField;

    protected StructReader(List> readers, Types.StructType struct, Map idToConstant) {
      List fields = struct.fields();
      this.readers = new OrcValueReader[fields.size()];
      this.isConstantOrMetadataField = new boolean[fields.size()];
      for (int pos = 0, readerIndex = 0; pos < fields.size(); pos += 1) {
        Types.NestedField field = fields.get(pos);
        if (idToConstant.containsKey(field.fieldId())) {
          this.isConstantOrMetadataField[pos] = true;
          this.readers[pos] = constants(idToConstant.get(field.fieldId()));
        } else if (field.equals(MetadataColumns.ROW_POSITION)) {
          this.isConstantOrMetadataField[pos] = true;
          this.readers[pos] = new RowPositionReader();
        } else {
          this.readers[pos] = readers.get(readerIndex);
          readerIndex++;
        }
      }
    }

    protected abstract T create();

    protected abstract void set(T struct, int pos, Object value);

    public OrcValueReader reader(int pos) {
      return readers[pos];
    }

    @Override
    public T nonNullRead(ColumnVector vector, int row) {
      StructColumnVector structVector = (StructColumnVector) vector;
      return readInternal(create(), structVector.fields, row);
    }

    private T readInternal(T struct, ColumnVector[] columnVectors, int row) {
      for (int c = 0, vectorIndex = 0; c < readers.length; ++c) {
        ColumnVector vector;
        if (isConstantOrMetadataField[c]) {
          vector = null;
        } else {
          vector = columnVectors[vectorIndex];
          vectorIndex++;
        }
        set(struct, c, reader(c).read(vector, row));
      }
      return struct;
    }

    @Override
    public void setBatchContext(long batchOffsetInFile) {
      for (OrcValueReader reader : readers) {
        reader.setBatchContext(batchOffsetInFile);
      }
    }
  }

  private static class ConstantReader implements OrcValueReader {
    private final C constant;

    private ConstantReader(C constant) {
      this.constant = constant;
    }

    @Override
    public C read(ColumnVector ignored, int ignoredRow) {
      return constant;
    }

    @Override
    public C nonNullRead(ColumnVector ignored, int ignoredRow) {
      return constant;
    }
  }

  private static class RowPositionReader implements OrcValueReader {
    private long batchOffsetInFile;

    @Override
    public Long read(ColumnVector ignored, int row) {
      return batchOffsetInFile + row;
    }

    @Override
    public Long nonNullRead(ColumnVector ignored, int row) {
      throw new UnsupportedOperationException("Use RowPositionReader.read()");
    }

    @Override
    public void setBatchContext(long newBatchOffsetInFile) {
      this.batchOffsetInFile = newBatchOffsetInFile;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy