All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.orc.OrcIterable Maven / Gradle / Ivy

There is a newer version: 1.7.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg.orc;

import java.io.IOException;
import java.util.Iterator;
import java.util.function.Function;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.iceberg.Schema;
import org.apache.iceberg.exceptions.RuntimeIOException;
import org.apache.iceberg.io.CloseableGroup;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.InputFile;
import org.apache.orc.OrcFile;
import org.apache.orc.Reader;
import org.apache.orc.TypeDescription;
import org.apache.orc.storage.ql.exec.vector.VectorizedRowBatch;

/**
 * Iterable used to read rows from ORC.
 */
class OrcIterable extends CloseableGroup implements CloseableIterable {
  private final Configuration config;
  private final Schema schema;
  private final InputFile file;
  private final Long start;
  private final Long length;
  private final Function> readerFunction;

  OrcIterable(InputFile file, Configuration config, Schema schema,
                     Long start, Long length,
                     Function> readerFunction) {
    this.schema = schema;
    this.readerFunction = readerFunction;
    this.file = file;
    this.start = start;
    this.length = length;
    this.config = config;
  }

  @SuppressWarnings("unchecked")
  @Override
  public Iterator iterator() {
    return new OrcIterator(
        newOrcIterator(file, TypeConversion.toOrc(schema, new ColumnIdMap()),
            start, length, newFileReader(file, config)),
        readerFunction.apply(schema));
  }

  private static VectorizedRowBatchIterator newOrcIterator(InputFile file,
                                                           TypeDescription readerSchema,
                                                           Long start, Long length,
                                                           Reader orcFileReader) {
    final Reader.Options options = orcFileReader.options();
    if (start != null) {
      options.range(start, length);
    }
    options.schema(readerSchema);

    try {
      return new VectorizedRowBatchIterator(file.location(), readerSchema, orcFileReader.rows(options));
    } catch (IOException ioe) {
      throw new RuntimeIOException(ioe, "Failed to get ORC rows for file: %s", file);
    }
  }

  private static Reader newFileReader(InputFile file, Configuration config) {
    try {
      return OrcFile.createReader(new Path(file.location()),
          OrcFile.readerOptions(config));
    } catch (IOException ioe) {
      throw new RuntimeIOException(ioe, "Failed to open file: %s", file);
    }
  }

  private static class OrcIterator implements Iterator {

    private int nextRow;
    private VectorizedRowBatch current;

    private final VectorizedRowBatchIterator batchIter;
    private final OrcValueReader reader;

    OrcIterator(VectorizedRowBatchIterator batchIter, OrcValueReader reader) {
      this.batchIter = batchIter;
      this.reader = reader;
      current = null;
      nextRow = 0;
    }

    @Override
    public boolean hasNext() {
      return (current != null && nextRow < current.size) || batchIter.hasNext();
    }

    @Override
    public T next() {
      if (current == null || nextRow >= current.size) {
        current = batchIter.next();
        nextRow = 0;
      }

      return this.reader.read(current, nextRow++);
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy