All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.delta.input.DeltaInputSourceReader Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.delta.input;

import io.delta.kernel.data.FilteredColumnarBatch;
import io.delta.kernel.data.Row;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowListPlusRawValues;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSourceReader;
import org.apache.druid.data.input.InputStats;
import org.apache.druid.java.util.common.parsers.CloseableIterator;

import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;

/**
 * A reader for the Delta Lake input source. It initializes an iterator {@link DeltaInputSourceIterator}
 * for a subset of Delta records given by {@link FilteredColumnarBatch} and schema {@link InputRowSchema}.
 *
 */
public class DeltaInputSourceReader implements InputSourceReader
{
  private final Iterator> filteredColumnarBatchIterators;
  private final InputRowSchema inputRowSchema;

  public DeltaInputSourceReader(
      Iterator> filteredColumnarBatchIterators,
      InputRowSchema inputRowSchema
  )
  {
    this.filteredColumnarBatchIterators = filteredColumnarBatchIterators;
    this.inputRowSchema = inputRowSchema;
  }

  @Override
  public CloseableIterator read()
  {
    return new DeltaInputSourceIterator(filteredColumnarBatchIterators, inputRowSchema);
  }

  @Override
  public CloseableIterator read(InputStats inputStats)
  {
    return new DeltaInputSourceIterator(filteredColumnarBatchIterators, inputRowSchema);
  }

  @Override
  public CloseableIterator sample()
  {

    CloseableIterator inner = read();
    return new CloseableIterator()
    {
      @Override
      public void close() throws IOException
      {
        inner.close();
      }

      @Override
      public boolean hasNext()
      {
        return inner.hasNext();
      }

      @Override
      public InputRowListPlusRawValues next()
      {
        DeltaInputRow deltaInputRow = (DeltaInputRow) inner.next();
        return InputRowListPlusRawValues.of(deltaInputRow, deltaInputRow.getRawRowAsMap());
      }
    };
  }

  private static class DeltaInputSourceIterator implements CloseableIterator
  {
    private final Iterator> filteredColumnarBatchIterators;

    private io.delta.kernel.utils.CloseableIterator currentBatch = null;
    private final InputRowSchema inputRowSchema;

    public DeltaInputSourceIterator(
        Iterator> filteredColumnarBatchCloseableIterator,
        InputRowSchema inputRowSchema
    )
    {
      this.filteredColumnarBatchIterators = filteredColumnarBatchCloseableIterator;
      this.inputRowSchema = inputRowSchema;
    }

    @Override
    public boolean hasNext()
    {
      while (currentBatch == null || !currentBatch.hasNext()) {
        if (!filteredColumnarBatchIterators.hasNext()) {
          return false; // No more batches or records to read!
        }

        final io.delta.kernel.utils.CloseableIterator filteredBatchIterator =
            filteredColumnarBatchIterators.next();

        while (filteredBatchIterator.hasNext()) {
          final FilteredColumnarBatch nextBatch = filteredBatchIterator.next();
          currentBatch = nextBatch.getRows();
          if (currentBatch.hasNext()) {
            return true;
          }
        }
      }
      return true;
    }

    @Override
    public InputRow next()
    {
      if (!hasNext()) {
        throw new NoSuchElementException();
      }

      Row dataRow = currentBatch.next();
      return new DeltaInputRow(dataRow, inputRowSchema);
    }

    @Override
    public void close() throws IOException
    {
      if (currentBatch != null) {
        currentBatch.close();
      }

      if (filteredColumnarBatchIterators.hasNext()) {
        filteredColumnarBatchIterators.next().close();
      }
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy