org.apache.druid.delta.input.DeltaInputSourceReader Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.delta.input;
import io.delta.kernel.data.FilteredColumnarBatch;
import io.delta.kernel.data.Row;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowListPlusRawValues;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSourceReader;
import org.apache.druid.data.input.InputStats;
import org.apache.druid.java.util.common.parsers.CloseableIterator;
import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
/**
* A reader for the Delta Lake input source. It initializes an iterator {@link DeltaInputSourceIterator}
* for a subset of Delta records given by {@link FilteredColumnarBatch} and schema {@link InputRowSchema}.
*
*/
public class DeltaInputSourceReader implements InputSourceReader
{
private final Iterator> filteredColumnarBatchIterators;
private final InputRowSchema inputRowSchema;
public DeltaInputSourceReader(
Iterator> filteredColumnarBatchIterators,
InputRowSchema inputRowSchema
)
{
this.filteredColumnarBatchIterators = filteredColumnarBatchIterators;
this.inputRowSchema = inputRowSchema;
}
@Override
public CloseableIterator read()
{
return new DeltaInputSourceIterator(filteredColumnarBatchIterators, inputRowSchema);
}
@Override
public CloseableIterator read(InputStats inputStats)
{
return new DeltaInputSourceIterator(filteredColumnarBatchIterators, inputRowSchema);
}
@Override
public CloseableIterator sample()
{
CloseableIterator inner = read();
return new CloseableIterator()
{
@Override
public void close() throws IOException
{
inner.close();
}
@Override
public boolean hasNext()
{
return inner.hasNext();
}
@Override
public InputRowListPlusRawValues next()
{
DeltaInputRow deltaInputRow = (DeltaInputRow) inner.next();
return InputRowListPlusRawValues.of(deltaInputRow, deltaInputRow.getRawRowAsMap());
}
};
}
private static class DeltaInputSourceIterator implements CloseableIterator
{
private final Iterator> filteredColumnarBatchIterators;
private io.delta.kernel.utils.CloseableIterator currentBatch = null;
private final InputRowSchema inputRowSchema;
public DeltaInputSourceIterator(
Iterator> filteredColumnarBatchCloseableIterator,
InputRowSchema inputRowSchema
)
{
this.filteredColumnarBatchIterators = filteredColumnarBatchCloseableIterator;
this.inputRowSchema = inputRowSchema;
}
@Override
public boolean hasNext()
{
while (currentBatch == null || !currentBatch.hasNext()) {
if (!filteredColumnarBatchIterators.hasNext()) {
return false; // No more batches or records to read!
}
final io.delta.kernel.utils.CloseableIterator filteredBatchIterator =
filteredColumnarBatchIterators.next();
while (filteredBatchIterator.hasNext()) {
final FilteredColumnarBatch nextBatch = filteredBatchIterator.next();
currentBatch = nextBatch.getRows();
if (currentBatch.hasNext()) {
return true;
}
}
}
return true;
}
@Override
public InputRow next()
{
if (!hasNext()) {
throw new NoSuchElementException();
}
Row dataRow = currentBatch.next();
return new DeltaInputRow(dataRow, inputRowSchema);
}
@Override
public void close() throws IOException
{
if (currentBatch != null) {
currentBatch.close();
}
if (filteredColumnarBatchIterators.hasNext()) {
filteredColumnarBatchIterators.next().close();
}
}
}
}