All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.parquet.bytes.MultiBufferInputStream Maven / Gradle / Ivy

/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */

package org.apache.parquet.bytes;

import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;

class MultiBufferInputStream extends ByteBufferInputStream {
  private static final ByteBuffer EMPTY = ByteBuffer.allocate(0);

  private final List buffers;
  private final long length;

  private Iterator iterator;
  private ByteBuffer current = EMPTY;
  private long position = 0;

  private long mark = -1;
  private long markLimit = 0;
  private List markBuffers = new ArrayList<>();

  MultiBufferInputStream(List buffers) {
    this.buffers = buffers;

    long totalLen = 0;
    for (ByteBuffer buffer : buffers) {
      totalLen += buffer.remaining();
    }
    this.length = totalLen;

    this.iterator = buffers.iterator();

    nextBuffer();
  }

  /**
   * Returns the position in the stream.
   */
  public long position() {
    return position;
  }

  @Override
  public long skip(long n) {
    if (n <= 0) {
      return 0;
    }

    if (current == null) {
      return -1;
    }

    long bytesSkipped = 0;
    while (bytesSkipped < n) {
      if (current.remaining() > 0) {
        long bytesToSkip = Math.min(n - bytesSkipped, current.remaining());
        current.position(current.position() + (int) bytesToSkip);
        bytesSkipped += bytesToSkip;
        this.position += bytesToSkip;
      } else if (!nextBuffer()) {
        // there are no more buffers
        return bytesSkipped > 0 ? bytesSkipped : -1;
      }
    }

    return bytesSkipped;
  }

  @Override
  public int read(ByteBuffer out) {
    int len = out.remaining();
    if (len <= 0) {
      return 0;
    }

    if (current == null) {
      return -1;
    }

    int bytesCopied = 0;
    while (bytesCopied < len) {
      if (current.remaining() > 0) {
        int bytesToCopy;
        ByteBuffer copyBuffer;
        if (current.remaining() <= out.remaining()) {
          // copy all of the current buffer
          bytesToCopy = current.remaining();
          copyBuffer = current;
        } else {
          // copy a slice of the current buffer
          bytesToCopy = out.remaining();
          copyBuffer = current.duplicate();
          copyBuffer.limit(copyBuffer.position() + bytesToCopy);
          current.position(copyBuffer.position() + bytesToCopy);
        }

        out.put(copyBuffer);
        bytesCopied += bytesToCopy;
        this.position += bytesToCopy;

      } else if (!nextBuffer()) {
        // there are no more buffers
        return bytesCopied > 0 ? bytesCopied : -1;
      }
    }

    return bytesCopied;
  }

  @Override
  public ByteBuffer slice(int length) throws EOFException {
    if (length <= 0) {
      return EMPTY;
    }

    if (current == null) {
      throw new EOFException();
    }

    ByteBuffer slice;
    if (length > current.remaining()) {
      // a copy is needed to return a single buffer
      // TODO: use an allocator
      slice = ByteBuffer.allocate(length);
      int bytesCopied = read(slice);
      slice.flip();
      if (bytesCopied < length) {
        throw new EOFException();
      }
    } else {
      slice = current.duplicate();
      slice.limit(slice.position() + length);
      current.position(slice.position() + length);
      this.position += length;
    }

    return slice;
  }

  public List sliceBuffers(long len) throws EOFException {
    if (len <= 0) {
      return Collections.emptyList();
    }

    if (current == null) {
      throw new EOFException();
    }

    List buffers = new ArrayList<>();
    long bytesAccumulated = 0;
    while (bytesAccumulated < len) {
      if (current.remaining() > 0) {
        // get a slice of the current buffer to return
        // always fits in an int because remaining returns an int that is >= 0
        int bufLen = (int) Math.min(len - bytesAccumulated, current.remaining());
        ByteBuffer slice = current.duplicate();
        slice.limit(slice.position() + bufLen);
        buffers.add(slice);
        bytesAccumulated += bufLen;

        // update state; the bytes are considered read
        current.position(current.position() + bufLen);
        this.position += bufLen;
      } else if (!nextBuffer()) {
        // there are no more buffers
        throw new EOFException();
      }
    }

    return buffers;
  }

  @Override
  public List remainingBuffers() {
    if (position >= length) {
      return Collections.emptyList();
    }

    try {
      return sliceBuffers(length - position);
    } catch (EOFException e) {
      throw new RuntimeException(
          "[Parquet bug] Stream is bad: incorrect bytes remaining " +
              (length - position));
    }
  }

  @Override
  public int read(byte[] bytes, int off, int len) {
    if (len <= 0) {
      if (len < 0) {
        throw new IndexOutOfBoundsException("Read length must be greater than 0: " + len);
      }
      return 0;
    }

    if (current == null) {
      return -1;
    }

    int bytesRead = 0;
    while (bytesRead < len) {
      if (current.remaining() > 0) {
        int bytesToRead = Math.min(len - bytesRead, current.remaining());
        current.get(bytes, off + bytesRead, bytesToRead);
        bytesRead += bytesToRead;
        this.position += bytesToRead;
      } else if (!nextBuffer()) {
        // there are no more buffers
        return bytesRead > 0 ? bytesRead : -1;
      }
    }

    return bytesRead;
  }

  @Override
  public int read(byte[] bytes) {
    return read(bytes, 0, bytes.length);
  }

  @Override
  public int read() throws IOException {
    if (current == null) {
      throw new EOFException();
    }

    while (true) {
      if (current.remaining() > 0) {
        this.position += 1;
        return current.get() & 0xFF; // as unsigned
      } else if (!nextBuffer()) {
        // there are no more buffers
        throw new EOFException();
      }
    }
  }

  @Override
  public int available() {
    long remaining = length - position;
    if (remaining > Integer.MAX_VALUE) {
      return Integer.MAX_VALUE;
    } else {
      return (int) remaining;
    }
  }

  @Override
  public void mark(int readlimit) {
    if (mark >= 0) {
      discardMark();
    }
    this.mark = position;
    this.markLimit = mark + readlimit + 1;
    if (current != null) {
      markBuffers.add(current.duplicate());
    }
  }

  @Override
  public void reset() throws IOException {
    if (mark >= 0 && position < markLimit) {
      this.position = mark;
      // replace the current iterator with one that adds back the buffers that
      // have been used since mark was called.
      this.iterator = concat(markBuffers.iterator(), iterator);
      discardMark();
      nextBuffer(); // go back to the marked buffers
    } else {
      throw new IOException("No mark defined or has read past the previous mark limit");
    }
  }

  private void discardMark() {
    this.mark = -1;
    this.markLimit = 0;
    markBuffers = new ArrayList<>();
  }

  @Override
  public boolean markSupported() {
    return true;
  }

  private boolean nextBuffer() {
    if (!iterator.hasNext()) {
      this.current = null;
      return false;
    }

    this.current = iterator.next().duplicate();

    if (mark >= 0) {
      if (position < markLimit) {
        // the mark is defined and valid. save the new buffer
        markBuffers.add(current.duplicate());
      } else {
        // the mark has not been used and is no longer valid
        discardMark();
      }
    }

    return true;
  }

  private static  Iterator concat(Iterator first, Iterator second) {
    return new ConcatIterator<>(first, second);
  }

  private static class ConcatIterator implements Iterator {
    private final Iterator first;
    private final Iterator second;
    boolean useFirst = true;

    public ConcatIterator(Iterator first, Iterator second) {
      this.first = first;
      this.second = second;
    }

    @Override
    public boolean hasNext() {
      if (useFirst) {
        if (first.hasNext()) {
          return true;
        } else {
          useFirst = false;
          return second.hasNext();
        }
      }
      return second.hasNext();
    }

    @Override
    public E next() {
      if (useFirst && !first.hasNext()) {
        useFirst = false;
      }

      if (!useFirst && !second.hasNext()) {
        throw new NoSuchElementException();
      }

      if (useFirst) {
        return first.next();
      }

      return second.next();
    }

    @Override
    public void remove() {
      if (useFirst) {
        first.remove();
      }
      second.remove();
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy