All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.util.PagedBytes Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util;

import java.io.IOException;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;

/**
 * Represents a logical byte[] as a series of pages. You can write-once into the logical byte[]
 * (append only), using copy, and then retrieve slices (BytesRef) into it using fill.
 *
 * @lucene.internal
 */
// TODO: refactor this, byteblockpool, fst.bytestore, and any
// other "shift/mask big arrays". there are too many of these classes!
public final class PagedBytes implements Accountable {
  private static final long BASE_RAM_BYTES_USED =
      RamUsageEstimator.shallowSizeOfInstance(PagedBytes.class);
  private byte[][] blocks = new byte[16][];
  private int numBlocks;
  // TODO: these are unused?
  private final int blockSize;
  private final int blockBits;
  private final int blockMask;
  private boolean didSkipBytes;
  private boolean frozen;
  private int upto;
  private byte[] currentBlock;
  private final long bytesUsedPerBlock;

  private static final byte[] EMPTY_BYTES = new byte[0];

  /**
   * Provides methods to read BytesRefs from a frozen PagedBytes.
   *
   * @see #freeze
   */
  public static final class Reader implements Accountable {
    private static final long BASE_RAM_BYTES_USED =
        RamUsageEstimator.shallowSizeOfInstance(Reader.class);
    private final byte[][] blocks;
    private final int blockBits;
    private final int blockMask;
    private final int blockSize;
    private final long bytesUsedPerBlock;

    private Reader(PagedBytes pagedBytes) {
      blocks = ArrayUtil.copyOfSubArray(pagedBytes.blocks, 0, pagedBytes.numBlocks);
      blockBits = pagedBytes.blockBits;
      blockMask = pagedBytes.blockMask;
      blockSize = pagedBytes.blockSize;
      bytesUsedPerBlock = pagedBytes.bytesUsedPerBlock;
    }

    /**
     * Gets a slice out of {@link PagedBytes} starting at start with a given length. Iff the
     * slice spans across a block border this method will allocate sufficient resources and copy the
     * paged data.
     *
     * 

Slices spanning more than two blocks are not supported. * * @lucene.internal */ public void fillSlice(BytesRef b, long start, int length) { assert length >= 0 : "length=" + length; assert length <= blockSize + 1 : "length=" + length; b.length = length; if (length == 0) { return; } final int index = (int) (start >> blockBits); final int offset = (int) (start & blockMask); if (blockSize - offset >= length) { // Within block b.bytes = blocks[index]; b.offset = offset; } else { // Split b.bytes = new byte[length]; b.offset = 0; System.arraycopy(blocks[index], offset, b.bytes, 0, blockSize - offset); System.arraycopy( blocks[1 + index], 0, b.bytes, blockSize - offset, length - (blockSize - offset)); } } /** * Get the byte at the given offset. * * @lucene.internal */ public byte getByte(long o) { final int index = (int) (o >> blockBits); final int offset = (int) (o & blockMask); return blocks[index][offset]; } /** * Reads length as 1 or 2 byte vInt prefix, starting at start. * *

Note: this method does not support slices spanning across block borders. * * @lucene.internal */ // TODO: this really needs to be refactored into fieldcacheimpl public void fill(BytesRef b, long start) { final int index = (int) (start >> blockBits); final int offset = (int) (start & blockMask); final byte[] block = b.bytes = blocks[index]; if ((block[offset] & 128) == 0) { b.length = block[offset]; b.offset = offset + 1; } else { b.length = ((short) BitUtil.VH_BE_SHORT.get(block, offset)) & 0x7FFF; b.offset = offset + 2; assert b.length > 0; } } @Override public long ramBytesUsed() { long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks); if (blocks.length > 0) { size += (blocks.length - 1) * bytesUsedPerBlock; size += RamUsageEstimator.sizeOf(blocks[blocks.length - 1]); } return size; } @Override public String toString() { return "PagedBytes(blocksize=" + blockSize + ")"; } } /** 1<<blockBits must be bigger than biggest single BytesRef slice that will be pulled */ public PagedBytes(int blockBits) { assert blockBits > 0 && blockBits <= 31 : blockBits; this.blockSize = 1 << blockBits; this.blockBits = blockBits; blockMask = blockSize - 1; upto = blockSize; bytesUsedPerBlock = RamUsageEstimator.alignObjectSize(blockSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER); numBlocks = 0; } private void addBlock(byte[] block) { blocks = ArrayUtil.grow(blocks, numBlocks + 1); blocks[numBlocks++] = block; } /** Read this many bytes from in */ public void copy(IndexInput in, long byteCount) throws IOException { while (byteCount > 0) { int left = blockSize - upto; if (left == 0) { if (currentBlock != null) { addBlock(currentBlock); } currentBlock = new byte[blockSize]; upto = 0; left = blockSize; } if (left < byteCount) { in.readBytes(currentBlock, upto, left, false); upto = blockSize; byteCount -= left; } else { in.readBytes(currentBlock, upto, (int) byteCount, false); upto += byteCount; break; } } } /** * Copy BytesRef in, setting BytesRef out to the result. Do not use this if you will use * freeze(true). This only supports bytes.length <= blockSize */ public void copy(BytesRef bytes, BytesRef out) { int left = blockSize - upto; if (bytes.length > left || currentBlock == null) { if (currentBlock != null) { addBlock(currentBlock); didSkipBytes = true; } currentBlock = new byte[blockSize]; upto = 0; left = blockSize; assert bytes.length <= blockSize; // TODO: we could also support variable block sizes } out.bytes = currentBlock; out.offset = upto; out.length = bytes.length; System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length); upto += bytes.length; } /** Commits final byte[], trimming it if necessary and if trim=true */ public Reader freeze(boolean trim) { if (frozen) { throw new IllegalStateException("already frozen"); } if (didSkipBytes) { throw new IllegalStateException("cannot freeze when copy(BytesRef, BytesRef) was used"); } if (trim && upto < blockSize) { final byte[] newBlock = new byte[upto]; System.arraycopy(currentBlock, 0, newBlock, 0, upto); currentBlock = newBlock; } if (currentBlock == null) { currentBlock = EMPTY_BYTES; } addBlock(currentBlock); frozen = true; currentBlock = null; return new PagedBytes.Reader(this); } public long getPointer() { if (currentBlock == null) { return 0; } else { return (numBlocks * ((long) blockSize)) + upto; } } @Override public long ramBytesUsed() { long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks); if (numBlocks > 0) { size += (numBlocks - 1) * bytesUsedPerBlock; size += RamUsageEstimator.sizeOf(blocks[numBlocks - 1]); } if (currentBlock != null) { size += RamUsageEstimator.sizeOf(currentBlock); } return size; } /** Copy bytes in, writing the length as a 1 or 2 byte vInt prefix. */ // TODO: this really needs to be refactored into fieldcacheimpl! public long copyUsingLengthPrefix(BytesRef bytes) { if (bytes.length >= 32768) { throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")"); } if (upto + bytes.length + 2 > blockSize) { if (bytes.length + 2 > blockSize) { throw new IllegalArgumentException( "block size " + blockSize + " is too small to store length " + bytes.length + " bytes"); } if (currentBlock != null) { addBlock(currentBlock); } currentBlock = new byte[blockSize]; upto = 0; } final long pointer = getPointer(); if (bytes.length < 128) { currentBlock[upto++] = (byte) bytes.length; } else { BitUtil.VH_BE_SHORT.set(currentBlock, upto, (short) (bytes.length | 0x8000)); upto += 2; } System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length); upto += bytes.length; return pointer; } /** * Input that transparently iterates over pages * * @lucene.internal */ public final class PagedBytesDataInput extends DataInput { private int currentBlockIndex; private int currentBlockUpto; private byte[] currentBlock; PagedBytesDataInput() { currentBlock = blocks[0]; } @Override public PagedBytesDataInput clone() { PagedBytesDataInput clone = getDataInput(); clone.setPosition(getPosition()); return clone; } /** Returns the current byte position. */ public long getPosition() { return (long) currentBlockIndex * blockSize + currentBlockUpto; } /** Seek to a position previously obtained from {@link #getPosition}. */ public void setPosition(long pos) { currentBlockIndex = (int) (pos >> blockBits); currentBlock = blocks[currentBlockIndex]; currentBlockUpto = (int) (pos & blockMask); } @Override public byte readByte() { if (currentBlockUpto == blockSize) { nextBlock(); } return currentBlock[currentBlockUpto++]; } @Override public void readBytes(byte[] b, int offset, int len) { assert b.length >= offset + len; final int offsetEnd = offset + len; while (true) { final int blockLeft = blockSize - currentBlockUpto; final int left = offsetEnd - offset; if (blockLeft < left) { System.arraycopy(currentBlock, currentBlockUpto, b, offset, blockLeft); nextBlock(); offset += blockLeft; } else { // Last block System.arraycopy(currentBlock, currentBlockUpto, b, offset, left); currentBlockUpto += left; break; } } } @Override public void skipBytes(long numBytes) { if (numBytes < 0) { throw new IllegalArgumentException("numBytes must be >= 0, got " + numBytes); } final long skipTo = getPosition() + numBytes; setPosition(skipTo); } private void nextBlock() { currentBlockIndex++; currentBlockUpto = 0; currentBlock = blocks[currentBlockIndex]; } } /** * Output that transparently spills to new pages as necessary * * @lucene.internal */ public final class PagedBytesDataOutput extends DataOutput { @Override public void writeByte(byte b) { if (upto == blockSize) { if (currentBlock != null) { addBlock(currentBlock); } currentBlock = new byte[blockSize]; upto = 0; } currentBlock[upto++] = b; } @Override public void writeBytes(byte[] b, int offset, int length) { assert b.length >= offset + length : "b.length=" + b.length + " offset=" + offset + " length=" + length; if (length == 0) { return; } if (upto == blockSize) { if (currentBlock != null) { addBlock(currentBlock); } currentBlock = new byte[blockSize]; upto = 0; } final int offsetEnd = offset + length; while (true) { final int left = offsetEnd - offset; final int blockLeft = blockSize - upto; if (blockLeft < left) { System.arraycopy(b, offset, currentBlock, upto, blockLeft); addBlock(currentBlock); currentBlock = new byte[blockSize]; upto = 0; offset += blockLeft; } else { // Last block System.arraycopy(b, offset, currentBlock, upto, left); upto += left; break; } } } /** Return the current byte position. */ public long getPosition() { return getPointer(); } } /** Returns a DataInput to read values from this PagedBytes instance. */ public PagedBytesDataInput getDataInput() { if (!frozen) { throw new IllegalStateException("must call freeze() before getDataInput"); } return new PagedBytesDataInput(); } /** * Returns a DataOutput that you may use to write into this PagedBytes instance. If you do this, * you should not call the other writing methods (eg, copy); results are undefined. */ public PagedBytesDataOutput getDataOutput() { if (frozen) { throw new IllegalStateException("cannot get DataOutput after freeze()"); } return new PagedBytesDataOutput(); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy