All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.util.PagedBytes Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.util;


import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;

/** Represents a logical byte[] as a series of pages.  You
 *  can write-once into the logical byte[] (append only),
 *  using copy, and then retrieve slices (BytesRef) into it
 *  using fill.
 *
 * @lucene.internal
 **/
// TODO: refactor this, byteblockpool, fst.bytestore, and any
// other "shift/mask big arrays". there are too many of these classes!
public final class PagedBytes implements Accountable {
  private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(PagedBytes.class);
  private byte[][] blocks = new byte[16][];
  private int numBlocks;
  // TODO: these are unused?
  private final int blockSize;
  private final int blockBits;
  private final int blockMask;
  private boolean didSkipBytes;
  private boolean frozen;
  private int upto;
  private byte[] currentBlock;
  private final long bytesUsedPerBlock;

  private static final byte[] EMPTY_BYTES = new byte[0];

  /** Provides methods to read BytesRefs from a frozen
   *  PagedBytes.
   *
   * @see #freeze */
  public final static class Reader implements Accountable {
    private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Reader.class);
    private final byte[][] blocks;
    private final int blockBits;
    private final int blockMask;
    private final int blockSize;
    private final long bytesUsedPerBlock;

    private Reader(PagedBytes pagedBytes) {
      blocks = Arrays.copyOf(pagedBytes.blocks, pagedBytes.numBlocks);
      blockBits = pagedBytes.blockBits;
      blockMask = pagedBytes.blockMask;
      blockSize = pagedBytes.blockSize;
      bytesUsedPerBlock = pagedBytes.bytesUsedPerBlock;
    }

    /**
     * Gets a slice out of {@link PagedBytes} starting at start with a
     * given length. Iff the slice spans across a block border this method will
     * allocate sufficient resources and copy the paged data.
     * 

* Slices spanning more than two blocks are not supported. *

* @lucene.internal **/ public void fillSlice(BytesRef b, long start, int length) { assert length >= 0: "length=" + length; assert length <= blockSize+1: "length=" + length; b.length = length; if (length == 0) { return; } final int index = (int) (start >> blockBits); final int offset = (int) (start & blockMask); if (blockSize - offset >= length) { // Within block b.bytes = blocks[index]; b.offset = offset; } else { // Split b.bytes = new byte[length]; b.offset = 0; System.arraycopy(blocks[index], offset, b.bytes, 0, blockSize-offset); System.arraycopy(blocks[1+index], 0, b.bytes, blockSize-offset, length-(blockSize-offset)); } } /** * Reads length as 1 or 2 byte vInt prefix, starting at start. *

* Note: this method does not support slices spanning across block * borders. *

* * @lucene.internal **/ // TODO: this really needs to be refactored into fieldcacheimpl public void fill(BytesRef b, long start) { final int index = (int) (start >> blockBits); final int offset = (int) (start & blockMask); final byte[] block = b.bytes = blocks[index]; if ((block[offset] & 128) == 0) { b.length = block[offset]; b.offset = offset+1; } else { b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff); b.offset = offset+2; assert b.length > 0; } } @Override public long ramBytesUsed() { long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks); if (blocks.length > 0) { size += (blocks.length - 1) * bytesUsedPerBlock; size += RamUsageEstimator.sizeOf(blocks[blocks.length - 1]); } return size; } @Override public String toString() { return "PagedBytes(blocksize=" + blockSize + ")"; } } /** 1<<blockBits must be bigger than biggest single * BytesRef slice that will be pulled */ public PagedBytes(int blockBits) { assert blockBits > 0 && blockBits <= 31 : blockBits; this.blockSize = 1 << blockBits; this.blockBits = blockBits; blockMask = blockSize-1; upto = blockSize; bytesUsedPerBlock = RamUsageEstimator.alignObjectSize(blockSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER); numBlocks = 0; } private void addBlock(byte[] block) { if (blocks.length == numBlocks) { blocks = Arrays.copyOf(blocks, ArrayUtil.oversize(numBlocks, RamUsageEstimator.NUM_BYTES_OBJECT_REF)); } blocks[numBlocks++] = block; } /** Read this many bytes from in */ public void copy(IndexInput in, long byteCount) throws IOException { while (byteCount > 0) { int left = blockSize - upto; if (left == 0) { if (currentBlock != null) { addBlock(currentBlock); } currentBlock = new byte[blockSize]; upto = 0; left = blockSize; } if (left < byteCount) { in.readBytes(currentBlock, upto, left, false); upto = blockSize; byteCount -= left; } else { in.readBytes(currentBlock, upto, (int) byteCount, false); upto += byteCount; break; } } } /** Copy BytesRef in, setting BytesRef out to the result. * Do not use this if you will use freeze(true). * This only supports bytes.length <= blockSize */ public void copy(BytesRef bytes, BytesRef out) { int left = blockSize - upto; if (bytes.length > left || currentBlock==null) { if (currentBlock != null) { addBlock(currentBlock); didSkipBytes = true; } currentBlock = new byte[blockSize]; upto = 0; left = blockSize; assert bytes.length <= blockSize; // TODO: we could also support variable block sizes } out.bytes = currentBlock; out.offset = upto; out.length = bytes.length; System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length); upto += bytes.length; } /** Commits final byte[], trimming it if necessary and if trim=true */ public Reader freeze(boolean trim) { if (frozen) { throw new IllegalStateException("already frozen"); } if (didSkipBytes) { throw new IllegalStateException("cannot freeze when copy(BytesRef, BytesRef) was used"); } if (trim && upto < blockSize) { final byte[] newBlock = new byte[upto]; System.arraycopy(currentBlock, 0, newBlock, 0, upto); currentBlock = newBlock; } if (currentBlock == null) { currentBlock = EMPTY_BYTES; } addBlock(currentBlock); frozen = true; currentBlock = null; return new PagedBytes.Reader(this); } public long getPointer() { if (currentBlock == null) { return 0; } else { return (numBlocks * ((long) blockSize)) + upto; } } @Override public long ramBytesUsed() { long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);; if (numBlocks > 0) { size += (numBlocks - 1) * bytesUsedPerBlock; size += RamUsageEstimator.sizeOf(blocks[numBlocks - 1]); } if (currentBlock != null) { size += RamUsageEstimator.sizeOf(currentBlock); } return size; } /** Copy bytes in, writing the length as a 1 or 2 byte * vInt prefix. */ // TODO: this really needs to be refactored into fieldcacheimpl! public long copyUsingLengthPrefix(BytesRef bytes) { if (bytes.length >= 32768) { throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")"); } if (upto + bytes.length + 2 > blockSize) { if (bytes.length + 2 > blockSize) { throw new IllegalArgumentException("block size " + blockSize + " is too small to store length " + bytes.length + " bytes"); } if (currentBlock != null) { addBlock(currentBlock); } currentBlock = new byte[blockSize]; upto = 0; } final long pointer = getPointer(); if (bytes.length < 128) { currentBlock[upto++] = (byte) bytes.length; } else { currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8)); currentBlock[upto++] = (byte) (bytes.length & 0xff); } System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length); upto += bytes.length; return pointer; } public final class PagedBytesDataInput extends DataInput { private int currentBlockIndex; private int currentBlockUpto; private byte[] currentBlock; PagedBytesDataInput() { currentBlock = blocks[0]; } @Override public PagedBytesDataInput clone() { PagedBytesDataInput clone = getDataInput(); clone.setPosition(getPosition()); return clone; } /** Returns the current byte position. */ public long getPosition() { return (long) currentBlockIndex * blockSize + currentBlockUpto; } /** Seek to a position previously obtained from * {@link #getPosition}. */ public void setPosition(long pos) { currentBlockIndex = (int) (pos >> blockBits); currentBlock = blocks[currentBlockIndex]; currentBlockUpto = (int) (pos & blockMask); } @Override public byte readByte() { if (currentBlockUpto == blockSize) { nextBlock(); } return currentBlock[currentBlockUpto++]; } @Override public void readBytes(byte[] b, int offset, int len) { assert b.length >= offset + len; final int offsetEnd = offset + len; while (true) { final int blockLeft = blockSize - currentBlockUpto; final int left = offsetEnd - offset; if (blockLeft < left) { System.arraycopy(currentBlock, currentBlockUpto, b, offset, blockLeft); nextBlock(); offset += blockLeft; } else { // Last block System.arraycopy(currentBlock, currentBlockUpto, b, offset, left); currentBlockUpto += left; break; } } } private void nextBlock() { currentBlockIndex++; currentBlockUpto = 0; currentBlock = blocks[currentBlockIndex]; } } public final class PagedBytesDataOutput extends DataOutput { @Override public void writeByte(byte b) { if (upto == blockSize) { if (currentBlock != null) { addBlock(currentBlock); } currentBlock = new byte[blockSize]; upto = 0; } currentBlock[upto++] = b; } @Override public void writeBytes(byte[] b, int offset, int length) { assert b.length >= offset + length; if (length == 0) { return; } if (upto == blockSize) { if (currentBlock != null) { addBlock(currentBlock); } currentBlock = new byte[blockSize]; upto = 0; } final int offsetEnd = offset + length; while(true) { final int left = offsetEnd - offset; final int blockLeft = blockSize - upto; if (blockLeft < left) { System.arraycopy(b, offset, currentBlock, upto, blockLeft); addBlock(currentBlock); currentBlock = new byte[blockSize]; upto = 0; offset += blockLeft; } else { // Last block System.arraycopy(b, offset, currentBlock, upto, left); upto += left; break; } } } /** Return the current byte position. */ public long getPosition() { return getPointer(); } } /** Returns a DataInput to read values from this * PagedBytes instance. */ public PagedBytesDataInput getDataInput() { if (!frozen) { throw new IllegalStateException("must call freeze() before getDataInput"); } return new PagedBytesDataInput(); } /** Returns a DataOutput that you may use to write into * this PagedBytes instance. If you do this, you should * not call the other writing methods (eg, copy); * results are undefined. */ public PagedBytesDataOutput getDataOutput() { if (frozen) { throw new IllegalStateException("cannot get DataOutput after freeze()"); } return new PagedBytesDataOutput(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy