org.apache.lucene.util.PagedBytes Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.IndexInput;
/** Represents a logical byte[] as a series of pages. You
* can write-once into the logical byte[] (append only),
* using copy, and then retrieve slices (BytesRef) into it
* using fill.
*
* @lucene.internal
**/
// TODO: refactor this, byteblockpool, fst.bytestore, and any
// other "shift/mask big arrays". there are too many of these classes!
public final class PagedBytes implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(PagedBytes.class);
private byte[][] blocks = new byte[16][];
private int numBlocks;
// TODO: these are unused?
private final int blockSize;
private final int blockBits;
private final int blockMask;
private boolean didSkipBytes;
private boolean frozen;
private int upto;
private byte[] currentBlock;
private final long bytesUsedPerBlock;
private static final byte[] EMPTY_BYTES = new byte[0];
/** Provides methods to read BytesRefs from a frozen
* PagedBytes.
*
* @see #freeze */
public final static class Reader implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Reader.class);
private final byte[][] blocks;
private final int blockBits;
private final int blockMask;
private final int blockSize;
private final long bytesUsedPerBlock;
private Reader(PagedBytes pagedBytes) {
blocks = Arrays.copyOf(pagedBytes.blocks, pagedBytes.numBlocks);
blockBits = pagedBytes.blockBits;
blockMask = pagedBytes.blockMask;
blockSize = pagedBytes.blockSize;
bytesUsedPerBlock = pagedBytes.bytesUsedPerBlock;
}
/**
* Gets a slice out of {@link PagedBytes} starting at start with a
* given length. Iff the slice spans across a block border this method will
* allocate sufficient resources and copy the paged data.
*
* Slices spanning more than two blocks are not supported.
*
* @lucene.internal
**/
public void fillSlice(BytesRef b, long start, int length) {
assert length >= 0: "length=" + length;
assert length <= blockSize+1: "length=" + length;
b.length = length;
if (length == 0) {
return;
}
final int index = (int) (start >> blockBits);
final int offset = (int) (start & blockMask);
if (blockSize - offset >= length) {
// Within block
b.bytes = blocks[index];
b.offset = offset;
} else {
// Split
b.bytes = new byte[length];
b.offset = 0;
System.arraycopy(blocks[index], offset, b.bytes, 0, blockSize-offset);
System.arraycopy(blocks[1+index], 0, b.bytes, blockSize-offset, length-(blockSize-offset));
}
}
/**
* Reads length as 1 or 2 byte vInt prefix, starting at start.
*
* Note: this method does not support slices spanning across block
* borders.
*
*
* @lucene.internal
**/
// TODO: this really needs to be refactored into fieldcacheimpl
public void fill(BytesRef b, long start) {
final int index = (int) (start >> blockBits);
final int offset = (int) (start & blockMask);
final byte[] block = b.bytes = blocks[index];
if ((block[offset] & 128) == 0) {
b.length = block[offset];
b.offset = offset+1;
} else {
b.length = ((block[offset] & 0x7f) << 8) | (block[1+offset] & 0xff);
b.offset = offset+2;
assert b.length > 0;
}
}
@Override
public long ramBytesUsed() {
long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);
if (blocks.length > 0) {
size += (blocks.length - 1) * bytesUsedPerBlock;
size += RamUsageEstimator.sizeOf(blocks[blocks.length - 1]);
}
return size;
}
@Override
public String toString() {
return "PagedBytes(blocksize=" + blockSize + ")";
}
}
/** 1<<blockBits must be bigger than biggest single
* BytesRef slice that will be pulled */
public PagedBytes(int blockBits) {
assert blockBits > 0 && blockBits <= 31 : blockBits;
this.blockSize = 1 << blockBits;
this.blockBits = blockBits;
blockMask = blockSize-1;
upto = blockSize;
bytesUsedPerBlock = RamUsageEstimator.alignObjectSize(blockSize + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER);
numBlocks = 0;
}
private void addBlock(byte[] block) {
if (blocks.length == numBlocks) {
blocks = Arrays.copyOf(blocks, ArrayUtil.oversize(numBlocks, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
}
blocks[numBlocks++] = block;
}
/** Read this many bytes from in */
public void copy(IndexInput in, long byteCount) throws IOException {
while (byteCount > 0) {
int left = blockSize - upto;
if (left == 0) {
if (currentBlock != null) {
addBlock(currentBlock);
}
currentBlock = new byte[blockSize];
upto = 0;
left = blockSize;
}
if (left < byteCount) {
in.readBytes(currentBlock, upto, left, false);
upto = blockSize;
byteCount -= left;
} else {
in.readBytes(currentBlock, upto, (int) byteCount, false);
upto += byteCount;
break;
}
}
}
/** Copy BytesRef in, setting BytesRef out to the result.
* Do not use this if you will use freeze(true).
* This only supports bytes.length <= blockSize */
public void copy(BytesRef bytes, BytesRef out) {
int left = blockSize - upto;
if (bytes.length > left || currentBlock==null) {
if (currentBlock != null) {
addBlock(currentBlock);
didSkipBytes = true;
}
currentBlock = new byte[blockSize];
upto = 0;
left = blockSize;
assert bytes.length <= blockSize;
// TODO: we could also support variable block sizes
}
out.bytes = currentBlock;
out.offset = upto;
out.length = bytes.length;
System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
upto += bytes.length;
}
/** Commits final byte[], trimming it if necessary and if trim=true */
public Reader freeze(boolean trim) {
if (frozen) {
throw new IllegalStateException("already frozen");
}
if (didSkipBytes) {
throw new IllegalStateException("cannot freeze when copy(BytesRef, BytesRef) was used");
}
if (trim && upto < blockSize) {
final byte[] newBlock = new byte[upto];
System.arraycopy(currentBlock, 0, newBlock, 0, upto);
currentBlock = newBlock;
}
if (currentBlock == null) {
currentBlock = EMPTY_BYTES;
}
addBlock(currentBlock);
frozen = true;
currentBlock = null;
return new PagedBytes.Reader(this);
}
public long getPointer() {
if (currentBlock == null) {
return 0;
} else {
return (numBlocks * ((long) blockSize)) + upto;
}
}
@Override
public long ramBytesUsed() {
long size = BASE_RAM_BYTES_USED + RamUsageEstimator.shallowSizeOf(blocks);;
if (numBlocks > 0) {
size += (numBlocks - 1) * bytesUsedPerBlock;
size += RamUsageEstimator.sizeOf(blocks[numBlocks - 1]);
}
if (currentBlock != null) {
size += RamUsageEstimator.sizeOf(currentBlock);
}
return size;
}
/** Copy bytes in, writing the length as a 1 or 2 byte
* vInt prefix. */
// TODO: this really needs to be refactored into fieldcacheimpl!
public long copyUsingLengthPrefix(BytesRef bytes) {
if (bytes.length >= 32768) {
throw new IllegalArgumentException("max length is 32767 (got " + bytes.length + ")");
}
if (upto + bytes.length + 2 > blockSize) {
if (bytes.length + 2 > blockSize) {
throw new IllegalArgumentException("block size " + blockSize + " is too small to store length " + bytes.length + " bytes");
}
if (currentBlock != null) {
addBlock(currentBlock);
}
currentBlock = new byte[blockSize];
upto = 0;
}
final long pointer = getPointer();
if (bytes.length < 128) {
currentBlock[upto++] = (byte) bytes.length;
} else {
currentBlock[upto++] = (byte) (0x80 | (bytes.length >> 8));
currentBlock[upto++] = (byte) (bytes.length & 0xff);
}
System.arraycopy(bytes.bytes, bytes.offset, currentBlock, upto, bytes.length);
upto += bytes.length;
return pointer;
}
public final class PagedBytesDataInput extends DataInput {
private int currentBlockIndex;
private int currentBlockUpto;
private byte[] currentBlock;
PagedBytesDataInput() {
currentBlock = blocks[0];
}
@Override
public PagedBytesDataInput clone() {
PagedBytesDataInput clone = getDataInput();
clone.setPosition(getPosition());
return clone;
}
/** Returns the current byte position. */
public long getPosition() {
return (long) currentBlockIndex * blockSize + currentBlockUpto;
}
/** Seek to a position previously obtained from
* {@link #getPosition}. */
public void setPosition(long pos) {
currentBlockIndex = (int) (pos >> blockBits);
currentBlock = blocks[currentBlockIndex];
currentBlockUpto = (int) (pos & blockMask);
}
@Override
public byte readByte() {
if (currentBlockUpto == blockSize) {
nextBlock();
}
return currentBlock[currentBlockUpto++];
}
@Override
public void readBytes(byte[] b, int offset, int len) {
assert b.length >= offset + len;
final int offsetEnd = offset + len;
while (true) {
final int blockLeft = blockSize - currentBlockUpto;
final int left = offsetEnd - offset;
if (blockLeft < left) {
System.arraycopy(currentBlock, currentBlockUpto,
b, offset,
blockLeft);
nextBlock();
offset += blockLeft;
} else {
// Last block
System.arraycopy(currentBlock, currentBlockUpto,
b, offset,
left);
currentBlockUpto += left;
break;
}
}
}
private void nextBlock() {
currentBlockIndex++;
currentBlockUpto = 0;
currentBlock = blocks[currentBlockIndex];
}
}
public final class PagedBytesDataOutput extends DataOutput {
@Override
public void writeByte(byte b) {
if (upto == blockSize) {
if (currentBlock != null) {
addBlock(currentBlock);
}
currentBlock = new byte[blockSize];
upto = 0;
}
currentBlock[upto++] = b;
}
@Override
public void writeBytes(byte[] b, int offset, int length) {
assert b.length >= offset + length;
if (length == 0) {
return;
}
if (upto == blockSize) {
if (currentBlock != null) {
addBlock(currentBlock);
}
currentBlock = new byte[blockSize];
upto = 0;
}
final int offsetEnd = offset + length;
while(true) {
final int left = offsetEnd - offset;
final int blockLeft = blockSize - upto;
if (blockLeft < left) {
System.arraycopy(b, offset, currentBlock, upto, blockLeft);
addBlock(currentBlock);
currentBlock = new byte[blockSize];
upto = 0;
offset += blockLeft;
} else {
// Last block
System.arraycopy(b, offset, currentBlock, upto, left);
upto += left;
break;
}
}
}
/** Return the current byte position. */
public long getPosition() {
return getPointer();
}
}
/** Returns a DataInput to read values from this
* PagedBytes instance. */
public PagedBytesDataInput getDataInput() {
if (!frozen) {
throw new IllegalStateException("must call freeze() before getDataInput");
}
return new PagedBytesDataInput();
}
/** Returns a DataOutput that you may use to write into
* this PagedBytes instance. If you do this, you should
* not call the other writing methods (eg, copy);
* results are undefined. */
public PagedBytesDataOutput getDataOutput() {
if (frozen) {
throw new IllegalStateException("cannot get DataOutput after freeze()");
}
return new PagedBytesDataOutput();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy